import numpy as np import pandas as pd from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from collections import Counter import math
# 处理X_train defsummarize(self, train_data): summaries = [(self.mean(i), self.stdev(i)) for i in zip(*train_data)] return summaries
# 分类别求出数学期望和标准差 deffit(self, X, y): labels = list(set(y)) data = {label: [] for label in labels} for f, label in zip(X, y): data[label].append(f) self.model = { label: self.summarize(value) for label, value in data.items() } return'gaussianNB train done!'
# 计算概率 defcalculate_probabilities
(self, input_data): # summaries:{0.0: [(5.0, 0.37),(3.42, 0.40)], 1.0: [(5.8, 0.449),(2.7, 0.27)]} # input_data:[1.1, 2.2] probabilities = {} for label, value in self.model.items(): probabilities[label] = 1 for i in range(len(value)): mean, stdev = value[i] probabilities[label] *= self.gaussian_probability( input_data[i], mean, stdev) return probabilities