机器学习算法,机器让我学习

Posted bai2018

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了机器学习算法,机器让我学习相关的知识,希望对你有一定的参考价值。

这个小段的内容主要是朴素贝叶斯、支持向量机、决策树和集成学习的代码,看不懂..........后面的更是看不懂..................

朴素贝叶斯

    技术图片

    技术图片

    scikit-learn提供了伯努利,多项式,高斯三个变体。伯努利是一个二项分布,多项式是离散分布,高斯是连续分布。用在不同的场景里:

    伯努利朴素贝叶斯:验证测试点的分布:

技术图片
 1 from __future__ import print_function
 2 import numpy as np
 3 import matplotlib.pyplot as plt
 4 from sklearn.datasets import make_classification
 5 from sklearn.model_selection import train_test_split, cross_val_score
 6 from sklearn.naive_bayes import BernoulliNB
 7 # For reproducibility
 8 np.random.seed(1000)
 9 nb_samples = 300
10 def show_dataset(X, Y):
11     fig, ax = plt.subplots(1, 1, figsize=(10, 8))
12     ax.grid()
13     ax.set_xlabel(X)
14     ax.set_ylabel(Y)
15     for i in range(nb_samples):
16         if Y[i] == 0:
17             ax.scatter(X[i, 0], X[i, 1], marker=o, color=r)
18         else:
19             ax.scatter(X[i, 0], X[i, 1], marker=^, color=b)
20     plt.show()
21 
22 if __name__ == __main__:
23     # Create dataset
24     X, Y = make_classification(n_samples=nb_samples, n_features=2, n_informative=2, n_redundant=0)
25     # Show dataset
26     show_dataset(X, Y)
27     # Split dataset
28     X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25)
29     # Create and train Bernoulli Naive Bayes classifier
30     bnb = BernoulliNB(binarize=0.0)
31     bnb.fit(X_train, Y_train)
32     print(Bernoulli Naive Bayes score: %.3f % bnb.score(X_test, Y_test))
33     # Compute CV score
34     bnb_scores = cross_val_score(bnb, X, Y, scoring=accuracy, cv=10)
35     print(Bernoulli Naive Bayes CV average score: %.3f % bnb_scores.mean())
36     # Predict some values
37     data = np.array([[0, 0], [1, 0], [0, 1], [1, 1]])
38     Yp = bnb.predict(data)
39     print(Yp)
View Code

      技术图片

    多项式朴素贝叶斯:根据数据预测城市还是农村:

技术图片
 1 from __future__ import print_function
 2 import numpy as np
 3 from sklearn.feature_extraction import DictVectorizer
 4 from sklearn.naive_bayes import MultinomialNB
 5 # For reproducibility
 6 np.random.seed(1000)
 7 if __name__ == __main__:
 8     # Prepare a dummy dataset
 9     data = [
10         {house: 100, street: 50, shop: 25, car: 100, tree: 20},
11         {house: 5, street: 5, shop: 0, car: 10, tree: 500, river: 1}
12     ]
13     # Create and train a dictionary vectorizer
14     dv = DictVectorizer(sparse=False)
15     X = dv.fit_transform(data)
16     Y = np.array([1, 0])
17     print(X)
18     # Create and train a Multinomial Naive Bayes classifier
19     mnb = MultinomialNB()
20     mnb.fit(X, Y)
21 
22     # Create dummy test data
23     test_data = data = [
24         {house: 80, street: 20, shop: 15, car: 50, tree: 20, river: 1},
25         {house: 10, street: 5, shop: 1, car: 8, tree: 300, river: 0}
26     ]
27     #测试城市还是农村
28     Yp = mnb.predict(dv.fit_transform(test_data))
29     print(Yp)
View Code

       技术图片

    高斯朴素贝叶斯:验证点并通过ROC曲线比较结果

技术图片
 1 from __future__ import print_function
 2 import numpy as np
 3 import matplotlib.pyplot as plt
 4 from sklearn.datasets import make_classification
 5 from sklearn.naive_bayes import GaussianNB
 6 from sklearn.model_selection import train_test_split
 7 from sklearn.linear_model import LogisticRegression
 8 from sklearn.metrics import roc_curve, auc
 9 # For reproducibility
10 np.random.seed(1000)
11 nb_samples = 300
12 def show_dataset(X, Y):
13     fig, ax = plt.subplots(1, 1, figsize=(10, 8))
14 
15     ax.grid()
16     ax.set_xlabel(X)
17     ax.set_ylabel(Y)
18 
19     for i in range(nb_samples):
20         if Y[i] == 0:
21             ax.scatter(X[i, 0], X[i, 1], marker=o, color=r)
22         else:
23             ax.scatter(X[i, 0], X[i, 1], marker=^, color=b)
24 
25     plt.show()
26 
27 
28 if __name__ == __main__:
29     # Create dataset
30     X, Y = make_classification(n_samples=nb_samples, n_features=2, n_informative=2, n_redundant=0)
31 
32     # Show dataset
33     show_dataset(X, Y)
34 
35     # Split dataset
36     X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25)
37 
38     # Create and train Gaussian Naive Bayes classifier
39     gnb = GaussianNB()
40     gnb.fit(X_train, Y_train)
41 
42     # Create and train a Logistic regressor (for comparison)
43     lr = LogisticRegression()
44     lr.fit(X_train, Y_train)
45 
46     # Compute ROC Curve
47     Y_gnb_score = gnb.predict_proba(X_test)
48     Y_lr_score = lr.decision_function(X_test)
49 
50     fpr_gnb, tpr_gnb, thresholds_gnb = roc_curve(Y_test, Y_gnb_score[:, 1])
51     fpr_lr, tpr_lr, thresholds_lr = roc_curve(Y_test, Y_lr_score)
52 
53     # Plot ROC Curve
54     plt.figure(figsize=(10, 8))
55 
56     plt.plot(fpr_gnb, tpr_gnb, color=red, label=Naive Bayes (AUC: %.2f) % auc(fpr_gnb, tpr_gnb))
57     plt.plot(fpr_lr, tpr_lr, color=green, label=Logistic Regression (AUC: %.2f) % auc(fpr_lr, tpr_lr))
58     plt.plot([0, 1], [0, 1], color=blue, linestyle=--)
59     plt.xlim([0.0, 1.0])
60     plt.ylim([0.0, 1.01])
61     plt.title(ROC Curve)
62     plt.xlabel(False Positive Rate)
63     plt.ylabel(True Positive Rate)
64     plt.legend(loc="lower right")
65 
66     plt.show()
View Code

技术图片

    对比高斯朴素贝叶斯和多项式朴素贝叶斯在MNIST上的性能

1 from sklearn.datasets import load_digits
2 from sklearn.model_selection import cross_val_score
3 digits = load_digits()
4 gnb = GaussianNB()
5 mnb = MultinomialNB()
6 print(cross_val_score(gnb, digits.data, digits.target, scoring = accuracy, cv = 10).mean())
7 print(cross_val_score(mnb, digits.data, digits.target, scoring = accuracy, cv = 10).mean())

      技术图片

  支持向量机

技术图片技术图片技术图片

    线性分类

技术图片
 1 from __future__ import print_function
 2 import numpy as np
 3 import matplotlib.pyplot as plt
 4 from sklearn.datasets import make_classification
 5 from sklearn.svm import SVC
 6 from sklearn.model_selection import cross_val_score
 7 # For reproducibility
 8 np.random.seed(1000)
 9 nb_samples = 500
10 def show_dataset(X, Y):
11     fig, ax = plt.subplots(1, 1, figsize=(10, 8))
12     ax.grid()
13     ax.set_xlabel(X)
14     ax.set_ylabel(Y)
15     for i in range(nb_samples):
16         if Y[i] == 0:
17             ax.scatter(X[i, 0], X[i, 1], marker=o, color=r)
18         else:
19             ax.scatter(X[i, 0], X[i, 1], marker=^, color=b)
20     plt.show()
21 
22 if __name__ == __main__:
23     # Create dataset
24     X, Y = make_classification(n_samples=nb_samples, n_features=2, n_informative=2, n_redundant=0,
25                                n_clusters_per_class=1)
26     # Show dataset
27     show_dataset(X, Y)
28     # Create a SVM with linear kernel
29     svc = SVC(kernel=linear)
30     # Compute CV score
31     svc_scores = cross_val_score(svc, X, Y, scoring=accuracy, cv=10)
32     print(Linear SVM CV average score: %.3f % svc_scores.mean())
View Code

          技术图片

    基于内核的分类:径向基核(Radial Basis Function),多项式核(Ploymomial kernel),sigmoid核,自定义核

    非线性例子:找到最好的内核并得到测试结果

技术图片
 1 from __future__ import print_function
 2 import numpy as np
 3 import matplotlib.pyplot as plt
 4 import multiprocessing
 5 from sklearn.datasets import make_circles
 6 from sklearn.model_selection import GridSearchCV
 7 from sklearn.svm import SVC
 8 # For reproducibility
 9 np.random.seed(1000)
10 nb_samples = 500
11 
12 def show_dataset(X, Y):
13     fig, ax = plt.subplots(1, 1, figsize=(10, 8))
14     ax.grid()
15     ax.set_xlabel(X)
16     ax.set_ylabel(Y)
17     for i in range(nb_samples):
18         if Y[i] == 0:
19             ax.scatter(X[i, 0], X[i, 1], marker=o, color=r)
20         else:
21             ax.scatter(X[i, 0], X[i, 1], marker=^, color=b)
22     plt.show()
23 
24 if __name__ == __main__:
25     # Create datasets
26     X, Y = make_circles(n_samples=nb_samples, noise=0.1)
27     # Show dataset
28     show_dataset(X, Y)
29     # Define a param grid
30     param_grid = [
31         {
32             kernel: [linear, rbf, poly, sigmoid],
33             C: [0.1, 0.2, 0.4, 0.5, 1.0, 1.5, 1.8, 2.0, 2.5, 3.0]
34         }
35     ]
36     # Create a train grid search on SVM classifier
37     gs = GridSearchCV(estimator=SVC(), param_grid=param_grid,
38                       scoring=accuracy, cv=10, n_jobs=multiprocessing.cpu_count())
39     gs.fit(X, Y)
40     print(gs.best_estimator_)
41     print(Kernel SVM score: %.3f % gs.best_score_)
42     
43     #线性回归的分类效果
44     lr = LogisticRegression()
45     print(cross_val_score(lr,X,Y,scoring=accuracy,cv=10).mean())
View Code

        技术图片

      对MNIST使用支持向量机找到最好的内核:

技术图片
 1 from __future__ import print_function
 2 import numpy as np
 3 import multiprocessing
 4 from sklearn.datasets import load_digits
 5 from sklearn.model_selection import GridSearchCV
 6 from sklearn.svm import SVC
 7 # For reproducibility
 8 np.random.seed(1000)
 9 if __name__ == __main__:
10     # Load dataset
11     digits = load_digits()
12     # Define a param grid
13     param_grid = [
14         {
15             kernel: [linear, rbf, poly, sigmoid],
16             C: [0.1, 0.2, 0.4, 0.5, 1.0, 1.5, 1.8, 2.0, 2.5, 3.0]
17         }
18     ]
19     # Create a train grid search on SVM classifier
20     gs = GridSearchCV(estimator=SVC(), param_grid=param_grid,
21                       scoring=accuracy, cv=10, n_jobs=multiprocessing.cpu_count())
22     print(gs.fit(digits.data, digits.target))
23     print(gs.best_estimator_)#最好的内核
24     print(Kernel SVM score: %.3f % gs.best_score_)
View Code

      技术图片

    受控支持向量机:并通过网格寻找最佳选择

技术图片
 1 from __future__ import print_function
 2 import numpy as np
 3 import matplotlib.pyplot as plt
 4 from sklearn.datasets import make_classification
 5 from sklearn.svm import SVC, NuSVC
 6 from sklearn.model_selection import GridSearchCV
 7 import multiprocessing
 8 # For reproducibility
 9 np.random.seed(1000)
10 nb_samples = 500
11 def show_dataset(X, Y):
12     fig, ax = plt.subplots(1, 1, figsize=(10, 8))
13     ax.grid()
14     ax.set_xlabel(X)
15     ax.set_ylabel(Y)
16     for i in range(nb_samples):
17         if Y[i] == 0:
18             ax.scatter(X[i, 0], X[i, 1], marker=o, color=r)
19         else:
20             ax.scatter(X[i, 0], X[i, 1], marker=^, color=b)
21     plt.show()
22 
23 if __name__ == __main__:
24     # Create dataset
25     X, Y = make_classification(n_samples=nb_samples, n_features=2, n_informative=2, n_redundant=0,
26                                n_clusters_per_class=1)
27     # Show dataset
28     show_dataset(X, Y)
29     # Create and train a linear SVM
30     svc = SVC(kernel=linear)
31     svc.fit(X, Y)
32     print(svc.support_vectors_.shape)
33     # Create and train a Nu-SVM classifier
34     nusvc = NuSVC(kernel=linear, nu=0.5)
35     nusvc.fit(X, Y)
36     print(nusvc.support_vectors_.shape)
37     nusvc = NuSVC(kernel=linear, nu=0.05)
38     nusvc.fit(X, Y)
39     print(nusvc.support_vectors_.shape)
40     
41     param_grid = [
42         {
43             nu : np.arange(0.081,1.0,0.5)
44         }
45     ]
46     gs = GridSearchCV(estimator=NuSVC(kernel=linear),param_grid = param_grid , scoring=accuracy,cv=10,n_jobs=multiprocessing.cpu_count())
47     gs.fit(X,Y)
48     print(gs.best_estimator_)
49     print(gs.best_score_)
50     print(gs.best_estimator_.support_vectors_.shape)
View Code

技术图片

  支持向量回归

技术图片
 1 from __future__ import print_function
 2 import numpy as np
 3 import matplotlib.pyplot as plt
 4 from sklearn.svm import SVR
 5 from sklearn.model_selection import cross_val_score
 6 # For reproducibility
 7 np.random.seed(1000)
 8 nb_samples = 50
 9 def show_dataset(X, Y):
10     fig, ax = plt.subplots(1, 1, figsize=(10, 8))
11     ax.grid()
12     ax.set_xlabel(X)
13     ax.set_ylabel(Y)
14     ax.scatter(X, Y)
15     plt.show()
16 
17 if __name__ == __main__:
18     # Create dataset
19     X = np.arange(-nb_samples, nb_samples, 1)
20     Y = np.zeros(shape=(2 * nb_samples,))
21     for x in X:
22         Y[int(x) + nb_samples] = np.power(x * 6, 2.0) / 1e4 + np.random.uniform(-2, 2)
23     # Show dataset
24     #show_dataset(X, Y)
25     # Create and train a Support Vector regressor
26     svr = SVR(kernel=poly, degree=2, C=1.5, epsilon=0.5)
27     svr_scores = cross_val_score(svr, X.reshape((nb_samples*2, 1)), Y, scoring=neg_mean_squared_error, cv=10)
28     print(SVR CV average negative squared error: %.3f % svr_scores.mean())
View Code

       技术图片

 

决策树和集成学习

  二元决策

  不纯度的衡量基尼不纯度指数、交叉熵不纯度指数、误分类不纯度指数

  特征重要度:

技术图片
 1 from __future__ import print_function
 2 
 3 import numpy as np
 4 
 5 from sklearn.datasets import make_classification
 6 from sklearn.tree import DecisionTreeClassifier, export_graphviz
 7 from sklearn.model_selection import cross_val_score
 8 
 9 
10 # For reproducibility
11 np.random.seed(1000)
12 
13 nb_samples = 500
14 
15 # Set a folder to store the graph in
16 graph_folder = ‘‘
17 
18 
19 if __name__ == __main__:
20     # Create dataset
21     X, Y = make_classification(n_samples=nb_samples, n_features=3, n_informative=3, n_redundant=0, n_classes=3,
22                                n_clusters_per_class=1)
23 
24     # Create a Decision tree classifier
25     dt = DecisionTreeClassifier()
26     dt_scores = cross_val_score(dt, X, Y, scoring=accuracy, cv=10)
27     print(Decision tree score: %.3f % dt_scores.mean())
28 
29     # Save in Graphviz format
30     dt.fit(X, Y)
31 
32     with open(dt.dot, w) as df:
33         df = export_graphviz(dt, out_file=df,
34                              feature_names=[A, B, C],
35                              class_names=[C1, C2, C3])
View Code

      技术图片

    Graphviz下载后,windows安装后,找到目录下的:bin下的:dot,加入环境变量path,然后在cmd中输入:dot -Tpdf dt.dot dt.pdf,将dt.dot转化为dt.pdf,可视化图:

       技术图片

技术图片
 1 from __future__ import print_function
 2 import numpy as np
 3 import multiprocessing
 4 from sklearn.datasets import load_digits
 5 from sklearn.tree import DecisionTreeClassifier
 6 from sklearn.model_selection import GridSearchCV
 7 # For reproducibility
 8 np.random.seed(1000)
 9 
10 if __name__ == __main__:
11     # Load dataset
12     digits = load_digits()
13     # Define a param grid
14     param_grid = [
15         {
16             criterion: [gini, entropy],
17             max_features: [auto, log2, None],
18             min_samples_split: [2, 10, 25, 100, 200],
19             max_depth: [5, 10, 15, None]
20         }
21     ]
22     # Create and train a grid searh
23     gs = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=param_grid,
24                       scoring=accuracy, cv=10, n_jobs=multiprocessing.cpu_count())
25     gs.fit(digits.data, digits.target)
26     print(gs.best_estimator_)
27     print(Decision tree score: %.3f % gs.best_score_)
View Code

        技术图片

   随机森林

技术图片
 1 from __future__ import print_function
 2 
 3 import numpy as np
 4 import matplotlib.pyplot as plt
 5 
 6 from sklearn.datasets import load_digits
 7 from sklearn.ensemble import RandomForestClassifier
 8 from sklearn.model_selection import cross_val_score
 9 
10 
11 # For reproducibility
12 np.random.seed(1000)
13 
14 nb_classifications = 100
15 
16 
17 if __name__ == __main__:
18     # Load dataset
19     digits = load_digits()
20 
21     # Collect accuracies
22     rf_accuracy = []
23 
24     for i in range(1, nb_classifications):
25         a = cross_val_score(RandomForestClassifier(n_estimators=i), digits.data, digits.target, scoring=accuracy,
26                             cv=10).mean()
27         rf_accuracy.append(a)
28 
29     # Show results
30     plt.figure(figsize=(10, 8))
31     plt.xlabel(Number of trees)
32     plt.ylabel(Accuracy)
33     plt.grid(True)
34     plt.plot(rf_accuracy)
35     plt.show()
View Code

      技术图片

技术图片
 1 from __future__ import print_function
 2 
 3 import numpy as np
 4 import matplotlib.pyplot as plt
 5 
 6 from sklearn.datasets import load_digits
 7 from sklearn.ensemble import ExtraTreesClassifier
 8 from sklearn.model_selection import cross_val_score
 9 
10 
11 # For reproducibility
12 np.random.seed(1000)
13 
14 nb_classifications = 100
15 
16 
17 if __name__ == __main__:
18     # Load dataset
19     digits = load_digits()
20 
21     # Collect accuracies
22     et_accuracy = []
23 
24     for i in range(1, nb_classifications):
25         a = cross_val_score(ExtraTreesClassifier(n_estimators=i), digits.data, digits.target, scoring=accuracy,
26                             cv=10).mean()
27         et_accuracy.append(a)
28 
29     # Show results
30     plt.figure(figsize=(10, 8))
31     plt.xlabel(Number of trees)
32     plt.ylabel(Accuracy)
33     plt.grid(True)
34     plt.plot(et_accuracy)
35     plt.show()
View Code

       技术图片

    AdaBoost:

技术图片
 1 from __future__ import print_function
 2 
 3 import numpy as np
 4 import matplotlib.pyplot as plt
 5 
 6 from sklearn.datasets import load_digits
 7 from sklearn.ensemble import AdaBoostClassifier
 8 from sklearn.model_selection import cross_val_score
 9 
10 
11 # For reproducibility
12 np.random.seed(1000)
13 
14 nb_classifications = 100
15 
16 
17 if __name__ == __main__:
18     # Load dataset
19     digits = load_digits()
20 
21     # Collect accuracies
22     ab_accuracy = []
23 
24     for i in range(1, nb_classifications):
25         a = cross_val_score(AdaBoostClassifier(n_estimators=i), digits.data, digits.target, scoring=accuracy,
26                             cv=10).mean()
27         ab_accuracy.append(a)
28 
29     # Show results
30     plt.figure(figsize=(10, 8))
31     plt.xlabel(Number of trees)
32     plt.ylabel(Accuracy)
33     plt.grid(True)
34     plt.plot(ab_accuracy)
35     plt.show()
View Code

      技术图片

技术图片
 1 from __future__ import print_function
 2 
 3 import numpy as np
 4 
 5 from sklearn.datasets import load_iris
 6 from sklearn.ensemble import AdaBoostClassifier
 7 from sklearn.model_selection import cross_val_score
 8 
 9 
10 # For reproducibility
11 np.random.seed(1000)
12 
13 
14 if __name__ == __main__:
15     # Load dataset
16     iris = load_iris()
17 
18     # Create and train an AdaBoost classifier
19     ada = AdaBoostClassifier(n_estimators=100, learning_rate=1.0)
20     ada_scores = cross_val_score(ada, iris.data, iris.target, scoring=accuracy, cv=10)
21     print(AdaBoost score: %.3f % ada_scores.mean())
View Code

技术图片

  梯度树提升

技术图片
 1 from __future__ import print_function
 2 
 3 import numpy as np
 4 import matplotlib.pyplot as plt
 5 
 6 from sklearn.datasets import make_classification
 7 from sklearn.ensemble import GradientBoostingClassifier
 8 from sklearn.model_selection import cross_val_score
 9 
10 # For reproducibility
11 np.random.seed(1000)
12 
13 nb_samples = 500
14 
15 if __name__ == __main__:
16     # Create the dataset
17     X, Y = make_classification(n_samples=nb_samples, n_features=4, n_informative=3, n_redundant=1, n_classes=3)
18 
19     # Collect the scores for n_estimators in (1, 50)
20     a = []
21     max_estimators = 50
22 
23     for i in range(1, max_estimators):
24         score = cross_val_score(GradientBoostingClassifier(n_estimators=i, learning_rate=10.0 / float(i)), X, Y,
25                                      cv=10, scoring=accuracy).mean()
26         a.append(score)
27 
28     # Plot the results
29     plt.figure(figsize=(10, 8))
30     plt.xlabel(Number of estimators)
31     plt.ylabel(Average CV accuracy)
32     plt.grid(True)
33     plt.plot(a)
34     plt.show()
View Code

      技术图片

  投票分类器:

技术图片
 1 from __future__ import print_function
 2 
 3 import numpy as np
 4 import matplotlib.pyplot as plt
 5 
 6 from sklearn.datasets import make_classification
 7 from sklearn.linear_model import LogisticRegression
 8 from sklearn.svm import SVC
 9 from sklearn.tree import DecisionTreeClassifier
10 from sklearn.ensemble import VotingClassifier
11 from sklearn.model_selection import cross_val_score
12 
13 # For reproducibility
14 np.random.seed(1000)
15 
16 nb_samples = 500
17 
18 
19 def compute_accuracies(lr, dt, svc, vc, X, Y):
20     accuracies = []
21 
22     accuracies.append(cross_val_score(lr, X, Y, scoring=accuracy, cv=10).mean())
23     accuracies.append(cross_val_score(dt, X, Y, scoring=accuracy, cv=10).mean())
24     accuracies.append(cross_val_score(svc, X, Y, scoring=accuracy, cv=10).mean())
25     accuracies.append(cross_val_score(vc, X, Y, scoring=accuracy, cv=10).mean())
26 
27     print(Accuracies:)
28     print(np.array(accuracies))
29 
30     return accuracies
31 
32 
33 def plot_accuracies(accuracies):
34     fig, ax = plt.subplots(figsize=(12, 8))
35     positions = np.array([0, 1, 2, 3])
36 
37     ax.bar(positions, accuracies, 0.5)
38     ax.set_ylabel(Accuracy)
39     ax.set_xticklabels((Logistic Regression, Decision Tree, SVM, Ensemble))
40     ax.set_xticks(positions + (5.0 / 20))
41     plt.ylim([0.80, 0.93])
42     plt.show()
43 
44 
45 if __name__ == __main__:
46     # Create the dataset
47     X, Y = make_classification(n_samples=nb_samples, n_features=2, n_redundant=0, n_classes=2)
48 
49     # Show the dataset
50     fig, ax = plt.subplots(figsize=(12, 12))
51 
52     for i, x in enumerate(X):
53         if Y[i] == 0:
54             ax.scatter(x[0], x[1], marker=s, color=blue)
55         else:
56             ax.scatter(x[0], x[1], marker=d, color=red)
57 
58     ax.set_xlabel(r$X_0$)
59     ax.set_ylabel(r$X_1$)
60     plt.show()
61 
62     # Create the classifiers
63     lr = LogisticRegression()
64     svc = SVC(kernel=poly, probability=True)
65     dt = DecisionTreeClassifier()
66 
67     classifiers = [(lr, lr),
68                    (dt, dt),
69                    (svc, svc)]
70 
71     # Hard voting
72     vc = VotingClassifier(estimators=classifiers, voting=hard)
73 
74     # Compute and plot accuracies
75     hard_accuracies = compute_accuracies(lr, dt, svc, vc, X, Y)
76     plot_accuracies(hard_accuracies)
77 
78     # Soft weighted voting
79     weights = [1.5, 0.5, 0.75]
80 
81     vc = VotingClassifier(estimators=classifiers, weights=weights, voting=soft)
82 
83     # Compute and plot accuracies
84     soft_accuracies = compute_accuracies(lr, dt, svc, vc, X, Y)
85     plot_accuracies(soft_accuracies)
View Code

 技术图片技术图片技术图片

看不懂也看不进去,希望以后还有的话,再看看吧.....

以上是关于机器学习算法,机器让我学习的主要内容,如果未能解决你的问题,请参考以下文章

python 机器学习有用的代码片段

机器学习—朴素贝叶斯

k-近邻算法

机器学习中的照片 OCR 算法

使用机器学习算法进行词分类

机器学习推荐算法(附例题代码)