Sklearn之Svm支持向量机
Posted OpeSource
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Sklearn之Svm支持向量机相关的知识,希望对你有一定的参考价值。
本节主要介绍利用python’s Sklearn进行Svm支持向量机分类,案例数据采用“german_credit”信息。
[cut] Read more
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder,Imputer
from sklearn.svm import SVC,LinearSVC,NuSVC
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn import metrics
from sklearn.pipeline import Pipeline
from sklearn.linear_model import SGDClassifier
df=pd.read_csv('german_credit_tra.csv',header=0)
print(df)
sel_df=df.loc[:,['AccountBalance','Duration',
'PaymentStatusofPreviousCredit',
'Purpose','ValueSavings']]
X =sel_df
#标签'Creditability'
labels=df.Creditability
le = LabelEncoder()
y= le.fit_transform(labels)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2,random_state=123)
#print(X_train) print(y_train)
#缺失值处理
imputer=Imputer()
######001svm radial model 高斯核
#kernel:参数选择有RBF, Linear, Poly, Sigmoid, 默认的是"RBF";
svmRadial = SVC(kernel="rbf",random_state=123)
pipe =Pipeline([('imputer',imputer ),('svmRadial', svmRadial)])
#########
param_grid = {
'svmRadial__gamma': (0.1,0.3),
'svmRadial__C': (0.1,0.2)
}
recallgrid_search=GridSearchCV(pipe,param_grid,n_jobs=1,
verbose=1,scoring='roc_auc')
grid_search.fit(X_train,y_train)
print('最优参数')
best_pars=grid_search.best_estimator_.get_params()
print(best_pars)
pipe.fit(X_train, y_train)
expected=le.inverse_transform(y_test)
svmRadialPre=pipe.predict(X_test)
predicted=le.inverse_transform(svmRadialPre)
auc=metrics.accuracy_score(expected, predicted)
print('auc:%s' % auc)
print(metrics.classification_report(expected, predicted))
#####002svm linear model 线性核
svmLinear=LinearSVC()
pipe2 =Pipeline([('imputer',imputer ),('svmLinear', svmLinear)])
param_grid2 = {
'svmLinear__C': (0.05,0.1),
}
grid_search2=GridSearchCV(pipe2,param_grid2,n_jobs=1,
verbose=1,scoring='roc_auc')
grid_search2.fit(X_train,y_train)
print('最优参数')
best_pars=grid_search2.best_estimator_.get_params()
print(best_pars)
pipe2.fit(X_train, y_train)
expected=le.inverse_transform(y_test)
svmLinearPre=pipe2.predict(X_test)
predicted=le.inverse_transform(svmLinearPre)
auc=metrics.accuracy_score(expected, predicted)
print('auc:%s' % auc)
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))
#####003SGD svm linear model 线性核
svmSGD = SGDClassifier(loss="hinge",
fit_intercept=True,
max_iter=200)
pipe3 =Pipeline([('imputer',imputer ),('svmSGD', svmSGD)])
param_grid3 = {'svmSGD__alpha': (0.3,0.35)
}
grid_search3=GridSearchCV(pipe3,param_grid3,n_jobs=1,
verbose=1,scoring='roc_auc')
grid_search3.fit(X_train,y_train)
print('最优参数')
best_pars=grid_search3.best_estimator_.get_params()
print(best_pars)
pipe3.fit(X_train, y_train)
expected=le.inverse_transform(y_test)
svmLinearPre=pipe3.predict(X_test)
predicted=le.inverse_transform(svmLinearPre)
auc=metrics.accuracy_score(expected, predicted)
print('auc:%s' % auc)
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))
#混淆钜阵绘图
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
""" This function prints and plots the confusion matrix. Normalization can be applied by setting `normalize=True`. """
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix") else:
print('Confusion matrix, without normalization')
print(cm)
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
class_names = set(labels)
cnf_matrix = metrics.confusion_matrix(expected, predicted)
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names,
title='Confusion matrix, without normalization')
# Plot normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
title='Normalized confusion matrix')
plt.show()
以上是关于Sklearn之Svm支持向量机的主要内容,如果未能解决你的问题,请参考以下文章