Sklearn之Svm支持向量机

Posted OpeSource

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Sklearn之Svm支持向量机相关的知识,希望对你有一定的参考价值。

本节主要介绍利用python’s Sklearn进行Svm支持向量机分类,案例数据采用“german_credit”信息。


[cut] Read more

import itertools
import numpy as np
import
pandas as pd
import
matplotlib.pyplot as plt

from
sklearn.preprocessing import LabelEncoder,Imputer
from
sklearn.svm import SVC,LinearSVC,NuSVC from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn import metrics from sklearn.pipeline import Pipeline

from
sklearn.linear_model import SGDClassifier df=pd.read_csv('german_credit_tra.csv',header=0) print(df) sel_df=df.loc[:,['AccountBalance','Duration',
                'PaymentStatusofPreviousCredit'
,
                'Purpose'
,'ValueSavings']] X =sel_df
#标签'Creditability'
labels=df.Creditability le = LabelEncoder() y= le.fit_transform(labels) X_train, X_test, y_train, y_test = train_test_split(X, y,
        test_size=0.2,random_state=123)
#print(X_train) print(y_train)


#缺失值处理

imputer=Imputer()
######001svm radial model 高斯核

#kernel:参数选择有RBF, Linear, Poly, Sigmoid, 默认的是"RBF";

svmRadial = SVC(kernel="rbf",random_state=123) pipe =Pipeline([('imputer',imputer ),('svmRadial', svmRadial)])

#########

param_grid = {
'svmRadial__gamma'
: (0.1,0.3),
'svmRadial__C'
: (0.1,0.2) }

recall
grid_search=GridSearchCV(pipe,param_grid,n_jobs=1,                     verbose=1,scoring='roc_auc') grid_search.fit(X_train,y_train) print('最优参数') best_pars=grid_search.best_estimator_.get_params() print(best_pars) pipe.fit(X_train, y_train) expected=le.inverse_transform(y_test) svmRadialPre=pipe.predict(X_test) predicted=le.inverse_transform(svmRadialPre) auc=metrics.accuracy_score(expected, predicted) print('auc:%s' % auc) print(metrics.classification_report(expected, predicted))

#####002svm linear model 线性核

svmLinear=LinearSVC() pipe2 =Pipeline([('imputer',imputer ),('svmLinear', svmLinear)]) param_grid2 = {
'svmLinear__C'
: (0.05,0.1), }

grid_search2=GridSearchCV(pipe2,param_grid2,n_jobs=1,                     verbose=1,scoring='roc_auc') grid_search2.fit(X_train,y_train) print('最优参数') best_pars=grid_search2.best_estimator_.get_params() print(best_pars) pipe2.fit(X_train, y_train) expected=le.inverse_transform(y_test) svmLinearPre=pipe2.predict(X_test) predicted=le.inverse_transform(svmLinearPre) auc=metrics.accuracy_score(expected, predicted) print('auc:%s' % auc) print(metrics.classification_report(expected, predicted)) print(metrics.confusion_matrix(expected, predicted))

#####003SGD svm linear  model 线性核
svmSGD = SGDClassifier(loss="hinge",  
                      fit_intercept=True,
                      max_iter=200) pipe3 =Pipeline([('imputer',imputer ),('svmSGD', svmSGD)]) param_grid3 = {'svmSGD__alpha': (0.3,0.35) }

grid_search3=GridSearchCV(pipe3,param_grid3,n_jobs=1,                     verbose=1,scoring='roc_auc') grid_search3.fit(X_train,y_train) print('最优参数') best_pars=grid_search3.best_estimator_.get_params() print(best_pars) pipe3.fit(X_train, y_train) expected=le.inverse_transform(y_test) svmLinearPre=pipe3.predict(X_test) predicted=le.inverse_transform(svmLinearPre) auc=metrics.accuracy_score(expected, predicted) print('auc:%s' % auc) print(metrics.classification_report(expected, predicted)) print(metrics.confusion_matrix(expected, predicted))

#混淆钜阵绘图

def
plot_confusion_matrix(cm, classes,                      normalize=False,                      title='Confusion matrix',                      cmap=plt.cm.Blues):
   """    This function prints and plots the confusion matrix.    Normalization can be applied by setting `normalize=True`.    """    if normalize:        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]        print("Normalized confusion matrix")    else:        print('Confusion matrix, without normalization')    print(cm)    plt.imshow(cm, interpolation='nearest', cmap=cmap)    plt.title(title)    plt.colorbar()    tick_marks = np.arange(len(classes))    plt.xticks(tick_marks, classes, rotation=45)    plt.yticks(tick_marks, classes)    fmt = '.2f' if normalize else 'd'    thresh = cm.max() / 2.    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):        plt.text(j, i, format(cm[i, j], fmt),             horizontalalignment="center",             color="white" if cm[i, j] > thresh else "black")    plt.tight_layout()    plt.ylabel('True label')    plt.xlabel('Predicted label') class_names = set(labels) cnf_matrix = metrics.confusion_matrix(expected, predicted) np.set_printoptions(precision=2)

# Plot non-normalized confusion matrix

plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names,      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,     title='Normalized confusion matrix') plt.show()


以上是关于Sklearn之Svm支持向量机的主要内容,如果未能解决你的问题,请参考以下文章

支持向量机+sklearn绘制超平面

支持向量机 人脸识别(SVM)SKLearn

sklearn集成支持向量机svm.SVC参数说明

sklearn SVM(支持向量机)模型使用RandomSearchCV获取最优参数及可视化

Python数模笔记-Sklearn支持向量机

Python数模笔记-Sklearn支持向量机