2021 年 Mathorcup B题预测模型搭建

Posted Gendan

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了2021 年 Mathorcup B题预测模型搭建相关的知识,希望对你有一定的参考价值。

machine_learning.py
这部分代码对应文章的机器学习部分。

-- coding: utf-8 --

import os
import warnings
import numpy as np
from sklearn import preprocessing
import pickle

用于机器学习的第三方库导入

from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import MultinomialNB
from read_data import read_data_from_path
from read_data import plot_cluster
from read_data import plot_surface
warnings.filterwarnings("ignore") #不显示警告
def select_knn(X,Y):

""""筛选kNN算法的最合适参数k"""
grid = {\'n_neighbors\':[3,5,7,9,11,13,15,17,19,21,23,25,27]}  
grid_search = GridSearchCV(KNeighborsClassifier(),\\
                            param_grid=grid,
                            cv=5,
                            scoring=\'accuracy\')                        
grid_search.fit(X,Y)   
print(grid_search.best_params_)    
return grid_search.best_params_

def select_svc(X,Y):

grid = {\'C\':[0.1,0.25,0.5,0.75,1,1.25,1.5,1.75],\\
        \'kernel\':[\'linear\',\'rbf\',\'poly\']}    
grid_search = GridSearchCV(SVC(),param_grid=grid,cv=5,
                            scoring=\'accuracy\')
grid_search.fit(X,Y)   
print(grid_search.best_params_) 
return grid_search.best_params_

def select_dtc(X,Y):

grid = {\'max_depth\':[19,24,29,34,39,44,49,54,59,64,69,74,79],\\
        \'ccp_alpha\':[0,0.00025,0.0005,0.001,0.00125,0.0015,0.002,0.005,0.01,0.05,0.1]} 
grid_search = GridSearchCV(DecisionTreeClassifier(),\\
                            param_grid=grid, cv=5, \\
                            scoring=\'accuracy\')
grid_search.fit(X,Y)
print(grid_search.best_params_) 
return grid_search.best_params_

def select_rf(X,Y):

grid = {\'n_estimators\':[15,25,35,45,50,65,75,85,95]}
grid_search = GridSearchCV(RandomForestClassifier(max_samples=0.67,\\
                            max_features=0.33, max_depth=5), \\
                            param_grid=grid, cv=5,\\
                            scoring=\'accuracy\')
grid_search.fit(X,Y)
print(grid_search.best_params_)
return grid_search.best_params_

def select_ada(X,Y):

grid = {\'n_estimators\':[15,25,35,45,50,65,75,85,95]}
grid_search = GridSearchCV(AdaBoostClassifier( \\
                            base_estimator=LogisticRegression()),\\
                            param_grid=grid,
                            cv=5,
                            scoring=\'r2\')
grid_search.fit(X,Y)
print(grid_search.best_params_)
return grid_search.best_params_

def select_model(X,Y):

knn_param = select_knn(X,Y)
svc_param = select_svc(X,Y)
dtc_param = select_dtc(X,Y)
rf_param = select_rf(X,Y)
ada_param = select_ada(X,Y)
return knn_param, svc_param, dtc_param, rf_param, ada_param

def cv_score(X, Y, \\

        knn_param={\'n_neighbors\':25}, \\
        svc_param={\'C\': 0.1, \'kernel\': \'rbf\'},\\
        dtc_param={\'ccp_alpha\':0.01, \'max_depth\':19}, \\
        rf_param={\'n_estimators\':75},\\
        ada_param={\'n_estimators\':15}):               
"""根据上述最优参数,构建模型"""
lg = LogisticRegression()
knn = KNeighborsClassifier(n_neighbors=knn_param[\'n_neighbors\'])
svc = SVC(C=svc_param[\'C\'], [PayPal下载](https://www.gendan5.com/wallet/PayPal.html)kernel=svc_param[\'kernel\'])
dtc = DecisionTreeClassifier(max_depth=dtc_param[\'max_depth\'],
                            ccp_alpha=dtc_param[\'ccp_alpha\'])
rf = RandomForestClassifier(n_estimators=rf_param[\'n_estimators\'],\\
                            max_samples=0.67,\\
                            max_features=0.33, max_depth=5)
ada = AdaBoostClassifier(base_estimator=lg,\\
                        n_estimators=ada_param[\'n_estimators\'])
NB = MultinomialNB(alpha=1) 
"""用5折交叉验证,计算所有模型的 r2,并计算其均值"""
S_lg_i = cross_val_score(lg, X, Y, \\
                        scoring=\'accuracy\',cv=5)   
S_knn_i = cross_val_score(knn, X, Y, \\
                        scoring=\'accuracy\',cv=5)                              
S_svc_i = cross_val_score(svc, X, Y, \\
                        scoring=\'accuracy\',cv=5)   
S_dtc_i = cross_val_score(dtc, X, Y, \\
                        scoring=\'accuracy\',cv=5)                               
S_rf_i = cross_val_score(rf, X, Y, \\
                        scoring=\'accuracy\',cv=5)                               
S_ada_i = cross_val_score(ada, X, Y, \\
                        scoring=\'accuracy\',cv=5)   
S_NB_i = cross_val_score(NB, X, Y,\\
                        scoring=\'accuracy\',cv=5)                            
print(f\'lg : {np.mean(S_lg_i)}\')    
print(f\'knn : {np.mean(S_knn_i)}\')
print(f\'svc : {np.mean(S_svc_i)}\')
print(f\'dtc :{np.mean(S_dtc_i)}\')
print(f\'rf : {np.mean(S_rf_i)}\')
print(f\'ada : {np.mean(S_ada_i)}\')
print(f\'NB : {np.mean(S_NB_i)}\')
return S_lg_i, S_knn_i, S_svc_i, S_dtc_i, S_rf_i, S_ada_i, S_NB_i

if name == \'__main__\':

data_after_clu = pickle.load(open(r\'.\\model_and_data\\data_after_clu.pkl\',\'rb\'))
ener_div = pickle.load(open(r\'.\\model_and_data\\ener_div.pkl\',\'rb\'))

print(data_after_clu)

print(ener_div)

knn_param, svc_param, dtc_param, rf_param, ada_param = select_model(data_after_clu,

ener_div)

S_lg_i, S_knn_i, S_svc_i, S_dtc_i, \\
                    S_rf_i, S_ada_i, S_NB_i= cv_score(data_after_clu,ener_div)

以上是关于2021 年 Mathorcup B题预测模型搭建的主要内容,如果未能解决你的问题,请参考以下文章

2021 年 五一数学建模比赛 B 题(第一问至第三问)

数学建模大赛赛题解析:Mathorcup高校数学建模挑战赛-基于收得率预测模型的转炉炼钢的成本优化

数学建模大赛赛题解析:Mathorcup高校数学建模挑战赛-基于收得率预测模型的转炉炼钢的成本优化

数学建模MATLAB应用实战系列(138)-2021年MathorCup高校数学建模挑战赛A题思路解析(附代码)

2021年MathorCup数学建模A题自动驾驶中的车辆调头问题全过程解题论文及程序

2021年MathorCup高校数学建模挑战赛——大数据竞赛A题