堆叠分类器无法识别 Keras

Posted

技术标签:

【中文标题】堆叠分类器无法识别 Keras【英文标题】:Stacking Classifier doesn't recognize Keras 【发布时间】:2020-08-28 17:20:36 【问题描述】:

我在 5 个 scikit-learn 分类器和一个 Keras 分类器上使用 StackingClassifier。然而,它似乎没有将 Keras 识别为分类器。

相关代码:

from tensorflow.keras import layers
from tensorflow import keras
from keras.constraints import maxnorm

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation,  Flatten, Input
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras import metrics
import joblib
from joblib import parallel_backend
np.random.seed(42)
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import sklearn
from sklearn.ensemble import StackingClassifier
def create_model ():
    # create model
    model = Sequential()
    model.add(Dense(best_neurons, input_shape=(X_train.shape[1],), kernel_initializer=best_init_mode, activation='relu', 
                   kernel_constraint=maxnorm(best_weight_constraint)))
    model.add(Dropout(best_dropout_rate))
    model.add(Flatten())
    optimizer= tf.keras.optimizers.RMSprop(lr=best_learn_rate)
    model.add(Dense(units = 1, kernel_initializer=best_init_mode, activation = 'sigmoid'))  # Compile model
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
    return model


NN_clf=KerasClassifier(build_fn=create_model, epochs=best_epochs, batch_size= best_batch_size)
RF_clf =RandomForestClassifier(max_depth=best_max_depth_rf, n_estimators=best_n_estimators_rf, 
                               min_samples_leaf=best_min_samples_leaf_rf, max_features=best_max_features_rf,
                               class_weight=best_class_weight_rf, max_samples=best_max_samples_rf,
                               random_state=42, oob_score=True)
KN_clf =KNeighborsClassifier(n_neighbors=best_n_neighbors,  p=best_p, leaf_size=best_leaf_size )
#DT_clf = DecisionTreeClassifier(max_depth=best_max_depth_dt, min_samples_leaf=best_min_samples_leaf_dt)
SV_clf =  SVC(gamma=best_gamma_sv, C=best_c_sv, kernel=best_kernel_sv, random_state=42, probability=True)
GBC_clf =  xgb.XGBClassifier(learning_rate=best_learning_rate_gbc, random_state=42, colsample_bytree=best_colsample_bytree_gbc,
                             max_depth=best_max_depth_gbc, n_estimators=best_n_estimators_gbc,
                            gamma=best_gamma_gbc, subsample=best_subsample_gbc)
EX_clf= ExtraTreesClassifier(max_depth=best_max_depth_ex, n_estimators=best_n_estimators_ex, 
                             min_samples_leaf=best_min_samples_leaf_ex, max_features=best_max_features_ex,
                             warm_start=False, oob_score=True, bootstrap=True, random_state=42)
LR_clf=LogisticRegression(random_state=42, solver=best_solver, penalty=best_penalty, class_weight=best_class_weight, C=best_log_C)

estimators= [('RF', RF_clf), ('GBC', GBC_clf),  ('EX', EX_clf), ('LR',LR_clf), ('KN', KN_clf),
            ('SV', SV_clf), ('NN', NN_clf) ]
clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(), n_jobs=-1)
clf.fit(X_train, y_train.values.ravel())
print("Stacking model score: %.3f" % clf.score(X_test, y_test.values.ravel()))

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-41-272df6aa838e> in <module>
      2             ('SV', SV_clf), ('NN', NN_clf) ]
      3 clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(), n_jobs=-1)
----> 4 clf.fit(X_train, y_train.values.ravel())
      5 print("Stacking model score: %.3f" % clf.score(X_test, y_test.values.ravel()))

~\Anaconda3\lib\site-packages\sklearn\ensemble\_stacking.py in fit(self, X, y, sample_weight)
    411         self._le = LabelEncoder().fit(y)
    412         self.classes_ = self._le.classes_
--> 413         return super().fit(X, self._le.transform(y), sample_weight)
    414 
    415     @if_delegate_has_method(delegate='final_estimator_')

~\Anaconda3\lib\site-packages\sklearn\ensemble\_stacking.py in fit(self, X, y, sample_weight)
    129         # all_estimators contains all estimators, the one to be fitted and the
    130         # 'drop' string.
--> 131         names, all_estimators = self._validate_estimators()
    132         self._validate_final_estimator()
    133 

~\Anaconda3\lib\site-packages\sklearn\ensemble\_base.py in _validate_estimators(self)
    247                 raise ValueError(
    248                     "The estimator  should be a .".format(
--> 249                         est.__class__.__name__, is_estimator_type.__name__[3:]
    250                     )
    251                 )

ValueError: The estimator KerasClassifier should be a classifier.

我正在使用 Sci-kit learn 版本 2.2,TF 版本 2.x。我看到了类似的错误here,但不想重写我的代码并使用 MLextend 库。

【问题讨论】:

【参考方案1】:

这个问题是因为here 报告了VotingClassifier 的类似问题。

解决方案就是将这个_estimator_type='classifier' 添加到KerasClassifier

注意:请仅提供重现问题的最少代码。

from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
import numpy as np
from tensorflow.keras import layers
from tensorflow import keras
from keras.constraints import maxnorm

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation,  Flatten, Input
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras import metrics
import joblib
from joblib import parallel_backend
np.random.seed(42)
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import sklearn
from sklearn.ensemble import StackingClassifier
from sklearn.neighbors import KNeighborsClassifier

def create_model ():
    # create model
    model = Sequential()
    model.add(Dense(20, input_dim=20, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Flatten())
    optimizer= keras.optimizers.RMSprop(lr=0.001)
    model.add(Dense(units = 1, activation = 'sigmoid'))  # Compile model
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
    return model


NN_clf=KerasClassifier(build_fn=create_model, epochs=15, batch_size= 32)
NN_clf._estimator_type = "classifier"

RF_clf =RandomForestClassifier(random_state=42, oob_score=True)
KN_clf =KNeighborsClassifier()
SV_clf =  SVC(random_state=42, probability=True)
EX_clf= ExtraTreesClassifier(random_state=42)
LR_clf=LogisticRegression(random_state=42,)

estimators= [('RF', RF_clf), ('EX', EX_clf), ('LR',LR_clf), ('KN', KN_clf),
            ('SV', SV_clf), ('NN', NN_clf) ]
clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())

X, y = make_classification()

from sklearn.model_selection import train_test_split

X_train, X_test, y_train , y_test = train_test_split(X, y, test_size=0.3)


clf.fit(X_train, y_train)
print("Stacking model score: %.3f" % clf.score(X_test, y_test))

# Stacking model score: 0.967

【讨论】:

以上是关于堆叠分类器无法识别 Keras的主要内容,如果未能解决你的问题,请参考以下文章

无法将 Keras Generator 图像传递给人脸识别

C++使用opencv调用级联分类器来识别目标物体

以下方法无法识别 UIButton 选择器

[Python图像识别] 五十.Keras构建AlexNet和CNN实现自定义数据集分类详解

keras入门实战:手写数字识别

Angularjs验证 - 无法识别引导日期选择器输入