使用 KerasClassifier 随机搜索cv 进行超参数优化,TypeError: 'list' 对象不能被解释为整数

Posted

技术标签:

【中文标题】使用 KerasClassifier 随机搜索cv 进行超参数优化,TypeError: \'list\' 对象不能被解释为整数【英文标题】:Hyperparameter Optimization using KerasClassifier randomizedsearchcv, TypeError: 'list' object cannot be interpreted as an integer使用 KerasClassifier 随机搜索cv 进行超参数优化,TypeError: 'list' 对象不能被解释为整数 【发布时间】:2019-11-13 21:14:42 【问题描述】:

我正在尝试使用RandomizedSearchCV优化ANN的超参数,例如一层神经元数、层数、learning_rate、epochs、batch_size和dropout,代码如下:

def neural_network(num_neurons=64,num_layers=4,input_dim=8,
                   output_dim=2,learning_rate=1.0e-05,act='relu',
                   dropout=0.3):
    model = Sequential()

    model.add(Dense(num_neurons,activation='relu',input_dim=input_dim))

    for i in range(1,num_layers):
        model.add(Dense(num_neurons,activation=act))

    model.add(Dropout(dropout))

    model.add(Dense(output_dim,activation='softmax'))

    adam = optimizers.Adam(lr=learning_rate)

    model.compile(adam,
                  loss='categorical_crossentropy',
                  metrics=['accuracy']
                 )
    return model
create_model = neural_network
model = KerasClassifier(build_fn=create_model,verbose=0)
batch_size = [16,32,64]
epochs = [200,300,500]
num_neurons = [64,128,256]
num_layers= [2,4,6]
learning_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
dropout = [0.1,0.3,0.5]
param_grid = dict(batch_size=batch_size,epochs=epochs,
                      num_neurons=num_neurons,
                      num_layers=num_layers,
                      learning_rate=learning_rate,
                      dropout=dropout
                     )
grid = RandomizedSearchCV(estimator=model,param_distributions=param_grid,cv=3,n_iter=3)
grid_result = grid.fit(x,y,epochs=epochs,batch_size=batch_size)

我得到的错误是:TypeError: 'list' object cannot be interpreted as an integer

谁能找出代码的问题。 谢谢你

   ---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-159-aa51f2b7cc03> in <module>
     13                      )
     14 grid = RandomizedSearchCV(estimator=model,param_distributions=param_grid,cv=3,n_iter=3)
---> 15 grid_result = grid.fit(x,y,epochs=epochs,batch_size=batch_size)

D:\Anaconda\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
    720                 return results_container[0]
    721 
--> 722             self._run_search(evaluate_candidates)
    723 
    724         results = results_container[0]

D:\Anaconda\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates)
   1513         evaluate_candidates(ParameterSampler(
   1514             self.param_distributions, self.n_iter,
-> 1515             random_state=self.random_state))

D:\Anaconda\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params)
    709                                for parameters, (train, test)
    710                                in product(candidate_params,
--> 711                                           cv.split(X, y, groups)))
    712 
    713                 all_candidate_params.extend(candidate_params)

D:\Anaconda\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self, iterable)
    915             # remaining jobs.
    916             self._iterating = False
--> 917             if self.dispatch_one_batch(iterator):
    918                 self._iterating = self._original_iterator is not None
    919 

D:\Anaconda\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self, iterator)
    757                 return False
    758             else:
--> 759                 self._dispatch(tasks)
    760                 return True
    761 

D:\Anaconda\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self, batch)
    714         with self._lock:
    715             job_idx = len(self._jobs)
--> 716             job = self._backend.apply_async(batch, callback=cb)
    717             # A job can complete so quickly than its callback is
    718             # called before we get here, causing self._jobs to

D:\Anaconda\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self, func, callback)
    180     def apply_async(self, func, callback=None):
    181         """Schedule a func to be run"""
--> 182         result = ImmediateResult(func)
    183         if callback:
    184             callback(result)

D:\Anaconda\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self, batch)
    547         # Don't delay the application, to avoid keeping the input
    548         # arguments in memory
--> 549         self.results = batch()
    550 
    551     def get(self):

D:\Anaconda\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

D:\Anaconda\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0)
    223         with parallel_backend(self._backend, n_jobs=self._n_jobs):
    224             return [func(*args, **kwargs)
--> 225                     for func, args, kwargs in self.items]
    226 
    227     def __len__(self):

D:\Anaconda\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
    526             estimator.fit(X_train, **fit_params)
    527         else:
--> 528             estimator.fit(X_train, y_train, **fit_params)
    529 
    530     except Exception as e:

D:\Anaconda\lib\site-packages\keras\wrappers\scikit_learn.py in fit(self, x, y, sample_weight, **kwargs)
    208         if sample_weight is not None:
    209             kwargs['sample_weight'] = sample_weight
--> 210         return super(KerasClassifier, self).fit(x, y, **kwargs)
    211 
    212     def predict(self, x, **kwargs):

D:\Anaconda\lib\site-packages\keras\wrappers\scikit_learn.py in fit(self, x, y, **kwargs)
    150         fit_args.update(kwargs)
    151 
--> 152         history = self.model.fit(x, y, **fit_args)
    153 
    154         return history

D:\Anaconda\lib\site-packages\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
   1037                                         initial_epoch=initial_epoch,
   1038                                         steps_per_epoch=steps_per_epoch,
-> 1039                                         validation_steps=validation_steps)
   1040 
   1041     def evaluate(self, x=None, y=None,

D:\Anaconda\lib\site-packages\keras\engine\training_arrays.py in fit_loop(model, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
    140             indices_for_conversion_to_dense.append(i)
    141 
--> 142     for epoch in range(initial_epoch, epochs):
    143         # Reset stateful metrics
    144         for m in model.stateful_metric_functions:

TypeError: 'list' object cannot be interpreted as an integer

【问题讨论】:

你能发布完整的追溯错误吗? 我已经编辑了问题并包含了完整的追溯错误。 【参考方案1】:

问题是在下面一行中分别设置epochsbatch_size

grid_result = grid.fit(x,y,epochs=epochs,batch_size=batch_size)

如下更改!我们已经在param_grid 中设置了epochsbatch_size 的范围。

grid_result = grid.fit(X, y) 

可重现的例子:

import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import  RandomizedSearchCV
from tensorflow.keras import Sequential
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras import optimizers



X, y = make_classification(n_samples=1000, n_classes=2,
                           n_informative=4, weights=[0.7, 0.3],
                           random_state=0)

def neural_network(num_neurons=64,num_layers=4,input_dim=20,
                   output_dim=2,learning_rate=1.0e-05,act='relu',
                   dropout=0.3):
    model = Sequential()

    model.add(Dense(num_neurons,activation='relu',input_dim=input_dim))

    for i in range(1,num_layers):
        model.add(Dense(num_neurons,activation=act))

    model.add(Dropout(dropout))

    model.add(Dense(output_dim,activation='softmax'))

    adam = optimizers.Adam(lr=learning_rate)

    model.compile(adam,
                  loss='categorical_crossentropy',
                  metrics=['accuracy']
                 )
    return model

model = KerasClassifier(build_fn=neural_network,verbose=0)
batch_size = [16,32,64]
epochs = [2,3 ]
num_neurons = [6,1,2]
num_layers= [1,2]
learning_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
dropout = [0.1,0.3,0.5]
param_grid = dict(batch_size=batch_size,epochs=epochs,
                      num_neurons=num_neurons,
                      num_layers=num_layers,
                      learning_rate=learning_rate,
                      dropout=dropout
                     )
grid = RandomizedSearchCV(estimator=model,param_distributions=param_grid,cv=2,n_iter=1)
grid_result = grid.fit(X, y)

grid_result.best_params_

输出:

'batch_size': 64,
 'dropout': 0.1,
 'epochs': 2,
 'learning_rate': 0.01,
 'num_layers': 1,
 'num_neurons': 6

【讨论】:

以上是关于使用 KerasClassifier 随机搜索cv 进行超参数优化,TypeError: 'list' 对象不能被解释为整数的主要内容,如果未能解决你的问题,请参考以下文章

使用超网格搜索和 10 倍 CV 调整参数后,随机森林模型的 AUC 较低

如何将 X_train + X_eval 传递给随机搜索 CV 拟合方法

sklearn 网格搜索与分组 K 折 cv 生成器

尝试使用管道和网格搜索运行随机森林分类器时出错

尽管一切正常,但 KerasClassifier 无法拟合模型

KerasClassifier 对象没有属性模型