将 gridsearchCV 与 Keras RNN-LSTM 一起使用时出现尺寸错误
Posted
技术标签:
【中文标题】将 gridsearchCV 与 Keras RNN-LSTM 一起使用时出现尺寸错误【英文标题】:Dimension error when using gridsearchCV with Keras RNN-LSTM 【发布时间】:2019-12-12 13:08:34 【问题描述】:目标是通过 gridsearchCV 优化 Keras LSTM 的超参数。第一次训练后会产生维度错误。 RNN-LSTM 需要 3D 输入。 gridsearchCV 是否存在维度冲突?代码如下:
def createLSTMModel(dropout_rate=0.1, optimizer='Nadam', learning_rate=0.015, activation='relu', loss='mae', n_jobs=1):
K.clear_session()
model = Sequential()
model.add(Bidirectional(LSTM(250, return_sequences=True),input_shape=(Train_Num,1)))
model.add(Dropout(dropout_rate))
model.add(Bidirectional(LSTM(170, return_sequences=True)))
model.add(Dropout(dropout_rate))
model.add(Bidirectional(LSTM(25, return_sequences=True)))
model.add(Dropout(dropout_rate))
#model.add(Flatten())
model.add(Dense(1))
model.compile(optimizer=optimizer,loss=loss)
return model
grid_param_LSTM =
'batch_size': [348,256,200,128],
'epochs': [15,30],
'learning_rate':[0.001,0.01,0.1,0.0001],
'optimizer': ['Nadam', 'Adam', 'RMSProp'],
'loss': ['logcosh', 'mae', 'mse', 'hinge','squared_hinge'],
'activation': ['relu', 'linear','sigmoid','hard_sigmoid', 'tanh'],
'dropout_rate':[0.1,0.2,0.4,0.6]
model_LSTM=KerasRegressor(build_fn=createLSTMModel)
GridLSTM = GridSearchCV(estimator=model_LSTM,
param_grid=grid_param_LSTM,
scoring='neg_mean_squared_error','r2','explained_variance','max_error','neg_mean_absolute_error','neg_median_absolute_error',
refit='neg_median_absolute_error',
cv=2)
X_train_R_Grid=X_train_smallLSTM.reshape(4,Train_Num,1)
y_train_R_Grid=y_train_smallLSTM.reshape(4,Train_Num,1)
GridLSTM.fit(X_train_R_Grid, y_train_R_Grid)
产生以下错误:
Epoch 1/15
1/1 [==============================] - 17s 17s/step - loss: 1.4543
Epoch 2/15
1/1 [==============================] - 15s 15s/step - loss: 1.4568
Epoch 3/15
1/1 [==============================] - 13s 13s/step - loss: 1.3417
Epoch 4/15
1/1 [==============================] - 13s 13s/step - loss: 1.2683
Epoch 5/15
1/1 [==============================] - 14s 14s/step - loss: 1.2689
Epoch 6/15
1/1 [==============================] - 13s 13s/step - loss: 1.2692
Epoch 7/15
1/1 [==============================] - 12s 12s/step - loss: 1.2667
Epoch 8/15
1/1 [==============================] - 12s 12s/step - loss: 1.2663
Epoch 9/15
1/1 [==============================] - 12s 12s/step - loss: 1.2685
Epoch 10/15
1/1 [==============================] - 12s 12s/step - loss: 1.2660
Epoch 11/15
1/1 [==============================] - 12s 12s/step - loss: 1.2628
Epoch 12/15
1/1 [==============================] - 14s 14s/step - loss: 1.2666
Epoch 13/15
1/1 [==============================] - 13s 13s/step - loss: 1.2662
Epoch 14/15
1/1 [==============================] - 13s 13s/step - loss: 1.2656
Epoch 15/15
1/1 [==============================] - 13s 13s/step - loss: 1.2644
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-251-ec253ff8a2db> in <module>
----> 1 GridLSTM.fit(X_train_R_Grid, y_train_R_Grid)
~\Anaconda3\envs\Tensorflow\lib\site-packages\sklearn\model_selection\_search.py in fit(self, X, y, groups, **fit_params)
685 return results
686
--> 687 self._run_search(evaluate_candidates)
688
689 # For multi-metric evaluation, store the best_index_, best_params_ and
~\Anaconda3\envs\Tensorflow\lib\site-packages\sklearn\model_selection\_search.py in _run_search(self, evaluate_candidates)
1146 def _run_search(self, evaluate_candidates):
1147 """Search all candidates in param_grid"""
-> 1148 evaluate_candidates(ParameterGrid(self.param_grid))
1149
1150
~\Anaconda3\envs\Tensorflow\lib\site-packages\sklearn\model_selection\_search.py in evaluate_candidates(candidate_params)
664 for parameters, (train, test)
665 in product(candidate_params,
--> 666 cv.split(X, y, groups)))
667
668 if len(out) < 1:
~\Anaconda3\envs\Tensorflow\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
919 # remaining jobs.
920 self._iterating = False
--> 921 if self.dispatch_one_batch(iterator):
922 self._iterating = self._original_iterator is not None
923
~\Anaconda3\envs\Tensorflow\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
~\Anaconda3\envs\Tensorflow\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
~\Anaconda3\envs\Tensorflow\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
~\Anaconda3\envs\Tensorflow\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
~\Anaconda3\envs\Tensorflow\lib\site-packages\joblib\parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
~\Anaconda3\envs\Tensorflow\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
~\Anaconda3\envs\Tensorflow\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, error_score)
552 fit_time = time.time() - start_time
553 # _score will return dict if is_multimetric is True
--> 554 test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric)
555 score_time = time.time() - start_time - fit_time
556 if return_train_score:
~\Anaconda3\envs\Tensorflow\lib\site-packages\sklearn\model_selection\_validation.py in _score(estimator, X_test, y_test, scorer, is_multimetric)
595 """
596 if is_multimetric:
--> 597 return _multimetric_score(estimator, X_test, y_test, scorer)
598 else:
599 if y_test is None:
~\Anaconda3\envs\Tensorflow\lib\site-packages\sklearn\model_selection\_validation.py in _multimetric_score(estimator, X_test, y_test, scorers)
625 score = scorer(estimator, X_test)
626 else:
--> 627 score = scorer(estimator, X_test, y_test)
628
629 if hasattr(score, 'item'):
~\Anaconda3\envs\Tensorflow\lib\site-packages\sklearn\metrics\scorer.py in __call__(self, estimator, X, y_true, sample_weight)
95 else:
96 return self._sign * self._score_func(y_true, y_pred,
---> 97 **self._kwargs)
98
99
~\Anaconda3\envs\Tensorflow\lib\site-packages\sklearn\metrics\regression.py in explained_variance_score(y_true, y_pred, sample_weight, multioutput)
413 """
414 y_type, y_true, y_pred, multioutput = _check_reg_targets(
--> 415 y_true, y_pred, multioutput)
416 check_consistent_length(y_true, y_pred, sample_weight)
417
~\Anaconda3\envs\Tensorflow\lib\site-packages\sklearn\metrics\regression.py in _check_reg_targets(y_true, y_pred, multioutput)
75
76 """
---> 77 check_consistent_length(y_true, y_pred)
78 y_true = check_array(y_true, ensure_2d=False)
79 y_pred = check_array(y_pred, ensure_2d=False)
~\Anaconda3\envs\Tensorflow\lib\site-packages\sklearn\utils\validation.py in check_consistent_length(*arrays)
203 if len(uniques) > 1:
204 raise ValueError("Found input variables with inconsistent numbers of"
--> 205 " samples: %r" % [int(l) for l in lengths])
206
207
ValueError: Found input variables with inconsistent numbers of samples: [1, 1396]
软件包版本
System:
python: 3.6.9 |Anaconda, Inc.| (default, Jul 30 2019, 14:00:49) [MSC v.1915 64 bit (AMD64)]
executable: C:\Users\alias\Anaconda3\envs\Tensorflow\python.exe
machine: Windows-10-10.0.18362-SP0
BLAS:
macros:
lib_dirs:
cblas_libs: cblas
Python deps:
pip: 19.1.1
setuptools: 39.1.0
sklearn: 0.21.2
numpy: 1.15.0
scipy: 1.2.1
Cython: 0.28.4
pandas: 0.24.2
keras: 2.1.6
tensorflow:1.9.0
故意选择 keras、numpy 和 tensorflow 版本来实现 K.clear_session()
,这会在最新版本上与 gridsearchCV
结合产生 OOM 问题。
gridsearchCV 不适合 RNN 使用还是有解决方法?
提前非常感谢。
最好的问候, JDS
【问题讨论】:
github.com/cerlymarco/keras-hypetune 【参考方案1】:是的。 GridSearch
需要二维数组作为输入,所以在你的情况下会失败。
另请参阅:GridSearchCV/RandomizedSearchCV with LSTM
以下是一个工作示例。使用 2D 输入的 keras 和 GridSearch 的组合。
grid.fit(X, Y)
使用的是X
,它需要是一个二维数组。
数据集:https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv
# Use scikit-learn to grid search the batch size and epochs
import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
# Function to create model, required for KerasClassifier
def create_model():
# create model
model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
dataset = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:,0:8]
Y = dataset[:,8]
# create model
model = KerasClassifier(build_fn=create_model, verbose=0)
# define the grid search parameters
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X, Y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
print("%f (%f) with: %r" % (mean, stdev, param))
【讨论】:
您好 Serafeim,感谢您的贡献。您从 Brownlee 发布的代码是 ANN,所以不幸的是,这没有帮助。您的链接可以,但是当我想将model.add(Reshape)
实现为输入层时,会发生奇怪的事情。对于model.add(Reshape((2,Train_Num),input_shape=(2*Train_Num,)))
,会弹出以下错误:expected reshape_1_input to have shape (2792,) but got array with shape (1,)
。但是,实现model.add(Reshape((2,Train_Num,),input_shape=(2*Train_Num,1)))
会得到expected reshape_1_input to have 3 dimensions, but got array with shape (2792, 1)
另外,管道解决方案对我来说是非常不透明的,mea culpa^^' 你有一个正在运行的示例。我一直面临着几个问题,例如All intermediate steps should be transformers and implement fit and transform or be the string 'passthrough' '(array([[[-1.61538462], [-1.54076656], [-1.51940169],
for ('reshaper',CustomReshaper(X_train_smallLSTM,y_train_smallLSTM)), ----> 5 ('clf',model_LSTM) 6 ])
。我不太清楚如何克服这个问题......所以带有 LSTM 的 gridsearchCV 对我来说仍然是一个挑战......
您能分享一个正在运行的 LSTM GridSearch(或 Cross_Val_Pred)示例吗?
@JDS 你找到解决方案了吗?【参考方案2】:
问题是得分。具有维度转换的自定义记分器提供了解决方案。就这么简单:
def CustomMAD(y_true, y_pred):
y_true=y_true.reshape(Train_Num,1)
if np.isnan(y_pred).any():
result=-1000000
else:
y_pred=y_pred.reshape(Train_Num,1)
result=median_absolute_error(y_true, y_pred)
return result
scorer = make_scorer(CustomMAD, greater_is_better=False)
GridLSTM = GridSearchCV(model_LSTM,
param_grid=grid_param_LSTM,
scoring=scorer,
cv=3)
【讨论】:
以上是关于将 gridsearchCV 与 Keras RNN-LSTM 一起使用时出现尺寸错误的主要内容,如果未能解决你的问题,请参考以下文章
使用 keras 在虹膜上的 GridSearchCV 结果不佳
使用 Keras 和 sklearn GridSearchCV 交叉验证提前停止
GridSearchCV中“n_jobs == 1”的含义与使用多个GPU