使用 Keras 进行分类:预测和多类

Posted

技术标签:

【中文标题】使用 Keras 进行分类:预测和多类【英文标题】:Classification with Keras: prediction and multiclass 【发布时间】:2018-02-17 12:37:18 【问题描述】:

我用 keras 实现了一个多类分类器。 我现在的问题是做出预测,因为我得到了一个错误。相信和代码的预测部分有关。

代码如下:

import numpy as np  
from keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img  
from keras.models import Sequential  
from keras.layers import Dropout, Flatten, Dense  
from keras import applications  
from keras.utils.np_utils import to_categorical 

from PIL import Image

import matplotlib.pyplot as plt  
import math

%matplotlib inline

# dimensions of our images.  
img_width, img_height = 150, 150  

top_model_weights_path = 'bottleneck_fc_model.h5'  
train_data_dir = 'data/train'  
validation_data_dir = 'data/validation'  

epochs = 30  
batch_size = 16

def save_bottleneck_features():

    model = applications.VGG16(include_top=False, weights='imagenet')  
    datagen = ImageDataGenerator(rescale=1. / 255)  

    generator = datagen.flow_from_directory(train_data_dir, target_size=(img_width, img_height), \
                                            batch_size=batch_size, class_mode=None, shuffle=False)  

    n_train_samples = len(generator.filenames)  
    n_classes = len(generator.class_indices)
    print("Number of train files = ".format(n_train_samples))
    print("Number of classes = ".format(n_classes))

    predict_size_train = int(math.ceil(n_train_samples / batch_size))  

    bottleneck_features_train = model.predict_generator(generator, predict_size_train)  

    np.save('bottleneck_features_train.npy', bottleneck_features_train) 

    generator = datagen.flow_from_directory(validation_data_dir, target_size=(img_width, img_height), \
     batch_size=batch_size, class_mode=None, shuffle=False)  

    n_validation_samples = len(generator.filenames)  

    predict_size_validation = int(math.ceil(n_validation_samples / batch_size))  

    bottleneck_features_validation = model.predict_generator(generator, predict_size_validation)  

    np.save('bottleneck_features_validation.npy', bottleneck_features_validation)  


def train_top_model():
    datagen_top = ImageDataGenerator(rescale=1./255)  
    generator_top = datagen_top.flow_from_directory(train_data_dir, target_size=(img_width, img_height),\
                                                    batch_size=batch_size, class_mode='categorical', \
                                                    shuffle=False)  

    n_train_samples = len(generator_top.filenames)  
    n_classes = len(generator_top.class_indices)  

    # load the bottleneck features saved earlier  
    train_data = np.load('bottleneck_features_train.npy')  

    # get the class lebels for the training data, in the original order  
    train_labels = generator_top.classes  

    # convert the training labels to categorical vectors  
    train_labels = to_categorical(train_labels, num_classes=n_classes)

    generator_top = datagen_top.flow_from_directory(validation_data_dir, target_size=(img_width, img_height),\
                                                    batch_size=batch_size, class_mode=None, shuffle=False)  

    n_validation_samples = len(generator_top.filenames)  

    validation_data = np.load('bottleneck_features_validation.npy')  

    validation_labels = generator_top.classes  
    validation_labels = to_categorical(validation_labels, num_classes=n_classes) 

    model = Sequential()  
    model.add(Flatten(input_shape=train_data.shape[1:]))  
    model.add(Dense(256, activation='relu'))  
    model.add(Dropout(0.5))  
    model.add(Dense(n_classes, activation='sigmoid'))  

    model.compile(optimizer='rmsprop',  
              loss='categorical_crossentropy', metrics=['accuracy'])  

    history = model.fit(train_data, train_labels, epochs=epochs, batch_size=batch_size,\
                        validation_data=(validation_data, validation_labels))  

    model.save_weights(top_model_weights_path)  

    (eval_loss, eval_accuracy) = model.evaluate(validation_data, validation_labels, \
                                                batch_size=batch_size, verbose=1)

    print("[INFO] accuracy: :.2f%".format(eval_accuracy * 100))  
    print("[INFO] Loss: ".format(eval_loss))
    return model

要执行我们所做的程序:

save_bottleneck_features()
model = train_top_model()

当我尝试进行预测时,使用以下代码:

img_path = 'image_test/bird.jpg'  

# predicting images
img = load_img(img_path, target_size=(img_width, img_height))
x = img_to_array(img)
x = np.expand_dims(x, axis=0)

images = np.vstack([x])
classes = model.predict_classes(images, batch_size=10)
print (classes)

它给了我以下错误:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-44-c3652addeabc> in <module>()
      8 
      9 images = np.vstack([x])
---> 10 classes = model.predict_classes(images, batch_size=10)
     11 print (classes)

~/anaconda/lib/python3.6/site-packages/keras/models.py in predict_classes(self, x, batch_size, verbose)
   1016             A numpy array of class predictions.
   1017         """
-> 1018         proba = self.predict(x, batch_size=batch_size, verbose=verbose)
   1019         if proba.shape[-1] > 1:
   1020             return proba.argmax(axis=-1)

~/anaconda/lib/python3.6/site-packages/keras/models.py in predict(self, x, batch_size, verbose)
    911         if not self.built:
    912             self.build()
--> 913         return self.model.predict(x, batch_size=batch_size, verbose=verbose)
    914 
    915     def predict_on_batch(self, x):

~/anaconda/lib/python3.6/site-packages/keras/engine/training.py in predict(self, x, batch_size, verbose, steps)
   1693         x = _standardize_input_data(x, self._feed_input_names,
   1694                                     self._feed_input_shapes,
-> 1695                                     check_batch_axis=False)
   1696         if self.stateful:
   1697             if x[0].shape[0] > batch_size and x[0].shape[0] % batch_size != 0:

~/anaconda/lib/python3.6/site-packages/keras/engine/training.py in _standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix)
    142                             ' to have shape ' + str(shapes[i]) +
    143                             ' but got array with shape ' +
--> 144                             str(array.shape))
    145     return arrays
    146 

ValueError: Error when checking : expected flatten_8_input to have shape (None, 7, 7, 512) but got array with shape (1, 150, 150, 3)

【问题讨论】:

您总是收到相同的错误消息。阅读它们。您的模型(以Flatten 开头的模型)期望您的数据X 的形状为(BatchSize, 7, 7, 512),但您试图预测您的X 的形状为(BatchSize, 150, 150, 3) 我现在张量的形状错误。但是我不知道如何解决这个问题,因为我认为问题出在预测函数中。事实上,我真的不明白如何为 DNN 提供数据。这个问题很常见(在很多地方都存在),但解决方案很少。 问题显然在于输入/输出张量是如何创建的。这个问题只有在我们清楚地了解您的数据是什么以及预期的输出是什么的情况下才能解决。你有多少型号?一个或两个?什么是瓶颈特征?您的模型应该接收图像还是另一个模型的输出?你能列出你的每个模型并用文字解释每个模型的输入和输出是什么吗?这会让你更容易理解你想用你的代码实现什么。 不幸的是,图像到矩阵的转换对我来说不是很清楚。据我所知,它应该始终是一个秩为 3 的矩阵,但我定义了 generator 我看不到里面有什么。例如,在上一个错误中,我为系统提供了一个形状为 (1, 150, 150, 3) 的 4 阶张量,这是我理解的,因为它是一个大小为 150x150 的具有三个通道的图像。然而,系统期待我不理解的类似 (Something, 7, 7, 512)。 这是您为模型定义的形状:model.add(Flatten(input_shape=train_data.shape[1:])) --- 这意味着 train_data 不是图像,而是其他东西。 --- 它是从“瓶颈特性”加载的,什么是“瓶颈特性”?它们不是您所期望的图像。 【参考方案1】:

我终于找到了答案。 为了预测图像的类别,我们需要通过与之前相同的管道运行它。

预测函数必须是:

image_path = 'image_test/bird.jpg'  

orig = cv2.imread(image_path)  

print("[INFO] loading and preprocessing image...")  
image = load_img(image_path, target_size=(img_width, img_height))  
image = img_to_array(image)  

# important! otherwise the predictions will be '0'  
image = image / 255  

image = np.expand_dims(image, axis=0)

# build the VGG16 network  
model = applications.VGG16(include_top=False, weights='imagenet')  

# get the bottleneck prediction from the pre-trained VGG16 model  
bottleneck_prediction = model.predict(image)  

# build top model  
model = Sequential()  
model.add(Flatten(input_shape=bottleneck_prediction.shape[1:]))  
model.add(Dense(256, activation='relu'))  
model.add(Dropout(0.5))  
model.add(Dense(n_classes, activation='softmax'))  

model.load_weights(top_model_weights_path)  

# use the bottleneck prediction on the top model to get the final classification  
class_predicted = model.predict_classes(bottleneck_prediction) 

inID = class_predicted[0]  

class_dictionary = generator_top.class_indices  

inv_map = v: k for k, v in class_dictionary.items()  

label = inv_map[inID]  

# get the prediction label  
print("Image ID: , Label: ".format(inID, label)) 

【讨论】:

以上是关于使用 Keras 进行分类:预测和多类的主要内容,如果未能解决你的问题,请参考以下文章

Keras 多类图像分类和预测

如何找出概率输出中每列的哪个类对应于使用Keras进行多类分类?

使用 Keras 稀疏分类交叉熵进行像素级多类分类

朴素贝叶斯和多类分类器返回整数。数据团队说他们应该是概率

如何使用keras进行多标签多类分类

如何处理 ValueError:分类指标无法处理多标签指标和多类目标错误的混合