深度学习MLP/LeNet/AlexNet/GoogLeNet/ResNet在三个不同数据集上的分类效果实践
Posted zstar-_
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了深度学习MLP/LeNet/AlexNet/GoogLeNet/ResNet在三个不同数据集上的分类效果实践相关的知识,希望对你有一定的参考价值。
本文是深度学习课程的实验报告
使用了MLP/LeNet/AlexNet/GoogLeNet/ResNet五个深度神经网络模型结构和MNIST、Fashion MNIST、HWDB1三个不同的数据集,所用的开发框架为tensorflow2。
本文的数据集和.ipynb文件可在此处下载:https://download.csdn.net/download/qq1198768105/85260780
实验结果
实验结果如下表所示MNIST | Fashion MNIST | HWDB1 | |
---|---|---|---|
MLP | 97.76% | 87.73% | 84.17% |
LeNet | 98.68% | 85.82% | 91.33% |
AlexNet | 98.91% | 90.57% | 89.67% |
GoogLeNet | 99.27% | 90.27% | 91.50% |
ResNet | 99.21% | 91.35% | 93.67% |
导入相关库
import os
import warnings
import gzip
import numpy as np
import tensorflow as tf
import logging
环境设置
warnings.filterwarnings("ignore")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2' # 只显示 warning 和 Error
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # 用CPU训练
logging.disable(30) # 不显示warning
加载mnist数据
def load_mnist():
with np.load(r'./datasets/mnist.npz', allow_pickle=True) as f:
x_train, y_train = f['x_train'], f['y_train']
x_test, y_test = f['x_test'], f['y_test']
return (x_train, y_train), (x_test, y_test)
加载fashion_mnist数据
def load_fashion_mnist():
dirname = os.path.join('datasets', 'fashion-mnist')
files = ['train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz',
't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz']
paths = []
for fname in files:
paths.append(os.path.join(dirname, fname))
with gzip.open(paths[0], 'rb') as lbpath:
y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8)
with gzip.open(paths[1], 'rb') as imgpath:
x_train = np.frombuffer(imgpath.read(), np.uint8,
offset=16).reshape(len(y_train), 28, 28)
with gzip.open(paths[2], 'rb') as lbpath:
y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8)
with gzip.open(paths[3], 'rb') as imgpath:
x_test = np.frombuffer(imgpath.read(), np.uint8,
offset=16).reshape(len(y_test), 28, 28)
return (x_train, y_train), (x_test, y_test)
加载HWDB1数据
# 图像预处理/设置为24*24大小并归一化
def preprocess_image(image):
image_size = 24
img_tensor = tf.image.decode_jpeg(image, channels=1)
img_tensor = tf.image.resize(img_tensor, [image_size, image_size])
img_tensor /= 255.0 # normalize to [0,1] range
return img_tensor
def load_and_preprocess_image(path):
image = tf.io.read_file(path)
return preprocess_image(image)
def load_HWDB1():
root_path = './datasets/HWDB1'
train_images = []
train_labels = []
test_images = []
test_labels = []
temp = []
with open(os.path.join(root_path, 'train.txt'), 'r') as f:
for line in f:
line = line.strip('\\n')
if line is not '':
imgpath = line
label = line.split('\\\\')[1]
train_images.append(imgpath)
train_labels.append(int(label))
for item in train_images:
img = load_and_preprocess_image(item)
temp.append(img)
x_train = np.array(temp)
y_train = np.array(train_labels)
temp = []
with open(os.path.join(root_path, 'test.txt'), 'r') as f:
for line in f:
line = line.strip('\\n')
if line is not '':
imgpath = line
label = line.split('\\\\')[1]
test_images.append(imgpath)
test_labels.append(int(label))
for item in test_images:
img = load_and_preprocess_image(item)
temp.append(img)
x_test = np.array(temp)
y_test = np.array(test_labels)
return (x_train, y_train), (x_test, y_test)
定义mnist数据装载器
class MNISTLoader():
def __init__(self):
(self.train_data, self.train_label), (self.test_data,
self.test_label) = load_mnist()
# MNIST中的图像默认为uint8(0-255的数字)。以下代码将其归一化到0-1之间的浮点数,并在最后增加一维作为颜色通道
self.train_data = np.expand_dims(self.train_data.astype(
np.float32) / 255.0, axis=-1) # [60000, 28, 28, 1]
self.test_data = np.expand_dims(self.test_data.astype(
np.float32) / 255.0, axis=-1) # [10000, 28, 28, 1]
self.train_label = self.train_label.astype(np.int32) # [60000]
self.test_label = self.test_label.astype(np.int32) # [10000]
self.num_train_data, self.num_test_data = self.train_data.shape[
0], self.test_data.shape[0]
def get_batch(self, batch_size):
# 从数据集中随机取出batch_size个元素并返回
index = np.random.randint(0, self.num_train_data, batch_size)
return self.train_data[index, :], self.train_label[index]
定义fashion_mnist数据装载器
class Fashion_MNISTLoader():
def __init__(self):
(self.train_data, self.train_label), (self.test_data,
self.test_label) = load_fashion_mnist()
self.train_data = np.expand_dims(
self.train_data.astype(np.float32) / 255.0, axis=-1)
self.test_data = np.expand_dims(
self.test_data.astype(np.float32) / 255.0, axis=-1)
self.train_label = self.train_label.astype(np.int32)
self.test_label = self.test_label.astype(np.int32)
self.num_train_data, self.num_test_data = self.train_data.shape[
0], self.test_data.shape[0]
def get_batch(self, batch_size):
# 从数据集中随机取出batch_size个元素并返回
index = np.random.randint(0, self.num_train_data, batch_size)
return self.train_data[index, :], self.train_label[index]
定义HWDB1数据装载器
class HWDB1Loader():
def __init__(self):
(self.train_data, self.train_label), (self.test_data,
self.test_label) = load_HWDB1()
self.train_label = self.train_label.astype(np.int32)
self.test_label = self.test_label.astype(np.int32)
self.num_train_data, self.num_test_data = self.train_data.shape[
0], self.test_data.shape[0]
def get_batch(self, batch_size):
# 从数据集中随机取出batch_size个元素并返回
index = np.random.randint(0, self.num_train_data, batch_size)
return self.train_data[index, :], self.train_label[index]
定义多层感知机网络结构
两个全连接层
class MLP(tf.keras.Model):
def __init__(self):
super().__init__()
self.flatten = tf.keras.layers.Flatten() # Flatten层将除第一维(batch_size)以外的维度展平
self.dense1 = tf.keras.layers.Dense(units=100, activation=tf.nn.relu)
self.dense2 = tf.keras.layers.Dense(units=10)
def call(self, inputs): # [batch_size, 28, 28, 1]
x = self.flatten(inputs) # [batch_size, 784]
x = self.dense1(x) # [batch_size, 100]
x = self.dense2(x) # [batch_size, 10]
output = tf.nn.softmax(x)
return output
定义LeNet网络结构
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, MaxPool2D, Dropout, Flatten, Dense
from tensorflow.keras import Model
class LeNet(Model):
def __init__(self):
super(LeNet, self).__init__()
# 原LeNet输入是 32x32,这个Mnist数据输入是28x28
self.c1 = Conv2D(filters=6, kernel_size=(5, 5), activation='sigmoid')
self.p1 = MaxPool2D(pool_size=(2, 2), strides=2)
self.c2 = Conv2D(filters=16, kernel_size=(5, 5),activation='sigmoid')
self.p2 = MaxPool2D(pool_size=(2, 2), strides=2)
self.flatten = Flatten()
self.f1 = Dense(120, activation='sigmoid')
self.f2 = Dense(84, activation='sigmoid')
self.f3 = Dense(10, activation='softmax')
# 前向传播
def call(self, inputs): # [batch_size, 28, 28, 1]
x = self.c1(inputs) # [batch_size, 24, 24, 6] 卷积前后计算公式:输出大小=[(输入大小-卷积核大小+2*Padding)/步长]+1
x = self.p1(x) # [batch_size, 12, 12, 6]
x = self.c2(x) # [batch_size, 8, 8, 16]
x = self.p2(x) # [batch_size, 4, 4, 16]
x = self.flatten(x) # [batch_size, 256]
x = self.f1(x) # [batch_size, 120]
x = self.f2(x) # [batch_size, 84]
y = self.f3(x) # [batch_size, 10]
return y
定义AlexNet网络结构
AlexNet创新点:
1.激活函数使用relu
2.卷积之后引入标准化层(BN层)
3.使用了Dropout防止过拟合
from tensorflow.keras import layers, models, Model, Sequential
class AlexNet(Model):
def __init__(self):
super(AlexNet, self).__init__()
# 依次建立卷积层/标准化层/激活层/池化层,一共8层
# 96通道,卷积核改成3x3(原始是11x11)
self.c1 = Conv2D(filters=96, kernel_size=(3, 3))
self.b1 = BatchNormalization()
# ReLu激活函数
self.a1 = Activation('relu')
# 最大池化3*3池化层,步长为2
self.p1 = MaxPool2D(pool_size=(3, 3), strides=2)
self.c2 = Conv2D(filters=256, kernel_size=(3, 3))
self.b2 = BatchNormalization()
self.a2 = Activation('relu')
self.p2 = MaxPool2D(pool_size=(3, 3), strides=2)
# 384个通道,3*3卷积核,使用全零填充,激活函数为ReLu
self.c3 = Conv2D(filters=384, kernel_size=(3, 3), padding='same', activation='relu')
self.c4 = Conv2D(filters=384, kernel_size=(3, 3), padding='same', activation='relu')
self.c5 = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu')
self.p3 = MaxPool2D(pool_size=(3, 3), strides=2)
# 将像素拉直
self.flatten = Flatten()
# 设置全连接层2048个神经元,激活函数为ReLu
self.f1 = Dense(2048, activation='relu')
# 舍弃50%的神经元
self.d1 = Dropout(0.5)
self.f2 = Dense(2048, activation='relu')
self.d2 = Dropout(0.5)
# 全连接层,神经元为10,采用softmax激活函数
self.f3 = Dense(10, activation='softmax')
# 前向传播
def call(self, inputs): # [batch_size, 28, 28, 1]
x = self.c1(inputs) # [batch_size, 26, 26, 96] 卷积前后计算公式:输出大小=[(输入大小-卷积核大小+2*Padding)/步长]+1
x = self.b1(x) # [batch_size, 26, 26, 96]
x = self.a1(x) # [batch_size, 26, 26, 96]
x = self.p1(x) # [batch_size, 13, 13, 96] 池化3x3,步长为2 (26-3)/2+1=13
x = self.c2(x) # [batch_size, 11, 11, 256] 13-3+1=11
x = self.b2(x) # [batch_size, 11, 11, 256]
x = self.a2(x) # [batch_size, 11, 11, 256]
x = self.p2(x) # [batch_size, 5, 5, 256] (11-3)/2+1=5
x = self.c3(x) # [batch_size, 5, 5, 384] padding=same 输出大小=[输入/步长]
x = self.c4(x) # [batch_size, 5, 5, 384]
x = self.c5(x) # [batch_size, 5, 5, 256]
x = self.p3(x) # [batch_size, 2, 2, 256] (5-3)/2+1=2
x = self.flatten(x) # [batch_size, 1024]
x = self.f1(x) # [batch_size, 2048]
x = self.d1(x) # [batch_size, 2048]
x = self.f2(x) # [batch_size, 2048]
x = self.d2(x) # [batch_size, 2048]
y = self.f3(x) # [batch_size, 10]
return y
定义GoogLeNet网络结构
GoogLeNet-Inception 共四个版本,这里选择了v3
inception模块结构图:
GoogLeNet创新点:
v1:引入了Inception结构,并使用1x1卷积和来压缩通道数(减少参数量。Inception作用:代替人工确定卷积层中的过滤器类型或者确定是否需要创建卷积层和池化层,即:不需要人为的决定使用哪个过滤器,是否需要池化层等,由网络自行决定这些参数,可以给网络添加所有可能值,将输出连接起来,网络自己学习它需要什么样的参数。
v2:引入BN层,BN作用:加速网络训练/防止梯度消失。
v3:(1)将Inception内部的BN层推广到外部。(2)优化了网络结构,将较大的二维卷积拆成两个较小的一维卷积,比如将3x3拆成1x3和3x1。这样节省了大量参数,加速运算,并增加了一层非线性表达能力。
v4:引入残差结构。
from tensorflow.keras import layers, models, Model, Sequential
from tensorflow深度学习“深度学习”-概念篇