日常手写三层反向传播神经网络(损失函数交叉熵+正则项+反向求导)
Posted 囚生CY
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了日常手写三层反向传播神经网络(损失函数交叉熵+正则项+反向求导)相关的知识,希望对你有一定的参考价值。
课程的一次作业,虽然没什么用,但是手写一遍dense确实能加深对神经网络的理解,尤其是反向传播求导这一块。
资源已经上传,不过最近CSDN犯病不能改资源积分了。留个BDY链接了?
#-*- coding:UTF-8 -*-
import numpy as np
import pandas as pd
from scipy.io import loadmat
from scipy.linalg import norm
import matplotlib.pyplot as plt
""" 百度云链接:https://pan.baidu.com/s/1kdSoUcK9PFjUEfRiUI9pdw """
""" 密码:iycw """
"""
Nesterov's方法是先根据历史信息走到一个点,
再根据那个点的gradient来走一段更新;
这恰好与动量方法的次序是相反的,
动量方法是先根据当前点的gradient来走一段更新,
然后再根据历史信息往前面搞一段距离;
"""
def train(wd,n_hidden,n_iters,learning_rate,momentum_mul,do_early_stopping=False,minibatch_size=10,isNestrov=False):
"""
· wd: 权重衰减
· n_hidden: 隐层结点数量
· n_iters: 随机梯度下降迭代次数
· learning_rate: 学习速率
· momentum_mul: 速率衰减系数(这个系数将附加在前一次的动量上,然后瞎搞)
· do_early_stopping: 是否提早结束(如果是则简单的输出过去epoch中最优的那个)
· minibatch_size: 随机梯度下降的小批尺寸
· inNestrov: 是否使用Nestrov方法
· return: 数据集上的分类损失
"""
data_file = loadmat("data.mat",squeeze_me=True,struct_as_record=False)
data = data_file["data"] # 读取数据
"""
· data.training.inputs —— 256×1000
· data.training.targets —— 10×1000
· data.validation.inputs —— 256×1000
· data.validation.targets —— 10×1000
· data.test.inputs —— 256×9000
· data.test.targets —— 10×9000
"""
data_train = "X":data.training.inputs,"y":data.training.targets
data_valid = "X":data.validation.inputs,"y":data.validation.targets
data_test = "X":data.test.inputs,"y":data.test.targets
n_train = data_train["X"].shape[1] # 训练集样本数量
params = initial_model(n_hidden) # 初始化两个权重矩阵的参数
theta = model2theta(params) # 将两个矩阵压扁拼接成向量
test_gradient(params,data_train,wd,n_hidden) # 检测梯度是否有问题
v = 0 # 初始化速率
loss_train = [] # 储存训练时的损失函数值
loss_valid = [] # 储存验证时的损失函数值
best = # 储存最优参数
if do_early_stopping: # 提前结束
best["theta"] = 0
best["loss_valid"] = np.inf
best["iter"] = -1
for t in range(n_iters+1): # 随机梯度下降迭代算法
batch_start = (t*minibatch_size) % n_train # 每次取一定规模的样本来搞搞
data_batch =
"X": data_train["X"][:,batch_start:batch_start+minibatch_size],
"y": data_train["y"][:,batch_start:batch_start+minibatch_size],
if isNestrov: # Nestrov's方法
temp = theta + momentum_mul*v # 先照着前一次的情况再苟一段距离
loss,grad = eval_obj_grad(theta2model(temp),data_batch,wd,n_hidden)
grad_vec = model2theta(grad) # 梯度转向量
v = momentum_mul*v - grad_vec # 得到本次的实际下降方向
theta -= learning_rate*v # 找到更新后的参数值
else: # 传统动量
loss,grad = eval_obj_grad(theta2model(theta),data_batch,wd,n_hidden)
grad_vec = model2theta(grad) # 梯度转向量
v = momentum_mul*v - grad_vec # 计算经过动量调整后的下降方向
theta += learning_rate*v # 找到更新后的参数值
params = theta2model(theta) # 将调整好的theta转化为params的格式(即两个权重矩阵)
loss = eval_obj(params,data_train,wd) # 计算训练损失
loss_train.append(loss) # 储存训练损失
loss = eval_obj(params,data_valid,wd) # 计算验证损失
loss_valid.append(loss) # 存储验证损失
if do_early_stopping and loss_valid[-1]<best["loss_valid"]: # 如果验证损失比当前最好要小, 则更新best
best["theta"] = theta.copy()
best["loss_valid"] = loss_valid[-1]
best["iter"] = t
if t%(max(1,n_iters//10))==0: # 每过10次迭代将输出信息
print("After %d iterations - ||theta|| is %.4e - training loss is %.4e - and validation loss is %.4e\\n"%(t,norm(theta),loss_train[-1],loss_valid[-1]))
test_gradient(params,data_train,wd,n_hidden)
""" 实验绘图 """
plt.close()
plt.figure()
plt.plot(loss_train,label="training loss")
plt.plot(loss_valid,label="validation loss")
plt.legend(loc="best")
plt.show()
if do_early_stopping:
print("Early stopping: validation loss: %.3e, was lowest after %d iterations" % (best["loss_valid"],best["iter"]))
theta = best["theta"]
params = theta2model(theta)
""" 检查效果 """
datasets = [data_train,data_valid,data_test]
acc = [accuracy(params,x) for x in datasets]
classification_loss = [eval_obj(params,x,0) for x in datasets]
print("Accuracy: training %.4f,validation %.4f,testing %.4f" % (acc[0],acc[1],acc[2]))
info =
"loss_train": classification_loss[0],
"loss_valid": classification_loss[1],
"loss_test": classification_loss[2],
print(info)
def eval_obj(params,data,wd): # 计算损失函数
W_hid,W_out = params["W_hid"],params["W_out"] # 100×256 10×100
X,y = data["X"],data["y"] # 256×1000 10×1000
""" 前向传播 """
hidden_in = np.dot(W_hid,X)
hidden_out = sigmoid(hidden_in)
y_hat = np.dot(W_out,hidden_out)
exps = np.exp(y_hat)
y_hat = exps/(np.sum(exps,axis=0)) # softmax输出
assert y_hat.shape==y.shape # 断言
loss = 0
for i in range(y_hat.shape[1]):
loss -= np.dot(y[:,i].T,np.log(y_hat[:,i]))
loss /= y_hat.shape[1]
loss += wd/2*(np.linalg.norm(W_hid,ord="fro")**2+np.linalg.norm(W_out,ord="fro")**2)
return loss
def eval_obj_grad(params,data,wd,n_hidden): # 计算模型的损失函数与梯度
W_hid,W_out = params["W_hid"],params["W_out"] # 100×256 10×100
X,y = data["X"],data["y"] # 256×1000 10×1000
m = y.shape[1] # 获取样本数量
""" 前向传播 """
hidden_in = np.dot(W_hid,X) # 计算隐层输入 100×1000
hidden_out = sigmoid(hidden_in) # 计算隐层输出 100×1000
y_hat = np.dot(W_out,hidden_out) # 计算输出层输入 10×1000
exps = np.exp(y_hat)
y_hat = exps/(np.sum(exps,axis=0)) # softmax输出 10×1000
assert y_hat.shape==y.shape # 断言
loss = 0
for i in range(y_hat.shape[1]):
loss -= np.dot(y[:,i].T,np.log(y_hat[:,i]))
loss /= y_hat.shape[1]
loss += wd/2*(np.linalg.norm(W_hid,ord="fro")**2+np.linalg.norm(W_out,ord="fro")**2)
""" 反向传播 """
dz_2 = y_hat - y # 计算预测与实际的误差 10×1000
grad_W_out = 1/m * np.dot(dz_2,hidden_out.T) # 转置很关键 10×100
dz_1 = np.dot(W_out.T,dz_2) * hidden_out*(1-hidden_out) # 对sigmoid激活函数求导 100×1000
grad_W_hid = 1/m * np.dot(dz_1,X.T) # 100×256
grad = # 返回梯度
"W_out": grad_W_out + wd*W_out, # SGD飞了我才想起来还有正则项的梯度
"W_hid": grad_W_hid + wd*W_hid, # SGD飞了我才想起来还有正则项的梯度
return loss,grad
def initial_model(n_hid):
n_params = (256+10)*n_hid # 表明 输入->隐层 & 隐层->输出 的两个权重矩阵总的元素数量
as_row_vector = np.cos(np.arange(n_params)) # 返回darray([0,1,2,...,n_params-1])
params =
params["W_hid"] = as_row_vector[:256*n_hid].reshape((n_hid,256))*0.1
params["W_out"] = as_row_vector[256*n_hid:].reshape((10,n_hid))*0.1
return params
def test_gradient(params,data,wd,n_hidden): # 测试梯度
loss,analytic_grad = eval_obj_grad(params,data,wd,n_hidden) # 获取当前参数的损失函数值及梯度
num_checks = 100
theta = model2theta(params)
grad_ana = model2theta(analytic_grad)
delta = 1e-4
threshold = 1e-5
for i in range(num_checks):
ind = (i*1299283) % theta.size
grad_ind_ana = grad_ana[ind]
theta1 = theta.copy()
theta1[ind] += delta
l1 = eval_obj(theta2model(theta1),data,wd)
theta2 = theta.copy()
theta2[ind] -= delta
l2 = eval_obj(theta2model(theta2),data,wd)
grad_ind_fin = (l1-l2)/(2*delta)
diff = abs(grad_ind_ana - grad_ind_fin)
if diff<threshold: continue
if diff/(abs(grad_ind_ana)+abs(grad_ind_fin))<threshold: continue
raise AssertionError("%d-th: l %.3e\\nl1 %.3e\\nl2 %.3e\\nanalytic %.3e\\nfd %.3e\\ndiff %.3e\\n" % (i,loss,l1,l2,grad_ind_ana,grad_ind_fin,diff))
print("Gradient test passed")
def model2theta(params): # 将两个权重矩阵参数转化为向量再拼起来
theta = np.concatenate((params["W_out"].flatten(),params["W_hid"].flatten()))
return theta
def theta2model(theta): # 将向量形式转化为两个权重矩阵输出
n_hid = theta.size // (256+10) # theta.size是theta的向量维度, a//b运算符相当于int(a/b6)
params =
params["W_out"] = np.reshape(theta[:n_hid*10],(10,n_hid))
params["W_hid"] = np.reshape(theta[n_hid*10:],(n_hid,256))
return params
def accuracy(params,data): # 计算准确度
W_hid,W_out = params["W_hid"],params["W_out"]
index_transpose = np.nonzero(data["y"].T)
true_label = index_transpose[1]
a_hidden = W_hid.dot(data["X"])
h_hidden = sigmoid(a_hidden)
a_out = W_out.dot(h_hidden)
pred = a_out.argmax(axis=0)
return np.mean(pred==true_label)
def log_sum_exp(X): # 对矩阵X每列的求以自然底数的指数累和, 对得到的行向量去自然对数
return np.log(np.sum(np.exp(X),axis=0))
def sigmoid(X): # 激活函数
return 1/(1+np.exp(-X))
def grad_sigmoid(X): # 激活函数求导
return sigmoid(X)*(1-sigmoid(X))
def P2Q1():
print("Part2 Question1...")
params = [ # 存一些需要测试的参数
[0,10,70,0.005,0,False,4],
[0,10,70,0.01,0,False,4],
[0,10,70,0.05,0,False,4],
[0,10,70,0.2,0,False,4],
[0,10,70,1.0,0,False,4],
[0,10,70,5.0,0,False,4],
]
for param in params:
print("正在测试参数:wd is - n_hidden is - n_iters is - learning_rate is - momentum_mul is - do_early_stopping is - minibatch_size is ...".format(param[0],param[1],param[2],param[3],param[4],param[5],param[6]))
train(param[0],param[1],param[2],param[3],param[4],param[5],param[6])
def P2Q2():
print("Part2 Question2...")
param = [0,10,100,None,None,False,4]
learning_rates = [0.01,0.05,0.2,1.0,5.0]
momentums_muls = [0,0.5,0.9]
for learning_rate in learning_rates:
for momentum_mul in momentums_muls:
tempParam = param[:]
tempParam[3] = learning_rate
tempParam[4] = momentum_mul
print("正在测试参数:wd is - n_hidden is - n_iters is - learning_rate is - momentum_mul is - do_early_stopping is - minibatch_size is ...".format(tempParam[0],tempParam[1],tempParam[2],tempParam[3],tempParam[4],tempParam[5],tempParam[6]))
train(tempParam[0],tempParam[1],tempParam[2],tempParam[3],tempParam[4],tempParam[5],tempParam[6])
def P2Q3():
""" 与P2Q1比较 """
print("Part2 Question3...")
params = [ # 存一些需要测试的参数
[0,10,70,0.005,0,False,4],
[0,10,70,0.01,0,False,4],
[0,10,70,0.05,0,False,4],
[0,10,70,0.2,0,False,4],
[0,10,70,1.0,0,False,4],
[0,10,70,5.0,0,False,4],
]
for param in params:
print("正在测试参数:wd is - n_hidden is - n_iters is - learning_rate is - momentum_mul is - do_early_stopping is - minibatch_size is ...".format(param[0],param[1],param[2],param[3],param[4],param[5],param[6]))
train(param[0],param[1],param[2],param[3],param[4],param[5],param[6],True)
print("###########################################################")
print("######################## 分界线 ##########################")
print("###########################################################")
""" 与P2Q2比较 """
param = [0,10,100,None,None,False,4]
learning_rates = [0.01,0.05,0.2,1.0,5.0]
momentums_muls = [0,0.5,0.9]
for learning_rate in learning_rates:
for momentum_mul in momentums_muls:
tempParam = param[:]
tempParam[3] = learning_rate
tempParam[4] = momentum_mul
print("正在测试参数:wd is - n_hidden is - n_iters is - learning_rate is - momentum_mul is - do_early_stopping is - minibatch_size is ...".format(tempParam[0],tempParam[1],tempParam[2],tempParam[3],tempParam[4],tempParam[5],tempParam[6]))
train(tempParam[0],tempParam[1],tempParam[2],tempParam[3],tempParam[4],tempParam[5],tempParam[6],True)
def P3Q1():
print("Part3 Question1...")
params = [ # 存一些需要测试的参数
[0,200,1000,0.2,0.9,False,4],
[1e-4,200,1000,0.2,0.9,False,4],
[1e-3,200,1000,0.2,0.9,False,4],
[1e-2,200,1000,0.2,0.9,False,4],
[1e-1,200,1000,0.2,0.9,False,4],
[1,200,1000,0.2,0.9,False,4],
[10,200,1000,0.2,0.9,False,4],
]
for param in params:
print("正在测试参数:wd is - n_hidden is - n_iters is - learning_rate is - momentum_mul is - do_early_stopping is - minibatch_size is ...".format(param[0],param[1],param[2],param[3],param[4],param[5],param[6]))
train(param[0],param[1],param[2],param[3],param[4],param[5],param[6])
def P3Q2():
print("Part3 Question2...")
param = [0,None,1000,0.2,0.9,None,4]
n_hiddens = [10,50,100,200,300]
for n_hidden in n_hiddens:
tempParam = param[:]
tempParam[1] = n_hidden
tempParam[5] = True
print("正在测试参数:wd is - n_hidden is - n_iters is - learning_rate is - momentum_mul is - do_early_stopping is - minibatch_size is ...".format(tempParam[0],tempParam[1],tempParam[2],tempParam[3],tempParam[4],tempParam[5],tempParam[6]))
train(tempParam[0],tempParam[1],tempParam[2],tempParam[3],tempParam[4],tempParam[5],tempParam[6],True)
tempParam = param[:]
tempParam[1] = n_hidden
tempParam[5] = False
print("正在测试参数:wd is - n_hidden is - n_iters is - learning_rate is - momentum_mul is - do_early_stopping is - minibatch_size is ...".format(tempParam[0],tempParam[1],tempParam[2],tempParam[3],tempParam[4],tempParam[5],tempParam[6]))
train(tempParam[0],tempParam[1],tempParam[2],tempParam[3],tempParam[4],tempParam[5],tempParam[6],True)
def train_1(wd,n_hidden,n_iters,learning_rate,momentum_mul,do_early_stopping=False,minibatch_size=10,isNestrov=False):
"""
· wd: 权重衰减
· n_hidden: 隐层结点数量
· n_iters: 随机梯度下降迭代次数
· learning_rate: 学习速率
· momentum_mul: 速率衰减系数(这个系数将附加在前一次的动量上,然后瞎搞)
· do_early_stopping: 是否提早结束(如果是则简单的输出过去epoch中最优的那个)
· minibatch_size: 随机梯度下降的小批尺寸
· inNestrov: 是否使用Nestrov方法
· return: 数据集上的分类损失
"""
data_file = loadmat("data.mat",squeeze_me=True,struct_as_record=False)
data = data_file["data"] # 读取数据
"""
· data.training.inputs —— 256×1000
· data.training.targets —— 10×1000
· data.validation.inputs —— 256×1000
· data.validation.targets —— 10×1000
· data.test.inputs —— 256×9000
· data.test.targets —— 10×9000
"""
data_train = "X":data.training.inputs,"y":data.training.targets
data_valid = "X":data.validation.inputs,"y":data.validation.targets
data_test = "X":data.test.inputs,"y":data.test.targets
n_train = data_train["X"].shape[1] # 训练集样本数量
params = initial_model(n_hidden) # 初始化两个权重矩阵的参数
theta = model2theta(params) # 将两个矩阵压扁拼接成向量
test_gradient(params,data_train,wd,n_hidden) # 检测梯度是否有问题
v = 0 # 初始化速率
loss_train = [] # 储存训练时的损失函数值
loss_valid = [] # 储存验证时的损失函数值
best = # 储存最优参数
if do_early_stopping: # 提前结束
best["theta"] = 0
best["loss_valid"] = np.inf
best["iter"] = -1
for t in range(n_iters+1): # 随机梯度下降迭代算法
batch_start = (t*minibatch_size) % n_train # 每次取一定规模的样本来搞搞
data_batch =
"X": data_train["X"][:,batch_start:batch_start+minibatch_size],
"y": data_train["y"][:,batch_start:batch_start+minibatch_size],
if isNestrov: # Nestrov's方法
temp = theta + momentum_mul*v # 先照着前一次的情况再苟一段距离
loss,grad = eval_obj_grad(theta2model(temp),data_batch,wd,n_hidden)
grad_vec = model2theta(grad) # 梯度转向量
v = momentum_mul*v - grad_vec # 得到本次的实际下降方向
theta -= learning_rate*v # 找到更新后的参数值
else: # 传统动量
loss,grad = eval_obj_grad(theta2model(theta),data_batch,wd,n_hidden)
grad_vec = model2theta(grad) # 梯度转向量
v = momentum_mul*v - grad_vec # 计算经过动量调整后的下降方向
theta += learning_rate*v # 找到更新后的参数值
params = theta2model(theta) # 将调整好的theta转化为params的格式(即两个权重矩阵)
loss = eval_obj(params,data_train,wd) # 计算训练损失
loss_train.append(loss) # 储存训练损失
loss = eval_obj(params,data_valid,wd) # 计算验证损失
loss_valid.append(loss) # 存储验证损失
if do_early_stopping and loss_valid[-1]<best["loss_valid"]: # 如果验证损失比当前最好要小, 则更新best
best["theta"] = theta.copy()
best["loss_valid"] = loss_valid[-1]
best["iter"] = t
if t%(max(1,n_iters//10))==0: # 每过10次迭代将输出信息
print("After %d iterations - ||theta|| is %.4e - training loss is %.4e - and validation loss is %.4e\\n"%(t,norm(theta),loss_train[-1],loss_valid[-1]))
test_gradient(params,data_train,wd,n_hidden)
""" 实验绘图 """
plt.close()
plt.figure()
plt.plot(loss_train,label="training loss")
plt.plot(loss_valid,label="validation loss")
plt.legend(loc="best")
plt.show()
if do_early_stopping:
print("Early stopping: validation loss: %.3e, was lowest after %d iterations" % (best["loss_valid"],best["iter"]))
theta = best["theta"]
params = theta2model(theta)
""" 检查效果 """
datasets = [data_train,data_valid,data_test]
acc = [accuracy(params,x) for x in datasets]
classification_loss = [eval_obj(params,x,0) for x in datasets]
print("Accuracy: training %.4f,validation %.4f,testing %.4f" % (acc[0],acc[1],acc[2]))
return acc[1]
def train_2(wd,n_hidden,n_iters,learning_rate,momentum_mul,do_early_stopping=False,minibatch_size=10,isNestrov=False):
"""
· wd: 权重衰减
· n_hidden: 隐层结点数量
· n_iters: 随机梯度下降迭代次数
· learning_rate: 学习速率
· momentum_mul: 速率衰减系数(这个系数将附加在前一次的动量上,然后瞎搞)
· do_early_stopping: 是否提早结束(如果是则简单的输出过去epoch中最优的那个)
· minibatch_size: 随机梯度下降的小批尺寸
· inNestrov: 是否使用Nestrov方法
· return: 数据集上的分类损失
"""
data_file = loadmat("data.mat",squeeze_me=True,struct_as_record=False)
data = data_file["data"] # 读取数据
"""
· data.training.inputs —— 256×1000
· data.training.targets —— 10×1000
· data.validation.inputs —— 256×1000
· data.validation.targets —— 10×1000
· data.test.inputs —— 256×9000
· data.test.targets —— 10×9000
"""
data_valid = "X":data.training.inputs,"y":data.training.targets
data_train = "X":data.validation.inputs,"y":data.validation.targets
data_test = "X":data.test.inputs,"y":data.test.targets
n_train = data_train["X"].shape[1] # 训练集样本数量
params = initial_model(n_hidden) # 初始化两个权重矩阵的参数
theta = model2theta(params) # 将两个矩阵压扁拼接成向量
test_gradient(params,data_train,wd,n_hidden) # 检测梯度是否有问题
v = 0 # 初始化速率
loss_train = [] # 储存训练时的损失函数值
loss_valid = [] # 储存验证时的损失函数值
best = # 储存最优参数
if do_early_stopping: # 提前结束
best["theta"] = 0
best["loss_valid"] = np.inf
best["iter"] = -1
for t in range(n_iters+1): # 随机梯度下降迭代算法
batch_start = (t*minibatch_size) % n_train # 每次取一定规模的样本来搞搞
data_batch =
"X": data_train["X"][:,batch_start:batch_start+minibatch_size],
"y": data_train["y"][:,batch_start:batch_start+minibatch_size],
if isNestrov: # Nestrov's方法
temp = theta + momentum_mul*v # 先照着前一次的情况再苟一段距离
loss,grad = eval_obj_grad(theta2model(temp),data_batch,wd,n_hidden)
grad_vec = model2theta(grad) # 梯度转向量
v = momentum_mul*v - grad_vec # 得到本次的实际下降方向
theta -= learning_rate*v # 找到更新后的参数值
else: # 传统动量
loss,grad = eval_obj_grad(theta2model(theta),data_batch,wd,n_hidden)
grad_vec = model2theta(grad) # 梯度转向量
v = momentum_mul*v - grad_vec # 计算经过动量调整后的下降方向
theta += learning_rate*v # 找到更新后的参数值
params = theta2model(theta) # 将调整好的theta转化为params的格式(即两个权重矩阵)
loss = eval_obj(params,data_train,wd) # 计算训练损失
loss_train.append(loss) # 储存训练损失
loss = eval_obj(params,data_valid,wd) # 计算验证损失
loss_valid.append(loss) # 存储验证损失
if do_early_stopping and loss_valid[-1]<best["loss_valid"]: # 如果验证损失比当前最好要小, 则更新best
best["theta"] = theta.copy()
best["loss_valid"] = loss_valid[-1]
best["iter"] = t
if t%(max(1,n_iters//10))==0: # 每过10次迭代将输出信息
print("After %d iterations - ||theta|| is %.4e - training loss is %.4e - and validation loss is %.4e\\n"%(t,norm(theta),loss_train[-1],loss_valid[-1]))
test_gradient(params,data_train,wd,n_hidden)
""" 实验绘图 """
plt.close()
plt.figure()
plt.plot(loss_train,label="training loss")
plt.plot(loss_valid,label="validation loss")
plt.legend(loc="best")
plt.show()
if do_early_stopping:
print("Early stopping: validation loss: %.3e, was lowest after %d iterations" % (best["loss_valid"],best["iter"]))
theta = best["theta"]
params = theta2model(theta)
""" 检查效果 """
datasets = [data_train,data_valid,data_test]
acc = [accuracy(params,x) for x in datasets]
classification_loss = [eval_obj(params,x,0) for x in datasets]
print("Accuracy: training %.4f,validation %.4f,testing %.4f" % (acc[0],acc[1],acc[2]))
return acc[1]
def P3Q3():
wds = [0,1e-4,1e-3,1e-2,1e-1,1,10]
n_hiddens = [10,50,100,200,300]
n_iters = [1000]
learning_rates = [0.2]
momentum_muls = [0.9]
do_early_stoppings = [True,False]
minibatch_sizes = [4]
isNestrovs = [False]
results = # 存储所有结果
count = 0 # 计数
for wd in wds:
for n_hidden in n_hiddens:
for n_iter in n_iters:
for learning_rate in learning_rates:
for momentum_mul in momentum_muls:
for do_early_stopping in do_early_stoppings:
for minibatch_size in minibatch_sizes:
for isNestrov in isNestrovs:
count += 1
print("###################################################")
print("################### 第个组合 #####################".format(count))
print("###################################################")
print("正在测试参数 - 1折:wd is - n_hidden is - n_iters is - learning_rate is - momentum_mul is - do_early_stopping is - minibatch_size is - isNestrov is ...".format(wd,n_hidden,n_iter,learning_rate,momentum_mul,do_early_stopping,minibatch_size,isNestrov))
acc1 = train_1(wd,n_hidden,n_iter,learning_rate,momentum_mul,do_early_stopping,minibatch_size,isNestrov)
print("正在测试参数 - 2折:wd is - n_hidden is - n_iters is - learning_rate is - momentum_mul is - do_early_stopping is - minibatch_size is - isNestrov is ...".format(wd,n_hidden,n_iter,learning_rate,momentum_mul,do_early_stopping,minibatch_size,isNestrov))
acc2 = train_2(wd,n_hidden,n_iter,learning_rate,momentum_mul,do_early_stopping,minibatch_size,isNestrov)
results["--".format(wd,n_hidden,do_early_stopping)] = (acc1+acc2)/2
for key,value in results.items():
print("\\t平均精度 ".format(key,value))
if __name__ == "__main__":
P3Q3()
以上是关于日常手写三层反向传播神经网络(损失函数交叉熵+正则项+反向求导)的主要内容,如果未能解决你的问题,请参考以下文章