BP算法
基本思想:学习过程由信号的正向传播和误差的反向传播两个过程组成。(这一步体现在propagate()函数)
数学工具:微积分的链式求导法则。(这一步体现在propagate()函数中第34行)
求解最小化成本函数(cost function):梯度下降法。(这一步体现在optimize()函数)
说明
实现功能:这段代码主要实现的功能是判断一张图片是否有cat,实现的是二分类,有就为1,没有就为0。
训练方法:BP网络,此代码很简单,只有一个神经元,故权值w是一维。(这一步体现在initialiize_with_zeros()中第21行)
难点说明: 第34行:dw = (1./m)*np.dot(X,((A-Y).T)) 此处的dw是指(dL/dw),即损失函数对权值w的导数,此公式是由微积分的链式求导法则推导出来的,吴恩达视频里有,不清楚的请看视频。(2.9 logistic回归中的梯度下降法),这里如果理解了,整个代码也没啥难度了。
注意
1.损失函数和代价函数的区别:
损失函数(Loss function):指单个训练样本进行预测的结果与实际结果的误差。
代价函数(Cost function):整个训练集,所有样本误差总和(所有损失函数总和)的平均值。(这一步体现在propagate()函数中的第32行)
1 #!/usr/bin/env python3 2 # -*- coding: utf-8 3 4 import numpy as np 5 import matplotlib.pyplot as plt 6 import h5py 7 import scipy 8 from PIL import Image 9 from scipy import ndimage 10 from lr_utils import load_dataset 11 import pylab 12 13 #sigmoid函数 14 def sigmoid(z): 15 s = 1./(1+np.exp(-z)) 16 return(s) 17 18 #初始化权值阈值 19 def initialiize_with_zeros(dim): 20 #这里只有一个神经元,w是一维的 21 w = np.zeros(shape = (dim,1), dtype = np.float32) 22 b = 0 23 #断言函数,判断是否为真 24 assert(w.shape == (dim,1)) 25 assert(isinstance(b,float) or isinstance(b,int)) 26 return(w,b) 27 28 def propagate(w,b,X,Y): 29 m = X.shape[1] 30 #forward propagation 31 A = sigmoid(np.dot(w.T,X) + b) 32 cost = (-1./m)*np.sum(Y*np.log(A) + (1-Y)*np.log(1-A),axis = 1)#按行相加 33 #backward propagation 34 dw = (1./m)*np.dot(X,((A-Y).T))#dw就是损失函数对w的求导 35 db = (1./m)*np.sum(A-Y, axis=1)#axis=0按列相加,axis=1按行相加 36 assert(dw.shape == w.shape) 37 assert(db.dtype == float) 38 cost = np.squeeze(cost)#squeeze函数的作用是去掉维度为1的维,在这就是将一个一维变成一个数字 39 # [ 6.00006477] 40 # 6.000064773192205 41 assert(cost.shape == ()) 42 grads = {"dw": dw, 43 "db": db} 44 return grads, cost 45 46 def optimize(w,b,X,Y,num_iterations,learning_rate,print_cost = False): 47 costs = [] 48 for i in range(num_iterations): 49 grads, cost = propagate(w=w, b=b, X=X, Y=Y) 50 dw = grads["dw"] 51 db = grads["db"] 52 w = w - learning_rate*dw 53 b = b - learning_rate*db 54 if i % 100 == 0: 55 costs.append(cost) 56 if print_cost and i % 100 == 0:#这句没懂 57 print ("Cost after iteration %i: %f" %(i, cost)) 58 params = {"w": w, 59 "b": b} 60 grads = {"dw": dw, 61 "db": db} 62 return params, grads, costs 63 64 def predict(w, b, X): 65 m = X.shape[1] 66 Y_prediction = np.zeros((1,m)) 67 w = w.reshape(X.shape[0], 1) 68 A = sigmoid(np.dot(w.T, X) + b) 69 # [print(x) for x in A]这句没懂,但对代码没啥影响 70 for i in range(A.shape[1]): 71 if A[0, i] >= 0.5: 72 Y_prediction[0, i] = 1 73 else: 74 Y_prediction[0, i] = 0 75 assert(Y_prediction.shape == (1, m)) 76 77 return Y_prediction 78 79 def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False): 80 #初始化权值阈值 81 w, b = initialiize_with_zeros(X_train.shape[0]) 82 #梯度下降法寻优获取最佳权值阈值 83 parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost) 84 w = parameters["w"] 85 b = parameters["b"] 86 #测试集进行预测 87 Y_prediction_test = predict(w, b, X_test) 88 Y_prediction_train = predict(w, b, X_train) 89 #输出正确率 90 print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100)) 91 print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100)) 92 #将所有结果以字典形式保存并返回 93 d = {"costs": costs, 94 "Y_prediction_test": Y_prediction_test, 95 "Y_prediction_train" : Y_prediction_train, 96 "w" : w, 97 "b" : b, 98 "learning_rate" : learning_rate, 99 "num_iterations": num_iterations} 100 101 return d 102 103 104 105 106 ‘‘‘主程序从这里开始‘‘‘ 107 #获取训练数据,测试数据 108 train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset() 109 110 #reshape()方法来更改数组的形状,train_set_x_orig.shape[0]是行数,-1是代表列数未知,需要numpy自动计算出列数 111 #这里的列数:就是一张图片64*64*3数据变成一行数据的个数 112 train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T 113 test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T 114 115 #归一,颜色的数值是0~255 116 train_set_x = train_set_x_flatten/255. 117 test_set_x = test_set_x_flatten/255. 118 119 #训练模型 120 d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = 0.005, print_cost = True) 121 print(d)
#!/usr/bin/env python3
# -*- coding: utf-8
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage
from lr_utils import load_dataset
import pylab
#sigmoid函数
def sigmoid(z):
s = 1./(1+np.exp(-z))
return(s)
#初始化权值阈值
def initialiize_with_zeros(dim):
#这里只有一个神经元,w是一维的
w = np.zeros(shape = (dim,1), dtype = np.float32)
b = 0
#断言函数,判断是否为真
assert(w.shape == (dim,1))
assert(isinstance(b,float) or isinstance(b,int))
return(w,b)
def propagate(w,b,X,Y):
m = X.shape[1]
#forward propagation
A = sigmoid(np.dot(w.T,X) + b)
cost = (-1./m)*np.sum(Y*np.log(A) + (1-Y)*np.log(1-A),axis = 1)#按行相加
#backward propagation
dw = (1./m)*np.dot(X,((A-Y).T))#dw就是损失函数对w的求导
db = (1./m)*np.sum(A-Y, axis=1)#axis=0按列相加,axis=1按行相加
assert(dw.shape == w.shape)
assert(db.dtype == float)
cost = np.squeeze(cost)#????squeeze函数的作用是去掉维度为1的维,在这就是将一个一维变成一个数字
# [ 6.00006477]
# 6.000064773192205
assert(cost.shape == ())
grads = {"dw": dw,
"db": db}
return grads, cost
def optimize(w,b,X,Y,num_iterations,learning_rate,print_cost = False):
costs = []
for i in range(num_iterations):
grads, cost = propagate(w=w, b=b, X=X, Y=Y)
dw = grads["dw"]
db = grads["db"]
w = w - learning_rate*dw
b = b - learning_rate*db
if i % 100 == 0:
costs.append(cost)
if print_cost and i % 100 == 0:#这句没懂
print ("Cost after iteration %i: %f" %(i, cost))
params = {"w": w,
"b": b}
grads = {"dw": dw,
"db": db}
return params, grads, costs
def predict(w, b, X):
m = X.shape[1]
Y_prediction = np.zeros((1,m))
w = w.reshape(X.shape[0], 1)
A = sigmoid(np.dot(w.T, X) + b)
# [print(x) for x in A]
for i in range(A.shape[1]):
# Convert probabilities A[0,i] to actual predictions p[0,i]
### START CODE HERE ### (≈ 4 lines of code)
if A[0, i] >= 0.5:
Y_prediction[0, i] = 1
else:
Y_prediction[0, i] = 0
### END CODE HERE ###
assert(Y_prediction.shape == (1, m))
return Y_prediction
def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):
#初始化权值阈值
w, b = initialiize_with_zeros(X_train.shape[0])
#梯度下降法寻优获取最佳权值阈值
parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate, print_cost)
w = parameters["w"]
b = parameters["b"]
#测试集进行预测
Y_prediction_test = predict(w, b, X_test)
Y_prediction_train = predict(w, b, X_train)
#输出正确率
print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))
#将所有结果以字典形式保存并返回
d = {"costs": costs,
"Y_prediction_test": Y_prediction_test,
"Y_prediction_train" : Y_prediction_train,
"w" : w,
"b" : b,
"learning_rate" : learning_rate,
"num_iterations": num_iterations}
return d
‘‘‘主程序从这里开始‘‘‘
#获取训练数据,测试数据
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()
#reshape()方法来更改数组的形状,train_set_x_orig.shape[0]是行数,-1是代表列数未知,需要numpy自动计算出列数
#这里的列数:就是一张图片64*64*3数据变成一行数据的个数
train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T
#归一,颜色的数值是0~255
train_set_x = train_set_x_flatten/255.
test_set_x = test_set_x_flatten/255.
#训练模型
d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = 0.005, print_cost = True)
print(d)