【中文标题】我的自定义逻辑回归实现有啥问题?【英文标题】:What is wrong with my custom logistic regression implementation?我的自定义逻辑回归实现有什么问题? 【发布时间】:2020-10-17 16:55:23 【问题描述】:我试图反映与 sklearn 几乎相同的结果,但我没有得到好的结果。我的自定义实现和 sklearn 实现的截距值相差 5,所以我在这里尽可能地减小这个值。
我的 sklearn 代码如下:
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=50000, n_features=15, n_informative=10, n_redundant=5,
n_classes=2, weights=[0.7], class_sep=0.7, random_state=15)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=15)
clf = linear_model.SGDClassifier(eta0=0.0001, alpha=0.0001, loss='log', random_state=15, penalty='l2', tol=1e-3, verbose=2, learning_rate='constant')
clf.fit(X=X_train, y=y_train) # fitting our model
print(clf.coef_, clf.coef_.shape, clf.intercept_)
(array([[-0.42336692, 0.18547565, -0.14859036, 0.34144407, -0.2081867 ,
0.56016579, -0.45242483, -0.09408813, 0.2092732 , 0.18084126,
0.19705191, 0.00421916, -0.0796037 , 0.33852802, 0.02266721]]),
(1, 15),
def initialize_weights(dim):
''' In this function, we will initialize our weights and bias'''
#initialize the weights to zeros array of (dim,1) dimensions
#you use zeros_like function to initialize zero
#initialize bias to zero
w = np.zeros_like(dim)
b = 0
return w,b
def sigmoid(z):
''' In this function, we will return sigmoid of z'''
# compute sigmoid(z) and return
return 1/(1+np.exp(-z))
def logloss(y_true,y_pred):
'''In this function, we will compute log loss '''
loss = 0
A = list(zip(y_true, y_pred))
for y, y_score in A:
loss += (-1/len(A))*(y*np.log10(y_score) + (1-y) * np.log10(1-y_score))
return loss
def gradient_dw(x,y,w,b,alpha,N):
'''In this function, we will compute the gardient w.r.to w '''
z = np.dot(w, x) + b
dw = x*(y - sigmoid(z)) - ((1/alpha)*(1/N) * w)
return dw
def gradient_db(x,y,w,b):
z = np.dot(w, x) + b
db = y - sigmoid(z)
return DB
def train(X_train,y_train,X_test,y_test,epochs,alpha,eta0, tol=1e-3):
''' In this function, we will implement logistic regression'''
#Here eta0 is learning rate
#implement the code as follows
# initalize the weights (call the initialize_weights(X_train[0]) function)
w, b = initialize_weights(X_train[0])
# for every epoch
train_loss = []
test_loss = []
for epoch in range(epochs):
# for every data point(X_train,y_train)
for x, y in zip(X_train, y_train):
#compute gradient w.r.to w (call the gradient_dw() function)
dw = gradient_dw(x, y, w, b, alpha, len(X_train))
#compute gradient w.r.to b (call the gradient_db() function)
db = gradient_db(x, y, w, b)
#update w, b
w = w + eta0 * dw
b = b + eta0 * db
# predict the output of x_train[for all data points in X_train] using w,b
y_pred = [sigmoid(np.dot(w, x)) for x in X_train]
#compute the loss between predicted and actual values (call the loss function)
train_loss.append(logloss(y_train, y_pred))
# store all the train loss values in a list
# predict the output of x_test[for all data points in X_test] using w,b
y_pred_test = [sigmoid(np.dot(w, x)) for x in X_test]
print(f"EPOCH: epoch Train Loss: logloss(y_train, y_pred) Test Loss: logloss(y_test, y_pred_test)")
#compute the loss between predicted and actual values (call the loss function)
test_loss.append(logloss(y_test, y_pred_test))
# you can also compare previous loss and current loss if the loss is not updating then stop the process and return w,b
return w,b, train_loss, test_loss
w,b, train_loss, test_loss=train(X_train,y_train,X_test,y_test,epochs,alpha,eta0)
Thew, b 结果
(array([-0.22281323, 0.10570237, -0.02506523, 0.16630429, -0.07033019,
0.27985805, -0.27348925, -0.04622113, 0.13212066, 0.05330409,
0.09926212, -0.00791336, -0.02920803, 0.1828124 , 0.03442375]),
看起来像AAIC的作业 【参考方案1】:在您的函数gradient_dw ()
def gradient_dw(x,y,w,b,alpha,N):
'''In this function, we will compute the gardient w.r.to w '''
z = np.dot(w, x) + b
dw = x*(y - sigmoid(z)) - ((alpha)*(1/N) * w)
return dw
梯度下降算法通过对成本函数 w.r.t. 求导而变为以下算法。权重
对您的代码的另一个小修正 - 需要将 intercept b
y_pred = [sigmoid(np.dot(w, x) + b) for x in X_train]
y_pred_test = [sigmoid(np.dot(w, x) + b) for x in X_test]
因此,完整代码的最终形式如下,它将与 Scikit-learn 实现的所有权重相差 0.001 左右。
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import linear_model
import matplotlib.pyplot as plt
X, y = make_classification(n_samples=50000, n_features=15, n_informative=10, n_redundant=5,
n_classes=2, weights=[0.7], class_sep=0.7, random_state=15)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=15)
clf = linear_model.SGDClassifier(eta0=0.0001, alpha=0.0001, loss='log', random_state=15, penalty='l2', tol=1e-3, verbose=2, learning_rate='constant')
clf.fit(X=X_train, y=y_train) # fitting our model
print(clf.coef_, clf.coef_.shape, clf.intercept_)
def initialize_weights(dim):
''' In this function, we will initialize our weights and bias'''
#initialize the weights to zeros array of (dim,1) dimensions
#you use zeros_like function to initialize zero
#initialize bias to zero
w = np.zeros_like(dim)
b = 0
return w,b
def sigmoid(z):
''' In this function, we will return sigmoid of z'''
# compute sigmoid(z) and return
return 1/(1+np.exp(-z))
def logloss(y_true,y_pred):
'''In this function, we will compute log loss '''
loss = 0
A = list(zip(y_true, y_pred))
for y, y_score in A:
loss += (-1/len(A))*(y*np.log10(y_score) + (1-y) * np.log10(1-y_score))
return loss
def gradient_dw(x,y,w,b,alpha,N):
'''In this function, we will compute the gardient w.r.to w '''
z = np.dot(w, x) + b
dw = x*(y - sigmoid(z)) - ((alpha)*(1/N) * w)
return dw
def gradient_db(x,y,w,b):
z = np.dot(w, x) + b
db = y - sigmoid(z)
return db
def train(X_train,y_train,X_test,y_test,epochs,alpha,eta0, tol=1e-3):
''' In this function, we will implement logistic regression'''
#Here eta0 is learning rate
#implement the code as follows
# initalize the weights (call the initialize_weights(X_train[0]) function)
w, b = initialize_weights(X_train[0])
# for every epoch
train_loss = []
test_loss = []
for epoch in range(epochs):
# for every data point(X_train,y_train)
for x, y in zip(X_train, y_train):
#compute gradient w.r.to w (call the gradient_dw() function)
dw = gradient_dw(x, y, w, b, alpha, len(X_train))
#compute gradient w.r.to b (call the gradient_db() function)
db = gradient_db(x, y, w, b)
#update w, b
w = w + eta0 * dw
b = b + eta0 * db
# predict the output of x_train[for all data points in X_train] using w,b
y_pred = [sigmoid(np.dot(w, x)) for x in X_train]
#compute the loss between predicted and actual values (call the loss function)
train_loss.append(logloss(y_train, y_pred))
# store all the train loss values in a list
# predict the output of x_test[for all data points in X_test] using w,b
y_pred_test = [sigmoid(np.dot(w, x)) for x in X_test]
print(f"EPOCH: epoch Train Loss: logloss(y_train, y_pred) Test Loss: logloss(y_test, y_pred_test)")
#compute the loss between predicted and actual values (call the loss function)
test_loss.append(logloss(y_test, y_pred_test))
# you can also compare previous loss and current loss if the loss is not updating then stop the process and return w,b
return w,b, train_loss, test_loss
w,b, train_loss, test_loss=train(X_train,y_train,X_test,y_test,epochs,alpha,eta0)
print("Difference between custom w and Scikit-learn's clf.coef_ ", w - clf.coef_)
print("Difference between custom intercept b and Scikit-learn's clf.intercept_ ", b - clf.intercept_)
Difference between custom w and Scikit-learn's clf.coef_ [[-0.00642552 0.00755955 0.00012041 -0.00335043 -0.01309563 0.00978314
0.00724319 0.00418409 0.0125563 -0.00701162 0.00169655 -0.00480346
-0.00173041 0.00056208 0.00032075]]
Difference between custom intercept b and Scikit-learn's clf.intercept_ [-0.03911387]