神经网络快速来一发
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了神经网络快速来一发相关的知识,希望对你有一定的参考价值。
都觉得神经网络很牛逼,那我们从零来一发吧。
先温习一下NN: http://deeplearning.stanford.edu/wiki/index.php/Neural_Networks
再温习一下BP算法:http://deeplearning.stanford.edu/wiki/index.php/Backpropagation_Algorithm
然后代码实现之
1 #include <iostream> 2 #include <cmath> 3 #include <cstdlib> 4 #include <cassert> 5 using namespace std; 6 7 // http://www.cnblogs.com/yeahgis/archive/2012/07/13/2590485.html 8 // 高斯分布的随机数,均值为0,方差为1 9 double gaussrand() 10 { 11 static double V1, V2, S; 12 static int phase = 0; 13 double X; 14 15 if ( phase == 0 ) { 16 do { 17 double U1 = (double)rand() / RAND_MAX; 18 double U2 = (double)rand() / RAND_MAX; 19 20 V1 = 2 * U1 - 1; 21 V2 = 2 * U2 - 1; 22 S = V1 * V1 + V2 * V2; 23 } while(S >= 1 || S == 0); 24 25 X = V1 * sqrt(-2 * log(S) / S); 26 } else 27 X = V2 * sqrt(-2 * log(S) / S); 28 29 phase = 1 - phase; 30 31 return X; 32 } 33 34 // 不做内存释放,搞简单一点 35 typedef shared_ptr<double> DoublePtr; 36 inline DoublePtr newDoubleArray(int size) 37 { 38 double *p = new double[size]; 39 return DoublePtr(p, default_delete<double[]>()); 40 } 41 42 // 简单的矩阵(复制的时候只复制meta,不复制实际数据) 43 struct Matrix 44 { 45 int row, col, size; 46 DoublePtr data; 47 48 Matrix(int _row=1, int _col=1) : row(_row), col(_col) 49 { 50 size = row * col; 51 data = newDoubleArray(size); 52 memset(data.get(), 0, sizeof(double) * size); 53 } 54 55 inline double* operator[](int i) { 56 assert(i < row); 57 return data.get() + i * col; 58 } 59 }; 60 61 // 打印矩阵内容 62 ostream& operator<<(ostream& out, Matrix w) 63 { 64 out << "[ (" << w.row << " x " << w.col << ")" << endl; 65 for(int i = 0;i < w.row;i++) { 66 out << "\\t["; 67 for(int j = 0;j < w.col;j++) { 68 if(j > 0) out << ","; 69 out << w[i][j]; 70 } 71 out << "]" << endl; 72 } 73 out << "]"; 74 return out; 75 } 76 77 // 简单的向量(复制的时候只复制meta,不复制实际数据) 78 struct Vector 79 { 80 int size; 81 DoublePtr data; 82 83 Vector(int _size=1) : size(_size) 84 { 85 data = newDoubleArray(size); 86 memset(data.get(), 0, sizeof(double) * size); 87 } 88 89 inline double &operator[](int x) 90 { 91 assert(x < size); 92 return data.get()[x]; 93 } 94 }; 95 96 // 打印向量内容 97 ostream& operator<<(ostream& out, Vector v) 98 { 99 out << "[ (" << v.size << ") "; 100 for(int i = 0;i < v.size;i++) { 101 if(i > 0) out << ","; 102 out << v[i]; 103 } 104 out << "]"; 105 return out; 106 } 107 108 Vector operator*(Matrix w, Vector v) 109 { 110 Vector ret(w.row); 111 for(int i = 0;i < w.row;i++) { 112 for(int j = 0;j < w.col;j++) { 113 ret[i] += w[i][j] * v[j]; 114 } 115 } 116 return ret; 117 } 118 119 // 点乘 120 Vector operator*(Vector x, Vector y) 121 { 122 Vector ret(x.size); 123 for(int i = 0;i < x.size;i++) { 124 ret[i] = x[i] * y[i]; 125 } 126 return ret; 127 } 128 129 // w转置,然后和v相乘 130 Vector TandMul(Matrix w, Vector v) 131 { 132 Vector ret(w.col); 133 for(int i = 0;i < w.col;i++) { 134 for(int j = 0;j < w.row;j++) { 135 ret[i] += w[j][i] * v[j]; 136 } 137 } 138 return ret; 139 } 140 141 Vector operator+(Vector x, Vector y) 142 { 143 Vector ret(x.size); 144 for(int i = 0;i < x.size;i++) { 145 ret[i] = x[i] + y[i]; 146 } 147 return ret; 148 } 149 150 Vector operator*(double x, Vector y) 151 { 152 Vector ret(y.size); 153 for(int i = 0;i < y.size;i++) { 154 ret[i] = x * y[i]; 155 } 156 return ret; 157 } 158 159 Vector operator*(Vector x, double y) 160 { 161 return y * x; 162 } 163 164 // Cost函数 165 struct CostFun 166 { 167 virtual double calc(Vector x, Vector y) 168 { 169 return 0; 170 } 171 172 virtual double operator()(Vector x, Vector y) 173 { 174 return calc(x,y); 175 } 176 177 virtual Vector propagateDelta(Vector output, Vector y) 178 { 179 return Vector(output.size); 180 } 181 }; 182 183 // 方差Cost函数 184 struct SqrCostFun: CostFun 185 { 186 virtual double calc(Vector x, Vector y) 187 { 188 double ret = 0; 189 for(int i = 0;i < x.size;i++) { 190 double t = x[i] - y[i]; 191 ret += t * t; 192 } 193 return ret / 2; 194 } 195 196 virtual Vector propagateDelta(Vector output, Vector y) 197 { 198 // -(y - output) 199 return -1 * y + output; 200 } 201 }; 202 203 // 单例 204 SqrCostFun SqrCostFunSingleton; 205 206 // 激活函数 207 struct Activator 208 { 209 // forward 210 virtual double forward(double v) 211 { 212 return v; 213 } 214 215 virtual double operator()(double v) 216 { 217 return forward(v); 218 } 219 220 virtual Vector operator()(Vector v) 221 { 222 Vector ret(v.size); 223 for(int i = 0;i < v.size;i++) { 224 ret[i] = forward(v[i]); 225 } 226 return ret; 227 } 228 229 // 求导数 230 virtual double derive(double v) 231 { 232 return 1; 233 } 234 235 virtual Vector derive(Vector v) 236 { 237 Vector ret(v.size); 238 for(int i = 0;i < ret.size;i++) { 239 ret[i] = derive(v[i]); 240 } 241 return ret; 242 } 243 }; 244 245 // Sigmoid激活函数 246 struct SigmoidActivator : Activator 247 { 248 virtual double forward(double v) 249 { 250 return 1 / (1 + exp(-v)); 251 } 252 253 virtual double derive(double v) 254 { 255 double t = exp(-v); 256 return t / ( (1 + t) * (1 + t) ); 257 } 258 }; 259 260 // 单例 261 SigmoidActivator SigmoidActivatorSingleton; 262 263 // NN的一层 264 // 1. 输入不算一层 265 // 2. 层的w矩阵是从前面一层到当前层的w,和NG的定义有些出入 266 // 3. 层的b是前面一层到当前层的b,和NG的定义有些出入 267 struct Layer 268 { 269 // 上一层的输出的个数,不包括bias 270 int inSize; 271 // 当前层的输出 272 int outSize; 273 274 Activator &activator; 275 Matrix w; 276 Vector b; 277 278 void initWeights(double *p, int size) 279 { 280 // 采用 (0, 0.01)的正太分布初始化 281 for(int i = 0;i < size;i++) { 282 p[i] = gaussrand() * 0.01; 283 } 284 } 285 286 Layer(int _inSize=1, int _outSize=1, Activator &_activator= SigmoidActivatorSingleton): 287 inSize(_inSize), 288 outSize(_outSize), 289 w(_outSize, _inSize), 290 b(_outSize), 291 activator(_activator) 292 { 293 initWeights(w.data.get(), w.size); 294 initWeights(b.data.get(), b.size); 295 } 296 297 // 最后一次forward计算之后保存的激活值 298 Vector a; 299 Vector z; 300 // in是上一层的输出 301 Vector operator()(Vector in) 302 { 303 z = w * in + b; 304 return a = activator(z); 305 } 306 307 // 最后一次反向传播计算之后保存的delta值 308 Vector delta; 309 Vector propagateDelta() 310 { 311 return TandMul(w, delta); 312 } 313 314 // alpha是学习率 315 // prevA是上一层的输出 316 void updateParameters(double alpha, Vector prevA) 317 { 318 b = b + (-alpha) * delta; 319 Matrix nw(w.row, w.col); 320 for(int i = 0;i < w.row;i++) { 321 for(int j = 0;j < w.col;j++) { 322 nw[i][j] = w[i][j] - alpha * prevA[j] * delta[i]; 323 } 324 } 325 w = nw; 326 } 327 }; 328 329 ostream& operator<<(ostream& out, Layer& layer) 330 { 331 out << "Layer {" << endl; 332 out << "w = " << layer.w << endl; 333 out << "b = " << layer.b << endl; 334 out << "z = " << layer.z << endl; 335 out << "a = " << layer.a << endl; 336 out << "delta = " << layer.delta << endl; 337 out << "}" << endl; 338 return out; 339 } 340 341 Vector forward(Layer layerList[], int nLayer, Vector input) 342 { 343 Vector tmp = input; 344 for(int i = 0;i < nLayer;i++) { 345 tmp = layerList[i](tmp); 346 } 347 return tmp; 348 } 349 350 void backward(Layer layerList[], int nLayer, Vector input, Vector y, CostFun& costFun, double alpha) 351 { 352 // 反向传播delta 353 Layer &lastLayer = layerList[nLayer - 1]; 354 // Sqr cost function为例是: -(y - a) f‘(z) 355 lastLayer.delta = costFun.propagateDelta(lastLayer.a, y) * lastLayer.activator.derive(lastLayer.z); 356 357 for(int i = nLayer - 2;i >= 0;i--) { 358 Layer &layer = layerList[i]; 359 Layer &nextLayer = layerList[i + 1]; 360 layer.delta = nextLayer.propagateDelta() * layer.activator.derive(layer.z); 361 } 362 363 // 更新所有的w和b 364 for(int i = 0;i < nLayer;i++) { 365 layerList[i].updateParameters(alpha, i == 0 ? input : layerList[i - 1].a); 366 } 367 } 368 369 int main() 370 { 371 srand(100); 372 373 // NN网络结构 374 Layer layerList[] = { 375 Layer(2, 2), // 隐藏层,input size 376 Layer(2, 1), // 输出层,output size 377 }; 378 379 // Cost fun 380 CostFun &costFun = SqrCostFunSingleton; 381 382 // 不包括输入层在内的层的个数 383 int nLayer = sizeof(layerList) / sizeof(layerList[0]); 384 int nInput = layerList[0].inSize; 385 int nOuptut = layerList[nLayer - 1].outSize; 386 387 // 测试xor 388 int xs[4][2] = { 389 {0,0}, 390 {0,1}, 391 {1,0}, 392 {1,1} 393 }; 394 int ys[4] = { 395 0, 396 1, 397 1, 398 0 399 }; 400 401 for(int step = 0;step < 100000;step++) { 402 double avgError = 0; 403 for(int i = 0;i < 4;i++) { 404 Vector x(2); 405 for(int j = 0;j < 2;j++) { 406 x[j] = xs[i][j]; 407 } 408 409 Vector y(1); 410 y[0] = ys[i]; 411 412 Vector output = forward(layerList, nLayer, x); 413 double error = SqrCostFunSingleton(output, y); 414 avgError += error; 415 416 backward(layerList, nLayer, x, y, SqrCostFunSingleton, 0.1); 417 } 418 avgError /= 4; 419 420 cout << "after " << step << " steps, error = " << avgError << endl; 421 } 422 423 return 0; 424 }
实现大概花了4个多小时,最麻烦的是很难调试,测试了一些例子,找到了几个typo,至少能正确分类xor了。NG提到一个办法用来检验算法的正确性,看来还是需要的,特别是如果后面希望用这个代码为基础去做别的东西的话。下次搞: http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization
以上是关于神经网络快速来一发的主要内容,如果未能解决你的问题,请参考以下文章