神经网络快速来一发

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了神经网络快速来一发相关的知识,希望对你有一定的参考价值。

都觉得神经网络很牛逼,那我们从零来一发吧。

先温习一下NN: http://deeplearning.stanford.edu/wiki/index.php/Neural_Networks

再温习一下BP算法:http://deeplearning.stanford.edu/wiki/index.php/Backpropagation_Algorithm

然后代码实现之

 

  1 #include <iostream>
  2 #include <cmath>
  3 #include <cstdlib>
  4 #include <cassert>
  5 using namespace std;
  6 
  7 // http://www.cnblogs.com/yeahgis/archive/2012/07/13/2590485.html
  8 // 高斯分布的随机数,均值为0,方差为1
  9 double gaussrand()
 10 {
 11     static double V1, V2, S;
 12     static int phase = 0;
 13     double X;
 14      
 15     if ( phase == 0 ) {
 16         do {
 17             double U1 = (double)rand() / RAND_MAX;
 18             double U2 = (double)rand() / RAND_MAX;
 19              
 20             V1 = 2 * U1 - 1;
 21             V2 = 2 * U2 - 1;
 22             S = V1 * V1 + V2 * V2;
 23         } while(S >= 1 || S == 0);
 24          
 25         X = V1 * sqrt(-2 * log(S) / S);
 26     } else
 27         X = V2 * sqrt(-2 * log(S) / S);
 28          
 29     phase = 1 - phase;
 30  
 31     return X;
 32 }
 33 
 34 // 不做内存释放,搞简单一点
 35 typedef shared_ptr<double> DoublePtr;
 36 inline DoublePtr newDoubleArray(int size)
 37 {
 38     double *p = new double[size];
 39     return DoublePtr(p, default_delete<double[]>());
 40 }
 41 
 42 // 简单的矩阵(复制的时候只复制meta,不复制实际数据)
 43 struct Matrix
 44 {
 45     int row, col, size;
 46     DoublePtr data;
 47 
 48     Matrix(int _row=1, int _col=1) : row(_row), col(_col)
 49     {
 50         size = row * col;
 51         data = newDoubleArray(size);
 52         memset(data.get(), 0, sizeof(double) * size);
 53     }
 54 
 55     inline double* operator[](int i) {
 56         assert(i < row);
 57         return data.get() + i * col;
 58     }
 59 };
 60 
 61 // 打印矩阵内容
 62 ostream& operator<<(ostream& out, Matrix w)
 63 {
 64     out << "[ (" << w.row << " x " << w.col << ")" << endl;
 65     for(int i = 0;i < w.row;i++) {
 66         out << "\\t[";
 67         for(int j = 0;j < w.col;j++) {
 68             if(j > 0) out << ",";
 69             out << w[i][j];
 70         }
 71         out << "]" << endl;
 72     }
 73     out << "]";
 74     return out;
 75 }
 76 
 77 // 简单的向量(复制的时候只复制meta,不复制实际数据)
 78 struct Vector
 79 {
 80     int size;
 81     DoublePtr data;
 82 
 83     Vector(int _size=1) : size(_size)
 84     {
 85         data = newDoubleArray(size);
 86         memset(data.get(), 0, sizeof(double) * size);
 87     }
 88 
 89     inline double &operator[](int x)
 90     {
 91         assert(x < size);
 92         return data.get()[x];
 93     }
 94 };
 95 
 96 // 打印向量内容
 97 ostream& operator<<(ostream& out, Vector v)
 98 {
 99     out << "[ (" << v.size << ") ";
100     for(int i = 0;i < v.size;i++) {
101         if(i > 0) out << ",";
102         out << v[i];
103     }
104     out << "]";
105     return out;
106 }
107 
108 Vector operator*(Matrix w, Vector v)
109 {
110     Vector ret(w.row);
111     for(int i = 0;i < w.row;i++) {
112         for(int j = 0;j < w.col;j++) {
113             ret[i] += w[i][j] * v[j];
114         }
115     }
116     return ret;
117 }
118 
119 // 点乘
120 Vector operator*(Vector x, Vector y)
121 {
122     Vector ret(x.size);
123     for(int i = 0;i < x.size;i++) {
124         ret[i] = x[i] * y[i];
125     }
126     return ret;
127 }
128 
129 // w转置,然后和v相乘
130 Vector TandMul(Matrix w, Vector v)
131 {
132     Vector ret(w.col);
133     for(int i = 0;i < w.col;i++) {
134         for(int j = 0;j < w.row;j++) {
135             ret[i] += w[j][i] * v[j];
136         }
137     }
138     return ret;
139 }
140 
141 Vector operator+(Vector x, Vector y)
142 {
143     Vector ret(x.size);
144     for(int i = 0;i < x.size;i++) {
145         ret[i] = x[i] + y[i];
146     }
147     return ret;
148 }
149 
150 Vector operator*(double x, Vector y)
151 {
152     Vector ret(y.size);
153     for(int i = 0;i < y.size;i++) {
154         ret[i] = x * y[i];
155     }
156     return ret;
157 }
158 
159 Vector operator*(Vector x, double y)
160 {
161     return y * x;
162 }
163 
164 // Cost函数
165 struct CostFun
166 {
167     virtual double calc(Vector x, Vector y)
168     {
169         return 0;
170     }
171 
172     virtual double operator()(Vector x, Vector y)
173     {
174         return calc(x,y);
175     }
176 
177     virtual Vector propagateDelta(Vector output, Vector y)
178     {
179         return Vector(output.size);
180     }
181 };
182 
183 // 方差Cost函数
184 struct SqrCostFun: CostFun
185 {
186     virtual double calc(Vector x, Vector y)
187     {
188         double ret = 0;
189         for(int i = 0;i < x.size;i++) {
190             double t = x[i] - y[i];
191             ret += t * t;
192         }
193         return ret / 2;
194     }
195 
196     virtual Vector propagateDelta(Vector output, Vector y)
197     {
198         // -(y - output)
199         return -1 * y + output;
200     }
201 };
202 
203 // 单例
204 SqrCostFun SqrCostFunSingleton;
205 
206 // 激活函数
207 struct Activator
208 {
209     // forward
210     virtual double forward(double v) 
211     {
212         return v;
213     }
214 
215     virtual double operator()(double v)
216     {
217         return forward(v);
218     }
219 
220     virtual Vector operator()(Vector v)
221     {
222         Vector ret(v.size);
223         for(int i = 0;i < v.size;i++) {
224             ret[i] = forward(v[i]);
225         }
226         return ret;
227     }
228 
229     // 求导数
230     virtual double derive(double v)
231     {
232         return 1;
233     }
234 
235     virtual Vector derive(Vector v)
236     {
237         Vector ret(v.size);
238         for(int i = 0;i < ret.size;i++) {
239             ret[i] = derive(v[i]);
240         }
241         return ret;
242     }
243 };
244 
245 // Sigmoid激活函数
246 struct SigmoidActivator : Activator
247 {
248     virtual double forward(double v)
249     {
250         return 1 / (1 + exp(-v));
251     }
252 
253     virtual double derive(double v)
254     {
255         double t = exp(-v);
256         return t / ( (1 + t) * (1 + t) );
257     }
258 };
259 
260 // 单例
261 SigmoidActivator SigmoidActivatorSingleton;
262 
263 // NN的一层
264 // 1. 输入不算一层
265 // 2. 层的w矩阵是从前面一层到当前层的w,和NG的定义有些出入
266 // 3. 层的b是前面一层到当前层的b,和NG的定义有些出入
267 struct Layer
268 {
269     // 上一层的输出的个数,不包括bias
270     int inSize;
271     // 当前层的输出
272     int outSize;
273 
274     Activator &activator;
275     Matrix w;
276     Vector b;
277 
278     void initWeights(double *p, int size)
279     {
280         // 采用 (0, 0.01)的正太分布初始化
281         for(int i = 0;i < size;i++) {
282             p[i] = gaussrand() * 0.01;
283         }
284     }
285 
286     Layer(int _inSize=1, int _outSize=1, Activator &_activator= SigmoidActivatorSingleton):
287         inSize(_inSize),
288         outSize(_outSize),
289         w(_outSize, _inSize),
290         b(_outSize),
291         activator(_activator)
292     {
293         initWeights(w.data.get(), w.size);
294         initWeights(b.data.get(), b.size);
295     }
296 
297     // 最后一次forward计算之后保存的激活值
298     Vector a;
299     Vector z;
300     // in是上一层的输出
301     Vector operator()(Vector in)
302     {
303         z = w * in + b;
304         return a = activator(z);
305     }
306 
307     // 最后一次反向传播计算之后保存的delta值
308     Vector delta;
309     Vector propagateDelta()
310     {
311         return TandMul(w, delta);
312     }
313 
314     // alpha是学习率
315     // prevA是上一层的输出
316     void updateParameters(double alpha, Vector prevA)
317     {
318         b = b + (-alpha) * delta;
319         Matrix nw(w.row, w.col);
320         for(int i = 0;i < w.row;i++) {
321             for(int j = 0;j < w.col;j++) {
322                 nw[i][j] = w[i][j] - alpha * prevA[j] * delta[i];
323             }
324         }
325         w = nw;
326     }
327 };
328 
329 ostream& operator<<(ostream& out, Layer& layer)
330 {
331     out << "Layer {" << endl;
332     out << "w = " << layer.w << endl;
333     out << "b = " << layer.b << endl;
334     out << "z = " << layer.z << endl;
335     out << "a = " << layer.a << endl;
336     out << "delta = " << layer.delta << endl;
337     out << "}" << endl;
338     return out;
339 }
340 
341 Vector forward(Layer layerList[], int nLayer, Vector input)
342 {
343     Vector tmp = input;
344     for(int i = 0;i < nLayer;i++) {
345         tmp = layerList[i](tmp);
346     }
347     return tmp;
348 }
349 
350 void backward(Layer layerList[], int nLayer, Vector input, Vector y, CostFun& costFun, double alpha)
351 {
352     // 反向传播delta
353     Layer &lastLayer = layerList[nLayer - 1];
354     // Sqr cost function为例是: -(y - a) f‘(z)
355     lastLayer.delta = costFun.propagateDelta(lastLayer.a, y) * lastLayer.activator.derive(lastLayer.z);
356 
357     for(int i = nLayer - 2;i >= 0;i--) {
358         Layer &layer = layerList[i];
359         Layer &nextLayer = layerList[i + 1];
360         layer.delta = nextLayer.propagateDelta() * layer.activator.derive(layer.z);
361     }
362 
363     // 更新所有的w和b
364     for(int i = 0;i < nLayer;i++) {
365         layerList[i].updateParameters(alpha, i == 0 ? input : layerList[i - 1].a);
366     }
367 }
368 
369 int main()
370 {
371     srand(100);
372 
373     // NN网络结构
374     Layer layerList[] = {
375         Layer(2, 2), // 隐藏层,input size
376         Layer(2, 1), // 输出层,output size
377     };
378 
379     // Cost fun
380     CostFun &costFun = SqrCostFunSingleton;
381 
382     // 不包括输入层在内的层的个数
383     int nLayer = sizeof(layerList) / sizeof(layerList[0]);
384     int nInput = layerList[0].inSize;
385     int nOuptut = layerList[nLayer - 1].outSize;
386 
387     // 测试xor
388     int xs[4][2] = {
389         {0,0},
390         {0,1},
391         {1,0},
392         {1,1}
393     };
394     int ys[4] = {
395         0,
396         1,
397         1,
398         0
399     };
400 
401     for(int step = 0;step < 100000;step++) {
402         double avgError = 0;
403         for(int i = 0;i < 4;i++) {
404             Vector x(2);
405             for(int j = 0;j < 2;j++) {
406                 x[j] = xs[i][j];
407             }
408 
409             Vector y(1);
410             y[0] = ys[i];
411 
412             Vector output = forward(layerList, nLayer, x);
413             double error = SqrCostFunSingleton(output, y);
414             avgError += error;
415 
416             backward(layerList, nLayer, x, y, SqrCostFunSingleton, 0.1);
417         }
418         avgError /= 4;
419 
420         cout << "after " << step << " steps, error = " << avgError << endl;
421     }
422 
423     return 0;
424 }

 

实现大概花了4个多小时,最麻烦的是很难调试,测试了一些例子,找到了几个typo,至少能正确分类xor了。NG提到一个办法用来检验算法的正确性,看来还是需要的,特别是如果后面希望用这个代码为基础去做别的东西的话。下次搞: http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization

 

以上是关于神经网络快速来一发的主要内容,如果未能解决你的问题,请参考以下文章

Java网络编程 - TCP通信

代码片段如何使用CSS来快速定义多彩光标

c#代码片段快速构建代码

简单的方法来分享/讨论/协作的代码片段?

vs2003:快速片段工具

前端开发工具vscode如何快速生成代码片段