tensorflow学习笔记:实现自编码器

Posted 佟学强

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了tensorflow学习笔记:实现自编码器相关的知识,希望对你有一定的参考价值。

黄文坚的tensorflow实战一书中的第四章,讲述了tensorflow实现多层感知机。Hiton早年提出过自编码器的非监督学习算法,书中的代码给出了一个隐藏层的神经网络,本人扩展到了多层,改进了代码。实现多层神经网络时,把每层封装成一个NetLayer对象(本质是单向链表),然后计算隐藏层输出值的时候,运用递归算法,最后定义外层管理类。代码如下:

技术分享
  1 import numpy as np
  2 import tensorflow as tf
  3 import sklearn.preprocessing as prep
  4 from tensorflow.examples.tutorials.mnist import input_data
  5 
  6 ‘‘‘
  7 tensorflow实现自编码器,非监督学习
  8 ‘‘‘
  9 
 10 ‘‘‘
 11 xavier初始化器,把权重初始化在low和high范围内(满足N(0,2/Nin+Nout))
 12 ‘‘‘
 13 def xavier_init(fan_in,fan_out,constant = 1):
 14     low = -constant * np.sqrt(6.0 / (fan_in + fan_out))
 15     high = constant * np.sqrt(6.0 / (fan_in + fan_out))
 16     return tf.random_uniform((fan_in,fan_out),minval=low ,maxval=high ,dtype=tf.float32)
 17 
 18 ‘‘‘数据零均值,特征方差归一化处理‘‘‘
 19 def standard_scale(X_train, X_test):
 20     preprocessor = prep.StandardScaler().fit(X_train)
 21     X_train = preprocessor.transform(X_train)
 22     X_test = preprocessor.transform(X_test)
 23     return X_train, X_test
 24 
 25 ‘‘‘获取批量文本的策略‘‘‘
 26 def get_random_block_from_data(data,batch_size):
 27     start_index = np.random.randint(0,len(data) - batch_size)
 28     return data[start_index:(start_index + batch_size)]
 29 
 30 ‘‘‘定义的hidden层,数据结构本质是链表,其中n_node:本层节点数,n_input为输入节点数目‘‘‘
 31 class NetLayer:
 32     def __init__(self,n_node,n_input):
 33         self.n_node = n_node
 34         self.n_input = n_input
 35         self.next_layer = None
 36 
 37     ‘‘‘初始化权重‘‘‘
 38     def _initialize_weights(self):
 39         weights = dict()
 40         if self.next_layer == None:#如果是最后一层,由于只聚合不激活,全部初始化为0
 41             weights[w] = tf.Variable(tf.zeros([self.n_input, self.n_node], dtype=tf.float32))
 42             weights[b] = tf.Variable(tf.zeros([self.n_node], dtype=tf.float32))
 43         else:
 44             weights[w] = xavier_init(self.n_input, self.n_node)
 45             weights[b] = tf.Variable(tf.zeros([self.n_node], dtype=tf.float32))
 46 
 47         self.weights = weights
 48         return self.weights
 49 
 50     ‘‘‘递归计算各层的输出值,返回最后一层的输出值‘‘‘
 51     def cal_output(self,transfer,index,X,scale):
 52         if index == 0:
 53             self.output = transfer(tf.add(tf.matmul(X + scale * tf.random_normal([self.n_input]),self.weights[w]),self.weights[b]))
 54         else:
 55             if self.next_layer is not None:
 56                 self.output = transfer(tf.add(tf.matmul(X,self.weights[w]),self.weights[b]))
 57             else:self.output = tf.add(tf.matmul(X,self.weights[w]),self.weights[b])
 58         if self.next_layer is not None:
 59             return self.next_layer.cal_output(transfer,++index,self.output,scale)
 60         return self.output
 61 
 62     def get_weights(self):
 63         return self.weights[w]
 64 
 65     def get_bias(self):
 66         return self.weights[b]
 67 
 68 ‘‘‘定义的外层管理类‘‘‘
 69 class AdditiveGaussianNoiseAutoencoder(object):
 70     def __init__(self,layers=[],transfer_function=tf.nn.softplus,optimizer=tf.train.AdamOptimizer(),scale=0.1):
 71         self.layers = []
 72         self.training_scale = scale
 73         self.scale = tf.placeholder(tf.float32)
 74         self.construct_network(layers)
 75         self._initialize_weights(self.layers)
 76 
 77         self.x = tf.placeholder(tf.float32,[None,layers[0]])
 78         self.reconstruction = self.layers[0].cal_output(transfer_function,0,self.x,scale)
 79 
 80         self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction,self.x),2.0))
 81         self.optimizer = optimizer.minimize(self.cost)
 82 
 83         init = tf.global_variables_initializer()
 84         self.sess = tf.Session()
 85         self.sess.run(init)
 86 
 87     ‘‘‘初始化各层并构建各层的关联‘‘‘
 88     def construct_network(self,layers):
 89         last_layer = None
 90         for i,layer in enumerate(layers):
 91             if i == 0:
 92                 continue
 93             cur_layer = NetLayer(layer,layers[i-1])
 94             self.layers.append(cur_layer)
 95             if last_layer is not None:
 96                 last_layer.next_layer = cur_layer
 97             last_layer = cur_layer
 98 
 99     ‘‘‘外层调用初始化权重‘‘‘
100     def _initialize_weights(self,layers):
101         for i,layer in enumerate(layers):
102             layer._initialize_weights()
103 
104     ‘‘‘训练参数,并且返回损失函数节点的值‘‘‘
105     def partial_fit(self,X):
106         cost,opt = self.sess.run((self.cost,self.optimizer),
107             feed_dict={self.x:X,self.scale:self.training_scale})
108         return cost
109 
110     ‘‘‘运行cost节点‘‘‘
111     def calc_total_cost(self,X):
112         return self.sess.run(self.cost,feed_dict={self.x:X,self.scale:self.training_scale})
113 
114     ‘‘‘运行reconstruction节点‘‘‘
115     def reconstruct(self,X):
116         return self.sess.run(self.reconstruction,feed_dict={self.x:X,self.scale:self.training_scale})
117 
118 
119 if __name__ == __main__:
120     mnist = input_data.read_data_sets("E:\\Python35\\Lib\\site-packages\\tensorflow\\examples\\tutorials\\mnist\\MNIST_data",one_hot=True)
121 
122     X_train,X_test = standard_scale(mnist.train.images,mnist.test.images) #得到训练样本和测试样本
123     n_samples = int(mnist.train.num_examples) #获取样本总数
124     training_epochs = 20 #迭代次数
125     batch_size = 128 #批次
126     display_step = 1 #每隔一步显示损失函数
127 
128     autoencoder = AdditiveGaussianNoiseAutoencoder(layers = [784,500,200,784],transfer_function = tf.nn.softplus,optimizer =
129         tf.train.AdamOptimizer(learning_rate=0.001),scale = 0.01)
130 
131     for epoch in range(training_epochs):
132         avg_cost = 0.
133         total_batch = int(n_samples / batch_size)
134         for i in range(total_batch):
135             batch_xs = get_random_block_from_data(X_train,batch_size)
136             cost = autoencoder.partial_fit(batch_xs)
137             avg_cost += cost / n_samples * batch_size
138 
139         if epoch % display_step == 0:
140             print("Epoch:","%04d" % (epoch+1),"cost=","{:.9f}".format(avg_cost))
141 
142     ‘‘‘训练完毕后,用测试样本验证一下cost‘‘‘
143     print("Total cost: " + str(autoencoder.calc_total_cost(X_test)))
View Code

 

以上是关于tensorflow学习笔记:实现自编码器的主要内容,如果未能解决你的问题,请参考以下文章

TensorFlow实战-TensorFlow实现自编码器及多层感知机-第4章

TensorFlow实现one-hot编码TensorFlow2入门手册

资源 | 数十种TensorFlow实现案例汇集:代码+笔记

TensorFlow实现去噪自编码器及使用—Masking Noise Auto Encoder

TensorFlow实现去噪自编码器及使用—Masking Noise Auto Encoder

TensorFlow实现去噪自编码器及使用—Masking Noise Auto Encoder