使用 TF2.0 训练 RNN 的每次迭代逐渐增加内存使用量
Posted
技术标签:
【中文标题】使用 TF2.0 训练 RNN 的每次迭代逐渐增加内存使用量【英文标题】:Gradual increase of memory usage for each iteration of training RNN using TF2.0 【发布时间】:2020-06-18 01:08:36 【问题描述】:我正在使用 TF2.0 训练简单的 RNN 网络作为附加代码。 我的问题是每次训练迭代的内存使用量都会逐渐增加。
import tensorflow as tf
import numpy as np
from Params import *
import Stimulus as stimulus
import matplotlib.pyplot as plt
import os
import pickle
import psutil
os.environ['KMP_DUPLICATE_LIB_OK']='True'
iteration = 2000
stim = stimulus.Stimulus()
trial_info = stim.generate_trial()
# set inputs
in_data = tf.constant(trial_info['in_neural'].astype('float32'))
out_target = tf.constant(trial_info['out_desired'])
mask_train = tf.constant(trial_info['mask_train'])
batch_size = par['batch_size']
# initialize weights
var_dict =
var_list = []
for k, v in par.items():
if k[-1] == '0':
name = k[:-1]
var_dict[name] = tf.Variable(par[k], name)
var_list.append(var_dict[name])
syn_x_init = tf.constant(par['syn_x_init'])
syn_u_init = tf.constant(par['syn_u_init'])
##
def rnn_cell(rnn_input, h, syn_x, syn_u, w_rnn, var_dict):
syn_x += (par['alpha_std'] * (1 - syn_x) - par['dt_sec'] * syn_u * syn_x * h) # what is alpha_std???
syn_u += (par['alpha_stf'] * (par['U'] - syn_u) + par['dt_sec'] * par['U'] * (1 - syn_u) * h)
syn_x = tf.minimum(np.float32(1), tf.nn.relu(syn_x))
syn_u = tf.minimum(np.float32(1), tf.nn.relu(syn_u))
h_post = syn_u * syn_x * h
h = tf.nn.relu((1 - par['alpha_neuron']) * h) \
+ par['alpha_neuron'] * (h_post @ w_rnn \
+ rnn_input @ tf.nn.relu(var_dict['w_in']) \
+ var_dict['b_rnn']) \
+ tf.random.normal(h.shape, 0, par['noise_rnn'], dtype=tf.float32)
return h, syn_x, syn_u
##
def run_model(in_data, var_dict, syn_x_init, syn_u_init):
self_h = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
self_syn_x = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
self_syn_u = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
self_output = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
# h = np.ones((par['batch_size'], 1)) @ var_dict['h']
h = np.ones((par['batch_size'], 1)) @ var_dict['h']
syn_x = syn_x_init
syn_u = syn_u_init
w_rnn = par['EI_matrix'] @ tf.nn.relu(var_dict['w_rnn'])
c = 0
for rnn_input in in_data:
# within a loop
h, syn_x, syn_u = rnn_cell(rnn_input, h, syn_x, syn_u, w_rnn, var_dict)
self_h = self_h.write(c, h)
self_syn_x = self_syn_x.write(c, syn_x)
self_syn_u = self_syn_u.write(c, syn_u)
self_output = self_output.write(c, h @ tf.nn.relu(var_dict['w_out']) + var_dict['b_out'])
c += 1
#
self_h = self_h.stack()
self_syn_x = self_syn_x.stack()
self_syn_u = self_syn_u.stack()
self_output = self_output.stack()
return self_h, self_output, self_syn_x, self_syn_u, w_rnn
##
def calc_loss(var_dict, syn_x_init, syn_u_init, in_data, out_target, mask_train):
h, output, _, _, w_rnn = run_model(in_data, var_dict, syn_x_init, syn_u_init)
perf_loss = tf.reduce_sum(mask_train * tf.math.square(output[:,:,0]-out_target[:,:,0]))
n = 2
spike_loss = tf.reduce_sum(h ** n)
weight_loss = tf.reduce_sum(tf.nn.relu(w_rnn) ** n)
loss = perf_loss + par['spike_cost'] * spike_loss + par['weight_cost'] * weight_loss
return loss, output, perf_loss, spike_loss, weight_loss
##
opt = tf.optimizers.Adam(learning_rate=par['learning_rate'])
@ tf.function
def train_onestep(var_dict, syn_x_init, syn_u_init, in_data, out_target, mask_train):
with tf.GradientTape() as t:
loss, output, perf_loss, spike_loss, _ = calc_loss(var_dict, syn_x_init, syn_u_init, in_data, out_target, mask_train)
grads = t.gradient(loss, var_list)
grads_and_vars = list(zip(grads, var_list))
opt.apply_gradients(grads_and_vars = grads_and_vars)
return loss, output, perf_loss, spike_loss
##
for i in range(0,iteration):
loss, output, perf_loss, spike_loss = train_onestep(var_dict, syn_x_init, syn_u_init, in_data, out_target, mask_train)
print('iter=', i+1 ,' loss=', loss.numpy(), ', perf_loss=', perf_loss.numpy(), ', spike_loss=', spike_loss.numpy(), ' memory=', np.round(psutil.Process(os.getpid()).memory_info().rss/(10**6)))
那么,结果如下
iter= 1 loss= 6052386.5 memory= 4208.0
iter= 2 loss= 2863788.0 memory= 4253.0
iter= 3 loss= 2265501.2 memory= 4280.0
iter= 4 loss= 2006586.8 memory= 4308.0
iter= 5 loss= 1869531.2 memory= 4472.0
iter= 6 loss= 1792165.0 memory= 4499.0
iter= 7 loss= 1744736.5 memory= 4529.0
iter= 8 loss= 1705666.1 memory= 4558.0
iter= 9 loss= 1678203.8 memory= 4588.0
iter= 10 loss= 1654413.4 memory= 4617.0
iter= 11 loss= 1643800.1 memory= 4647.0
iter= 12 loss= 1627259.1 memory= 4676.0
iter= 13 loss= 1612042.0 memory= 4706.0
iter= 14 loss= 1599993.6 memory= 4735.0
iter= 15 loss= 1587904.2 memory= 4765.0
iter= 16 loss= 1579129.6 memory= 4794.0
iter= 17 loss= 1565546.0 memory= 4824.0
iter= 18 loss= 1557875.1 memory= 4853.0
您可能会发现每次迭代都会增加“内存”使用量。 我怎么解决这个问题? 我会非常感谢你的帮助!!
【问题讨论】:
【参考方案1】:我通过安装“tf-nightly”解决了这个问题。
【讨论】:
除了 tf-nightly,您还可以尝试 2.1.0 版本,这是撰写本文时的最新版本以上是关于使用 TF2.0 训练 RNN 的每次迭代逐渐增加内存使用量的主要内容,如果未能解决你的问题,请参考以下文章
使用 dropout (TF2.0) 时,可变批量大小不适用于 tf.keras.layers.RNN?