使用 TF2.0 训练 RNN 的每次迭代逐渐增加内存使用量

Posted

技术标签:

【中文标题】使用 TF2.0 训练 RNN 的每次迭代逐渐增加内存使用量【英文标题】:Gradual increase of memory usage for each iteration of training RNN using TF2.0 【发布时间】:2020-06-18 01:08:36 【问题描述】:

我正在使用 TF2.0 训练简单的 RNN 网络作为附加代码。 我的问题是每次训练迭代的内存使用量都会逐渐增加。

import tensorflow as tf
import numpy as np
from Params import *
import Stimulus as stimulus
import matplotlib.pyplot as plt
import os
import pickle
import psutil
os.environ['KMP_DUPLICATE_LIB_OK']='True'
iteration = 2000

stim = stimulus.Stimulus()
trial_info = stim.generate_trial()

# set inputs

in_data = tf.constant(trial_info['in_neural'].astype('float32'))
out_target = tf.constant(trial_info['out_desired'])
mask_train = tf.constant(trial_info['mask_train'])

batch_size = par['batch_size']

# initialize weights
var_dict = 
var_list = []
for k, v in par.items():
    if k[-1] == '0':
        name = k[:-1]
        var_dict[name] = tf.Variable(par[k], name)
        var_list.append(var_dict[name])

syn_x_init = tf.constant(par['syn_x_init'])
syn_u_init = tf.constant(par['syn_u_init'])

##

def rnn_cell(rnn_input, h, syn_x, syn_u, w_rnn, var_dict):
    syn_x += (par['alpha_std'] * (1 - syn_x) - par['dt_sec'] * syn_u * syn_x * h)  # what is alpha_std???
    syn_u += (par['alpha_stf'] * (par['U'] - syn_u) + par['dt_sec'] * par['U'] * (1 - syn_u) * h)

    syn_x = tf.minimum(np.float32(1), tf.nn.relu(syn_x))
    syn_u = tf.minimum(np.float32(1), tf.nn.relu(syn_u))
    h_post = syn_u * syn_x * h

    h = tf.nn.relu((1 - par['alpha_neuron']) * h) \
         + par['alpha_neuron'] * (h_post @ w_rnn \
                                  + rnn_input @ tf.nn.relu(var_dict['w_in']) \
                                  + var_dict['b_rnn']) \
         + tf.random.normal(h.shape, 0, par['noise_rnn'], dtype=tf.float32)
    return h, syn_x, syn_u

##

def run_model(in_data, var_dict, syn_x_init, syn_u_init):
    self_h = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
    self_syn_x = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
    self_syn_u = tf.TensorArray(tf.float32, size=0, dynamic_size=True)
    self_output = tf.TensorArray(tf.float32, size=0, dynamic_size=True)

    # h = np.ones((par['batch_size'], 1)) @ var_dict['h']
    h = np.ones((par['batch_size'], 1)) @ var_dict['h']
    syn_x = syn_x_init
    syn_u = syn_u_init
    w_rnn = par['EI_matrix'] @ tf.nn.relu(var_dict['w_rnn'])

    c = 0
    for rnn_input in in_data:
        # within a loop
        h, syn_x, syn_u = rnn_cell(rnn_input, h, syn_x, syn_u, w_rnn, var_dict)

        self_h = self_h.write(c, h)
        self_syn_x = self_syn_x.write(c, syn_x)
        self_syn_u = self_syn_u.write(c, syn_u)
        self_output = self_output.write(c, h @ tf.nn.relu(var_dict['w_out']) + var_dict['b_out'])
        c += 1
    #
    self_h = self_h.stack()
    self_syn_x = self_syn_x.stack()
    self_syn_u = self_syn_u.stack()
    self_output = self_output.stack()

    return self_h, self_output, self_syn_x, self_syn_u, w_rnn

##

def calc_loss(var_dict, syn_x_init, syn_u_init, in_data, out_target, mask_train):
    h, output, _, _, w_rnn = run_model(in_data, var_dict, syn_x_init, syn_u_init)
    perf_loss = tf.reduce_sum(mask_train * tf.math.square(output[:,:,0]-out_target[:,:,0]))
    n = 2
    spike_loss = tf.reduce_sum(h ** n)
    weight_loss = tf.reduce_sum(tf.nn.relu(w_rnn) ** n)
    loss = perf_loss + par['spike_cost'] * spike_loss + par['weight_cost'] * weight_loss
    return loss, output, perf_loss, spike_loss, weight_loss

##

opt = tf.optimizers.Adam(learning_rate=par['learning_rate'])
@ tf.function
def train_onestep(var_dict, syn_x_init, syn_u_init, in_data, out_target, mask_train):
    with tf.GradientTape() as t:
        loss, output, perf_loss, spike_loss, _ = calc_loss(var_dict, syn_x_init, syn_u_init, in_data, out_target, mask_train)

    grads = t.gradient(loss, var_list)
    grads_and_vars = list(zip(grads, var_list))
    opt.apply_gradients(grads_and_vars = grads_and_vars)

    return loss, output, perf_loss, spike_loss

##

for i in range(0,iteration):
    loss, output, perf_loss, spike_loss = train_onestep(var_dict, syn_x_init, syn_u_init, in_data, out_target, mask_train)
    print('iter=', i+1 ,' loss=', loss.numpy(), ', perf_loss=', perf_loss.numpy(), ', spike_loss=', spike_loss.numpy(), ' memory=', np.round(psutil.Process(os.getpid()).memory_info().rss/(10**6)))

那么,结果如下

iter= 1  loss= 6052386.5  memory= 4208.0
iter= 2  loss= 2863788.0  memory= 4253.0
iter= 3  loss= 2265501.2  memory= 4280.0
iter= 4  loss= 2006586.8  memory= 4308.0
iter= 5  loss= 1869531.2  memory= 4472.0
iter= 6  loss= 1792165.0  memory= 4499.0
iter= 7  loss= 1744736.5  memory= 4529.0
iter= 8  loss= 1705666.1  memory= 4558.0
iter= 9  loss= 1678203.8  memory= 4588.0
iter= 10  loss= 1654413.4  memory= 4617.0
iter= 11  loss= 1643800.1  memory= 4647.0
iter= 12  loss= 1627259.1  memory= 4676.0
iter= 13  loss= 1612042.0  memory= 4706.0
iter= 14  loss= 1599993.6  memory= 4735.0
iter= 15  loss= 1587904.2  memory= 4765.0
iter= 16  loss= 1579129.6  memory= 4794.0
iter= 17  loss= 1565546.0  memory= 4824.0
iter= 18  loss= 1557875.1  memory= 4853.0

您可能会发现每次迭代都会增加“内存”使用量。 我怎么解决这个问题? 我会非常感谢你的帮助!!

【问题讨论】:

【参考方案1】:

我通过安装“tf-nightly”解决了这个问题。

【讨论】:

除了 tf-nightly,您还可以尝试 2.1.0 版本,这是撰写本文时的最新版本

以上是关于使用 TF2.0 训练 RNN 的每次迭代逐渐增加内存使用量的主要内容,如果未能解决你的问题,请参考以下文章

使用 dropout (TF2.0) 时,可变批量大小不适用于 tf.keras.layers.RNN?

关于RNN (循环神经网络)相邻采样为什么在每次迭代之前都需要将参数detach

deepfacelab迭代多少次合适

培训减少,验证 - 增加。培训损失,验证损失减少

拥抱TF2.0的时代来了

Tensorflow2.0语法 - 张量&基本函数