如何更改图像尺寸以使卷积算法正常工作

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了如何更改图像尺寸以使卷积算法正常工作相关的知识,希望对你有一定的参考价值。

基本上pygame.surfarray.pixels3d返回一个(672,672,3)形状的数组,该数组给出错误:资源耗尽

但是当我传递(6,30,30)数组时它起作用。

任何帮助将不胜感激。

import numpy
import random
from DeepRTS import PyDeepRTS 
from Algorithms.DQN2.DQN import DQN 

# Start the game
g = PyDeepRTS('21x21-2v2.json')

# Add players
player1 = g.add_player()
player2 = g.add_player()
#player3 = g.add_player()
#player4 = g.add_player()
# Set FPS and UPS limits
g.set_max_fps(10000000)
g.set_max_ups(10000000)

# How often the state should be drawn
g.render_every(20)

# How often the capture function should return a state
g.capture_every(20)
# How often the game image should be drawn to the screen
g.view_every(20)

# Start the game (flag)
g.start()
actions = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,14, 15, 16]
observation = numpy.ndarray(shape=(6,30,30), dtype=float)
flag = 0
player1.do_action(13)
player2.do_action(13)
player1.get_Score()
while flag == 0:
    g.tick()  # Update the game clock
    g.update()  # Process the game state
    g.render()  # Draw the game state to graphics
    state2 = g.capture()

    if state2 is not None:
        dqn = DQN(state2, len(actions))
        flag=1

# Run forever
i=0
while True:
    g.tick()  # Update the game clock
    g.update()  # Process the game state

    g.render()  # Draw the game state to graphics
    state2 = g.capture() # Captures current state (Returns None if .capture_every is set for some iterations)

    g.caption()  # Show Window caption

    g.view()  # View the game state in the pygame window
    if state2 is not None and flag == 1:

        actionID = dqn.act()

    # If the game is in terminal state

        terminal = g.is_terminal()  
        reward_ = player1.get_Score()
        player1.do_action(actionID)
        player2.do_action(numpy.random.randint(0,19))

        dqn.train(actionID, reward_, terminal,state2) 

        if g.is_terminal():
            print("finished")
            g.reset() 
        print(actionID, " Reward",reward_)
        i += 1


#This is the DQN algorithm
import os
import random
import numpy as np
import tensorflow as tf
from collections import deque
from skimage.color import rgb2gray
from skimage.transform import resize
from keras.models import Sequential
from keras.layers import Conv2D, Flatten, Dense
from keras import backend as K
K.set_image_dim_ordering('th')
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

class DQN:
    def __init__(self, 
    initial_state,
    num_actions, 
    initial_epsilon=1.0, 
    final_epsilon=0.1, 
    exploration_steps=10000,
    initial_replay_size=10,
    memory_size=400000,
    batch_size=9,  # 32
    learning_rate=0.0025,
    momentum=0.95,
    min_grad=0.01,
    env_name="DeepRTS",
    save_network_path = "dqn2/saved_networks/",
    save_summary_path = "dqn2/summary/",
    load_network = False,
    gamma=0.99,
    train_interval = 40,
    target_update_interval = 1000,
    save_interval = 30000

    ):


        self.state = initial_state
        self.sshape = initial_state.shape   # Shape of the state
        self.num_actions = num_actions  # Action space
        self.epsilon = initial_epsilon  # Epsilon-greedy start
        self.final_epsilon = final_epsilon  # Epsilon-greedy end
        self.epsilon_step = (self.epsilon - self.final_epsilon) / exploration_steps # Epsilon decrease step
        self.initial_replay_size = initial_replay_size
        self.memory_size = memory_size
        self.exploration_steps = exploration_steps

        self.learning_rate = learning_rate
        self.momentum = momentum
        self.min_grad = min_grad
        self.batch_size = batch_size
        self.gamma = gamma

        self.target_update_interval = target_update_interval
        self.save_interval = save_interval

        self.env_name = env_name
        self.save_network_path = save_network_path + self.env_name
        self.save_summary_path = save_summary_path + self.env_name
        self.load_network = load_network


        self.train_interval = train_interval
        self.t = 0 # TODO


        # Summary Parameters
        self.total_reward = 0
        self.total_q_max = 0
        self.total_loss = 0
        self.duration = 0
        self.episode = 0

        # Replay Memory
        self.replay_memory = deque()

        # Create Q Network
        self.s, self.q_values, q_network = self.build_model()
        q_network_weights = q_network.trainable_weights

        # Create target network
        self.st, self.target_q_values, target_network = self.build_model()
        target_network_weights = target_network.trainable_weights

        # Define target network update operation
        self.update_target_network = [target_network_weights[i].assign(q_network_weights[i]) for i in range(len(target_network_weights))]

        # Define loss and gradient update operation
        self.a, self.y, self.loss, self.grads_update = self.build_functions(q_network_weights)


        self.sess = tf.InteractiveSession()
        self.saver = tf.train.Saver(q_network_weights)
        self.summary_placeholders, self.update_ops, self.summary_op = self.setup_summary()
        self.summary_writer = tf.summary.FileWriter(self.save_summary_path, self.sess.graph)

        if not os.path.exists(self.save_network_path):
            os.makedirs(self.save_network_path)

        self.sess.run(tf.global_variables_initializer())

        # Load network
        self.load()

        # Initialize target network
        self.sess.run(self.update_target_network)


    def build_model(self):
        model = Sequential()
        model.add(Conv2D(32, (1, 1), strides=(1, 1), activation='relu', input_shape=self.sshape))
        model.add(Conv2D(64, (1, 1), activation="relu", strides=(1, 1)))
        model.add(Conv2D(64, (1, 1), activation="relu", strides=(1, 1)))
        model.add(Flatten())
        model.add(Dense(512, activation='relu'))
        model.add(Dense(self.num_actions))

        s = tf.placeholder(tf.float32, [None, *self.sshape])
        q_values = model(s)

        return s, q_values, model

    def build_functions(self, q_network_weights):
        a = tf.placeholder(tf.int64, [None])
        y = tf.placeholder(tf.float32, [None])

        # Convert action to one hot vector
        a_one_hot = tf.one_hot(a, self.num_actions, 1.0, 0.0)
        q_value = tf.reduce_sum(tf.multiply(self.q_values, a_one_hot), reduction_indices=1)

        # Clip the error, the loss is quadratic when the error is in (-1, 1), and linear outside of that region
        error = tf.abs(y - q_value)
        quadratic_part = tf.clip_by_value(error, 0.0, 1.0)
        linear_part = error - quadratic_part
        loss = tf.reduce_mean(0.5 * tf.square(quadratic_part) + linear_part)

        optimizer = tf.train.RMSPropOptimizer(self.learning_rate, momentum=self.momentum, epsilon=self.min_grad)
        grads_update = optimizer.minimize(loss, var_list=q_network_weights)

        return a, y, loss, grads_update


    def new_episode(self):
        pass


    def end_episode(self):
        pass

    def act(self):
        if self.epsilon >= random.random() or self.t < self.initial_replay_size:
            action = random.randrange(self.num_actions)
        else:
            action = np.argmax(self.q_values.eval(feed_dict=self.s: [np.float32(self.state)]))

        # Anneal epsilon linearly over time
        if self.epsilon > self.final_epsilon and self.t >= self.initial_replay_size:
            self.epsilon -= self.epsilon_step

        return action

    def train_network(self):
        state_batch = []
        action_batch = []
        reward_batch = []
        next_state_batch = []
        terminal_batch = []
        y_batch = []

        # Sample random minibatch of transition from replay memory
        minibatch = random.sample(self.replay_memory, self.batch_size)
        for data in minibatch:
            state_batch.append(data[0])
            action_batch.append(data[1])
            reward_batch.append(data[2])
            next_state_batch.append(data[3])
            terminal_batch.append(data[4])

        # Convert True to 1, False to 0
        terminal_batch = np.array(terminal_batch) + 0


        target_q_values_batch = self.target_q_values.eval(feed_dict=self.st: np.float32(np.array(next_state_batch)))
        y_batch = reward_batch + (1 - terminal_batch) * self.gamma * np.max(target_q_values_batch, axis=1)

        loss, _ = self.sess.run([self.loss, self.grads_update], feed_dict=
            self.s: np.float32(np.array(state_batch)),
            self.a: action_batch,
            self.y: y_batch
        )

        self.total_loss += loss


    def train(self, action, reward, terminal, observation):
        """
        # action - The performed action which led to this state
        # reward - The reward given in the state transition
        # terminal - Is state terminal? (Loss / Victory)
        # observation - New state observation after action
        """

        next_state = np.append(self.state[1:, :, :], observation, axis=0)

        # Clip all positive rewards at 1 and all negative rewards at -1, leaving 0 rewards unchanged
        reward = np.clip(reward, -1, 1)

        # Store transition in replay memory
        self.replay_memory.append((self.state, action, reward, self.state, terminal))
        if len(self.replay_memory) > self.memory_size:
            self.replay_memory.popleft()


        if self.t >= self.initial_replay_size:
            # Train network
            if self.t % self.train_interval == 0:
                self.train_network()

            # Update target network
            if self.t % self.target_update_interval == 0:
                self.sess.run(self.update_target_network)

            # Save network
            if self.t % self.save_interval == 0:
                save_path = self.saver.save(self.sess, self.save_network_path + '/' + self.env_name, global_step=self.t)
                print('Successfully saved: ' + save_path)


        self.total_reward += reward
        self.total_q_max += np.max(self.q_values.eval(feed_dict=self.s: [np.float32(self.state)]))
        self.duration += 1


        if terminal:
            # Write summary
            if self.t >= self.initial_replay_size:
                stats = [self.total_reward, self.total_q_max / float(self.duration),self.duration, self.total_loss / (float(self.duration) / float(self.train_interval))]

            for i in range(len(stats)):
                self.sess.run(self.update_ops[i], feed_dict=self.summary_placeholders[i]: float(stats[i]))

            summary_str = self.sess.run(self.summary_op)
            self.summary_writer.add_summary(summary_str, self.episode + 1)


            # Debug
            if self.t < self.initial_replay_size:
                mode = 'random'
            elif self.initial_replay_size <= self.t < self.initial_replay_size + self.exploration_steps:
                mode = 'explore'
            else:
                mode = 'exploit'
            print('EPISODE: 0:6d / TIMESTEP: 1:8d / DURATION: 2:5d / EPSILON: 3:.5f / TOTAL_REWARD: 4:3.0f / AVG_MAX_Q: 5:2.4f / AVG_LOSS: 6:.5f / MODE: 7'.format(self.episode + 1, self.t, self.duration, self.epsilon,self.total_reward, self.total_q_max / float(self.duration),self.total_loss / (float(self.duration) / float(self.train_interval)), mode))

            self.total_reward = 0
            self.total_q_max = 0
            self.total_loss = 0
            self.duration = 0
            self.episode += 1

        self.t += 1



    def iterate(self):
        pass

    def load(self):
        checkpoint = tf.train.get_checkpoint_state(self.save_network_path)
        if self.load_network and checkpoint and checkpoint.model_checkpoint_path:
            self.saver.restore(self.sess, checkpoint.model_checkpoint_path)
            print('Successfully loaded: ' + checkpoint.model_checkpoint_path)
        else:
            print('Training new network...')

    def setup_summary(self):
        episode_total_reward = tf.Variable(0.)
        tf.summary.scalar(self.env_name + '/Total Reward/Episode', episode_total_reward)
        episode_avg_max_q = tf.Variable(0.)
        tf.summary.scalar(self.env_name + '/Average Max Q/Episode', episode_avg_max_q)
        episode_duration = tf.Variable(0.)
        tf.summary.scalar(self.env_name + '/Duration/Episode', episode_duration)
        episode_avg_loss = tf.Variable(0.)
        tf.summary.scalar(self.env_name + '/Average Loss/Episode', episode_avg_loss)
        summary_vars = [episode_total_reward, episode_avg_max_q, episode_duration, episode_avg_loss]
        summary_placeholders = [tf.placeholder(tf.float32) for _ in range(len(summary_vars))]
        update_ops = [summary_vars[i].assign(summary_placeholders[i]) for i in range(len(summary_vars))]
        summary_op = tf.summary.merge_all()
        return summary_placeholders, update_ops, summary_op

和错误:

2019-07-07 02:58:55.652029:W tensorflow / core / common_runtime / bfc_allocator.cc:319] ************************ ****************************************************** ***** ____________________ *2019-07-07 02:58:55.652085:W tensorflow / core / framework / op_kernel.cc:1502] OP_REQUIRES在assign_op.h:117失败:资源耗尽:分配形状为[409600,512]的张量并键入float的OOM在/ job:localhost / replica:0 / task:0 / device:GPU:0上通过分配器GPU_0_bfc追溯(最近一次通话):

答案

使用Out Of Memory (OOM)时,如果它导致CNNs错误,我们可以尝试以下提到的步骤:

  1. 如上文Priyanka Chaudhary所述,减小Mini-Batch大小。
  2. 32 bit Floats替换16 bit Floats(如果值在该范围内)
  3. 增加步幅的高度(Sh)和宽度(Sw)(除非会损害模型的目的/性能),以便减少输入的Dimensionality/Shape,从而减少参数的数量,从而减少已消耗的RAM。
  4. 在多个设备之间分配培训(如果可能)。

以上是关于如何更改图像尺寸以使卷积算法正常工作的主要内容,如果未能解决你的问题,请参考以下文章

如何从 url 中删除端口以使 xampp 数据库中的图像源正常工作?

如何修复Code以使C#中的Fibonacci搜索算法正常 工作

我可以更改啥以使此代码正常工作?

Bootstrap 4 Carousel Scale不同尺寸的图像无法正常工作

如何覆盖 closeEvent 函数以使其正常工作

如何根据屏幕宽度更改图像路径?