In[0] 和 In[1] 必须具有兼容的批次尺寸:[64,32,32,128] 与 [128,32,32,64]



【中文标题】In[0] 和 In[1] 必须具有兼容的批次尺寸:[64,32,32,128] 与 [128,32,32,64]【英文标题】:In[0] and In[1] must have compatible batch dimensions: [64,32,32,128] vs. [128,32,32,64] 【发布时间】:2020-12-13 16:19:25 【问题描述】:

我正在使用 tensorflow 和 keras(TensorFlow(+Keras2) 和 Python3(CUDA 10.0 和 Intel MKL-DNN)),我遇到了批处理尺寸不兼容的问题,但我不知道哪个部分出了问题。我将不胜感激任何帮助和建议。我使用的数据是 MNIST 和 USPS。 错误是这样的

Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/client/", line 1365, in _do_call
    return fn(*args)
  File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/client/", line 1350, in _run_fn
    target_list, run_metadata)
  File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/client/", line 1443, in _call_tf_sessionrun
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument: In[0] and In[1] must have compatible batch dimensions: [64,32,32,128] vs. [128,32,32,64]
         [[node MatMul]]
  (1) Invalid argument: In[0] and In[1] must have compatible batch dimensions: [64,32,32,128] vs. [128,32,32,64]
         [[node MatMul]]
0 successful operations.
0 derived errors ignored.

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "", line 343, in <module>, ys, Xt, yt)
  File "", line 255, in fit
  File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/client/", line 956, in run
  File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/client/", line 1180, in _run
    feed_dict_tensor, options, run_metadata)
  File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/client/", line 1359, in _do_run
  File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/client/", line 1384, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument: In[0] and In[1] must have compatible batch dimensions: [64,32,32,128] vs. [128,32,32,64]
         [[node MatMul (defined at /home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/ ]]
  (1) Invalid argument: In[0] and In[1] must have compatible batch dimensions: [64,32,32,128] vs. [128,32,32,64]
         [[node MatMul (defined at /home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow_core/python/framework/ ]]
0 successful operations.
0 derived errors ignored.


import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import log_loss
from tensorflow.python.framework import ops
from keras import backend as K
from keras.layers import Dense, Dropout, Activation, GaussianNoise, Flatten
from keras.layers import Conv2D, MaxPooling2D, MaxPool2D
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import PReLU, ELU, LeakyReLU

N_CLASS = 10
def shuffle_aligned_list(data):
    """Shuffle arrays in a list by shuffling each array identically."""
    idx = data[0].shape[0]
    p = np.random.permutation(idx)
    return [d[p] for d in data]

def batch_gen(data, batch_size, shuffle=True):
    """Generate batches of data.

    Given a list of array-like objects, generate batches of a given
    size by yielding a list of array-like objects corresponding to the
    same slice of each input.
    if shuffle:
        data = shuffle_aligned_list(data)

    batch_count = 0
    while True:
        if batch_count * batch_size + batch_size >= len(data[0]):
            batch_count = 0

            if shuffle:
                data = shuffle_aligned_list(data)

        start = batch_count * batch_size
        end = start + batch_size
        batch_count += 1
        yield [d[start:end] for d in data]

def val_batch_gen(data, batch_size):
    """Generate batches of data.

    Given a list of array-like objects, generate batches of a given
    size by yielding a list of array-like objects corresponding to the
    same slice of each input.
    batch_count = 0
    # while True:
    nbatch = len(data[0]) // batch_size
    if nbatch * batch_size < len(data[0]): nbatch += 1

    for i in range(nbatch):
        start = batch_count * batch_size
        end = start + batch_size
        batch_count += 1
        yield [d[start:end] for d in data]

class DeepCoralNet(object):

    def __init__(self, nfeatures=50, arch=[8, 'act'], coral_layer_idx=[1],
                 batch_size=16, supervised=False, confusion=1e4, confusion_incr=50, confusion_max=1e9,
                 val_data=None, validate_every=1,
                 activations='relu', epochs=1000, optimizer=None, noise=0.0, droprate=0.0, verbose=False):

        self.batch_size = batch_size
        self.epochs = epochs
        self.validate_every = validate_every
        self.supervised = supervised
        self.verbose = verbose

        if val_data is None:
            self.validate_every = 0
            self.Xval = val_data[0]
            self.yval = val_data[1]

        self._build_model(nfeatures, arch, supervised, confusion, confusion_incr,
                          confusion_max, activations, noise, droprate, coral_layer_idx, optimizer)

        self.sess = tf.Session()

    def _coral_loss(self, layer_a, layer_b):
        d = tf.cast(tf.shape(layer_a)[1], tf.float32)

        # Source covariance
        xm = layer_a - tf.reduce_mean(layer_a, 0, keep_dims=True)
        xc = tf.matmul(tf.transpose(xm), xm) / self.batch_size

        # Target covariance
        xmt = layer_b - tf.reduce_mean(layer_b, 0, keep_dims=True)
        xct = tf.matmul(tf.transpose(xmt), xmt) / self.batch_size

        coral_loss = tf.reduce_sum(tf.multiply((xc - xct), (xc - xct)))
        coral_loss /= 4 * d * d
        return coral_loss

    def _build_model(self, nfeatures, architecture, supervised, confusion, confusion_incr, confusion_max,
                     activations, noise, droprate, coral_layer_idx, optimizer):

        # self.inp_a = tf.placeholder(tf.float32, shape=(None, nfeatures))
        # self.inp_b = tf.placeholder(tf.float32, shape=(None, nfeatures))
        self.inp_a = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        self.inp_b = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        self.labels_a = tf.placeholder(tf.float32, shape=(None, N_CLASS)) = tf.placeholder(tf.float32, [], name='lr')

        nlayers = len(architecture)
        layers_a = [self.inp_a]
        layers_b = [self.inp_b]

        for i, nunits in enumerate(architecture):

            # print
            # nunits,
            # if i in coral_layer_idx: print
            # '(CORAL)'
            # else: print

            if isinstance(nunits, int):
                shared_layer = Dense(nunits, activation='linear')
            elif nunits == 'noise':
                shared_layer = GaussianNoise(noise)
            elif nunits == 'bn':
                shared_layer = BatchNormalization()
            elif nunits == 'drop':
                shared_layer = Dropout(droprate)
            elif nunits == 'act':
                if activations == 'prelu':
                    shared_layer = PReLU()
                elif activations == 'elu':
                    shared_layer = ELU()
                elif activations == 'leakyrelu':
                    shared_layer = LeakyReLU()
                    shared_layer = Activation(activations)
            elif nunits == 'block1_conv1':
                shared_layer =Conv2D(64, (3, 3),
            elif nunits == 'block1_conv2':
                shared_layer += Conv2D(64, (3, 3),
            elif nunits == 'block1_pool':
                shared_layer += MaxPool2D((2, 2), strides=(2, 2), name='block1_pool')

            layers_a += [shared_layer(layers_a[-1])]
            layers_b += [shared_layer(layers_b[-1])]
            layers_a += [Flatten()(layers_a[-1])]
            layers_b += [Flatten()(layers_b[-1])]

        output_layer = Dense(N_CLASS, activation='linear')
        y_logits = output_layer(layers_a[-1])
        b_logits = output_layer(layers_b[-1])
        self.y_clf = Activation('sigmoid')(y_logits)

        # Sum the losses from both branches...
        self.xe_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels_a, logits=y_logits))

        self.coral_losses = []
        for idx in coral_layer_idx:
            self.coral_losses += [self._coral_loss(layers_a[idx], layers_b[idx])]
        self.coral_losses += [self._coral_loss(y_logits, b_logits)]

        self.domain_loss = tf.reduce_sum(self.coral_losses)

        self.confusion = tf.Variable(float(confusion), trainable=False, dtype=tf.float32)
        conf_incr = tf.cond(self.confusion < confusion_max, lambda: float(confusion_incr), lambda: 0.)
        self.increment_confusion = tf.assign(self.confusion, self.confusion + conf_incr)

        self.domain_loss = self.confusion * self.domain_loss
        self.total_loss = tf.add(self.domain_loss, self.xe_loss)

        if supervised:
            self.labels_b = tf.placeholder(tf.float32, shape=(None, 1))
            self.bloss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=self.labels_b, logits=b_logits))
            self.total_loss = tf.add(self.total_loss, self.bloss)

        if optimizer is None:
            self.train_step = tf.train.MomentumOptimizer(, 0.9)
            self.train_step = optimizer
        self.train_step = self.train_step.minimize(self.total_loss)

    def predict_proba(self, X, batch_size=None):
        if batch_size is None: batch_size = self.batch_size
        yprobs = np.zeros((X.shape[0]), N_CLASS, dtype=float)

        idx = np.arange(X.shape[0])
        vbatch = val_batch_gen([idx, X], batch_size)
        for i, (thisidx, thisX) in enumerate(vbatch):
            yprobs[thisidx] =,
                                            feed_dict=self.inp_a: thisX, K.learning_phase(): 0).flatten()
        return yprobs

    def evaluate(self, X, y, batch_size=None):
        yprobs = self.predict_proba(X, batch_size)
        acc = np.mean(np.equal(np.argmax(self.predict_proba(X,  batch_size).astype("float32"), axis=1),
                               np.argmax(y, axis=1)))
        return log_loss(y, yprobs), acc

    def fit(self, Xs, ys, Xt, yt=None, Xval=None, yval=None,
            epochs=None, batch_size=None, verbose=None):

        if epochs is None: epochs = self.epochs
        if batch_size is None: batch_size = self.batch_size
        if Xval is None:
            Xval = self.Xval
            yval = self.yval
        if verbose is None: verbose = self.verbose

        S_batches = batch_gen([Xs, ys], batch_size=batch_size)
        if yt is None: yt = np.ones(Xt.shape[0])
        T_batches = batch_gen([Xt, yt], batch_size=batch_size)

        self.history = 'source_loss': [], 'target_loss': [], 'val_loss': [], 'domain_loss': []
        for i in range(epochs):
            p = i / float(epochs)
            lr = 0.01 / (1 + 10. * p) ** 0.75

            Xsource, ysource = next(S_batches)
            Xtarget, ytarget = next(T_batches)

            feed_dict = self.inp_a: Xsource, self.inp_b: Xtarget,
                         # self.labels_a: ysource.reshape(-1, 1), K.learning_phase(): 1,
                         self.labels_a: ysource, K.learning_phase(): 1,
            if self.supervised:
                feed_dict[self.labels_b] = ytarget #.reshape(-1, 1)

            # train
            _, _, confusion, xeloss, dloss, tloss =[

            if self.validate_every > 0 and i % self.validate_every == 0:

                if i == 0:
                    print ('Epoch confusion  dloss  tloss  sloss tloss vloss')

                self.history['source_loss'] += [self.evaluate(Xs, ys)]
                self.history['target_loss'] += [self.evaluate(Xt, yt)]
                self.history['val_loss'] += [self.evaluate(Xval, yval)]
                self.history['domain_loss'] += [dloss]

                print (':04d :.2f  :.4E  :.4f  :.5f :.5f :.5f '.format(i, confusion, dloss, tloss,
                print(':04d :.2f  :.4E  :.4f  :.5f :.5f :.5f '.format(i, self.evaluate(Xs, ys)[1],
                                                             self.evaluate(Xt, yt)[1],
                                                             self.evaluate(Xval, yval)[1]))

# input data
from tensorflow.examples.tutorials.mnist import input_data
def return_mnist(path_train, path_test):
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    mnist_train = np.reshape(np.load(path_train), (55000, 32, 32, 1))
    mnist_train = np.reshape(mnist_train, (55000, 32, 32, 1))
    mnist_train = mnist_train.astype(np.float32)
    mnist_test = np.reshape(np.load(path_test), (10000, 32, 32, 1)).astype(
    mnist_test = np.reshape(mnist_test, (10000, 32, 32, 1))
    mnist_train = np.concatenate([mnist_train, mnist_train, mnist_train], 3)
    mnist_test = np.concatenate([mnist_test, mnist_test, mnist_test], 3)
    return mnist_train, mnist_test, mnist.train.labels, mnist.test.labels
path_mnist_train = '/home/miles/atda-master/train_mnist_32x32.npy'
path_mnist_test = '/home/miles/atda-master/test_mnist_32x32.npy'

import cv2
import pickle as pkl

def __resize_array_images(array_images, size):
    new_array = []
    for i in range(len(array_images)):
        img = cv2.resize( array_images[i], (size, size), interpolation = cv2.INTER_CUBIC )
        new_array.append( img )
    return np.array(new_array)

path = '/home/miles/atda-master/usps.h5'
import h5py

with h5py.File(path, 'r') as hf:
    train = hf.get('train')
    X_tr = train.get('data')[:]
    y_tr = train.get('target')[:]
    test = hf.get('test')
    X_te = test.get('data')[:]
    y_te = test.get('target')[:]
X_tr = np.reshape(X_tr, [X_tr.shape[0], 16, 16, 1])
X_tr = __resize_array_images(X_tr, 32)
X_tr = np.expand_dims(X_tr, -1)
X_tr = np.concatenate([X_tr, X_tr, X_tr], 3)

X_te = np.reshape(X_te, [X_te.shape[0], 16, 16, 1])
X_te = __resize_array_images(X_te, 32)
X_te = np.expand_dims(X_te, -1)
X_te = np.concatenate([X_te, X_te, X_te], 3)

y_tr = tf.keras.utils.to_categorical(y_tr, 10)
y_te = tf.keras.utils.to_categorical(y_te, 10)

Xt = X_tr
yt = y_tr
Xv = X_te
yv = y_te

_, mnist_test , _, mnist_test_label = return_mnist(path_mnist_train, path_mnist_test)
from sklearn.model_selection import train_test_split
Xs, _, ys, _ = train_test_split(
    mnist_test, mnist_test_label, test_size=0.3)

opt = tf.train.MomentumOptimizer(1e-3, 0.9)
model = DeepCoralNet(nfeatures=Xs.shape[1],arch=['block1_conv1'], val_data=(Xv, yv),epochs=10000, batch_size=128, validate_every=100,
    optimizer=opt, activations='leakyrelu'), ys, Xt, yt)
vloss_grl = model.evaluate(Xv, yv)


问题出现在哪一行?

您在matmul 之前执行transpose 操作。即tf.matmul(tf.transpose(xm), xm) 在您的代码中,这就是您的维度被洗牌的原因。能否请您参考这个tf.linalg.matmul tensorflow 文档。


import tensorflow as tf

a = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], shape=[2, 3, 2])

b = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], shape=[2, 3, 2])

x = tf.matmul(a, b)

输出 -

[[[ 1  2]
  [ 3  4]
  [ 5  6]]

 [[ 7  8]
  [ 9 10]
  [11 12]]], shape=(2, 3, 2), dtype=int32)
[[[ 1  2]
  [ 3  4]
  [ 5  6]]

 [[ 7  8]
  [ 9 10]
  [11 12]]], shape=(2, 3, 2), dtype=int32)
InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-18-8f20ea7be877> in <module>()
      7 print(b)
----> 9 x = tf.matmul(a, b)
     10 print(x)

4 frames
/usr/local/lib/python3.6/dist-packages/ in raise_from(value, from_value)

InvalidArgumentError: In[0] mismatch In[1] shape: 2 vs. 3: [2,3,2] [2,3,2] 0 0 [Op:BatchMatMulV2]

在任何转置之后,输入必须是秩 >= 2 的张量,其中内部 2 维指定有效的矩阵乘法维数。


import tensorflow as tf

a = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], shape=[2, 3, 2])

b = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], shape=[2, 2, 3])

x = tf.matmul(a, b)

输出 -

[[[ 1  2]
  [ 3  4]
  [ 5  6]]

 [[ 7  8]
  [ 9 10]
  [11 12]]], shape=(2, 3, 2), dtype=int32)
[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]], shape=(2, 2, 3), dtype=int32)
[[[  9  12  15]
  [ 19  26  33]
  [ 29  40  51]]

 [[129 144 159]
  [163 182 201]
  [197 220 243]]], shape=(2, 3, 3), dtype=int32)



