lstm维度不匹配问题由tensorflow引起

我构建了一个LSTM网络,我的输入维度是100*100*83 (batch_size=100, steps = 100, char_vector = 83)。我构建了两个具有512个隐藏单元的LSTM层。

# coding: utf-8from __future__ import print_functionimport tensorflow as tfimport numpy as npimport timeclass CharRNN:    def __init__(self, num_classes, batch_size=64, num_steps=50, lstm_size=128, num_layers =2,\                 learning_rate = 0.001, grad_clip=5, keep_prob=0.001,sampling= False):        # True for SGD        if sampling == True:            self.batch_size, self.num_steps = 1,1        else:            self.batch_size, self.num_steps = batch_size, num_steps        tf.reset_default_graph()        self.inputs, self.targets, self.keep_prob = self.build_inputs(self.batch_size,self.num_steps)        self.keep_prob = keep_prob        self.cell, self.initial_state = self.build_lstm(lstm_size,num_layers,self.batch_size,self.keep_prob)       # print(self.cell.state_size)        x_one_hot = tf.one_hot(self.inputs, num_classes)        print("cell state size: ",self.cell.state_size)        print("cell initial  state: ",self.initial_state)        print("this is inputs", self.inputs)        print("x_one_hot: ",x_one_hot)        outputs, state = tf.nn.dynamic_rnn(self.cell, x_one_hot, initial_state= self.initial_state)    def build_inputs(self, num_seqs, num_steps):        inputs = tf.placeholder(tf.int32, shape=(num_seqs, num_steps), name = "inputs")        targets = tf.placeholder(tf.int32, shape= (num_seqs, num_steps), name="targets")        print('inputs shape: ',inputs.shape)        keep_prob = tf.placeholder(tf.float32, name="keep_prob")        return inputs, targets, keep_prob    def build_lstm(self, lstm_size, num_layers, batch_size, keep_prob):        # construct lstm cell        lstm = tf.nn.rnn_cell.BasicLSTMCell(lstm_size)        # add dropout        drop = tf.nn.rnn_cell.DropoutWrapper(lstm, output_keep_prob= keep_prob)        # stack multiple rnn cells        cell = tf.nn.rnn_cell.MultiRNNCell([drop for _ in range(num_layers)])        initial_state = cell.zero_state(batch_size, tf.float32)        return cell, initial_stateif __name__ == '__main__':    len_vocab = 83    batch_size = 100    num_steps = 100    lstm_size = 512    num_layers = 2    learning_rate = 0.001    keep_prob = 0.5    epochs = 20    save_every_n = 200    print("h1")    model = CharRNN(len_vocab, batch_size = batch_size, num_steps=num_steps, lstm_size = lstm_size,num_layers=num_layers\                    ,learning_rate=learning_rate,sampling= False,keep_prob = keep_prob

我在tf.nn.dynamic_rnn处遇到了维度不匹配错误。错误信息如下:

    inputs shape:  (100, 100)cell state size:  (LSTMStateTuple(c=512, h=512), LSTMStateTuple(c=512, h=512))cell initial  state:  (LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros:0' shape=(100, 512) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState/BasicLSTMCellZeroState/zeros_1:0' shape=(100, 512) dtype=float32>), LSTMStateTuple(c=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState_1/BasicLSTMCellZeroState/zeros:0' shape=(100, 512) dtype=float32>, h=<tf.Tensor 'MultiRNNCellZeroState/DropoutWrapperZeroState_1/BasicLSTMCellZeroState/zeros_1:0' shape=(100, 512) dtype=float32>))this is inputs Tensor("inputs:0", shape=(100, 100), dtype=int32)x_one_hot:  Tensor("one_hot:0", shape=(100, 100, 83), dtype=float32)Traceback (most recent call last):  File "./seq2_minimal.py", line 70, in <module>    ,learning_rate=learning_rate,sampling= False,keep_prob = keep_prob)  File "./seq2_minimal.py", line 32, in __init__    outputs, state = tf.nn.dynamic_rnn(self.cell, x_one_hot, initial_state= self.initial_state)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 614, in dynamic_rnn    dtype=dtype)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 777, in _dynamic_rnn_loop    swap_memory=swap_memory)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2816, in while_loop    result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2640, in BuildLoop    pred, body, original_loop_vars, loop_vars, shape_invariants)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2590, in _BuildLoop    body_result = body(*packed_vars_for_body)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 762, in _time_step    (output, new_state) = call_cell()  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn.py", line 748, in <lambda>    call_cell = lambda: cell(input_t, state)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__    return super(RNNCell, self).__call__(inputs, state)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 575, in __call__    outputs = self.call(inputs, *args, **kwargs)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1066, in call    cur_inp, new_state = cell(cur_inp, cur_state)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 891, in __call__    output, new_state = self._cell(inputs, state, scope)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in __call__    return super(RNNCell, self).__call__(inputs, state)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 575, in __call__    outputs = self.call(inputs, *args, **kwargs)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 441, in call    value=self._linear([inputs, h]), num_or_size_splits=4, axis=1)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1189, in __call__    res = math_ops.matmul(array_ops.concat(args, 1), self._weights)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 1891, in matmul    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2437, in _mat_mul    name=name)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper    op_def=op_def)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2958, in create_op    set_shapes_for_outputs(ret)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2209, in set_shapes_for_outputs    shapes = shape_func(op)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2159, in call_with_requiring    return call_cpp_shape_fn(op, require_shape_fn=True)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 627, in call_cpp_shape_fn    require_shape_fn)  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 691, in _call_cpp_shape_fn_impl    raise ValueError(err.message)ValueError: Dimensions must be equal, but are 1024 and 595 for 'rnn/while/rnn/multi_rnn_cell/cell_0/cell_0/basic_lstm_cell/MatMul_1' (op: 'MatMul') with input shapes: [100,1024], [595,2048].

我搜索后发现tensorflow的lstm单元应该能自动调整其输入大小。但错误信息显示了相反的情况。

它显示

输入大小为[100, 1024],而lstm为[595, 2048]。

首先感谢您。


回答:

cell = tf.nn.rnn_cell.MultiRNNCell([drop for _ in range(num_layers)])

改为

cell = tf.nn.rnn_cell.MultiRNNCell([drop])

因为您给定的输入张量和生成的张量不一致。

Related Posts

L1-L2正则化的不同系数

我想对网络的权重同时应用L1和L2正则化。然而,我找不…

使用scikit-learn的无监督方法将列表分类成不同组别,有没有办法?

我有一系列实例,每个实例都有一份列表,代表它所遵循的不…

f1_score metric in lightgbm

我想使用自定义指标f1_score来训练一个lgb模型…

通过相关系数矩阵进行特征选择

我在测试不同的算法时,如逻辑回归、高斯朴素贝叶斯、随机…

可以将机器学习库用于流式输入和输出吗?

已关闭。此问题需要更加聚焦。目前不接受回答。 想要改进…

在TensorFlow中,queue.dequeue_up_to()方法的用途是什么?

我对这个方法感到非常困惑,特别是当我发现这个令人费解的…

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注