我已经在Tensorflow(版本1.3)和Seq2Seq模型上工作了一段时间,并且我已经有一个可用的Seq2Seq模型,但是当我尝试向其中添加注意力机制时,出现了错误堆栈:
---------------------------------------------------------------------------AttributeError Traceback (most recent call last)<ipython-input-118-a6d1c9e78e5d> in <module>() 26 rnn_size, 27 num_layers,---> 28 target_vocab_to_int) 29 30 <ipython-input-116-41a4ee1f8ac2> in seq2seq_model(input_data, target_data, keep_prob, batch_size, source_sequence_length, target_sequence_length, max_target_sentence_length, source_vocab_size, target_vocab_size, enc_embedding_size, dec_embedding_size, rnn_size, num_layers, target_vocab_to_int) 32 2*rnn_size, 33 num_layers, target_vocab_to_int, target_vocab_size,---> 34 batch_size, keep_prob, dec_embedding_size , enc_out) 35 36 return training_decoder_output, inference_decoder_output<ipython-input-115-3a03827107f3> in decoding_layer(dec_input, encoder_state, target_sequence_length, max_target_sequence_length, rnn_size, num_layers, target_vocab_to_int, target_vocab_size, batch_size, keep_prob, decoding_embedding_size, encoder_outputs) 55 with tf.variable_scope("decode"): 56 train_decoder_out = decoding_layer_train(new_state, attn_cell, dec_embed_input, ---> 57 target_sequence_length, max_target_sequence_length, output_layer, keep_prob) 58 59 with tf.variable_scope("decode", reuse=True):<ipython-input-12-484f2d84b18e> in decoding_layer_train(encoder_state, dec_cell, dec_embed_input, target_sequence_length, max_summary_length, output_layer, keep_prob) 23 training_decoder_output = tf.contrib.seq2seq.dynamic_decode(training_decoder, 24 impute_finished=True,---> 25 maximum_iterations=max_summary_length)[0] 26 return training_decoder_output 27 /Users/alsulaimi/Documents/AI/TensorFlow/workSpace/lib/python2.7/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.pyc in dynamic_decode(decoder, output_time_major, impute_finished, maximum_iterations, parallel_iterations, swap_memory, scope) 284 ], 285 parallel_iterations=parallel_iterations,--> 286 swap_memory=swap_memory) 287 288 final_outputs_ta = res[1]/Users/alsulaimi/Documents/AI/TensorFlow/workSpace/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name) 2773 context = WhileContext(parallel_iterations, back_prop, swap_memory, name) 2774 ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, context)-> 2775 result = context.BuildLoop(cond, body, loop_vars, shape_invariants) 2776 return result 2777 /Users/alsulaimi/Documents/AI/TensorFlow/workSpace/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc in BuildLoop(self, pred, body, loop_vars, shape_invariants) 2602 self.Enter() 2603 original_body_result, exit_vars = self._BuildLoop(-> 2604 pred, body, original_loop_vars, loop_vars, shape_invariants) 2605 finally: 2606 self.Exit()/Users/alsulaimi/Documents/AI/TensorFlow/workSpace/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants) 2493 # Let the context know the loop variables so the loop variables 2494 # would be added in the outer contexts properly.-> 2495 self._InitializeValues(loop_vars) 2496 real_vars = loop_vars 2497 if self._outer_context:/Users/alsulaimi/Documents/AI/TensorFlow/workSpace/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc in _InitializeValues(self, values) 2475 self._values.add(x.name) 2476 else:-> 2477 self._values.add(x.values.name) 2478 self._values.add(x.indices.name) 2479 if isinstance(x, ops.IndexedSlices):AttributeError: 'NoneType' object has no attribute 'values'.
我尝试通过谷歌寻找解决方案,但没有找到任何答案,找到了一个类似的问题是在BeamSearchDecoder上,但它似乎是一个错误 (在这里)
我不太确定是我在代码中做错了什么,还是我只是碰到了一个错误,以下是我代码的相关部分:
def decoding_layer(dec_input, encoder_state, target_sequence_length, max_target_sequence_length, rnn_size, num_layers, target_vocab_to_int, target_vocab_size, batch_size, keep_prob, decoding_embedding_size , encoder_outputs): """ 创建解码层 :param dec_input: 解码器输入 :param encoder_state: 编码器状态 :param target_sequence_length: 目标批次中每个序列的长度 :param max_target_sequence_length: 目标序列的最大长度 :param rnn_size: RNN大小 :param num_layers: 层数 :param target_vocab_to_int: 从目标词到id的字典 :param target_vocab_size: 目标词汇表大小 :param batch_size: 批次大小 :param keep_prob: Dropout保留概率 :param decoding_embedding_size: 解码嵌入大小 :return: (训练BasicDecoderOutput, 推理BasicDecoderOutput)的元组 """ # 1. 解码器嵌入 dec_embeddings = tf.Variable(tf.random_uniform([target_vocab_size, decoding_embedding_size])) dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input) # 2. 构建解码器单元 def create_cell(rnn_size): lstm_cell = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.random_uniform_initializer(-0.1,0.1,seed=2)) drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob) return drop dec_cell = tf.contrib.rnn.MultiRNNCell([create_cell(rnn_size) for _ in range(num_layers)]) #dec_cell = tf.contrib.rnn.MultiRNNCell(cells_a) #注意力机制细节 attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units=rnn_size, memory=encoder_outputs) attn_cell = tf.contrib.seq2seq.AttentionWrapper(dec_cell, attention_mechanism , attention_layer_size=rnn_size/2)attn_zero = attn_cell.zero_state(batch_size , tf.float32 )attn_zero = attn_zero.clone(cell_state = encoder_state)new_state = tf.contrib.seq2seq.AttentionWrapperState(cell_state = encoder_state, attention = attn_zero , time = 0 ,alignments=None , alignment_history=())"""out_cell = tf.contrib.rnn.OutputProjectionWrapper( attn_cell, target_vocab_size, reuse=True )""" #注意力机制结束 output_layer = Dense(target_vocab_size, kernel_initializer = tf.truncated_normal_initializer(mean = 0.0, stddev=0.1)) with tf.variable_scope("decode"): train_decoder_out = decoding_layer_train(new_state, attn_cell, dec_embed_input, target_sequence_length, max_target_sequence_length, output_layer, keep_prob) with tf.variable_scope("decode", reuse=True): infer_decoder_out = decoding_layer_infer(new_state, attn_cell, dec_embeddings, target_vocab_to_int['<GO>'], target_vocab_to_int['<EOS>'], max_target_sequence_length, target_vocab_size, output_layer, batch_size, keep_prob) return (train_decoder_out, infer_decoder_out)"""DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE"""#tests.test_decoding_layer(decoding_layer)def decoding_layer_train(encoder_state, dec_cell, dec_embed_input, target_sequence_length, max_summary_length, output_layer, keep_prob): """ 为训练创建解码层 :param encoder_state: 编码器状态 :param dec_cell: 解码器RNN单元 :param dec_embed_input: 解码器嵌入输入 :param target_sequence_length: 目标批次中每个序列的长度 :param max_summary_length: 批次中最长序列的长度 :param output_layer: 应用输出层的函数 :param keep_prob: Dropout保留概率 :return: 包含训练logits和sample_id的BasicDecoderOutput """ training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=dec_embed_input, sequence_length=target_sequence_length, time_major=False) training_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, training_helper, encoder_state, output_layer) training_decoder_output = tf.contrib.seq2seq.dynamic_decode(training_decoder, impute_finished=True, maximum_iterations=max_summary_length)[0] return training_decoder_output"""DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE"""#tests.test_decoding_layer_train(decoding_layer_train)
非常感谢您的帮助,提前谢谢
回答:
好的,原来是我做错了,问题出在decoding_layer函数中,我不应该显式地创建AttentionWrapperState,以下是更新后的代码:
def decoding_layer(dec_input, encoder_state, target_sequence_length, max_target_sequence_length, rnn_size, num_layers, target_vocab_to_int, target_vocab_size, batch_size, keep_prob, decoding_embedding_size , encoder_outputs):"""创建解码层:param dec_input: 解码器输入:param encoder_state: 编码器状态:param target_sequence_length: 目标批次中每个序列的长度:param max_target_sequence_length: 目标序列的最大长度:param rnn_size: RNN大小:param num_layers: 层数:param target_vocab_to_int: 从目标词到id的字典:param target_vocab_size: 目标词汇表大小:param batch_size: 批次大小:param keep_prob: Dropout保留概率:param decoding_embedding_size: 解码嵌入大小:return: (训练BasicDecoderOutput, 推理BasicDecoderOutput)的元组"""# 1. 解码器嵌入dec_embeddings = tf.Variable(tf.random_uniform([target_vocab_size, decoding_embedding_size]))dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)# 2. 构建解码器单元def create_cell(rnn_size): lstm_cell = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.random_uniform_initializer(-0.1,0.1,seed=2)) drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob) return dropdec_cell = tf.contrib.rnn.MultiRNNCell([create_cell(rnn_size) for _ in range(num_layers)])#dec_cell = tf.contrib.rnn.MultiRNNCell(cells_a) #注意力机制细节 attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units=rnn_size, memory=encoder_outputs) attn_cell = tf.contrib.seq2seq.AttentionWrapper(dec_cell, attention_mechanism , attention_layer_size=rnn_size/2)attn_zero = attn_cell.zero_state(batch_size , tf.float32 )attn_zero = attn_zero.clone(cell_state = encoder_state)#new_state = tf.contrib.seq2seq.AttentionWrapperState(cell_state = encoder_state, attention = attn_zero , time = 0 ,alignments=None , alignment_history=())"""out_cell = tf.contrib.rnn.OutputProjectionWrapper( attn_cell, target_vocab_size, reuse=True )"""#注意力机制结束 #tensor_util.make_tensor_proto(attn_cell)output_layer = Dense(target_vocab_size, kernel_initializer = tf.truncated_normal_initializer(mean = 0.0, stddev=0.1))with tf.variable_scope("decode"): train_decoder_out = decoding_layer_train(attn_zero, attn_cell, dec_embed_input, target_sequence_length, max_target_sequence_length, output_layer, keep_prob)with tf.variable_scope("decode", reuse=True): infer_decoder_out = decoding_layer_infer(attn_zero, attn_cell, dec_embeddings, target_vocab_to_int['<GO>'], target_vocab_to_int['<EOS>'], max_target_sequence_length, target_vocab_size, output_layer, batch_size, keep_prob)return (train_decoder_out, infer_decoder_out)
希望这能帮助到其他人
谢谢