背景信息
我正在为Tensorflow 2020峰会上我感兴趣的每个演讲创建Google Colabs。请注意,我使用的是Tensorflow 2.1版本。
我在尝试实现'使用Tensorflow学习阅读'
演讲时遇到了一个问题。
一切顺利,直到我们到达EncoderDecoder
类定义。在我对自定义的Model
子类实现fit方法时,遇到了一个错误,下面会详细说明这个错误。
最后一个显著的错误是AttributeError: 'NoneType' object has no attribute 'dtype'
。
然而,我认为这是由于GradientTape
作用域代码中的问题和/或Decoder Layers
(包括Attention Layers
)的定义问题引起的。
主要代码
# Not normally defined here... but doing so for clarityMAX_VOCAB_SIZE = 5000WINDOW_LENGTH = 11class EncoderDecoder(tf.keras.Model): def __init__(self, max_features=MAX_VOCAB_SIZE, output_seq_len=WINDOW_LENGTH-1, embedding_dims=200, rnn_units=512): super().__init__() self.max_features = max_features self.output_seq_len = output_seq_len self.embedding_dims = embedding_dims self.rnn_units = rnn_units self.vectorize_layer = \ tf.keras.layers.experimental.preprocessing.TextVectorization( max_tokens=self.max_features, standardize='lower_and_strip_punctuation', split='whitespace', ngrams=None, output_mode='int', output_sequence_length=self.output_seq_len, pad_to_max_tokens=True) # --- <ENCODER STUFF> --- # Embedding self.encoder_embedding = \ tf.keras.layers.Embedding(input_dim=self.max_features+1, output_dim=self.embedding_dims) # ENCODER self.lstm_layer = \ tf.keras.layers.LSTM(units=self.rnn_units, return_state=True) # --- </ENCODER STUFF> --- # --- <DECODER STUFF> --- # Embedding self.decoder_embedding = \ tf.keras.layers.Embedding(input_dim=self.max_features+1, output_dim=self.embedding_dims) # ---------------- MAYBE NOT NECESSARY ---------------- # Sampler (for use during training) # This was not shown during the talk but it is pretty obvious sampler = tfa.seq2seq.sampler.TrainingSampler() # This was not shown during the talk but is required... # This is my best guess decoder_cell = tf.keras.layers.LSTMCell(units=self.rnn_units) # ---------------- MAYBE NOT NECESSARY ---------------- # Output Layer For Decoder self.projection_layer = \ tf.keras.layers.Dense(self.max_features) # DECODER self.decoder = \ tfa.seq2seq.BasicDecoder(cell=decoder_cell, sampler=sampler, output_layer=self.projection_layer) # --- </DECODER STUFF> --- # --- <ATTN STUFF> --- # Basic dense attention layer to connect Encoder & Decoder self.attention = tf.keras.layers.Attention() # --- </ATTN STUFF> --- def train_step(self, data): """ Overwrite built-in train_step method Args: data (tuple): The example (ten `words`), and the label (one `word`) Returns: Metric results for all passed metrics """ # Split data into example (x) and label (y) x, y = data[0], data[1] # Vectorize the example words (x) x = self.vectorize_layer(x) # Vectorize the labels # This will by default pad the output to 10 ... but we only need the # first entry (the true label not the useless padding) y = self.vectorize_layer(y)[:, 0] # Convert our label into a one-hot encoding based on the max number of # features that we will be using for our model y_one_hot = tf.one_hot(y, self.max_features) # Everything within GradientTape is recorded # for later automatic differentiation with tf.GradientTape() as tape: # --- <ENCODER STUFF> --- # Transform the example utilizing the encoder embedding inputs = self.encoder_embedding(x) # Get the encoder outputs and state by # utilizing the encoder (lstm_layer) # - encoder_outputs : [max_time, batch_size, num_units] # - encoder_state : [state_h, state_c] # * state_h --- The Hidden State # * state_c --- The Cell State encoder_outputs, state_h, state_c = self.lstm_layer(inputs) # --- </ENCODER STUFF> --- # --- <ATTN STUFF> --- # Pass the encoder outputs and hidden state allowing us # to track the intermediate state coming out of the encoder layers attn_output = self.attention([encoder_outputs, state_h]) attn_output = tf.expand_dims(attn_output, axis=1) # --- </ATTN STUFF> --- # --- <DECODER STUFF> --- # ??? Create an empty embedding ??? targets = self.decoder_embedding(tf.zeros_like(y)) # Concat the output of the attention layer to the last axis # of the empty targets embedding concat_output = tf.concat([targets, attn_output], axis=-1) # Predict the targets using the state from the encoder outputs, _, _ = \ self.decoder(concat_output, initial_state=[state_h, state_c]) # --- </DECODER STUFF> --- # Automatically differentiate utilizing the loss and trainable variables gradients = tape.gradient(loss, trainable_variables) # Collect the outputs so that they can be optimized self.optimizer.apply_gradients(zip(gradients, trainable_variables)) # Update the metric state prior to return self.compiled_metrics.update_state(y_one_hot, y_pred) return {m.name: m.result() for m in self.metrics}model = EncoderDecoder()model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer="adam", metrics=["accuracy"])model.vectorize_layer.adapt(lines.batch(256))# ERROR OCCURS ON THIS LINEmodel.fit(data.batch(256), epochs=45, callbacks=[tf.keras.callbacks.ModelCheckpoint(filepath='text_gen')])
详细错误信息
---------------------------------------------------------------------------AttributeError Traceback (most recent call last)<ipython-input-40-779906f7f617> in <module>() 1 model.fit(data.batch(256), 2 epochs=45,----> 3 callbacks=[tf.keras.callbacks.ModelCheckpoint(filepath='text_gen')])8 frames/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs) 817 max_queue_size=max_queue_size, 818 workers=workers,--> 819 use_multiprocessing=use_multiprocessing) 820 821 def evaluate(self,/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs) 233 max_queue_size=max_queue_size, 234 workers=workers,--> 235 use_multiprocessing=use_multiprocessing) 236 237 total_samples = _get_total_number_of_samples(training_data_adapter)/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training_v2.py in _process_training_inputs(model, x, y, batch_size, epochs, sample_weights, class_weights, steps_per_epoch, validation_split, validation_data, validation_steps, shuffle, distribution_strategy, max_queue_size, workers, use_multiprocessing) 591 max_queue_size=max_queue_size, 592 workers=workers,--> 593 use_multiprocessing=use_multiprocessing) 594 val_adapter = None 595 if validation_data:/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training_v2.py in _process_inputs(model, mode, x, y, batch_size, epochs, sample_weights, class_weights, shuffle, steps, distribution_strategy, max_queue_size, workers, use_multiprocessing) 704 max_queue_size=max_queue_size, 705 workers=workers,--> 706 use_multiprocessing=use_multiprocessing) 707 708 return adapter/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/data_adapter.py in __init__(self, x, y, sample_weights, standardize_function, **kwargs) 700 701 if standardize_function is not None:--> 702 x = standardize_function(x) 703 704 # Note that the dataset instance is immutable, its fine to reusing the user/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training_v2.py in standardize_function(dataset) 658 model.sample_weight_mode = getattr(model, 'sample_weight_mode', None) 659 --> 660 standardize(dataset, extract_tensors_from_dataset=False) 661 662 # Then we map using only the tensor standardization portion./tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, check_steps, steps_name, steps, validation_split, shuffle, extract_tensors_from_dataset) 2358 is_compile_called = False 2359 if not self._is_compiled and self.optimizer:-> 2360 self._compile_from_inputs(all_inputs, y_input, x, y) 2361 is_compile_called = True 2362 /tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training.py in _compile_from_inputs(self, all_inputs, target, orig_inputs, orig_target) 2578 if training_utils.has_tensors(target): 2579 target = training_utils.cast_if_floating_dtype_and_mismatch(-> 2580 target, self.outputs) 2581 training_utils.validate_input_types(target, orig_target, 2582 allow_dict=False, field_name='target')/tensorflow-2.1.0/python3.6/tensorflow_core/python/keras/engine/training_utils.py in cast_if_floating_dtype_and_mismatch(targets, outputs) 1334 if tensor_util.is_tensor(targets): 1335 # There is one target, so output[0] should be the only output.-> 1336 return cast_single_tensor(targets, dtype=outputs[0].dtype) 1337 new_targets = [] 1338 for target, out in zip(targets, outputs):AttributeError: 'NoneType' object has no attribute 'dtype'
如何获取data
和lines
变量以便复制
获取数据
>>> wget http://www.thespermwhale.com/jaseweston/babi/CBTest.tgz>>> tar zxvf CBTest.tgz>>> rm -rf CBTest.tgz
预处理数据
# 从包含一行或多行文本文件的数据集中加载数据lines = tf.data.TextLineDataset("<path-to>/cbt_train.txt")# 首先过滤掉标题行# 此简单函数未包含在此stackoverflow代码中lines = lines.filter(lambda x: not is_title(x))# 然后我们删除所有标点符号# 此简单函数未包含在此stackoverflow代码中lines = lines.map(lambda x: remove_punc(x))# 然后我们删除由前一个函数创建的所有额外空格# 此简单函数未包含在此stackoverflow代码中lines = lines.map(lambda x: remove_extra_spaces(x))# 然后我们将所有大写字母转换为小写字母# 此简单函数未包含在此stackoverflow代码中lines = lines.map(lambda x: make_lower(x))# 从行中获取单词words = lines.map(tf.strings.split)words = words.unbatch()# 获取词集wordsets = words.batch(11)# get_example_label是一个简单的函数,用于将词集拆分为示例和标签# 前十个单词是示例,最后一个单词是标签data = wordsets.map(get_example_label)# 打乱顺序data = data.shuffle(1024)
参考资料
提前感谢!!
回答:
更新
看起来Tensorflow已经发布了详细介绍峰会上所有演示的教程。
结果是,您可以检查实际代码并确定它们与您的代码之间的差异。我不会在这里发布差异,因为它们比我最初想象的要大得多
链接
附加资源
当我联系Tensorflow时,他们还建议我查看变换器教程,该教程详细介绍了如何实现复杂的编码器-解码器和自注意力网络。
- 此外,他们还指引我查看了他们的TF2实现的BERT,因此我也会包含那个链接。
希望这些资源对你有帮助!