我一直在尝试使用Colab提供的TPU,因为据说它速度很快,但似乎无法成功。我使用的是TensorFlow 2.4.1。我尝试按照这个https://www.tensorflow.org/guide/tpu进行操作,但没有成功。这里是我的代码https://colab.research.google.com/drive/1GGtwBicZF0qtp57ioD7g0JdE1iBXL85J?usp=sharing
%tensorflow_version 2.xfrom __future__ import absolute_import, division, print_function, unicode_literalsimport osfrom pathlib import Pathimport tensorflow as tfimport numpy as npimport pandas as pdimport tensorflow_datasets as tfdsCSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth', 'Species']SPECIES = ['Setosa', 'Versicolor', 'Virginica']train_path = tf.keras.utils.get_file( "iris_training.csv", "https://storage.googleapis.com/download.tensorflow.org/data/iris_training.csv")train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)train_y = train.pop('Species')nb_classes=3 # we have three types of flowersX=np.array(train)Y=np.eye(nb_classes)[np.array(train_y)]clf = tf.keras.models.Sequential([ tf.keras.layers.Dense(30, activation='relu'), tf.keras.layers.Dense(10, activation='relu'), tf.keras.layers.Dense(3, activation='softmax'),])clf.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])history = clf.fit(X,Y, batch_size=32,epochs=10, validation_split=0.1)clf.save("numeric_values-model.h5")
这是我尝试转换它的代码
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')tf.config.experimental_connect_to_cluster(resolver)# This is the TPU initialization code that has to be at the beginning.tf.tpu.experimental.initialize_tpu_system(resolver)print("All devices: ", tf.config.list_logical_devices('TPU'))a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])with tf.device('/TPU:0'): c = tf.matmul(a, b)print("c device: ", c.device)print(c)strategy = tf.distribute.TPUStrategy(resolver)@tf.functiondef matmul_fn(x, y): z = tf.matmul(x, y) return zz = strategy.run(matmul_fn, args=(a, b))print(z)@tf.functiondef matmul_fn(x, y): z = tf.matmul(x, y) return zz = strategy.run(matmul_fn, args=(a, b))print(z)def create_model(): return tf.keras.models.Sequential([ tf.keras.layers.Dense(30, activation='relu'), tf.keras.layers.Dense(10, activation='relu'), tf.keras.layers.Dense(3, activation='softmax')])def get_dataset(batch_size, is_training=True): split = 'train' if is_training else 'test' dataset, info = tfds.load(name='mnist', split=split, with_info=True, as_supervised=True, try_gcs=True) # Only shuffle and repeat the dataset in training. The advantage to have a # infinite dataset for training is to avoid the potential last partial batch # in each epoch, so users don't need to think about scaling the gradients # based on the actual batch size. if is_training: dataset = dataset.shuffle(10000) dataset = dataset.repeat() dataset = dataset.batch(batch_size) return datasetwith strategy.scope(): model = create_model() model.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['sparse_categorical_accuracy'])batch_size = 200steps_per_epoch = 60000 // batch_sizevalidation_steps = 10000 // batch_sizetrain_dataset = get_dataset(batch_size, is_training=True)test_dataset = get_dataset(batch_size, is_training=False)model.fit(train_dataset, epochs=5, steps_per_epoch=steps_per_epoch, validation_data=test_dataset, validation_steps=validation_steps)
回答:
您提到的代码片段出现了以下错误信息:
(0) Invalid argument: {{function_node __inference_train_function_10150}} Compilation failure: Incompatible shapes: [25,1] vs. [25,28,28]
这意味着在您的输入数据和模型之间存在形状不匹配的问题。
这里最简单的解决方案是参考TensorFlow官方模型园区的MNIST模型(它也使用了TFDS对MNIST进行简单的模型处理),找出您的代码与之有何不同之处。