我的实现是基于Tensorflow: 自定义训练指南和Tensorflow: 模块、层和模型介绍的。
我创建了一个简单的具有自定义密集层的neural网络,几乎与我链接的教程中描述的相同。问题是它无法学习。我哪里做错了?
import tensorflow as tfimport tensorflow_datasets as tfdsds = tfds.load('iris', split='train', as_supervised=True)train_ds = ds.take(125).shuffle(125).batch(1)test_ds = ds.skip(125).take(25).shuffle(25).batch(1)class Dense(tf.Module): def __init__(self, in_features, out_features, activation, name=None): super().__init__(name=name) self.activation = activation self.w = tf.Variable( tf.random.uniform([in_features, out_features]), name='w') self.b = tf.Variable(tf.zeros([out_features]), name='b') def __call__(self, x): y = tf.matmul(x, self.w) + self.b return self.activation(y)class SequentialModule(tf.Module): def __init__(self, name): super().__init__(name=name) self.dense1 = Dense(in_features=4, out_features=16, activation=tf.nn.relu) self.dense2 = Dense(in_features=16, out_features=32, activation=tf.nn.relu) self.dense3 = Dense(in_features=32, out_features=3, activation=tf.nn.softmax) def __call__(self, x): x = self.dense1(x) x = self.dense2(x) x = self.dense3(x) return xmy_model = SequentialModule(name="model")loss_object = tf.losses.sparse_categorical_crossentropydef compute_loss(model, x, y): out = model(x) loss = loss_object(y, out, from_logits=False) return out, lossdef compute_gradients(model, x, y): with tf.GradientTape() as tape: out, loss_value = compute_loss(model, x, y) gradients = tape.gradient(loss_value, model.trainable_variables) return out, loss_value, gradientsoptimizer = tf.optimizers.Adam(lr=0.001)for epoch in range(1, 5 + 1): train_loss = tf.metrics.Mean(name='train_loss') test_loss = tf.metrics.Mean(name='test_loss') train_acc = tf.metrics.SparseCategoricalAccuracy() test_acc = tf.metrics.SparseCategoricalAccuracy() for input_batch, label_batch in train_ds: output, loss_value, gradients = compute_gradients(my_model, input_batch, label_batch) optimizer.apply_gradients(zip(gradients, my_model.trainable_variables)) train_loss.update_state(loss_value) train_acc.update_state(label_batch, output) for input_batch, label_batch in test_ds: output, loss_value = compute_loss(my_model, input_batch, label_batch) test_loss.update_state(loss_value) test_acc.update_state(label_batch, output) print(f'Epoch {epoch:03d} Loss {train_loss.result():=5.3f} Acc {train_acc.result():=5.3f} ' f'TLoss {test_loss.result():=5.3f} TAcc {test_acc.result():=5.3f}')
Epoch 001 Loss 10.445 Acc 0.352 TLoss 12.250 TAcc 0.240Epoch 002 Loss 10.445 Acc 0.352 TLoss 12.250 TAcc 0.240Epoch 003 Loss 10.445 Acc 0.352 TLoss 12.250 TAcc 0.240Epoch 004 Loss 10.445 Acc 0.352 TLoss 12.250 TAcc 0.240Epoch 005 Loss 10.445 Acc 0.352 TLoss 12.250 TAcc 0.240
回答:
所以看起来问题完全是统计上的。与链接的教程相反,如果你使用tf.initializers.GlorotUniform
作为初始化器,它就能正常学习。
self.w = tf.Variable( tf.initializers.GlorotUniform()([in_features, out_features]), name='w')
Epoch 1 Loss 0.515 Acc 0.776 TLoss 0.537 TAcc 0.720Epoch 2 Loss 0.186 Acc 0.928 TLoss 0.136 TAcc 0.920Epoch 3 Loss 0.171 Acc 0.944 TLoss 0.104 TAcc 0.920Epoch 4 Loss 0.230 Acc 0.920 TLoss 0.268 TAcc 0.880Epoch 5 Loss 0.177 Acc 0.928 TLoss 0.284 TAcc 0.880Epoch 6 Loss 0.144 Acc 0.944 TLoss 0.111 TAcc 0.920Epoch 7 Loss 0.151 Acc 0.952 TLoss 0.137 TAcc 0.920Epoch 8 Loss 0.192 Acc 0.952 TLoss 0.111 TAcc 0.960Epoch 9 Loss 0.081 Acc 0.968 TLoss 0.074 TAcc 0.960Epoch 10 Loss 0.222 Acc 0.920 TLoss 0.097 TAcc 1.000