我有一个DCGAN,它非常接近TensorFlow文档中的示例。
这是教程链接:https://www.tensorflow.org/tutorials/generative/dcgan
教程中使用的是灰度测试数据。我希望开始使用彩色数据进行训练,而不是仅仅使用黑白数据。
我假设训练数据的形状需要改变,但生成器模型的形状也需要改变吗?
我如何将这段代码改为RGB实现?
from google.colab import drivedrive.mount('/content/drive')import tensorflow as tfimport globimport matplotlib.pyplot as pltimport numpy as npimport osimport PILfrom tensorflow.keras import layersimport timefrom IPython import displaytrain_dataset = tf.keras.preprocessing.image_dataset_from_directory( "/content/drive/MyDrive/birds", seed=123, validation_split=0, image_size=(112, 112), color_mode="grayscale", shuffle=True, batch_size=1)train_images_array = []for images, _ in train_dataset: for i in range(len(images)): train_images_array.append(images[i]) train_images = np.array(train_images_array)train_images = train_images.reshape(train_images.shape[0],112,112,1).astype('float32')train_images = (train_images - 127.5) / 127.5 # Normalize the images to [-1, 1]BUFFER_SIZE = 60000BATCH_SIZE = 8# Batch and shuffle the datadataset_ = tf.data.Dataset.from_tensor_slices(train_images).shuffle(BUFFER_SIZE).batch(BATCH_SIZE)def make_generator_model(): model = tf.keras.Sequential() model.add(layers.Dense(7*7*256, use_bias=False, input_shape=(100,))) model.add(layers.BatchNormalization()) model.add(layers.LeakyReLU()) model.add(layers.Reshape((7, 7, 256))) assert model.output_shape == (None, 7, 7, 256) # Note: None is the batch size model.add(layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False)) assert model.output_shape == (None, 7, 7, 128) model.add(layers.BatchNormalization()) model.add(layers.LeakyReLU()) model.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False)) assert model.output_shape == (None, 14, 14, 64) model.add(layers.BatchNormalization()) model.add(layers.LeakyReLU()) model.add(layers.Conv2DTranspose(1, (20, 20), strides=(8, 8), padding='same', use_bias=False, activation='tanh')) assert model.output_shape == (None, 112, 112, 1) return modelgenerator = make_generator_model()noise = tf.random.normal([1, 100])generated_image = generator(noise, training=False)plt.imshow(generated_image[0, :, :, 0], cmap='gray')def make_discriminator_model(): model = tf.keras.Sequential() model.add(layers.Conv2D(64, (10, 10), strides=(2, 2), padding='same', input_shape=[112, 112, 1])) model.add(layers.LeakyReLU()) model.add(layers.Dropout(0.3)) model.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same', input_shape=[112, 112, 1])) model.add(layers.LeakyReLU()) model.add(layers.Dropout(0.3)) model.add(layers.Flatten()) model.add(layers.Dense(1)) return modeldiscriminator = make_discriminator_model()decision = discriminator(generated_image)print (decision)# This method returns a helper function to compute cross entropy losscross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)def discriminator_loss(real_output, fake_output): real_loss = cross_entropy(tf.ones_like(real_output), real_output) fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output) total_loss = real_loss + fake_loss return total_lossdef generator_loss(fake_output): return cross_entropy(tf.ones_like(fake_output), fake_output)generator_optimizer = tf.keras.optimizers.Adam(1e-4)discriminator_optimizer = tf.keras.optimizers.Adam(1e-4)checkpoint_dir = '/content/drive/MyDrive/training_checkpoints11'checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")checkpoint = tf.train.Checkpoint(generator_optimizer=generator_optimizer, discriminator_optimizer=discriminator_optimizer, generator=generator, discriminator=discriminator)EPOCHS = 50noise_dim = 100num_examples_to_generate = 16# You will reuse this seed overtime (so it's easier)# to visualize progress in the animated GIF)seed = tf.random.normal([num_examples_to_generate, noise_dim])def generate_and_save_images(model, epoch, test_input): # Notice `training` is set to False. # This is so all layers run in inference mode (batchnorm). predictions = model(test_input, training=False) fig = plt.figure(figsize=(4, 4)) for i in range(predictions.shape[0]): plt.subplot(4, 4, i+1) plt.imshow(predictions[i, :, :, 0] * 127.5 + 127.5, cmap='gray') plt.axis('off') plt.savefig('image_at_epoch_{:04d}.png'.format(epoch)) plt.show()# Notice the use of `tf.function`# This annotation causes the function to be "compiled".@tf.functiondef train_step(images): noise = tf.random.normal([BATCH_SIZE, noise_dim]) with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: generated_images = generator(noise, training=True) real_output = discriminator(images, training=True) fake_output = discriminator(generated_images, training=True) gen_loss = generator_loss(fake_output) disc_loss = discriminator_loss(real_output, fake_output) gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables) gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminator.trainable_variables) generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables)) discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, discriminator.trainable_variables))def train(dataset, epochs): for epoch in range(epochs): start = time.time() for image_batch in dataset: train_step(image_batch) # Produce images for the GIF as you go display.clear_output(wait=True) generate_and_save_images(generator, epoch + 1, seed) # Save the model every 1 epochs if (epoch + 1) % 8 == 0: checkpoint.save(file_prefix = checkpoint_prefix) print ('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start)) # Generate after the final epoch display.clear_output(wait=True) generate_and_save_images(generator, epochs, seed) returntrain(dataset_, 128)noise = tf.random.normal([1, 100])generated_image = generator(noise, training=False)print(generated_image.shape)plt.imshow(generated_image[0, :, :, 0], cmap='gray')checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
回答:
是的,生成器也需要改变。灰度图只有一个通道,而你需要三个通道。
所以你需要将
model.add(layers.Conv2DTranspose(1, (20, 20), strides=(8, 8), padding='same', use_bias=False, activation='tanh')) assert model.output_shape == (None, 112, 112, 1)
改为
model.add(layers.Conv2DTranspose(3, (20, 20), strides=(8, 8), padding='same', use_bias=False, activation='tanh')) assert model.output_shape == (None, 112, 112, 3)