我使用了顺序、函数式和子类语法重新实现了 Keras 的 MINST CNN 示例。
- https://keras.io/examples/mnist_cnn/
- https://github.com/JamesMcGuigan/kaggle-digit-recognizer/tree/master/src/keras/examples
一切都编译并运行良好,但我在使用子类语法时注意到验证准确率有很大的不同(35%),而使用顺序/函数式语法时为75%。模型架构应该是相同的,所以这让我很困惑。
import numpy as npimport tensorflow as tffrom tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten# BUG: ClassCNN 的准确率仅为 36%,而 SequentialCNN / FunctionalCNN 为 75%# SequentialCNN validation: | loss: 1.3756675141198293 | accuracy: 0.7430952# FunctionalCNN validation: | loss: 1.4285654685610816 | accuracy: 0.7835714# ClassCNN validation: | loss: 1.9851970995040167 | accuracy: 0.36214286class ClassCNN(tf.keras.Model): def __init__(self, input_shape, output_shape, **kwargs): super(ClassCNN, self).__init__() self._input_shape = input_shape # = (28, 28, 1) self._output_shape = output_shape # = 10 self.conv1 = Conv2D(32, kernel_size=(3, 3), activation=tf.nn.relu) self.conv2 = Conv2D(64, kernel_size=(3, 3), activation=tf.nn.relu) self.maxpool = MaxPooling2D(pool_size=(2, 2)) self.dropout1 = Dropout(0.25, name='dropout1') self.flatten = Flatten() self.dense1 = Dense(128, activation=tf.nn.relu) self.dropout2 = Dropout(0.5, name='dropout2') self.activation = Dense(self._output_shape, activation=tf.nn.relu) self.conv1.build( (None,) + input_shape ) self.conv2.build( (None,) + tuple(np.subtract(input_shape[:-1],2)) + (32,) ) self.maxpool.build( (None,) + tuple(np.subtract(input_shape[:-1],4)) + (64,) ) self.dropout1.build( tuple(np.floor_divide(np.subtract(input_shape[:-1],4),2)) + (64,) ) self.dropout2.build( 128 ) self.build( (None,) + input_shape) def call(self, x, training=False, **kwargs): x = self.conv1(x) x = self.conv2(x) x = self.maxpool(x) if training: x = self.dropout1(x) x = self.flatten(x) x = self.dense1(x) if training: x = self.dropout2(x) x = self.activation(x) return x
import osfrom tensorflow.keras import Input, Modelfrom tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Densefrom tensorflow.keras.utils import plot_modeldef FunctionalCNN(input_shape, output_shape): inputs = Input(shape=input_shape) x = Conv2D(32, kernel_size=(3, 3), activation='relu')(inputs) x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x) x = MaxPooling2D(pool_size=(2, 2))(x) x = Dropout(0.25)(x) x = Flatten()(x) x = Dense(128, activation='relu')(x) x = Dropout(0.5)(x) x = Dense(output_shape, activation='softmax')(x) model = Model(inputs, x, name="FunctionalCNN") plot_model(model, to_file=os.path.join(os.path.dirname(__file__), "FunctionalCNN.png")) return model
import osfrom tensorflow.keras import Sequentialfrom tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Densefrom tensorflow.keras.utils import plot_modeldef SequentialCNN(input_shape, output_shape): model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape) ) model.add( Conv2D(64, (3, 3), activation='relu') ) model.add( MaxPooling2D(pool_size=(2, 2)) ) model.add( Dropout(0.25) ) model.add( Flatten() ) model.add( Dense(128, activation='relu') ) model.add( Dropout(0.5) ) model.add( Dense(output_shape, activation='softmax') ) plot_model(model, to_file=os.path.join(os.path.dirname(__file__), "SequentialCNN.png")) return model
main.py
#!/usr/bin/env python3import multiprocessingimport osos.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' # 0, 1, 2, 3 # 禁用Tensorflow日志os.chdir( os.path.dirname( os.path.abspath(__file__) ) )import tensorflow as tfimport tensorflow.keras as kerasimport timefrom src.dataset import DataSetfrom src.keras.examples.ClassCNN import ClassCNNfrom src.keras.examples.ClassNN import ClassNNfrom src.keras.examples.FunctionalCNN import FunctionalCNNfrom src.keras.examples.SequentialCNN import SequentialCNNfrom src.utils.csv import predict_to_csvtf.random.set_seed(42)timer_start = time.time()dataset = DataSet()config = { "verbose": False, "epochs": 12, "batch_size": 128, "input_shape": dataset.input_shape(), "output_shape": dataset.output_shape(),}print("config", config)# BUG: ClassCNN 的准确率仅为 36%,而 SequentialCNN / FunctionalCNN 为 75%# SequentialCNN validation: | loss: 1.3756675141198293 | accuracy: 0.7430952# FunctionalCNN validation: | loss: 1.4285654685610816 | accuracy: 0.7835714# ClassCNN validation: | loss: 1.9851970995040167 | accuracy: 0.36214286models = { "SequentialCNN": SequentialCNN( input_shape=dataset.input_shape(), output_shape=dataset.output_shape() ), "FunctionalCNN": FunctionalCNN( input_shape=dataset.input_shape(), output_shape=dataset.output_shape() ), "ClassCNN": ClassCNN( input_shape=dataset.input_shape(), output_shape=dataset.output_shape() ),}for model_name, model in models.items(): print(model_name) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) model.summary() model.fit( dataset.data['train_X'], dataset.data['train_Y'], batch_size = config["batch_size"], epochs = config["epochs"], verbose = config["verbose"], validation_data = (dataset.data["valid_X"], dataset.data["valid_Y"]), use_multiprocessing = True, workers = multiprocessing.cpu_count() )for model_name, model in models.items(): score = model.evaluate(dataset.data['valid_X'], dataset.data['valid_Y'], verbose=config["verbose"]) print(model_name.ljust(15), "validation:", '| loss:', score[0], '| accuracy:', score[1])for model_name, model in models.items(): predict_to_csv( model.predict(dataset.data['test_X']), f'../../../submissions/keras-examples/keras-examples-{model_name}.csv')print("time:", int(time.time() - timer_start), "s")
输出:
./src/keras/examples/main.py config {'verbose': False, 'epochs': 12, 'batch_size': 128, 'input_shape': (28, 28, 1), 'output_shape': 10}SequentialCNNModel: "sequential"_________________________________________________________________Layer (type) Output Shape Param # =================================================================conv2d (Conv2D) (None, 26, 26, 32) 320 _________________________________________________________________conv2d_1 (Conv2D) (None, 24, 24, 64) 18496 _________________________________________________________________max_pooling2d (MaxPooling2D) (None, 12, 12, 64) 0 _________________________________________________________________dropout (Dropout) (None, 12, 12, 64) 0 _________________________________________________________________flatten (Flatten) (None, 9216) 0 _________________________________________________________________dense (Dense) (None, 128) 1179776 _________________________________________________________________dropout_1 (Dropout) (None, 128) 0 _________________________________________________________________dense_1 (Dense) (None, 10) 1290 =================================================================Total params: 1,199,882Trainable params: 1,199,882Non-trainable params: 0_________________________________________________________________FunctionalCNNModel: "FunctionalCNN"_________________________________________________________________Layer (type) Output Shape Param # =================================================================input_1 (InputLayer) [(None, 28, 28, 1)] 0 _________________________________________________________________conv2d_2 (Conv2D) (None, 26, 26, 32) 320 _________________________________________________________________conv2d_3 (Conv2D) (None, 24, 24, 64) 18496 _________________________________________________________________max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64) 0 _________________________________________________________________dropout_2 (Dropout) (None, 12, 12, 64) 0 _________________________________________________________________flatten_1 (Flatten) (None, 9216) 0 _________________________________________________________________dense_2 (Dense) (None, 128) 1179776 _________________________________________________________________dropout_3 (Dropout) (None, 128) 0 _________________________________________________________________dense_3 (Dense) (None, 10) 1290 =================================================================Total params: 1,199,882Trainable params: 1,199,882Non-trainable params: 0_________________________________________________________________ClassCNNModel: "class_cnn"_________________________________________________________________Layer (type) Output Shape Param # =================================================================conv2d_4 (Conv2D) multiple 320 _________________________________________________________________conv2d_5 (Conv2D) multiple 18496 _________________________________________________________________max_pooling2d_2 (MaxPooling2 multiple 0 _________________________________________________________________dropout1 (Dropout) multiple 0 _________________________________________________________________flatten_2 (Flatten) multiple 0 _________________________________________________________________dense_4 (Dense) multiple 1179776 _________________________________________________________________dropout2 (Dropout) multiple 0 _________________________________________________________________dense_5 (Dense) multiple 1290 =================================================================Total params: 1,199,882Trainable params: 1,199,882Non-trainable params: 0_________________________________________________________________SequentialCNN validation: | loss: 1.370523907570612 | accuracy: 0.74964285FunctionalCNN validation: | loss: 1.4270000725700742 | accuracy: 0.78511906ClassCNN validation: | loss: 2.028766530354818 | accuracy: 0.35630953
SequentialCNN
和 FunctionalCNN
都产生了相似的准确率(75%),但 ClassCNN
的准确率(35%)在根本上有很大不同。从视觉上看,模型看起来是相同的
有谁能解释一下吗?
回答:
我认为在 ClassCNN 的最后一层激活函数是 ‘relu’,应该像其他模型一样使用 ‘softmax’… 这只是一个人的错误… 谢谢…