这是Keras和Python的代码。当以下代码在Linux Mint上运行时,所有的批次都没有完成。训练总是停留在第32批次,如下所示:
1/100 .............. ETA 30:00 Loss .. 2/100 =........... ETA 29:59 Loss .. 3/100 ==......... ETA 29:58 Loss .. . . . 32/100 ==....... ETA 25:00 Loss ..
(训练因不明原因停止)
但是当在Windows上运行这段代码时,训练可以正常进行直到第100批次。是什么导致了这种行为?
from __future__ import print_functionimport numpy as npimport kerasfrom keras.models import Sequentialfrom keras.layers import Conv2D, MaxPooling2Dfrom keras.layers import Activation, Dropout, Flatten, Densefrom keras.layers.noise import AlphaDropoutfrom keras.preprocessing.image import ImageDataGeneratorfrom keras.models import model_from_jsonfrom keras.layers import Dense, Dropout, Activationfrom keras.preprocessing.text import Tokenizerimport keras.backend.tensorflow_backend as KTFimport tensorflow as tfimport os.pathf_log = './log'f_model = './model/dogvscat'model_yaml = 'dogvscat_model.yaml'model_filename = 'dogvscat_model.json'weights_filename = 'dogvscat_model_weights.hdf5'batch_size = 64epochs = 15nb_validation_samples = 100print('Building model...')if os.path.isfile(os.path.join(f_model,model_filename)): print('Saved parameters found. I will use this file...') json_string = open(os.path.join(f_model, model_filename)).read() model = model_from_json(json_string) model.summary() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.load_weights(os.path.join(f_model,weights_filename))else: print('Saved parameters Not found. Creating new model...') model = Sequential() model.add(Conv2D(32, (3, 3), input_shape=(128, 128, 3))) model.add(Activation('selu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(64, (3, 3))) model.add(Activation('selu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(64, kernel_initializer='lecun_normal')) model.add(Activation('selu')) model.add(AlphaDropout(0.1)) model.add(Dense(2)) model.add(Activation('softmax')) model.summary() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])train_datagen = ImageDataGenerator( rescale=1.0 / 255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)test_datagen = ImageDataGenerator(rescale=1.0 / 255)train_generator = train_datagen.flow_from_directory( 'data/train', target_size=(128, 128), batch_size=batch_size, class_mode='categorical', shuffle=True)validation_generator = test_datagen.flow_from_directory( 'data/validation', target_size=(128, 128), batch_size=batch_size, class_mode='categorical', shuffle=True)tb_cb = keras.callbacks.TensorBoard(log_dir=f_log, histogram_freq=0)cp_cb = keras.callbacks.ModelCheckpoint(filepath = os.path.join(f_model,weights_filename), monitor='val_loss', verbose=1, save_best_only=True, mode='auto')cbks = [tb_cb, cp_cb]history = model.fit_generator( train_generator, steps_per_epoch=nb_validation_samples, epochs=epochs, validation_data=validation_generator, validation_steps=nb_validation_samples, callbacks=cbks )score = model.evaluate_generator(validation_generator, nb_validation_samples)print('')print('Test score:', score[0])print('Test accuracy:', score[1])json_string = model.to_json() open(os.path.join(f_model,model_filename), 'w').write(json_string) yaml_string = model.to_yaml() open(os.path.join(f_model,model_yaml), 'w').write(yaml_string) print('save weights') model.save_weights(os.path.join(f_model,weights_filename))
回答:
你的代码中有一个错误,你应该修改steps_per_epoch
和validation_steps
,如下所示。
history = model.fit_generator(train_generator,steps_per_epoch=np.ceil(nb_training_samples/batch_size),epochs=epochs,validation_data=validation_generator,validation_steps=np.ceil(nb_validation_samples/batch_size),callbacks=cbks)
在每个步骤中都会使用你的batch_size数量的样本,因此你需要np.ceil(nb_training_samples/batch_size)
步来完成一个epoch