我尝试为我的二分类图像分类器实现K折交叉验证,但在处理数据方面遇到了困难。我在尝试K折交叉验证之前的代码(非常长且混乱,抱歉)效果非常差。我的代码如下,任何建议或帮助将不胜感激。我认为使用K折交叉验证是正确的途径,但如果不是,请告诉我。非常感谢!
我想知道如何重新格式化我的数据来创建不同的折,因为几乎所有的教程都使用.csv文件;然而,我只有两个不同的文件夹包含图像,这些图像要么被分为两个独立的类别(用于训练数据),要么只是一个单一的类别(用于测试数据)。
from keras.models import Sequentialfrom keras.layers import Conv2Dfrom keras.layers import MaxPooling2Dfrom keras.layers import Flattenfrom keras.layers import Densefrom keras.layers import Dropoutfrom keras.regularizers import l2from keras.preprocessing.image import ImageDataGeneratorfrom keras.callbacks import EarlyStoppingimport numpy as npimport matplotlib.pyplot as pltclassifier = Sequential()classifier.add(Conv2D(32, (3 , 3), input_shape = (256, 256, 3), activation = 'relu', kernel_regularizer=l2(0.01)))classifier.add(MaxPooling2D(pool_size=(2,2)))classifier.add(Flatten())classifier.add(Dense(units = 128, activation='relu'))classifier.add(Dropout(0.5))classifier.add(Dense(units=1, activation='sigmoid'))classifier.compile(optimizer='adam', loss='binary_crossentropy',metrics=['accuracy'])train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True, validation_split=0.2)test_datagen = ImageDataGenerator(rescale=1./255)train_generator = train_datagen.flow_from_directory( 'train', target_size=(256, 256), batch_size=32, class_mode='binary', subset='training') # set as training datavalidation_generator = train_datagen.flow_from_directory( 'train', # same directory as training data target_size=(256, 256), batch_size=32, class_mode='binary', subset='validation')test_set = test_datagen.flow_from_directory('test', target_size = (256,256), batch_size=10, class_mode='binary')history = classifier.fit_generator(train_generator, steps_per_epoch=40, epochs=100, validation_data=validation_generator)classifier.save('50epochmodel')test_images = np.array(list(next(test_set)[:1]))[0]probabilities = classifier.predict(test_images)
回答:
为了更灵活,你可以使用一个简单的文件加载函数,而不是使用Keras的生成器。然后,你可以遍历文件列表,并针对剩余的折进行测试。
import osos.chdir(r'catsanddogs')import tensorflow as tffrom tensorflow.keras.layers import *from tensorflow.keras import Sequentialfrom collections import dequefrom glob2 import globimport numpy as npfiles = glob('*\\*\\*.jpg')files = files[:-(len(files)%3)] # dataset is now divisible by 3 indices = np.random.permutation(len(files)).reshape(3, -1)imsize = 64def load(file_path): img = tf.io.read_file(file_path) img = tf.image.decode_png(img, channels=3) img = tf.image.convert_image_dtype(img, tf.float32) img = tf.image.resize(img, size=(imsize, imsize)) label = tf.strings.split(file_path, os.sep)[1] label = tf.cast(tf.equal(label, 'dogs'), tf.int32) return img, labelaccuracies_on_test_set = {}for i in range(len(indices)): d = deque(np.array(files)[indices].tolist()) d.rotate(-i) train1, train2, test1 = d train_ds = tf.data.Dataset.from_tensor_slices(train1 + train2).\ shuffle(len(train1) + len(train2)).map(load).batch(4) test_ds = tf.data.Dataset.from_tensor_slices(test1).\ shuffle(len(test1)).map(load).batch(4) classifier = Sequential() classifier.add(Conv2D(8, (3, 3), input_shape=(imsize, imsize, 3), activation='relu')) classifier.add(MaxPooling2D(pool_size=(2, 2))) classifier.add(Flatten()) classifier.add(Dense(units=32, activation='relu')) classifier.add(Dropout(0.5)) classifier.add(Dense(units=1, activation='sigmoid')) classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) classifier.fit(train_ds, validation_data=test_ds, epochs=2, verbose=0) loss, accuracy = classifier.evaluate(test_ds, verbose=0) accuracies_on_test_set[f'epoch_{i + 1}_accuracy'] = accuracyprint(accuracies_on_test_set)
{'epoch_1_accuracy': 0.8235, 'epoch_2_accuracy': 0.7765, 'epoch_3_accuracy': 0.736}
以下是数据集的轮换方式:
from collections import dequegroups = ['group1', 'group2', 'group3']for i in range(3): d = deque(groups) d.rotate(-i) print(list(d))
['group1', 'group2', 'group3']['group2', 'group3', 'group1']['group3', 'group1', 'group2']
它们轮流成为最后一个,并随后被选为测试集,与其他所有数据集进行对比:
train1, train2, test1 = d