我在13英寸的Macbook Pro上使用Theano成功训练了一个Keras模型,尽管速度较慢,但在使用相同数据和模型,在一台配置更强大的计算机(32 GB RAM,8 GB Nvidia Quadro GPU,8个CPU核心)上运行Ubuntu并使用TensorFlow进行训练时,出现了以下错误:
这是我使用的脚本:
from keras import backend as Kfrom keras.callbacks import Callbackfrom keras.constraints import maxnormfrom keras.models import Sequential, load_modelfrom keras.layers import Densefrom keras.layers import Dropoutfrom keras.layers import Flattenfrom keras.layers.convolutional import Convolution3Dfrom keras.layers.convolutional import MaxPooling3Dfrom keras.optimizers import Nadamfrom keras.preprocessing.image import random_rotation, random_shift, random_shear, random_zoomfrom keras.utils import np_utilsfrom keras.utils.io_utils import HDF5Matrixfrom pprint import pprintfrom random import shufflefrom sklearn.utils import shuffleK.set_image_dim_ordering("th")import cv2import h5pyimport jsonimport osimport sysimport numpy as npclass OpticalSpeechRecognizer(object): def __init__(self, rows, columns, frames_per_sequence, samples_generated_per_sample, config_file, training_save_fn, osr_save_fn): self.rows = rows self.columns = columns self.frames_per_sequence = frames_per_sequence self.samples_generated_per_sample = samples_generated_per_sample self.config_file = config_file self.training_save_fn = training_save_fn self.osr_save_fn = osr_save_fn self.osr = None def save_osr_model(self): """ 保存OSR模型到HDF5文件中 """ # 如果文件已存在则删除 try: print "文件\"{0}\"已存在!将覆盖之前保存的文件。\n".format(self.osr_save_fn) os.remove(self.osr_save_fn) except OSError: pass print "正在将OSR模型保存到\"{0}\"".format(self.osr_save_fn) self.osr.save(self.osr_save_fn) def load_osr_model(self): """ 从HDF5文件中加载OSR模型 """ print "正在从\"{0}\"加载OSR模型".format(self.osr_save_fn) self.osr = load_model(self.osr_save_fn) def train_osr_model(self): """ 训练光学语音识别器 """ print "\n正在训练OSR" validation_ratio = 0.3 batch_size = 25 training_sequence_generator = self.generate_training_sequences(batch_size=batch_size) validation_sequence_generator = self.generate_training_sequences(batch_size=batch_size, validation_ratio=validation_ratio) with h5py.File(self.training_save_fn, "r") as training_save_file: sample_count = training_save_file.attrs["sample_count"] pbi = ProgressDisplay() self.osr.fit_generator(generator=training_sequence_generator, validation_data=validation_sequence_generator, samples_per_epoch=sample_count, nb_val_samples=int(round(validation_ratio*sample_count)), nb_epoch=10, max_q_size=1, verbose=2, callbacks=[pbi], class_weight=None, nb_worker=1) def generate_training_sequences(self, batch_size, validation_ratio=0): """ 从HDF5文件中按需生成训练序列 """ while True: with h5py.File(self.training_save_fn, "r") as training_save_file: sample_count = int(training_save_file.attrs["sample_count"]) sample_idxs = range(0, sample_count) shuffle(sample_idxs) training_sample_idxs = sample_idxs[0:int((1-validation_ratio)*sample_count)] validation_sample_idxs = sample_idxs[int((1-validation_ratio)*sample_count):] # 生成用于验证的序列 if validation_ratio: validation_sample_count = len(validation_sample_idxs) batches = int(validation_sample_count/batch_size) remainder_samples = validation_sample_count%batch_size # 生成样本批次 for idx in xrange(0, batches): X = training_save_file["X"][validation_sample_idxs[idx*batch_size:idx*batch_size+batch_size]] Y = training_save_file["Y"][validation_sample_idxs[idx*batch_size:idx*batch_size+batch_size]] yield (X, Y) # 如果有剩余样本,则将剩余样本作为一个批次发送 if remainder_samples: X = training_save_file["X"][validation_sample_idxs[-remainder_samples:]] Y = training_save_file["Y"][validation_sample_idxs[-remainder_samples:]] yield (X, Y) # 生成用于训练的序列 else: training_sample_count = len(training_sample_idxs) batches = int(training_sample_count/batch_size) remainder_samples = training_sample_count%batch_size # 生成样本批次 for idx in xrange(0, batches): X = training_save_file["X"][training_sample_idxs[idx*batch_size:idx*batch_size+batch_size]] Y = training_save_file["Y"][training_sample_idxs[idx*batch_size:idx*batch_size+batch_size]] yield (X, Y) # 如果有剩余样本,则将剩余样本作为一个批次发送 if remainder_samples: X = training_save_file["X"][training_sample_idxs[-remainder_samples:]] Y = training_save_file["Y"][training_sample_idxs[-remainder_samples:]] yield (X, Y) def print_osr_summary(self): """ 打印OSR模型的摘要表示 """ print "\n*** 模型摘要 ***" self.osr.summary() def generate_osr_model(self): """ 构建光学语音识别器模型 """ print "".join(["\n正在生成OSR模型\n", "-"*40]) with h5py.File(self.training_save_fn, "r") as training_save_file: class_count = len(training_save_file.attrs["training_classes"].split(",")) osr = Sequential() print " - 添加卷积层" osr.add(Convolution3D(nb_filter=32, kernel_dim1=3, kernel_dim2=3, kernel_dim3=3, border_mode="same", input_shape=(1, self.frames_per_sequence, self.rows, self.columns), activation="relu")) osr.add(MaxPooling3D(pool_size=(3, 3, 3))) osr.add(Convolution3D(nb_filter=64, kernel_dim1=3, kernel_dim2=3, kernel_dim3=3, border_mode="same", activation="relu")) osr.add(MaxPooling3D(pool_size=(3, 3, 3))) osr.add(Convolution3D(nb_filter=128, kernel_dim1=3, kernel_dim2=3, kernel_dim3=3, border_mode="same", activation="relu")) osr.add(MaxPooling3D(pool_size=(3, 3, 3))) osr.add(Dropout(0.2)) osr.add(Flatten()) print " - 添加全连接层" osr.add(Dense(output_dim=128, init="normal", activation="relu")) osr.add(Dense(output_dim=128, init="normal", activation="relu")) osr.add(Dense(output_dim=128, init="normal", activation="relu")) osr.add(Dropout(0.2)) osr.add(Dense(output_dim=class_count, init="normal", activation="softmax")) print " - 编译模型" optimizer = Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004) osr.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["categorical_accuracy"]) self.osr = osr print " * OSR模型已生成 * " def process_training_data(self): """ 预处理训练数据并将其保存到HDF5文件中 """ # 从配置文件中加载训练元数据 training_metadata = {} training_classes = [] with open(self.config_file) as training_config: training_metadata = json.load(training_config) training_classes = sorted(list(training_metadata.keys())) print "".join(["\n", "找到{0}个训练类别!\n".format(len(training_classes)), "-"*40]) for class_label, training_class in enumerate(training_classes): print "{0:<4d} {1:<10s} {2:<30s}".format(class_label, training_class, training_metadata[training_class]) print "" # 计算样本数量 sample_count = 0 sample_count_by_class = [0]*len(training_classes) for class_label, training_class in enumerate(training_classes): # 获取训练类序列路径 training_class_data_path = training_metadata[training_class] training_class_sequence_paths = [os.path.join(training_class_data_path, file_name) for file_name in os.listdir(training_class_data_path) if (os.path.isfile(os.path.join(training_class_data_path, file_name)) and ".mov" in file_name)] # 更新样本计数 sample_count += len(training_class_sequence_paths) sample_count_by_class[class_label] = len(training_class_sequence_paths) print "".join(["\n", "找到{0}个训练样本!\n".format(sample_count), "-"*40]) for class_label, training_class in enumerate(training_classes): print "{0:<4d} {1:<10s} {2:<6d}".format(class_label, training_class, sample_count_by_class[class_label]) print "" # 初始化HDF5保存文件,但如果存在则先清除旧的重复文件 try: print "文件\"{0}\"已存在!将覆盖之前保存的文件。\n".format(self.training_save_fn) os.remove(self.training_save_fn) except OSError: pass # 处理并将训练数据保存到HDF5文件中 print "通过数据增强从{1}个样本生成{0}个样本\n".format(sample_count*self.samples_generated_per_sample, sample_count) sample_count = sample_count*self.samples_generated_per_sample with h5py.File(self.training_save_fn, "w") as training_save_file: training_save_file.attrs["training_classes"] = np.string_(",".join(training_classes)) training_save_file.attrs["sample_count"] = sample_count x_training_dataset = training_save_file.create_dataset("X", shape=(sample_count, 1, self.frames_per_sequence, self.rows, self.columns), dtype="f") y_training_dataset = training_save_file.create_dataset("Y", shape=(sample_count, len(training_classes)), dtype="i") # 迭代每个类数据 sample_idx = 0 for class_label, training_class in enumerate(training_classes): # 获取训练类序列路径 training_class_data_path = training_metadata[training_class] training_class_sequence_paths = [os.path.join(training_class_data_path, file_name) for file_name in os.listdir(training_class_data_path) if (os.path.isfile(os.path.join(training_class_data_path, file_name)) and ".mov" in file_name)] # 迭代每个序列 for idx, training_class_sequence_path in enumerate(training_class_sequence_paths): sys.stdout.write("正在处理类\"{0}\"的训练数据:{1}/{2}个序列\r" .format(training_class, idx+1, len(training_class_sequence_paths))) sys.stdout.flush() # 累积样本和标签 samples_batch = self.process_frames(training_class_sequence_path) label = [0]*len(training_classes) label[class_label] = 1 for sample in samples_batch: x_training_dataset[sample_idx] = sample y_training_dataset[sample_idx] = label # 更新样本索引 sample_idx += 1 print "\n" training_save_file.close() print "训练数据已处理并保存到{0}".format(self.training_save_fn) def process_frames(self, video_file_path): """ 预处理序列帧 """ # Haar级联用于定位口部区域 face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml') mouth_cascade = cv2.CascadeClassifier('haarcascade_mcs_mouth.xml') video = cv2.VideoCapture(video_file_path) success, frame = video.read() frames = [] success = True # 将帧转换为灰度,定位口部区域,调整帧尺寸,并累积有效帧 while success: success, frame = video.read() if success: # 转换为灰度 frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # 定位单个面部区域 faces_coords = face_cascade.detectMultiScale(frame, 1.3, 5) if len(faces_coords) == 1: face_x, face_y, face_w, face_h = faces_coords[0] frame = frame[face_y:face_y + face_h, face_x:face_x + face_w] # 定位口部区域 mouth_coords = mouth_cascade.detectMultiScale(frame, 1.3, 5) threshold = 0 for (mouth_x, mouth_y, mouth_w, mouth_h) in mouth_coords: if (mouth_y > threshold): threshold = mouth_y valid_mouth_coords = (mouth_x, mouth_y, mouth_w, mouth_h) else: pass mouth_x, mouth_y, mouth_w, mouth_h = valid_mouth_coords frame = frame[mouth_y:mouth_y + mouth_h, mouth_x:mouth_x + mouth_w] # 调整帧尺寸 frame = cv2.resize(frame, (self.columns, self.rows)).astype('float32') # 累积帧 frames.append(frame) # 忽略多个面部区域检测 else: pass # 调整序列长度 if len(frames) < self.frames_per_sequence: frames = [frames[0]]*(self.frames_per_sequence - len(frames)) + frames frames = np.asarray(frames[0:self.frames_per_sequence]) # 像素归一化器 pix_norm = lambda frame: frame / 255.0 samples_batch = [[map(pix_norm, frames)]] # 随机变换用于数据增强 for _ in xrange(0, self.samples_generated_per_sample-1): rotated_frames = random_rotation(frames, rg=45) shifted_frames = random_shift(rotated_frames, wrg=0.25, hrg=0.25) sheared_frames = random_shear(shifted_frames, intensity=0.79) zoomed_frames = random_zoom(sheared_frames, zoom_range=(1.25, 1.25)) samples_batch.append([map(pix_norm, zoomed_frames)]) return samples_batchclass ProgressDisplay(Callback): """ 进度显示回调 """ def on_batch_end(self, epoch, logs={}): print " 批次{0:<4d} => 准确率:{1:>8.4f} | 损失:{2:>8.4f} | 大小:{3:>4d}".format(int(logs["batch"])+1, float(logs["categorical_accuracy"]), float(logs["loss"]), int(logs["size"]))if __name__ == "__main__": # 使用示例 osr = OpticalSpeechRecognizer(rows=100, columns=150, frames_per_sequence=45, samples_generated_per_sample=10, config_file="training_config.json", training_save_fn="training_data.h5", osr_save_fn="osr_model.h5") osr.process_training_data() osr.generate_osr_model() osr.print_osr_summary() osr.train_osr_model() osr.save_osr_model() osr.load_osr_model()
回答: