系统的最终目标是对视频输入进行分类。训练数据由1到2秒的1080×1920视频序列组成,这些序列通过将RGB帧转换为灰度并通过填充空白帧来确保视频序列具有相同长度进行预处理。使用了一系列3D卷积层连接到全连接网络。当通过调用fit方法训练模型时,返回了“Killed: 9”,程序停止执行。以下是使用的代码:
from keras.constraints import maxnormfrom keras.models import Sequentialfrom keras.layers import Densefrom keras.layers import Dropoutfrom keras.layers import Flattenfrom keras.layers.convolutional import Convolution3Dfrom keras.layers.convolutional import MaxPooling3Dfrom keras.optimizers import SGDfrom keras.utils import np_utilsfrom pprint import pprintfrom sklearn.utils import shuffleimport jsonimport osimport cv2import sysimport numpy as npclass VideoClassifier(object): def __init__(self, rows, columns, frames_per_sequence): self.rows = rows self.columns = columns self.frames_per_sequence = frames_per_sequence self.X = [] self.Y = [] self.training_classes = [] self.vc = None def train_vc_model(self): """ 训练视频分类器 """ # 将数据分成训练和测试数据 print "Splitting data into training and test sets" validation_split = 0.2 sample_count = len(self.X) X_train = self.X[0:int(round(sample_count*(1-validation_split)))] Y_train = self.Y[0:int(round(sample_count*(1-validation_split)))] X_test = self.X[int(round(sample_count*(1-validation_split))):-1] Y_test = self.Y[int(round(sample_count*(1-validation_split))):-1] # 增量训练模型 print "Training video classifier" self.vc.fit(x=X_train, y=Y_train, validation_data=(X_test, Y_test), batch_size=10, nb_epoch=10, verbose=2) score = self.vc.evaluate(x=X_test, y=Y_test, batch_size=10) print "Accuracy: {0:.2%}".format(scores[1]*100) def generate_vc_model(self): """ 构建视频分类器模型 """ print "".join(["Generating video classifier model\n", "-"*40]) vc = Sequential() print " - Adding convolution layers" vc.add(Convolution3D(nb_filter=32, kernel_dim1=2, kernel_dim2=2, kernel_dim3=2, border_mode="same", input_shape=(1, self.frames_per_sequence, self.rows, self.columns), activation="relu")) vc.add(MaxPooling3D(pool_size=(2, 2, 2))) vc.add(Dropout(0.5)) vc.add(Convolution3D(nb_filter=32, kernel_dim1=2, kernel_dim2=2, kernel_dim3=2, border_mode="same", activation="relu")) vc.add(MaxPooling3D(pool_size=(2, 2, 2))) vc.add(Dropout(0.5)) vc.add(Flatten()) print " - Adding fully connected layers" vc.add(Dense(output_dim=32, init="normal", activation="relu")) vc.add(Dense(output_dim=32, init="normal", activation="relu")) vc.add(Dense(output_dim=3, init="normal", activation="softmax")) print " - Compiling model" sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) vc.compile(loss="categorical_crossentropy", optimizer=sgd) self.vc = vc print " * VIDEO CLASSIFIER MODEL GENERATED * " def load_training_data(self): """ 加载并预处理训练数据 """ # 从配置文件中加载训练元数据 training_metadata = {} training_classes = [] with open("training_config.json") as training_config: training_metadata = json.load(training_config) training_classes = sorted(list(training_metadata.keys())) print "".join(["\n", "Found {0} training classes!\n".format(len(training_classes)), "-"*40]) for class_label, training_class in enumerate(training_classes): print "{0:<4d} {1:<10s} {2:<30s}".format(class_label, training_class, training_metadata[training_class]) print "" X, Y = self.preprocess_training_data(training_metadata, training_classes) X, Y = shuffle(X, Y) self.X = X self.Y = Y self.training_classes = training_classes def preprocess_training_data(self, training_metadata, training_classes): """ 预处理训练数据以便加载 """ # 处理训练数据 X = [] Y = [] # 迭代每个类的数据 for class_label, training_class in enumerate(training_classes): # 获取训练类序列路径 training_class_data_path = training_metadata[training_class] training_class_sequence_paths = [os.path.join(training_class_data_path, file_name) for file_name in os.listdir(training_class_data_path) if (os.path.isfile(os.path.join(training_class_data_path, file_name)) and ".mov" in file_name)] # 迭代每个序列 for idx, training_class_sequence_path in enumerate(training_class_sequence_paths): sys.stdout.write("Processing training data for class \"{0}\": {1}/{2} sequences\r".format(training_class, idx+1, len(training_class_sequence_paths))) sys.stdout.flush() # 获取帧 frames = self.split_frames(training_class_sequence_path) # 使序列长度一致 if len(frames) < self.frames_per_sequence: blank_frame = np.zeros((self.rows, self.columns)) for _ in xrange(self.frames_per_sequence - len(frames)): frames.append(blank_frame) frames = frames[0:self.frames_per_sequence] frames = map(lambda frame: frame.astype("float32") / 255.0, frames) # 添加帧和类标签到训练数据中 X.append([frames]) Y.append(class_label) print "\n" # 标签进行独热编码 Y = np_utils.to_categorical(Y) return X, Y def split_frames(self, video_file_path): """ 将视频序列分割成帧 """ video = cv2.VideoCapture(video_file_path) success, frame = video.read() frames = [] success = True while success: success, frame = video.read() if success: frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)) return framesif __name__ == "__main__": vc = VideoClassifier(1080, 1920, 30) vc.load_training_data() vc.generate_osr_model() vc.train_osr_model()
据我所知,”Killed: 9″表示程序消耗了过多的内存,我的13英寸Macbook Pro只有8 GB的RAM。我已经尝试将批量大小减少到1,但即便如此也无法解决问题。还有其他方法可以训练Keras模型来应对这种内存限制吗?
回答:
这些图像对于分类来说太大了,近期的研究从未超过512×512的图像(主要用于对象检测)。对于分类,典型的大小约为256×256或224×224。
所以您的唯一选择是将图像缩小到合理的大小。同时,减少批量大小也有助于减少内存使用,但在这种情况下不起作用。