在 MacBook Pro 13 上使用视频输入训练 Keras 模型导致”Killed: 9″状态

系统的最终目标是对视频输入进行分类。训练数据由1到2秒的1080×1920视频序列组成,这些序列通过将RGB帧转换为灰度并通过填充空白帧来确保视频序列具有相同长度进行预处理。使用了一系列3D卷积层连接到全连接网络。当通过调用fit方法训练模型时,返回了“Killed: 9”,程序停止执行。以下是使用的代码:

from keras.constraints import maxnormfrom keras.models import Sequentialfrom keras.layers import Densefrom keras.layers import Dropoutfrom keras.layers import Flattenfrom keras.layers.convolutional import Convolution3Dfrom keras.layers.convolutional import MaxPooling3Dfrom keras.optimizers import SGDfrom keras.utils import np_utilsfrom pprint import pprintfrom sklearn.utils import shuffleimport jsonimport osimport cv2import sysimport numpy as npclass VideoClassifier(object):    def __init__(self, rows, columns, frames_per_sequence):        self.rows = rows        self.columns = columns        self.frames_per_sequence = frames_per_sequence        self.X = []        self.Y = []        self.training_classes = []        self.vc = None    def train_vc_model(self):        """ 训练视频分类器        """        # 将数据分成训练和测试数据        print "Splitting data into training and test sets"        validation_split = 0.2        sample_count = len(self.X)        X_train = self.X[0:int(round(sample_count*(1-validation_split)))]        Y_train = self.Y[0:int(round(sample_count*(1-validation_split)))]        X_test = self.X[int(round(sample_count*(1-validation_split))):-1]        Y_test = self.Y[int(round(sample_count*(1-validation_split))):-1]        # 增量训练模型        print "Training video classifier"        self.vc.fit(x=X_train,                    y=Y_train,                    validation_data=(X_test, Y_test),                    batch_size=10,                    nb_epoch=10,                    verbose=2)        score = self.vc.evaluate(x=X_test,                                 y=Y_test,                                 batch_size=10)        print "Accuracy: {0:.2%}".format(scores[1]*100)    def generate_vc_model(self):        """ 构建视频分类器模型        """        print "".join(["Generating video classifier model\n",                       "-"*40])        vc = Sequential()        print " - Adding convolution layers"        vc.add(Convolution3D(nb_filter=32,                             kernel_dim1=2,                             kernel_dim2=2,                             kernel_dim3=2,                             border_mode="same",                             input_shape=(1, self.frames_per_sequence, self.rows, self.columns),                              activation="relu"))        vc.add(MaxPooling3D(pool_size=(2, 2, 2)))        vc.add(Dropout(0.5))        vc.add(Convolution3D(nb_filter=32,                             kernel_dim1=2,                             kernel_dim2=2,                             kernel_dim3=2,                             border_mode="same",                             activation="relu"))        vc.add(MaxPooling3D(pool_size=(2, 2, 2)))        vc.add(Dropout(0.5))        vc.add(Flatten())        print " - Adding fully connected layers"        vc.add(Dense(output_dim=32,                     init="normal",                     activation="relu"))        vc.add(Dense(output_dim=32,                     init="normal",                     activation="relu"))        vc.add(Dense(output_dim=3,                     init="normal",                     activation="softmax"))        print " - Compiling model"        sgd = SGD(lr=0.01,                  decay=1e-6,                  momentum=0.9,                  nesterov=True)        vc.compile(loss="categorical_crossentropy",                    optimizer=sgd)        self.vc = vc        print " * VIDEO CLASSIFIER MODEL GENERATED * "    def load_training_data(self):        """ 加载并预处理训练数据        """        # 从配置文件中加载训练元数据        training_metadata = {}        training_classes = []        with open("training_config.json") as training_config:            training_metadata = json.load(training_config)            training_classes = sorted(list(training_metadata.keys()))            print "".join(["\n",                           "Found {0} training classes!\n".format(len(training_classes)),                           "-"*40])            for class_label, training_class in enumerate(training_classes):                print "{0:<4d} {1:<10s} {2:<30s}".format(class_label, training_class, training_metadata[training_class])            print ""        X, Y = self.preprocess_training_data(training_metadata, training_classes)        X, Y = shuffle(X, Y)        self.X = X        self.Y = Y        self.training_classes = training_classes    def preprocess_training_data(self, training_metadata, training_classes):        """ 预处理训练数据以便加载        """        # 处理训练数据        X = []        Y = []        # 迭代每个类的数据        for class_label, training_class in enumerate(training_classes):            # 获取训练类序列路径            training_class_data_path = training_metadata[training_class]            training_class_sequence_paths = [os.path.join(training_class_data_path, file_name)                                             for file_name in os.listdir(training_class_data_path)                                             if (os.path.isfile(os.path.join(training_class_data_path, file_name))                                                 and ".mov" in file_name)]            # 迭代每个序列            for idx, training_class_sequence_path in enumerate(training_class_sequence_paths):                sys.stdout.write("Processing training data for class \"{0}\": {1}/{2} sequences\r".format(training_class, idx+1, len(training_class_sequence_paths)))                sys.stdout.flush()                # 获取帧                frames = self.split_frames(training_class_sequence_path)                # 使序列长度一致                if len(frames) < self.frames_per_sequence:                    blank_frame = np.zeros((self.rows, self.columns))                    for _ in xrange(self.frames_per_sequence - len(frames)):                        frames.append(blank_frame)                frames = frames[0:self.frames_per_sequence]                frames = map(lambda frame: frame.astype("float32") / 255.0, frames)                # 添加帧和类标签到训练数据中                X.append([frames])                Y.append(class_label)            print "\n"        # 标签进行独热编码        Y = np_utils.to_categorical(Y)        return X, Y    def split_frames(self, video_file_path):        """ 将视频序列分割成帧        """        video = cv2.VideoCapture(video_file_path)        success, frame = video.read()        frames = []        success = True        while success:          success, frame = video.read()          if success:            frames.append(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))        return framesif __name__ == "__main__":    vc = VideoClassifier(1080, 1920, 30)    vc.load_training_data()    vc.generate_osr_model()    vc.train_osr_model()

据我所知,”Killed: 9″表示程序消耗了过多的内存,我的13英寸Macbook Pro只有8 GB的RAM。我已经尝试将批量大小减少到1,但即便如此也无法解决问题。还有其他方法可以训练Keras模型来应对这种内存限制吗?


回答:

这些图像对于分类来说太大了,近期的研究从未超过512×512的图像(主要用于对象检测)。对于分类,典型的大小约为256×256或224×224。

所以您的唯一选择是将图像缩小到合理的大小。同时,减少批量大小也有助于减少内存使用,但在这种情况下不起作用。

Related Posts

L1-L2正则化的不同系数

我想对网络的权重同时应用L1和L2正则化。然而,我找不…

使用scikit-learn的无监督方法将列表分类成不同组别,有没有办法?

我有一系列实例,每个实例都有一份列表,代表它所遵循的不…

f1_score metric in lightgbm

我想使用自定义指标f1_score来训练一个lgb模型…

通过相关系数矩阵进行特征选择

我在测试不同的算法时,如逻辑回归、高斯朴素贝叶斯、随机…

可以将机器学习库用于流式输入和输出吗?

已关闭。此问题需要更加聚焦。目前不接受回答。 想要改进…

在TensorFlow中,queue.dequeue_up_to()方法的用途是什么?

我对这个方法感到非常困惑,特别是当我发现这个令人费解的…

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注