Keras fit 花费太多时间

我最近在学习深度强化学习,想将所学应用到使用 Keras 的 gym 问题中。

在训练过程中,我发现速度太慢,检查原因后发现“fit”函数花费了大量时间。

运行每个回合需要3-4分钟。

我做的事情有什么问题吗?或者您能建议一些改进方法吗?

import pandas as pdimport numpy as npimport tensorflow as tfimport tensorflow.keras as kerasfrom tensorflow.keras.optimizers import Adamfrom collections import dequeimport randomimport gymimport datetimeclass DQN():    def __init__(self, env):        self.env = env        self.memory = deque(maxlen=2000)        self.gamma = 0.98        self.epsilon = 1        self.epsilon_min = 0.01        self.epsilon_decay = 0.998        self.learning_rate = 0.001        self.model = self.create_model()        self.target_model = self.create_model()    def create_model(self):        model = keras.Sequential()        state_shape = self.env.observation_space.shape        model.add(keras.layers.Dense(48, activation="relu", input_dim=state_shape[0]))        model.add(keras.layers.Dense(24, activation="relu"))        model.add(keras.layers.Dense(self.env.action_space.n, activation="relu"))        model.compile(loss="mse", optimizer=Adam(lr=self.learning_rate))        return model    def remember(self, state, action, reward, new_state, done):        self.memory.append([state, action, reward, new_state, done])        def replay(self):        batch_size = 32        if len(self.memory) < batch_size:            return                samples = random.sample(self.memory, batch_size)        # states, actions, rewards, states_, dones = samples        # targets = self.target_model.predict(states)        # _states = [i for i in range(len(samples))]        # targets = [[0 for j in range(self.env.action_space.n)] for i in range(len(samples))]        _states = np.zeros((len(samples), 8))        targets = np.zeros((len(samples), self.env.action_space.n))        for i, sample in enumerate(samples):            state, action, reward, new_state, done = sample            _states[i] = state            # target = self.target_model.predict(state)            if done:                targets[i][action] = reward            else:                Q_future = max(self.target_model.predict(new_state)[0])                targets[i][action] = reward + Q_future*self.gamma        self.model.fit(_states, targets, epochs=1, verbose=0)                              # for sample in samples:        #     state, action, reward, new_state, done = sample        #     target = self.target_model.predict(state)        #     if done:        #         target[0][action] = reward        #     else:        #         Q_future = max(self.target_model.predict(new_state)[0])        #         target[0][action] = reward + Q_future*self.gamma        #         start_time = datetime.datetime.now()        #         self.model.fit(state, target, epochs=1, verbose=0)        #         end_time = datetime.datetime.now()        #         print("--fit--")        #         print(end_time-start_time)                def target_train(self):        weights = self.model.get_weights()        target_weights = self.target_model.get_weights()        for i in range(len(target_weights)):            target_weights[i] = weights[i]        self.target_model.set_weights(target_weights)        def act(self, state):        self.epsilon *= self.epsilon_decay        self.epsilon = max(self.epsilon_min, self.epsilon)        if np.random.random() < self.epsilon:            return self.env.action_space.sample()        return np.argmax(self.model.predict(state)[0])    def save_model(self, fn):        self.model.save(fn)    def act_eval(self, state):        return np.argmax(self.model.predict(state)[0])    def evaluation(self, n_eval=10):        total_reward = 0        for _ in range(n_eval):            self.env.reset()            cur_state = self.env.reset().reshape(1,8)            done = False            while not done:                action = self.act_eval(cur_state)                new_state, reward, done, _ = self.env.step(action)                total_reward += reward                cur_state = new_state.reshape(1,8)                return total_reward / n_evaldef main():    save_path = "policies/"    env = gym.make("LunarLander-v2")        trials = 2000    trial_len = 500    update_target_network = 500    agent = DQN(env=env)    for trial in range(trials):        cur_state = env.reset().reshape(1,8)        time_step_cntr = 0        # check execution durations        dur_replay = 0        dur_step = 0        dur_act = 0        for step in range(trial_len):            print("Trial {0}, step {1}".format(trial, step))            action = agent.act(cur_state) #             new_state, reward, done, _ = env.step(action) #             new_state = new_state.reshape(1,8)            agent.remember(cur_state, action, reward, new_state, done)            # learn from experience            agent.replay() #             # after "update_target_network" steps, update target network            if time_step_cntr % update_target_network == 0:                agent.target_train()            time_step_cntr += 1            cur_state = new_state            if done:                break                # print("Duration replay {0}, duration act {1}, duration step {2}".format(dur_replay, dur_act, dur_step))                # at each N steps, evaluate        print("Evaluation over 10 episodes", agent.evaluation())                print("Trial #{0} completed.".format(trial))        # # print the progress        # if trial % 100 == 0:        #     print("Trial #{0} completed.".format(trial))        # save the model        # if trial % 20 == 0:        agent.save_model(save_path + str(trial) + "__.model")    agent.save_model(save_path + "_final" + "__.model")if __name__ == "__main__":    main()

回答:

你的问题不在于 fit 调用,而是在于 replay() 方法中的循环。 在这些情况下,尽量用 numpy 操作替代循环,这样操作会更加灵活高效。

将你的 replay 方法替换为下面的方法,看看是否能更快地工作

def replay(self):    batch_size = 32    if len(self.memory) >= batch_size:        # Draw a sample        samples = random.sample(self.memory, batch_size)                # Prepare the batch        state, action, reward, new_state, done = zip(*samples)        next_state = np.concatenate(new_state)        done = np.array(done)[:,None]        state = np.concatenate(state)        reward = np.array(reward)[:,None]        q_future = self.target_model.predict(next_state)        targets = reward + self.gamma*np.max(q_future, axis=1, keepdims=True)*(1-done)                # Fit the model        self.model.fit(state, targets, epochs=1, verbose=0)

Related Posts

使用LSTM在Python中预测未来值

这段代码可以预测指定股票的当前日期之前的值,但不能预测…

如何在gensim的word2vec模型中查找双词组的相似性

我有一个word2vec模型,假设我使用的是googl…

dask_xgboost.predict 可以工作但无法显示 – 数据必须是一维的

我试图使用 XGBoost 创建模型。 看起来我成功地…

ML Tuning – Cross Validation in Spark

我在https://spark.apache.org/…

如何在React JS中使用fetch从REST API获取预测

我正在开发一个应用程序,其中Flask REST AP…

如何分析ML.NET中多类分类预测得分数组?

我在ML.NET中创建了一个多类分类项目。该项目可以对…

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注