lstm:输入批次大小100与隐藏[0]批次大小1不匹配

我在尝试向我之前工作正常的AI模型中添加一个LSTM层。在添加模型后,训练批次时出现了这个错误。

之前没有LSTM时没有这个错误,一切正常。

输入批次大小100与隐藏[0]批次大小1不匹配。

我使用的是nn.LSTMCell

请问有人能帮我检查一下我是否遗漏了初始化我的lstmcell的某些参数,以便它也可以接受批次输入吗?

以下是我的代码…

import osimport timeimport randomimport numpy as npimport matplotlib.pyplot as pltimport pandas as pdfrom sklearn.preprocessing import StandardScalerimport torchimport torch.nn as nnimport torch.nn.functional as Ffrom random import random as rndmfrom torch.autograd import Variablefrom collections import dequeos.chdir("C:\\Users\\granthjain\\Desktop\\startup_code")torch.set_default_tensor_type('torch.DoubleTensor')class ReplayBuffer(object):  def __init__(self, max_size=1e6):    self.storage = []    self.max_size = max_size    self.ptr = 0  def add(self, transition):    if len(self.storage) == self.max_size:      self.storage[int(self.ptr)] = transition    else:      self.storage.append(transition)    self.ptr = (self.ptr + 1) % self.max_size      def sample(self, batch_size):        ind = np.random.randint(0, self.ptr, size=batch_size)    batch_states, batch_next_states, batch_actions, batch_rewards, batch_dones = [], [], [], [], []    for i in ind:       state, next_state, action, reward, done = self.storage[i]            if state is None:          continue      elif next_state is None:          continue      elif action is None:          continue      elif reward is None:          continue      elif done is None:          continue                batch_states.append(np.array(state, copy=False))      batch_next_states.append(np.array(next_state, copy=False))      batch_actions.append(np.array(action, copy=False))      batch_rewards.append(np.array(reward, copy=False))      batch_dones.append(np.array(done, copy=False))    return np.array(batch_states,dtype=object).astype(float), np.array(batch_next_states,dtype=object).astype(float), np.array(batch_actions,dtype=object).astype(float), np.array(batch_rewards,dtype=object).astype(float), np.array(batch_dones,dtype=object).astype(float)class Actor(nn.Module):    def __init__(self, state_dim, action_dim, max_action):    super(Actor, self).__init__()    self.lstm = nn.LSTMCell(state_dim, 256)    self.layer_1 = nn.Linear(256, 400)    self.layer_2 = nn.Linear(400, 300)    self.layer_3 = nn.Linear(300, action_dim)    self.hx = torch.zeros(1,256)    self.cx = torch.zeros(1,256)    self.max_action = max_action  def forward(self, x):    self.hx, self.cx = self.lstm(x, (self.hx, self.cx))    x = F.relu(self.layer_1(self.hx))    x = F.relu(self.layer_2(x))    x = self.max_action * torch.tanh(self.layer_3(x))    return xclass Critic(nn.Module):    def __init__(self, state_dim, action_dim):    super(Critic, self).__init__()    # Defining the first Critic neural network    self.lstm1 = nn.LSTMCell(state_dim + action_dim, 256)    self.layer_1 = nn.Linear(256, 400)    self.layer_2 = nn.Linear(400, 300)    self.layer_3 = nn.Linear(300, 1)    # Defining the second Critic neural network    self.lstm2 = nn.LSTMCell(state_dim + action_dim, 256)    self.layer_4 = nn.Linear(256, 400)    self.layer_5 = nn.Linear(400, 300)    self.layer_6 = nn.Linear(300, 1)    self.hx1 = torch.zeros(1,256)    self.cx1 = torch.zeros(1,256)    self.hx2 = torch.zeros(1,256)    self.cx2 = torch.zeros(1,256)      def forward(self, x, u):    xu = torch.cat([x, u], 1)    # Forward-Propagation on the first Critic Neural Network    self.hx1,self.cx1 = self.lstm(xu, (self.hx1, self.cx1))    x1 = F.relu(self.layer_1(self.hx1))    x1 = F.relu(self.layer_2(x1))    x1 = self.layer_3(x1)    # Forward-Propagation on the second Critic Neural Network    self.hx2,self.cx2 = self.lstm(xu, (self.hx2, self.cx2))    x2 = F.relu(self.layer_4(self.hx2))    x2 = F.relu(self.layer_5(x2))    x2 = self.layer_6(x2)    return x1, x2  def Q1(self, x, u):    xu = torch.cat([x, u], 1)    self.hx1,self.cx1 = self.lstm(xu, (self.hx1, self.cx1))    x1 = F.relu(self.layer_1(self.hx1))    x1 = F.relu(self.layer_2(x1))    x1 = self.layer_3(x1)    return x1# Selecting the device (CPU or GPU)device = torch.device("cuda" if torch.cuda.is_available() else "cpu")# Building the whole Training Process into a classclass TD3(object):    def __init__(self, state_dim, action_dim, max_action):    self.actor = Actor(state_dim, action_dim, max_action).to(device)    self.actor_target = Actor(state_dim, action_dim, max_action).to(device)    self.actor_target.load_state_dict(self.actor.state_dict())    self.actor_optimizer = torch.optim.Adam(self.actor.parameters())    self.critic = Critic(state_dim, action_dim).to(device)    self.critic_target = Critic(state_dim, action_dim).to(device)    self.critic_target.load_state_dict(self.critic.state_dict())    self.critic_optimizer = torch.optim.Adam(self.critic.parameters())    self.max_action = max_action  def reset_hxcx(self):    self.actor.cx = torch.zeros(1,256)    self.actor.hx = torch.zeros(1,256)    self.actor_target.cx = torch.zeros(1,256)    self.actor_target.hx = torch.zeros(1,256)    self.critic.cx1 = torch.zeros(1,256)    self.critic.cx2 = torch.zeros(1,256)    self.critic.hx1 = torch.zeros(1,256)    self.critic.hx2 = torch.zeros(1,256)        self.critic_target.cx1 = torch.zeros(1,256)    self.critic_target.cx2 = torch.zeros(1,256)    self.critic_target.hx1 = torch.zeros(1,256)    self.critic_target.hx2 = torch.zeros(1,256)        def select_action(self, state):    print("state =", type(state))    return self.actor(state).cpu().data.numpy().flatten()  def train(self, replay_buffer, iterations, batch_size=50, discount=0.99, tau=0.005, policy_noise=0.2, noise_clip=0.5, policy_freq=2):        for it in range(iterations):            # Step 4: We sample a batch of transitions (s, s’, a, r) from the memory      batch_states, batch_next_states, batch_actions, batch_rewards, batch_dones = replay_buffer.sample(batch_size)            batch_states=batch_states.astype(float)      batch_next_states=batch_next_states.astype(float)      batch_actions=batch_actions.astype(float)      batch_rewards=batch_rewards.astype(float)      batch_dones=batch_dones.astype(float)            state = torch.from_numpy(batch_states)      next_state = torch.from_numpy(batch_next_states)      action = torch.from_numpy(batch_actions)      reward = torch.from_numpy(batch_rewards)      done = torch.from_numpy(batch_dones)      #      print("actor cx:",self.actor.cx)#      print("actor hx:",self.actor.hx)#      print("actor_target cx:",self.actor_target.cx)#      print("actor_target cx:",self.actor_target.cx)#      print("self.critic.cx1:",self.critic.cx1)#      print("self.critic.cx2",self.critic.cx2)#      print("self.critic.hx1:",self.critic.hx1)#      print("self.critic.hx2:",self.critic.hx2)#      print("self.critic_target.cx1:",self.critic_target.cx1)#      print("self.critic_target.hx1",self.critic_target.hx1)#      print("self.critic_target.cx2:",self.critic_target.cx2)#      print("self.critic_target.hx2:",self.critic_target.hx2)      # Step 5: From the next state s’, the Actor target plays the next action a’      next_action = self.actor_target(next_state)            # Step 6: We add Gaussian noise to this next action a’ and we clamp it in a range of values supported by the environment      noise = torch.Tensor(batch_actions).data.normal_(0, policy_noise).to(device)      noise = noise.clamp(-noise_clip, noise_clip)      next_action = (next_action + noise).clamp(-self.max_action, self.max_action)            # Step 7: The two Critic targets take each the couple (s’, a’) as input and return two Q-values Qt1(s’,a’) and Qt2(s’,a’) as outputs      target_Q1, target_Q2 = self.critic_target(next_state, next_action)            # Step 8: We keep the minimum of these two Q-values: min(Qt1, Qt2)      target_Q = torch.min(target_Q1, target_Q2).double()                              # Step 9: We get the final target of the two Critic models, which is: Qt = r + γ * min(Qt1, Qt2), where γ is the discount factor      done = done.resize_((done.shape[0],1))      reward = reward.resize_((reward.shape[0],1))      target_Q = reward + ((1 - done) * discount * target_Q).detach()                  # Step 10: The two Critic models take each the couple (s, a) as input and return two Q-values Q1(s,a) and Q2(s,a) as outputs      current_Q1, current_Q2 = self.critic(state, action)      # Step 11: We compute the loss coming from the two Critic models: Critic Loss = MSE_Loss(Q1(s,a), Qt) + MSE_Loss(Q2(s,a), Qt)      critic_loss = F.mse_loss(current_Q1, target_Q) + F.mse_loss(current_Q2, target_Q)            # Step 12: We backpropagate this Critic loss and update the parameters of the two Critic models with a SGD optimizer      self.critic_optimizer.zero_grad()      critic_loss.backward()      self.critic_optimizer.step()            # Step 13: Once every two iterations, we update our Actor model by performing gradient ascent on the output of the first Critic model      if it % policy_freq == 0:        actor_loss = -self.critic.Q1(state, self.actor(state)).mean()        self.actor_optimizer.zero_grad()        actor_loss.backward()        self.actor_optimizer.step()                # Step 14: Still once every two iterations, we update the weights of the Actor target by polyak averaging        for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):          target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)                # Step 15: Still once every two iterations, we update the weights of the Critic target by polyak averaging        for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):          target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)    # Making a save method to save a trained model  def save(self, filename, directory):    torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, filename))    torch.save(self.critic.state_dict(), '%s/%s_critic.pth' % (directory, filename))    # Making a load method to load a pre-trained model  def load(self, filename, directory):    self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, filename)))    self.critic.load_state_dict(torch.load('%s/%s_critic.pth' % (directory, filename)))    #set the parametersstart_timesteps = 1e3 # Number of iterations/timesteps before which the model randomly chooses an action, and after which it starts to use the policy networkeval_freq = 5e1 # How often the evaluation step is performed (after how many timesteps)max_timesteps = 5e3 # Total number of iterations/timestepssave_models = True # Boolean checker whether or not to save the pre-trained modelexpl_noise = 0.1 # Exploration noise - STD value of exploration Gaussian noisebatch_size = 100 # Size of the batchdiscount = 0.99 # Discount factor gamma, used in the calculation of the total discounted rewardtau = 0.005 # Target network update ratepolicy_noise = 0.2 # STD of Gaussian noise added to the actions for the exploration purposesnoise_clip = 0.5 # Maximum value of the Gaussian noise added to the actions (policy)policy_freq = 2 # Number of iterations to wait before the policy network (Actor model) is updatedstate_dim = 3action_dim = 3max_action = 1idx = 0class env1:        def __init__(self,state_dim,action_dim,data):        self.state_dim = state_dim        self.state = torch.zeros(self.state_dim)        self.state[state_dim-1]=1000.0        self.next_state = torch.zeros(self.state_dim)        self.next_state[state_dim-1] = 1000.0        self.action_dim = action_dim        self.data = data        self.idx = 0        self.count = 0        self._max_episode_steps = 200        self.state[1] = self.data[self.idx]        self.next_state[1] = self.data[self.idx]    def reset(self):        self.next_state = torch.zeros(self.state_dim)        self.next_state[state_dim-1]=1000.0        self.state = torch.zeros(self.state_dim)        self.state[state_dim-1]=1000.0        self.state[1] = self.data[self.idx]        self.next_state[1] = self.data[self.idx]        self.count = 0        ch = self.state[0]        cp = self.state[1]        cc = self.state[2]        st = torch.tensor([ch,cp,cc])        return st    def step(self,action):        done = False        act_t = torch.argmax(action)        self.idx += 1        if(act_t==0):            num_s = int(self.state[2]/self.state[1])            self.next_state[0] += num_s            self.next_state[2] = self.state[2]%self.state[1]            self.next_state[1] = self.data[self.idx]        elif(act_t==1):            self.next_state[1] = self.data[self.idx]        elif(act_t==2):            self.next_state[2] = self.state[2]+ self.state[1]*self.state[0]            self.next_state[0] = 0            self.next_state[1] = self.data[self.idx]                            reward = self.next_state[2] - self.state[2] + self.next_state[1]*self.next_state[0] - self.state[1]*self.state[0] -1                self.state[0] = self.next_state[0]        self.state[1] = self.next_state[1]        self.state[2] = self.next_state[2]                ch = self.state[0]        cp = self.state[1]        cc = self.state[2]                st = torch.tensor([ch,cp,cc])        self.count = (self.count + 1)%100        if(self.count==0):            done = True        return st, reward, donepolicy = TD3(state_dim, action_dim, max_action)#Create the environmentdata = pd.read_csv('PAGEIND.csv')data = data['Close']data = np.array(data).reshape(-1,1)max_timesteps = data.shape[0]sc = StandardScaler()data = sc.fit_transform(data)data = torch.DoubleTensor(data)env = env1(state_dim,action_dim,data)replay_buffer = ReplayBuffer()#init training variables  total_timesteps = 0timesteps_since_eval = 0episode_num = 0done = Truet0 = time.time()# We start the main loop over 500,000 timestepswhile total_timesteps < max_timesteps:  # If the episode is done  if done:    # If we are not at the very beginning, we start the training process of the model    if total_timesteps != 0:      print("Total Timesteps: {} Episode Num: {} Reward: {}".format(total_timesteps, episode_num, episode_reward))      policy.train(replay_buffer, episode_timesteps, batch_size, discount, tau, policy_noise, noise_clip, policy_freq)       # When the training step is done, we reset the state of the environment    obs = env.reset()    policy.reset_hxcx()    # Set the Done to False    done = False        # Set rewards and episode timesteps to zero    episode_reward = 0    episode_timesteps = 0    episode_num += 1    # Before 1000 timesteps, we play random actions  if total_timesteps < 0.8*max_timesteps:#random action      actn = torch.randn(action_dim)      action = torch.zeros(action_dim)      action[torch.argmax(actn)] = 1        else: # After 1000 timesteps, we switch to the model    action = policy.select_action(torch.tensor(obs))    # If the explore_noise parameter is not 0, we add noise to the action and we clip it    if expl_noise != 0:      print("policy action:",action)      actn = (action + torch.randn(action_dim))      action = torch.zeros(action_dim)      action[torch.argmax(actn)] = 1              # The agent performs the action in the environment, then reaches the next state and receives the reward  new_obs, reward, done = env.step(action)    # We check if the episode is done  done_bool = 0 if episode_timesteps + 1 == env._max_episode_steps else float(done)    # We increase the total reward  episode_reward += reward    # We store the new transition into the Experience Replay memory (ReplayBuffer)  replay_buffer.add((obs, new_obs, action, reward, done_bool))  # We update the state, the episode timestep, the total timesteps, and the timesteps since the evaluation of the policy  obs = new_obs  episode_timesteps += 1  total_timesteps += 1  timesteps_since_eval += 1

回答:

如果你用零初始化你的单元状态和隐藏状态,那么根本不需要提供初始化,它会为你提供(作为默认值,请参见文档)。然而,如果你决定自己做,你应该始终考虑批次大小(每次迭代可能不同)。

最后,nn.LSTMCell的单元状态和隐藏状态的形状为(batch_size, hidden_size),而你在构造函数中只初始化了一次,形状为(1, hidden_size)。你需要将初始化移动到forward()中,并且每次调用时从x中获取批次大小,应该只是x.shape[0]


顺便提一下,你使用的是nn.LSTMCell,这只是单个单元计算。一次使用并没有真正意义,请确保这对你有用。也许改用nn.LSTM

Related Posts

L1-L2正则化的不同系数

我想对网络的权重同时应用L1和L2正则化。然而,我找不…

使用scikit-learn的无监督方法将列表分类成不同组别,有没有办法?

我有一系列实例,每个实例都有一份列表,代表它所遵循的不…

f1_score metric in lightgbm

我想使用自定义指标f1_score来训练一个lgb模型…

通过相关系数矩阵进行特征选择

我在测试不同的算法时,如逻辑回归、高斯朴素贝叶斯、随机…

可以将机器学习库用于流式输入和输出吗?

已关闭。此问题需要更加聚焦。目前不接受回答。 想要改进…

在TensorFlow中,queue.dequeue_up_to()方法的用途是什么?

我对这个方法感到非常困惑,特别是当我发现这个令人费解的…

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注