我目前正在使用PyTorch构建一个LSTM网络来预测时间序列数据。参考Roman的博客文章,我实现了一个简单的LSTM用于单变量时间序列数据,请查看下面的类定义。然而,在尝试将更多特征添加到输入数据中,如一天中的小时、一周中的天、一年中的周等方面,我已经停滞了几天。
class Model(nn.Module): def __init__(self, input_size, hidden_size, output_size): super(Model, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.lstm = nn.LSTMCell(self.input_size, self.hidden_size) self.linear = nn.Linear(self.hidden_size, self.output_size) def forward(self, input, future=0, y=None): outputs = [] # 重置LSTM的状态 # 状态保持到序列结束 h_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32) c_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32) for i, input_t in enumerate(input.chunk(input.size(1), dim=1)): h_t, c_t = self.lstm(input_t, (h_t, c_t)) output = self.linear(h_t) outputs += [output] for i in range(future): if y is not None and random.random() > 0.5: output = y[:, [i]] # 教师强制 h_t, c_t = self.lstm(output, (h_t, c_t)) output = self.linear(h_t) outputs += [output] outputs = torch.stack(outputs, 1).squeeze(2) return outputsclass Optimization: "一个帮助训练、测试和诊断LSTM的辅助类" def __init__(self, model, loss_fn, optimizer, scheduler): self.model = model self.loss_fn = loss_fn self.optimizer = optimizer self.scheduler = scheduler self.train_losses = [] self.val_losses = [] self.futures = [] @staticmethod def generate_batch_data(x, y, batch_size): for batch, i in enumerate(range(0, len(x) - batch_size, batch_size)): x_batch = x[i : i + batch_size] y_batch = y[i : i + batch_size] yield x_batch, y_batch, batch def train( self, x_train, y_train, x_val=None, y_val=None, batch_size=100, n_epochs=20, dropout=0.2, do_teacher_forcing=None, ): seq_len = x_train.shape[1] for epoch in range(n_epochs): start_time = time.time() self.futures = [] train_loss = 0 for x_batch, y_batch, batch in self.generate_batch_data(x_train, y_train, batch_size): y_pred = self._predict(x_batch, y_batch, seq_len, do_teacher_forcing) self.optimizer.zero_grad() loss = self.loss_fn(y_pred, y_batch) loss.backward() self.optimizer.step() train_loss += loss.item() self.scheduler.step() train_loss /= batch self.train_losses.append(train_loss) self._validation(x_val, y_val, batch_size) elapsed = time.time() - start_time print( "Epoch %d Train loss: %.2f. Validation loss: %.2f. Avg future: %.2f. Elapsed time: %.2fs." % (epoch + 1, train_loss, self.val_losses[-1], np.average(self.futures), elapsed) ) def _predict(self, x_batch, y_batch, seq_len, do_teacher_forcing): if do_teacher_forcing: future = random.randint(1, int(seq_len) / 2) limit = x_batch.size(1) - future y_pred = self.model(x_batch[:, :limit], future=future, y=y_batch[:, limit:]) else: future = 0 y_pred = self.model(x_batch) self.futures.append(future) return y_pred def _validation(self, x_val, y_val, batch_size): if x_val is None or y_val is None: return with torch.no_grad(): val_loss = 0 batch = 1 for x_batch, y_batch, batch in self.generate_batch_data(x_val, y_val, batch_size): y_pred = self.model(x_batch) loss = self.loss_fn(y_pred, y_batch) val_loss += loss.item() val_loss /= batch self.val_losses.append(val_loss) def evaluate(self, x_test, y_test, batch_size, future=1): with torch.no_grad(): test_loss = 0 actual, predicted = [], [] for x_batch, y_batch, batch in self.generate_batch_data(x_test, y_test, batch_size): y_pred = self.model(x_batch, future=future) y_pred = ( y_pred[:, -len(y_batch) :] if y_pred.shape[1] > y_batch.shape[1] else y_pred ) loss = self.loss_fn(y_pred, y_batch) test_loss += loss.item() actual += torch.squeeze(y_batch[:, -1]).data.cpu().numpy().tolist() predicted += torch.squeeze(y_pred[:, -1]).data.cpu().numpy().tolist() test_loss /= batch return actual, predicted, test_loss def plot_losses(self): plt.plot(self.train_losses, label="Training loss") plt.plot(self.val_losses, label="Validation loss") plt.legend() plt.title("Losses")
你可以找到一些帮助我分割和格式化数据的辅助函数,以便将它们喂入我的LSTM网络。
def to_dataframe(actual, predicted): return pd.DataFrame({"value": actual, "prediction": predicted})def inverse_transform(scaler, df, columns): for col in columns: df[col] = scaler.inverse_transform(df[col]) return dfdef split_sequences(sequences, n_steps): X, y = list(), list() for i in range(len(sequences)): # 找到此模式的结束 end_ix = i + n_steps # 检查是否超出数据集范围 if end_ix > len(sequences): break # 收集模式的输入和输出部分 seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1] X.append(seq_x) y.append(seq_y) return array(X), array(y)def train_val_test_split_new(df, test_ratio=0.2, seq_len = 100): y = df['value'] X = df.drop(columns = ['value']) tarin_ratio = 1 - test_ratio val_ratio = 1 - ((train_ratio - test_ratio) / train_ratio) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_ratio) return X_train, y_train, X_val, y_val, X_test, y_test
我使用以下数据框架来训练我的模型。
# df_train value weekday monthday hourtimestamp 2014-07-01 00:00:00 10844 1 1 02014-07-01 00:30:00 8127 1 1 02014-07-01 01:00:00 6210 1 1 12014-07-01 01:30:00 4656 1 1 12014-07-01 02:00:00 3820 1 1 2... ... ... ... ...2015-01-31 21:30:00 24670 5 31 212015-01-31 22:00:00 25721 5 31 222015-01-31 22:30:00 27309 5 31 222015-01-31 23:00:00 26591 5 31 232015-01-31 23:30:00 26288 5 31 2310320 rows × 4 columns# x_train weekday monthday hourtimestamp 2014-08-26 16:30:00 1 26 162014-08-18 16:30:00 0 18 162014-10-22 20:00:00 2 22 202014-12-10 08:00:00 2 10 82014-07-27 22:00:00 6 27 22... ... ... ...2014-08-24 05:30:00 6 24 52014-11-24 12:00:00 0 24 122014-12-18 06:00:00 3 18 62014-07-27 17:00:00 6 27 172014-12-05 21:00:00 4 5 216192 rows × 3 columns# y_train timestamp2014-08-26 16:30:00 140832014-08-18 16:30:00 144652014-10-22 20:00:00 251952014-12-10 08:00:00 213482014-07-27 22:00:00 16356 ... 2014-08-24 05:30:00 29482014-11-24 12:00:00 162922014-12-18 06:00:00 70292014-07-27 17:00:00 188832014-12-05 21:00:00 26284Name: value, Length: 6192, dtype: int64
在将时间序列数据转换并分割成较小的批次后,X和y的训练数据集变为如下所示:
X_data shape is (6093, 100, 3)y_data shape is (6093,)tensor([[[-1.0097, 1.1510, 0.6508], [-1.5126, 0.2492, 0.6508], [-0.5069, 0.7001, 1.2238], ..., [ 1.5044, -1.4417, -1.6413], [ 1.0016, -0.0890, 0.7941], [ 1.5044, -0.9908, -0.2087]], [[-1.5126, 0.2492, 0.6508], [-0.5069, 0.7001, 1.2238], [-0.5069, -0.6526, -0.4952], ..., [ 1.0016, -0.0890, 0.7941], [ 1.5044, -0.9908, -0.2087], [ 0.4988, 0.5874, 0.5076]], [[-0.5069, 0.7001, 1.2238], [-0.5069, -0.6526, -0.4952], [ 1.5044, 1.2637, 1.5104], ..., [ 1.5044, -0.9908, -0.2087], [ 0.4988, 0.5874, 0.5076], [ 0.4988, 0.5874, -0.6385]], ..., [[ 1.0016, 0.9255, -1.2115], [-1.0097, -0.9908, 1.0806], [-0.0041, 0.8128, 0.3643], ..., [ 1.5044, 0.9255, -0.9250], [-1.5126, 0.9255, 0.0778], [-0.0041, 0.2492, -0.7818]], [[-1.0097, -0.9908, 1.0806], [-0.0041, 0.8128, 0.3643], [-0.5069, 1.3765, -0.0655], ..., [-1.5126, 0.9255, 0.0778], [-0.0041, 0.2492, -0.7818], [ 1.5044, 1.2637, 0.7941]], [[-0.0041, 0.8128, 0.3643], [-0.5069, 1.3765, -0.0655], [-0.0041, -1.6672, -0.4952], ..., [-0.0041, 0.2492, -0.7818], [ 1.5044, 1.2637, 0.7941], [ 0.4988, -1.2163, 1.3671]]])tensor([ 0.4424, 0.1169, 0.0148, ..., -1.1653, 0.5394, 1.6037])
最后,为了检查所有这些训练、验证和测试数据集的维度是否正确,我打印出它们的形状。
train shape is: torch.Size([6093, 100, 3])train label shape is: torch.Size([6093])val shape is: torch.Size([1965, 100, 3])val label shape is: torch.Size([1965])test shape is: torch.Size([1965, 100, 3])test label shape is: torch.Size([1965])
当我尝试按以下方式构建模型时,最终得到一个指向输入大小不一致的RuntimeError错误。
model_params = {'train_ratio': 0.8, 'validation_ratio': 0.2, 'sequence_length': 100, 'teacher_forcing': False, 'dropout_rate': 0.2, 'batch_size': 100, 'num_of_epochs': 5, 'hidden_size': 24, 'n_features': 3, 'learning_rate': 1e-3 }train_ratio = model_params['train_ratio']val_ratio = model_params['validation_ratio']seq_len = model_params['sequence_length']teacher_forcing = model_params['teacher_forcing']dropout_rate = model_params['dropout_rate']batch_size = model_params['batch_size']n_epochs = model_params['num_of_epochs']hidden_size = model_params['hidden_size']n_features = model_params['n_features']lr = model_params['learning_rate']model = Model(input_size=n_features, hidden_size=hidden_size, output_size=1)loss_fn = nn.MSELoss()optimizer = optim.Adam(model.parameters(), lr=lr)scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)optimization = Optimization(model, loss_fn, optimizer, scheduler)start_time = datetime.now()optimization.train(x_train, y_train, x_val, y_val, batch_size=batch_size, n_epochs=n_epochs, dropout=dropout_rate, do_teacher_forcing=teacher_forcing)
---------------------------------------------------------------------------RuntimeError Traceback (most recent call last)<ipython-input-192-6fc406c0113d> in <module> 6 7 start_time = datetime.now()----> 8 optimization.train(x_train, y_train, x_val, y_val, 9 batch_size=batch_size, 10 n_epochs=n_epochs,<ipython-input-189-c18d20430910> in train(self, x_train, y_train, x_val, y_val, batch_size, n_epochs, dropout, do_teacher_forcing) 68 train_loss = 0 69 for x_batch, y_batch, batch in self.generate_batch_data(x_train, y_train, batch_size):---> 70 y_pred = self._predict(x_batch, y_batch, seq_len, do_teacher_forcing) 71 self.optimizer.zero_grad() 72 loss = self.loss_fn(y_pred, y_batch)<ipython-input-189-c18d20430910> in _predict(self, x_batch, y_batch, seq_len, do_teacher_forcing) 93 else: 94 future = 0---> 95 y_pred = self.model(x_batch) 96 self.futures.append(future) 97 return y_pred~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs) 725 result = self._slow_forward(*input, **kwargs) 726 else:--> 727 result = self.forward(*input, **kwargs) 728 for hook in itertools.chain( 729 _global_forward_hooks.values(),<ipython-input-189-c18d20430910> in forward(self, input, future, y) 17 18 for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):---> 19 h_t, c_t = self.lstm(input_t, (h_t, c_t)) 20 output = self.linear(h_t) 21 outputs += [output]~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs) 725 result = self._slow_forward(*input, **kwargs) 726 else:--> 727 result = self.forward(*input, **kwargs) 728 for hook in itertools.chain( 729 _global_forward_hooks.values(),~\Anaconda3\lib\site-packages\torch\nn\modules\rnn.py in forward(self, input, hx) 963 964 def forward(self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]] = None) -> Tuple[Tensor, Tensor]:--> 965 self.check_forward_input(input) 966 if hx is None: 967 zeros = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)~\Anaconda3\lib\site-packages\torch\nn\modules\rnn.py in check_forward_input(self, input) 789 def check_forward_input(self, input: Tensor) -> None: 790 if input.size(1) != self.input_size:--> 791 raise RuntimeError( 792 "input has inconsistent input_size: got {}, expected {}".format( 793 input.size(1), self.input_size))RuntimeError: input has inconsistent input_size: got 1, expected 3
我怀疑我当前的LSTM模型类不支持具有多个特征的数据,我最近尝试了不同的方法,但至今没有成功。欢迎分享您的想法或指导我解决此问题的方法。
如@stackoverflowuser2010所建议,我打印了在错误抛出之前输入到前向步骤的张量input_t、h_t和c_t的形状。
input_ttorch.Size([100, 1, 3])h_ttorch.Size([100, 24])c_ttorch.Size([100, 24])
回答:
经过几周的摸索,我解决了这个问题。这对我来说是一次富有成果的旅程,所以我想分享我所发现的内容。如果你想查看带有代码的完整教程,请查看我关于此问题的Medium文章。
就像在Pandas中一样,我发现当我坚持使用PyTorch的方式时,事情往往会更快、更顺畅。这两个库都依赖于NumPy,我相信一个人可以用NumPy数组和函数显式地完成几乎所有表格和矩阵操作。然而,这样做会消除这些库提供的所有好的抽象和性能改进,并将每个步骤变成一个计算机科学练习。这很有趣,直到它不再有趣。
与其手动调整所有训练和验证集以传递给模型,PyTorch的TensorDataset和DataLoaders类极大地帮助了我。缩放训练和验证的特征和目标集后,我们得到NumPy数组。我们可以将这些数组转换为张量,并使用这些张量创建我们的TensorDataset,或者根据您的需求创建自定义数据集。最后,DataLoaders使我们能够更轻松地迭代这些数据集,因为它们已经提供了内置的批处理、洗牌和丢弃最后一个批次的选项。
train_features = torch.Tensor(X_train_arr)train_targets = torch.Tensor(y_train_arr)val_features = torch.Tensor(X_val_arr)val_targets = torch.Tensor(y_val_arr)train = TensorDataset(train_features, train_targets)train_loader = DataLoader(train, batch_size=64, shuffle=False, drop_last=True)val = TensorDataset(val_features, val_targets)val_loader = DataLoader(val, batch_size=64, shuffle=False, drop_last=True)
在将我们的数据转换为可迭代的数据集后,它们稍后可以用于进行小批量训练。我们可以通过DataLoaders轻松地迭代它们,而不必显式定义批次或与矩阵操作搏斗,如下所示。
model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)criterion = nn.MSELoss(reduction='mean')optimizer = optim.Adam(model.parameters(), lr=1e-2)train_losses = []val_losses = []train_step = make_train_step(model, criterion, optimizer)device = 'cuda' if torch.cuda.is_available() else 'cpu'for epoch in range(n_epochs): batch_losses = [] for x_batch, y_batch in train_loader: x_batch = x_batch.view([batch_size, -1, n_features]).to(device) y_batch = y_batch.to(device) loss = train_step(x_batch, y_batch) batch_losses.append(loss) training_loss = np.mean(batch_losses) train_losses.append(training_loss) with torch.no_grad(): batch_val_losses = [] for x_val, y_val in val_loader: x_val = x_val.view([batch_size, -1, n_features]).to(device) y_val = y_val.to(device) model.eval() yhat = model(x_val) val_loss = criterion(y_val, yhat).item() batch_val_losses.append(val_loss) validation_loss = np.mean(batch_val_losses) val_losses.append(validation_loss) print(f"[{epoch+1}] Training loss: {training_loss:.4f}\t Validation loss: {validation_loss:.4f}")
PyTorch提供的另一个很酷的功能是view()
函数,它允许更快、更节省内存的张量重塑。由于我之前定义我的LSTM模型时使用了batch_first = True
,特征集的批次张量必须具有(batch size, time steps, number of features)的形状。上面代码中的行x_batch = x_batch.view([batch_size, -1, n_features]).to(device)
就完成了这个操作。
我希望这个答案能帮助那些处理类似问题的人,或者至少给出他们应该采取的方向的想法。我在原始帖子中共享的代码做了很多改动,但为了简单起见,我不会在这里全部列出。欢迎查看我在另一个Stack Overflow帖子中的其余内容这里。