使用PyTorch进行时间序列预测的LSTM多输入喂入

我目前正在使用PyTorch构建一个LSTM网络来预测时间序列数据。参考Roman的博客文章，我实现了一个简单的LSTM用于单变量时间序列数据，请查看下面的类定义。然而，在尝试将更多特征添加到输入数据中，如一天中的小时、一周中的天、一年中的周等方面，我已经停滞了几天。

class Model(nn.Module):    def __init__(self, input_size, hidden_size, output_size):        super(Model, self).__init__()        self.input_size = input_size        self.hidden_size = hidden_size        self.output_size = output_size        self.lstm = nn.LSTMCell(self.input_size, self.hidden_size)        self.linear = nn.Linear(self.hidden_size, self.output_size)    def forward(self, input, future=0, y=None):        outputs = []        # 重置LSTM的状态        # 状态保持到序列结束        h_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32)        c_t = torch.zeros(input.size(0), self.hidden_size, dtype=torch.float32)        for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):            h_t, c_t = self.lstm(input_t, (h_t, c_t))            output = self.linear(h_t)            outputs += [output]        for i in range(future):            if y is not None and random.random() > 0.5:                output = y[:, [i]]  # 教师强制            h_t, c_t = self.lstm(output, (h_t, c_t))            output = self.linear(h_t)            outputs += [output]        outputs = torch.stack(outputs, 1).squeeze(2)        return outputsclass Optimization:    "一个帮助训练、测试和诊断LSTM的辅助类"    def __init__(self, model, loss_fn, optimizer, scheduler):        self.model = model        self.loss_fn = loss_fn        self.optimizer = optimizer        self.scheduler = scheduler        self.train_losses = []        self.val_losses = []        self.futures = []    @staticmethod    def generate_batch_data(x, y, batch_size):        for batch, i in enumerate(range(0, len(x) - batch_size, batch_size)):            x_batch = x[i : i + batch_size]            y_batch = y[i : i + batch_size]            yield x_batch, y_batch, batch    def train(        self,        x_train,        y_train,        x_val=None,        y_val=None,        batch_size=100,        n_epochs=20,        dropout=0.2,        do_teacher_forcing=None,    ):        seq_len = x_train.shape[1]        for epoch in range(n_epochs):            start_time = time.time()            self.futures = []            train_loss = 0            for x_batch, y_batch, batch in self.generate_batch_data(x_train, y_train, batch_size):                y_pred = self._predict(x_batch, y_batch, seq_len, do_teacher_forcing)                self.optimizer.zero_grad()                loss = self.loss_fn(y_pred, y_batch)                loss.backward()                self.optimizer.step()                train_loss += loss.item()            self.scheduler.step()            train_loss /= batch            self.train_losses.append(train_loss)            self._validation(x_val, y_val, batch_size)            elapsed = time.time() - start_time            print(                "Epoch %d Train loss: %.2f. Validation loss: %.2f. Avg future: %.2f. Elapsed time: %.2fs."                % (epoch + 1, train_loss, self.val_losses[-1], np.average(self.futures), elapsed)            )    def _predict(self, x_batch, y_batch, seq_len, do_teacher_forcing):        if do_teacher_forcing:            future = random.randint(1, int(seq_len) / 2)            limit = x_batch.size(1) - future            y_pred = self.model(x_batch[:, :limit], future=future, y=y_batch[:, limit:])        else:            future = 0            y_pred = self.model(x_batch)        self.futures.append(future)        return y_pred    def _validation(self, x_val, y_val, batch_size):        if x_val is None or y_val is None:            return        with torch.no_grad():            val_loss = 0            batch = 1            for x_batch, y_batch, batch in self.generate_batch_data(x_val, y_val, batch_size):                y_pred = self.model(x_batch)                loss = self.loss_fn(y_pred, y_batch)                val_loss += loss.item()            val_loss /= batch            self.val_losses.append(val_loss)    def evaluate(self, x_test, y_test, batch_size, future=1):        with torch.no_grad():            test_loss = 0            actual, predicted = [], []            for x_batch, y_batch, batch in self.generate_batch_data(x_test, y_test, batch_size):                y_pred = self.model(x_batch, future=future)                y_pred = (                    y_pred[:, -len(y_batch) :] if y_pred.shape[1] > y_batch.shape[1] else y_pred                )                loss = self.loss_fn(y_pred, y_batch)                test_loss += loss.item()                actual += torch.squeeze(y_batch[:, -1]).data.cpu().numpy().tolist()                predicted += torch.squeeze(y_pred[:, -1]).data.cpu().numpy().tolist()            test_loss /= batch            return actual, predicted, test_loss    def plot_losses(self):        plt.plot(self.train_losses, label="Training loss")        plt.plot(self.val_losses, label="Validation loss")        plt.legend()        plt.title("Losses")

你可以找到一些帮助我分割和格式化数据的辅助函数，以便将它们喂入我的LSTM网络。

def to_dataframe(actual, predicted):    return pd.DataFrame({"value": actual, "prediction": predicted})def inverse_transform(scaler, df, columns):    for col in columns:        df[col] = scaler.inverse_transform(df[col])    return dfdef split_sequences(sequences, n_steps):    X, y = list(), list()    for i in range(len(sequences)):        # 找到此模式的结束        end_ix = i + n_steps        # 检查是否超出数据集范围        if end_ix > len(sequences):            break        # 收集模式的输入和输出部分        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]        X.append(seq_x)        y.append(seq_y)    return array(X), array(y)def train_val_test_split_new(df, test_ratio=0.2, seq_len = 100):    y = df['value']    X = df.drop(columns = ['value'])    tarin_ratio = 1 - test_ratio    val_ratio = 1 - ((train_ratio - test_ratio) / train_ratio)    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio)    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_ratio)    return X_train, y_train, X_val, y_val, X_test, y_test

我使用以下数据框架来训练我的模型。

# df_train value   weekday monthday    hourtimestamp               2014-07-01 00:00:00 10844   1   1   02014-07-01 00:30:00 8127    1   1   02014-07-01 01:00:00 6210    1   1   12014-07-01 01:30:00 4656    1   1   12014-07-01 02:00:00 3820    1   1   2... ... ... ... ...2015-01-31 21:30:00 24670   5   31  212015-01-31 22:00:00 25721   5   31  222015-01-31 22:30:00 27309   5   31  222015-01-31 23:00:00 26591   5   31  232015-01-31 23:30:00 26288   5   31  2310320 rows × 4 columns# x_train weekday monthday    hourtimestamp           2014-08-26 16:30:00 1   26  162014-08-18 16:30:00 0   18  162014-10-22 20:00:00 2   22  202014-12-10 08:00:00 2   10  82014-07-27 22:00:00 6   27  22... ... ... ...2014-08-24 05:30:00 6   24  52014-11-24 12:00:00 0   24  122014-12-18 06:00:00 3   18  62014-07-27 17:00:00 6   27  172014-12-05 21:00:00 4   5   216192 rows × 3 columns# y_train timestamp2014-08-26 16:30:00    140832014-08-18 16:30:00    144652014-10-22 20:00:00    251952014-12-10 08:00:00    213482014-07-27 22:00:00    16356                       ...  2014-08-24 05:30:00     29482014-11-24 12:00:00    162922014-12-18 06:00:00     70292014-07-27 17:00:00    188832014-12-05 21:00:00    26284Name: value, Length: 6192, dtype: int64

在将时间序列数据转换并分割成较小的批次后，X和y的训练数据集变为如下所示：

X_data shape is (6093, 100, 3)y_data shape is (6093,)tensor([[[-1.0097,  1.1510,  0.6508],         [-1.5126,  0.2492,  0.6508],         [-0.5069,  0.7001,  1.2238],         ...,         [ 1.5044, -1.4417, -1.6413],         [ 1.0016, -0.0890,  0.7941],         [ 1.5044, -0.9908, -0.2087]],        [[-1.5126,  0.2492,  0.6508],         [-0.5069,  0.7001,  1.2238],         [-0.5069, -0.6526, -0.4952],         ...,         [ 1.0016, -0.0890,  0.7941],         [ 1.5044, -0.9908, -0.2087],         [ 0.4988,  0.5874,  0.5076]],        [[-0.5069,  0.7001,  1.2238],         [-0.5069, -0.6526, -0.4952],         [ 1.5044,  1.2637,  1.5104],         ...,         [ 1.5044, -0.9908, -0.2087],         [ 0.4988,  0.5874,  0.5076],         [ 0.4988,  0.5874, -0.6385]],        ...,        [[ 1.0016,  0.9255, -1.2115],         [-1.0097, -0.9908,  1.0806],         [-0.0041,  0.8128,  0.3643],         ...,         [ 1.5044,  0.9255, -0.9250],         [-1.5126,  0.9255,  0.0778],         [-0.0041,  0.2492, -0.7818]],        [[-1.0097, -0.9908,  1.0806],         [-0.0041,  0.8128,  0.3643],         [-0.5069,  1.3765, -0.0655],         ...,         [-1.5126,  0.9255,  0.0778],         [-0.0041,  0.2492, -0.7818],         [ 1.5044,  1.2637,  0.7941]],        [[-0.0041,  0.8128,  0.3643],         [-0.5069,  1.3765, -0.0655],         [-0.0041, -1.6672, -0.4952],         ...,         [-0.0041,  0.2492, -0.7818],         [ 1.5044,  1.2637,  0.7941],         [ 0.4988, -1.2163,  1.3671]]])tensor([ 0.4424,  0.1169,  0.0148,  ..., -1.1653,  0.5394,  1.6037])

最后，为了检查所有这些训练、验证和测试数据集的维度是否正确，我打印出它们的形状。

train shape is: torch.Size([6093, 100, 3])train label shape is: torch.Size([6093])val shape is: torch.Size([1965, 100, 3])val label shape is: torch.Size([1965])test shape is: torch.Size([1965, 100, 3])test label shape is: torch.Size([1965])

当我尝试按以下方式构建模型时，最终得到一个指向输入大小不一致的RuntimeError错误。

model_params = {'train_ratio': 0.8,                 'validation_ratio': 0.2,                'sequence_length': 100,                'teacher_forcing': False,                'dropout_rate': 0.2,                'batch_size': 100,                'num_of_epochs': 5,                'hidden_size': 24,                'n_features': 3,                'learning_rate': 1e-3               }train_ratio = model_params['train_ratio']val_ratio = model_params['validation_ratio']seq_len = model_params['sequence_length']teacher_forcing = model_params['teacher_forcing']dropout_rate = model_params['dropout_rate']batch_size = model_params['batch_size']n_epochs = model_params['num_of_epochs']hidden_size = model_params['hidden_size']n_features = model_params['n_features']lr = model_params['learning_rate']model = Model(input_size=n_features, hidden_size=hidden_size, output_size=1)loss_fn = nn.MSELoss()optimizer = optim.Adam(model.parameters(), lr=lr)scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)optimization = Optimization(model, loss_fn, optimizer, scheduler)start_time = datetime.now()optimization.train(x_train, y_train, x_val, y_val,                      batch_size=batch_size,                      n_epochs=n_epochs,                     dropout=dropout_rate,                      do_teacher_forcing=teacher_forcing)

---------------------------------------------------------------------------RuntimeError                              Traceback (most recent call last)<ipython-input-192-6fc406c0113d> in <module>      6       7 start_time = datetime.now()----> 8 optimization.train(x_train, y_train, x_val, y_val,       9                      batch_size=batch_size,     10                      n_epochs=n_epochs,<ipython-input-189-c18d20430910> in train(self, x_train, y_train, x_val, y_val, batch_size, n_epochs, dropout, do_teacher_forcing)     68             train_loss = 0     69             for x_batch, y_batch, batch in self.generate_batch_data(x_train, y_train, batch_size):---> 70                 y_pred = self._predict(x_batch, y_batch, seq_len, do_teacher_forcing)     71                 self.optimizer.zero_grad()     72                 loss = self.loss_fn(y_pred, y_batch)<ipython-input-189-c18d20430910> in _predict(self, x_batch, y_batch, seq_len, do_teacher_forcing)     93         else:     94             future = 0---> 95             y_pred = self.model(x_batch)     96         self.futures.append(future)     97         return y_pred~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)    725             result = self._slow_forward(*input, **kwargs)    726         else:--> 727             result = self.forward(*input, **kwargs)    728         for hook in itertools.chain(    729                 _global_forward_hooks.values(),<ipython-input-189-c18d20430910> in forward(self, input, future, y)     17      18         for i, input_t in enumerate(input.chunk(input.size(1), dim=1)):---> 19             h_t, c_t = self.lstm(input_t, (h_t, c_t))     20             output = self.linear(h_t)     21             outputs += [output]~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)    725             result = self._slow_forward(*input, **kwargs)    726         else:--> 727             result = self.forward(*input, **kwargs)    728         for hook in itertools.chain(    729                 _global_forward_hooks.values(),~\Anaconda3\lib\site-packages\torch\nn\modules\rnn.py in forward(self, input, hx)    963     964     def forward(self, input: Tensor, hx: Optional[Tuple[Tensor, Tensor]] = None) -> Tuple[Tensor, Tensor]:--> 965         self.check_forward_input(input)    966         if hx is None:    967             zeros = torch.zeros(input.size(0), self.hidden_size, dtype=input.dtype, device=input.device)~\Anaconda3\lib\site-packages\torch\nn\modules\rnn.py in check_forward_input(self, input)    789     def check_forward_input(self, input: Tensor) -> None:    790         if input.size(1) != self.input_size:--> 791             raise RuntimeError(    792                 "input has inconsistent input_size: got {}, expected {}".format(    793                     input.size(1), self.input_size))RuntimeError: input has inconsistent input_size: got 1, expected 3

我怀疑我当前的LSTM模型类不支持具有多个特征的数据，我最近尝试了不同的方法，但至今没有成功。欢迎分享您的想法或指导我解决此问题的方法。

如@stackoverflowuser2010所建议，我打印了在错误抛出之前输入到前向步骤的张量input_t、h_t和c_t的形状。

input_ttorch.Size([100, 1, 3])h_ttorch.Size([100, 24])c_ttorch.Size([100, 24])

回答：

经过几周的摸索，我解决了这个问题。这对我来说是一次富有成果的旅程，所以我想分享我所发现的内容。如果你想查看带有代码的完整教程，请查看我关于此问题的Medium文章。

就像在Pandas中一样，我发现当我坚持使用PyTorch的方式时，事情往往会更快、更顺畅。这两个库都依赖于NumPy，我相信一个人可以用NumPy数组和函数显式地完成几乎所有表格和矩阵操作。然而，这样做会消除这些库提供的所有好的抽象和性能改进，并将每个步骤变成一个计算机科学练习。这很有趣，直到它不再有趣。

与其手动调整所有训练和验证集以传递给模型，PyTorch的TensorDataset和DataLoaders类极大地帮助了我。缩放训练和验证的特征和目标集后，我们得到NumPy数组。我们可以将这些数组转换为张量，并使用这些张量创建我们的TensorDataset，或者根据您的需求创建自定义数据集。最后，DataLoaders使我们能够更轻松地迭代这些数据集，因为它们已经提供了内置的批处理、洗牌和丢弃最后一个批次的选项。

train_features = torch.Tensor(X_train_arr)train_targets = torch.Tensor(y_train_arr)val_features = torch.Tensor(X_val_arr)val_targets = torch.Tensor(y_val_arr)train = TensorDataset(train_features, train_targets)train_loader = DataLoader(train, batch_size=64, shuffle=False, drop_last=True)val = TensorDataset(val_features, val_targets)val_loader = DataLoader(val, batch_size=64, shuffle=False, drop_last=True)

在将我们的数据转换为可迭代的数据集后，它们稍后可以用于进行小批量训练。我们可以通过DataLoaders轻松地迭代它们，而不必显式定义批次或与矩阵操作搏斗，如下所示。

model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)criterion = nn.MSELoss(reduction='mean')optimizer = optim.Adam(model.parameters(), lr=1e-2)train_losses = []val_losses = []train_step = make_train_step(model, criterion, optimizer)device = 'cuda' if torch.cuda.is_available() else 'cpu'for epoch in range(n_epochs):    batch_losses = []    for x_batch, y_batch in train_loader:        x_batch = x_batch.view([batch_size, -1, n_features]).to(device)        y_batch = y_batch.to(device)        loss = train_step(x_batch, y_batch)        batch_losses.append(loss)    training_loss = np.mean(batch_losses)    train_losses.append(training_loss)        with torch.no_grad():        batch_val_losses = []        for x_val, y_val in val_loader:            x_val = x_val.view([batch_size, -1, n_features]).to(device)            y_val = y_val.to(device)                    model.eval()            yhat = model(x_val)            val_loss = criterion(y_val, yhat).item()            batch_val_losses.append(val_loss)        validation_loss = np.mean(batch_val_losses)        val_losses.append(validation_loss)        print(f"[{epoch+1}] Training loss: {training_loss:.4f}\t Validation loss: {validation_loss:.4f}")

PyTorch提供的另一个很酷的功能是view()函数，它允许更快、更节省内存的张量重塑。由于我之前定义我的LSTM模型时使用了batch_first = True，特征集的批次张量必须具有(batch size, time steps, number of features)的形状。上面代码中的行x_batch = x_batch.view([batch_size, -1, n_features]).to(device)就完成了这个操作。

我希望这个答案能帮助那些处理类似问题的人，或者至少给出他们应该采取的方向的想法。我在原始帖子中共享的代码做了很多改动，但为了简单起见，我不会在这里全部列出。欢迎查看我在另一个Stack Overflow帖子中的其余内容这里。

学技术

使用PyTorch进行时间序列预测的LSTM多输入喂入

发表回复取消回复

相关文章：

Related Posts

使用LSTM在Python中预测未来值

如何在gensim的word2vec模型中查找双词组的相似性

dask_xgboost.predict 可以工作但无法显示 – 数据必须是一维的

ML Tuning – Cross Validation in Spark

如何在React JS中使用fetch从REST API获取预测

如何分析ML.NET中多类分类预测得分数组？

发表回复 取消回复

发表回复取消回复