我正在使用Pytorch训练一个神经网络来预测波士顿数据集的预期价格。网络结构如下:
from sklearn.datasets import load_bostonfrom torch.utils.data.dataset import Datasetfrom torch.utils.data import DataLoaderimport torch.nn.functional as Fimport torchimport torch.nn as nnimport torch.optim as optimclass Net(nn.Module): def __init__(self): super().__init__() self.fc1 = nn.Linear(13, 128) self.fc2 = nn.Linear(128, 64) self.fc3 = nn.Linear(64, 32) self.fc4 = nn.Linear(32, 16) self.fc5 = nn.Linear(16,1) def forward(self, x): x = self.fc1(x) x = self.fc2(x) x = F.relu(x) x = self.fc3(x) x = F.relu(x) x = self.fc4(x) x = F.relu(x) return self.fc5(x)
以及数据加载器:
class BostonData(Dataset): __xs = [] __ys = [] def __init__(self, train = True): df = load_boston() index = int(len(df["data"]) * 0.7) if train: self.__xs = df["data"][0:index] self.__ys = df["target"][0:index] else: self.__xs = df["data"][index:] self.__ys = df["target"][index:] def __getitem__(self, index): return self.__xs[index], self.__ys[index] def __len__(self): return len(self.__xs)
在我的第一次尝试中,我没有添加ReLU单元,但在进行了一些研究后,我发现添加它们是常见做法,但对我来说没有效果。
这是训练代码:
dset_train = BostonData(train = True)dset_test = BostonData(train = False)train_loader = DataLoader(dset_train, batch_size=30, shuffle=True)test_loader = DataLoader(dset_train, batch_size=30, shuffle=True)optimizer = optim.Adam(net.parameters(), lr = 0.001)criterion = torch.nn.MSELoss() EPOCHS = 10000lloss = []for epoch in range(EPOCHS): for trainbatch in train_loader: X,y = trainbatch net.zero_grad() output = net(X.float()) loss = criterion(output, y) loss.backward() optimizer.step() lloss.append(loss) print(loss)
经过10000个epoch后,损失图看起来如下:
我没有看到明显的下降。我不知道是torch.nn.MSELoss()
、optimizer
还是网络拓扑结构出了问题,因此任何帮助都将不胜感激。
编辑:更改学习率和规范化数据对我不起作用。我添加了行self.__xs = (self.__xs - self.__xs.mean()) / self.__xs.std()
并将学习率改为lr = 0.01
。损失图与第一个非常相似。
学习率为0.01并在1000个epoch后规范化的相同图表:
回答:
您每轮epoch添加lloss
是正确的,但您使用loss
(仅使用最后一个批次)来添加,而应该使用avg_train_loss
来添加。
试试这样:
for epoch in range(EPOCHS): avg_train_loss = 0 for trainbatch in train_loader: X,y = trainbatch net.zero_grad() output = net(X.float()) loss = criterion(output, y) loss.backward() optimizer.step() avg_train_loss += loss.item() / len(train_loader) lloss.append(avg_train_loss)