我已经设计了以下模型,用于对MNIST时尚数据进行分类。
class CNN(nn.Module): def __init__(self, **kwargs): super().__init__() self.conv1 = nn.Conv2d(784, 64, 2, 1, padding=5) self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2) self.conv2 = nn.Conv2d(64, 128, 2, 2, padding = 0) self.conv2_bn = nn.BatchNorm2d(128) self.relu = nn.ReLU() self.dense = nn.Linear(1, 128) self.softmax = nn.Softmax() def forward(self, x): # you can add any additional parameters you want x = self.conv1(x) x = F.max_pool2d(F.relu(x), kernel_size=2) x = self.conv2(x) x = self.conv2_bn(x) x = F.max_pool2d(F.relu(x), kernel_size=2) print(x.shape) x = self.dense(x) x = F.relu(x) return F.log_softmax(x)
这是我运行代码的地方:
for epoch in range(max_epoch): print('EPOCH='+str(epoch)) correct = 0 total = 0 running_loss = 0 for data, label in tzip(TRAX, TRAY): #train = data.view(64,1,2,2) DAAA = data.view(1,784,1,1) #zeroing the parameter optimizer.zero_grad() label = torch.tensor([label]).type(torch.LongTensor) #forwards prop outputs = model2(DAAA) loss = criterion(outputs, label) loss.backward() optimizer.step() running_loss += loss.item() '========================================' _, predicted = torch.max(outputs.data, 1) total += label.size(0) correct += (predicted == label).sum().item() '========================================' print('\n') print('Accuracy of the network on the 10000 test images: %d %%' % ( 100 * correct / total)) print('\n') print(str(epoch)+'loss= '+str(running_loss)) lossjournal.append(running_loss) accjournal.append(100 * correct / total)print('Finished Training')
<ipython-input-378-27ce013b2c10> in <module> 55 #forwards prop 56 outputs = model2(DAAA)---> 57 loss = criterion(outputs, label) 58 loss.backward() 59 optimizer.step()/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 887 result = self._slow_forward(*input, **kwargs) 888 else:--> 889 result = self.forward(*input, **kwargs) 890 for hook in itertools.chain( 891 _global_forward_hooks.values(),/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/torch/nn/modules/loss.py in forward(self, input, target) 1045 def forward(self, input: Tensor, target: Tensor) -> Tensor: 1046 assert self.weight is None or isinstance(self.weight, Tensor)-> 1047 return F.cross_entropy(input, target, weight=self.weight, 1048 ignore_index=self.ignore_index, reduction=self.reduction) 1049 /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction) 2691 if size_average is not None or reduce is not None: 2692 reduction = _Reduction.legacy_get_string(size_average, reduce)-> 2693 return nll_loss(log_softmax(input, 1), target, weight, None, ignore_index, None, reduction) 2694 2695 /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/torch/nn/functional.py in nll_loss(input, target, weight, size_average, ignore_index, reduce, reduction) 2388 ret = torch._C._nn.nll_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index) 2389 elif dim == 4:-> 2390 ret = torch._C._nn.nll_loss2d(input, target, weight, _Reduction.get_enum(reduction), ignore_index) 2391 else: 2392 # dim == 3 or dim > 4RuntimeError: only batches of spatial targets supported (3D tensors) but got targets of dimension: 1
当我运行我的模型时,我得到了这个错误,但我不知道接下来该怎么办?我应该对这个模型进行哪些调整才能使其工作?我知道问题出在标准上,但这是因为模型的输出形状是[1, 128, 1, 128]吗?
回答:
MNIST有10个类别,因此你的输出应该为[batch_size, 10]
的大小。将最后一个线性层更改为self.dense = nn.Linear(128,10)
。然后,由于你的标签大小为[batch_size,1]
,你应该使用torch.nn.CrossEntropyLoss
作为标准。
此外,你在训练过程中不需要包含最后的softmax
层,因为上述损失函数在计算时会执行softmax操作。你可以仅在推理时使用softmax
或argmax
。