我是深度学习的新手,我创建了一个模型来对我的图像进行分类。
目前,这个项目在Google Colab或Kaggle上(使用CPU和GPU)会引发错误,但在我的个人电脑上(使用CPU)不会出现这个问题。
模型:
class CNN(nn.Module): def __init__(self): super(CNN,self).__init__() self.network1 = nn.Sequential( nn.Conv2d(3, 32, kernel_size = 3, padding = 1), nn.ReLU(), nn.Conv2d(32, 64, kernel_size = 3, stride = 1, padding = 1), nn.ReLU(), nn.MaxPool2d(2,2), # nn.AdaptiveAvgPool2d((128,128)), nn.Conv2d(64, 128, kernel_size = 3, stride = 1, padding = 1), nn.ReLU(), nn.Conv2d(128 ,128, kernel_size = 3, stride = 1, padding = 1), nn.ReLU(), nn.MaxPool2d(2,2), nn.Conv2d(128, 256, kernel_size = 3, stride = 1, padding = 1), nn.ReLU(), nn.Conv2d(256, 256, kernel_size = 3, stride = 1, padding = 1), nn.ReLU(), nn.AdaptiveMaxPool2d((4,4)) ) self.network2 = nn.Sequential( nn.Flatten(), nn.Linear(256*4*4, 1024), nn.ReLU(), nn.Linear(1024, 512), nn.ReLU(), nn.Linear(512, n_classes) ) self.sigm = nn.Sigmoid() def forward(self,x): x = self.network1(x) x = self.network2(x) return self.sigm(x)
训练:
epoch = 0model.train()criterion = nn.BCELoss()while True: batch_losses = [] for imgs, labels in data: imgs, labels = imgs.float().to(device), labels.to(device) optimizer.zero_grad() model_result = model(imgs) loss = criterion(model_result, labels.type(torch.float)) batch_loss_value = loss.item() loss.backward() optimizer.step() batch_losses.append(batch_loss_value) loss_value = np.mean(batch_losses) print("epoch:{:2d} iter:{:3d} train: loss:{:.3f}".format(epoch, iteration, loss_value)) if epoch % SAVE_FREQ == 0: checkpoint_save(model, epoch) epoch += 1 if EPOCHS < epoch: break
错误:
RuntimeError Traceback (most recent call last)/tmp/ipykernel_33/872363799.py in <module> 14 optimizer.zero_grad() 15 ---> 16 model_result = model(imgs) 17 loss = criterion(model_result, labels.type(torch.float)) 18 /opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1109 or _global_forward_hooks or _global_forward_pre_hooks):-> 1110 return forward_call(*input, **kwargs) 1111 # Do not call functions when jit is used 1112 full_backward_hooks, non_full_backward_hooks = [], []/tmp/ipykernel_33/1050848783.py in forward(self, x) 32 self.sigm = nn.Sigmoid() 33 def forward(self,x):---> 34 x = self.network(x) 35 return self.sigm(x)/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1109 or _global_forward_hooks or _global_forward_pre_hooks):-> 1110 return forward_call(*input, **kwargs) 1111 # Do not call functions when jit is used 1112 full_backward_hooks, non_full_backward_hooks = [], []/opt/conda/lib/python3.7/site-packages/torch/nn/modules/container.py in forward(self, input) 139 def forward(self, input): 140 for module in self:--> 141 input = module(input) 142 return input 143 /opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs) 1108 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks 1109 or _global_forward_hooks or _global_forward_pre_hooks):-> 1110 return forward_call(*input, **kwargs) 1111 # Do not call functions when jit is used 1112 full_backward_hooks, non_full_backward_hooks = [], []/opt/conda/lib/python3.7/site-packages/torch/nn/modules/linear.py in forward(self, input) 101 102 def forward(self, input: Tensor) -> Tensor:--> 103 return F.linear(input, self.weight, self.bias) 104 105 def extra_repr(self) -> str:RuntimeError: mat1 and mat2 shapes cannot be multiplied (256x16 and 4096x1024)
输入形状为(3, 406, 565),该模型在此形状下的摘要如下:
---------------------------------------------------------------- Layer (type) Output Shape Param #================================================================ Conv2d-1 [-1, 32, 406, 565] 896 ReLU-2 [-1, 32, 406, 565] 0 Conv2d-3 [-1, 64, 406, 565] 18,496 ReLU-4 [-1, 64, 406, 565] 0 MaxPool2d-5 [-1, 64, 203, 282] 0 Conv2d-6 [-1, 128, 203, 282] 73,856 ReLU-7 [-1, 128, 203, 282] 0 Conv2d-8 [-1, 128, 203, 282] 147,584 ReLU-9 [-1, 128, 203, 282] 0 MaxPool2d-10 [-1, 128, 101, 141] 0 Conv2d-11 [-1, 256, 101, 141] 295,168 ReLU-12 [-1, 256, 101, 141] 0 Conv2d-13 [-1, 256, 101, 141] 590,080 ReLU-14 [-1, 256, 101, 141] 0AdaptiveAvgPool2d-15 [-1, 256, 4, 4] 0 Flatten-16 [-1, 4096] 0 Linear-17 [-1, 1024] 4,195,328 ReLU-18 [-1, 1024] 0 Linear-19 [-1, 512] 524,800 ReLU-20 [-1, 512] 0 Linear-21 [-1, 18] 9,234 Sigmoid-22 [-1, 18] 0================================================================Total params: 5,855,442Trainable params: 5,855,442Non-trainable params: 0----------------------------------------------------------------Input size (MB): 2.63Forward/backward pass size (MB): 712.84Params size (MB): 22.34Estimated Total Size (MB): 737.80----------------------------------------------------------------
解决方案
在我的情况下,问题在于我没有考虑图像形状中的批次大小,并且我的数据集中有少量灰度图像和带有Alpha通道的图像。
回答:
在我的情况下,问题首先是我没有考虑图像形状中的批次大小,其次是我的数据集中有少量灰度图像和带有Alpha通道的图像。