我无法解决这个错误。这段代码来自https://becominghuman.ai/extract-a-feature-vector-for-any-image-with-pytorch-9717561d1d4c
import torchimport torch.nn as nnimport torchvision.models as modelsimport torchvision.transforms as transformsfrom torch.autograd import Variablefrom PIL import Imagepic_one = '/content/drive/My Drive/Video_Recommender/zframe1.jpg'pic_two = '/content/drive/My Drive/Video_Recommender/zframe2.jpg'model = models.resnet18(pretrained=True)layer = model._modules.get('avgpool')scaler = transforms.Scale((224, 224))normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])to_tensor = transforms.ToTensor()def get_vector(image_name): # 1. 使用Pillow库加载图像 img = Image.open(image_name) # 2. 使用转换后的图像创建一个PyTorch变量 t_img = Variable(normalize(to_tensor(scaler(img))).unsqueeze(0)) # 3. 创建一个保存特征向量的零向量 # 'avgpool'层的输出大小为512 my_embedding = torch.zeros(512) # 4. 定义一个复制层输出的函数 def copy_data(m, i, o): my_embedding.copy_(o.data) # 5. 将该函数附加到我们选择的层上 h = layer.register_forward_hook(copy_data) # 6. 在我们转换后的图像上运行模型 model(t_img) # 7. 从层中分离我们的复制函数 h.remove() # 8. 返回特征向量 return my_embeddingpic_one_vector = get_vector(pic_one)pic_two_vector = get_vector(pic_two)
错误信息如下:
---------------------------------------------------------------------------RuntimeError Traceback (most recent call last)<ipython-input-41-ca2d66de2d9c> in <module>()----> 1 pic_one_vector = get_vector(pic_one) 2 pic_two_vector = get_vector(pic_two)5 frames<ipython-input-40-a45affe9d8f7> in get_vector(image_name) 13 h = layer.register_forward_hook(copy_data) 14 # 6. 在我们转换后的图像上运行模型---> 15 model(t_img) 16 # 7. 从层中分离我们的复制函数 17 h.remove()/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs) 548 result = self._slow_forward(*input, **kwargs) 549 else:--> 550 result = self.forward(*input, **kwargs) 551 for hook in self._forward_hooks.values(): 552 hook_result = hook(self, input, result)/usr/local/lib/python3.6/dist-packages/torchvision/models/resnet.py in forward(self, x) 218 219 def forward(self, x):--> 220 return self._forward_impl(x) 221 222 /usr/local/lib/python3.6/dist-packages/torchvision/models/resnet.py in _forward_impl(self, x) 211 x = self.layer4(x) 212 --> 213 x = self.avgpool(x) 214 x = torch.flatten(x, 1) 215 x = self.fc(x)/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs) 550 result = self.forward(*input, **kwargs) 551 for hook in self._forward_hooks.values():--> 552 hook_result = hook(self, input, result) 553 if hook_result is not None: 554 result = hook_result<ipython-input-40-a45affe9d8f7> in copy_data(m, i, o) 9 # 4. 定义一个复制层输出的函数 10 def copy_data(m, i, o):---> 11 my_embedding.copy_(o.data) 12 # 5. 将该函数附加到我们选择的层上 13 h = layer.register_forward_hook(copy_data)RuntimeError: output with shape [512] doesn't match the broadcast shape [1, 512, 1, 512]
我实际上想做的是从图像中提取特征向量,以便进一步用于构建推荐系统。如果有其他可用的替代方案,请告知我。提前感谢!!!
回答:
在avgpool之后,您需要重塑输出数据:
def copy_data(m, i, o): my_embedding.copy_(o.data.reshape(o.data.size(1)))