我正在实现FCN-8解码器(这是deeplearning.ai高级深度学习技术TensorFlow课程,计算机视觉课程,第3周,语义分割的作业)
我实现了下面的代码,我怀疑存在一些维度问题:运行测试时,它在以下行失败:
o = tf.keras.layers.Add()([o, o2])
报错信息为 ValueError: Operands could not be broadcast together with shapes (8, 12, 11) (4, 6, 11)
,因此我猜测我试图连接形状不同的对象。
我还复制了测试的代码部分,FCN8()
方法在我看来是安全的。
你有什么提示吗?
def fcn8_decoder(convs, n_classes): # features from the encoder stage f3, f4, f5 = convs # number of filters n = 512 # add convolutional layers on top of the CNN extractor. o = tf.keras.layers.Conv2D(n , (7 , 7) , activation='relu' , padding='same', name="conv6", data_format=IMAGE_ORDERING)(f5) o = tf.keras.layers.Dropout(0.5)(o) o = tf.keras.layers.Conv2D(n , (1 , 1) , activation='relu' , padding='same', name="conv7", data_format=IMAGE_ORDERING)(o) o = tf.keras.layers.Dropout(0.5)(o) o = tf.keras.layers.Conv2D(n_classes, (1, 1), activation='relu' , padding='same', data_format=IMAGE_ORDERING)(o) # Upsample `o` above and crop any extra pixels introduced o = tf.keras.layers.Conv2DTranspose(n_classes , kernel_size=(4,4) , strides=(2,2) , use_bias=False )(f5) o = tf.keras.layers.Cropping2D(cropping=(1,1))(o) # load the pool 4 prediction and do a 1x1 convolution to reshape it to the same shape of `o` above o2 = f4 o2 = tf.keras.layers.Conv2D(n_classes , ( 1 , 1 ) , activation='relu' , padding='same')(o2) # add the results of the upsampling and pool 4 prediction o = tf.keras.layers.Add()([o, o2]) # upsample the resulting tensor of the operation you just did o = tf.keras.layers.Conv2DTranspose( n_classes , kernel_size=(4,4) , strides=(2,2) , use_bias=False)(o) o = tf.keras.layers.Cropping2D(cropping=(1, 1))(o) # load the pool 3 prediction and do a 1x1 convolution to reshape it to the same shape of `o` above o2 = tf.keras.layers.Conv2D(n_classes , ( 1 , 1 ) , activation='relu' , padding='same')(o2) # add the results of the upsampling and pool 3 prediction o = tf.keras.layers.Add()([o, o2]) # upsample up to the size of the original image o = tf.keras.layers.Conv2DTranspose(n_classes , kernel_size=(8,8) , strides=(8,8) , use_bias=False )(o) o = tf.keras.layers.Cropping2D(((0, 0), (0, 96-84)))(o) # append a sigmoid activation o = (tf.keras.layers.Activation('sigmoid'))(o) return o
测试代码
# TEST CODEtest_convs, test_img_input = FCN8()test_fcn8_decoder = fcn8_decoder(test_convs, 11)print(test_fcn8_decoder.shape)del test_convs, test_img_input, test_fcn8_decoder
回答:
你必须先加载池3的预测,然后应用1*1卷积
def fcn8_decoder(convs, n_classes):
来自编码器阶段的特征
f3, f4, f5 = convs
过滤器数量
n = 512
在CNN提取器上添加卷积层
o = tf.keras.layers.Conv2D(n , (7 , 7) , activation=’relu’ , padding=’same’, name=”conv6″, data_format=IMAGE_ORDERING)(f5)o = tf.keras.layers.Dropout(0.5)(o)o = tf.keras.layers.Conv2D(n , (1 , 1) , activation=’relu’ , padding=’same’, name=”conv7″, data_format=IMAGE_ORDERING)(o)o = tf.keras.layers.Dropout(0.5)(o)o = tf.keras.layers.Conv2D(n_classes, (1, 1), activation=’relu’ , padding=’same’, data_format=IMAGE_ORDERING)(o)
上采样上述的o
并裁剪任何多余的像素
o = tf.keras.layers.Conv2DTranspose(n_classes , kernel_size=(4,4) , strides=(2,2) , use_bias=False )(f5)o = tf.keras.layers.Cropping2D(cropping=(1,1))(o)
加载池4的预测并进行1×1卷积以重塑其形状与上述的o
相同
o2 = f4o2 = tf.keras.layers.Conv2D(n_classes , ( 1 , 1 ) , activation=’relu’ , padding=’same’)(o2)
添加上采样和池4预测的结果
o = tf.keras.layers.Add()([o, o2])
上采样你刚刚执行的操作的输出张量
o = tf.keras.layers.Conv2DTranspose( n_classes , kernel_size=(4,4) , strides=(2,2) , use_bias=False)(o)o = tf.keras.layers.Cropping2D(cropping=(1, 1))(o)
加载池3的预测并进行1×1卷积以重塑其形状与上述的o
相同
o2=f3o2 = tf.keras.layers.Conv2D(n_classes , ( 1 , 1 ) , activation=’relu’ , padding=’same’)(o2)
添加上采样和池3预测的结果
o = tf.keras.layers.Add()([o, o2])
上采样到原始图像的大小
o = tf.keras.layers.Conv2DTranspose(n_classes , kernel_size=(8,8) , strides=(8,8) , use_bias=False )(o)o = tf.keras.layers.Cropping2D(((0, 0), (0, 96-84)))(o)
添加一个Sigmoid激活函数
o = (tf.keras.layers.Activation(‘sigmoid’))(o)
return o