Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug: Incorrect Implementation of Inception_v4 Net in .prototxt file! #81

Open
dprosperino opened this issue Jun 7, 2019 · 0 comments
Open

Comments

@dprosperino
Copy link

Hey Guys,

Seems like you have made a mistake in the design of the Inception-C block in the Inception-v4 network. The attached picture shall explain the mistake pretty clearly, you mixed up a 3x1 convolutional layer with a 1x3 convolutional layer.

Inception-C_block_mistake

This should not be too big of a deal in terms of performance or functionality, however, it could lead to bugs, if you build your network from scratch and use the pretrained weights linked on this site.

Tips for other developers,

If you use this .prototxt file and these pretrained weights, you will be fine and your CNN will work perfectly. However, if you build your Inception-v4 network from scratch and build it accordingly to the paper and use these pretrained weights, caffe will not be able to match the weights and will throw following error:
Cannot copy param 0 weights from layer 'inception_c1_3x1_2'; shape mismatch. Source param shape is 448 384 3 1 (516096); target param shape is 512 448 3 1 (688128). To learn this layer's parameters from scratch rather than copying from a saved net, rename the layer.

To fix this problem and still be able to use the pretrained weights, simply build the Inception-C part of your Inception-v4 network according to the picture on this post.

I hope I was able of saving someone a few hours of debugging.

Cheers,
d3lt4-papa

Click to view actual code

Code of one Inception-C block in Inception-v4 according to original paper

layer {
  name: "inception_c1_pool_ave"
  type: "Pooling"
  bottom: "reduction_b_concat"
  top: "inception_c1_pool_ave"
  pooling_param {
    pool: AVE
    kernel_size: 3
    stride: 1
    pad: 1
  }
}
layer {
  name: "inception_c1_1x1"
  type: "Convolution"
  bottom: "inception_c1_pool_ave"
  top: "inception_c1_1x1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1"
  top: "inception_c1_1x1"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_scale"
  type: "Scale"
  bottom: "inception_c1_1x1"
  top: "inception_c1_1x1"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1"
  top: "inception_c1_1x1"
}
layer {
  name: "inception_c1_1x1_2"
  type: "Convolution"
  bottom: "reduction_b_concat"
  top: "inception_c1_1x1_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_2_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1_2"
  top: "inception_c1_1x1_2"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_2_scale"
  type: "Scale"
  bottom: "inception_c1_1x1_2"
  top: "inception_c1_1x1_2"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_2_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1_2"
  top: "inception_c1_1x1_2"
}
layer {
  name: "inception_c1_1x1_3"
  type: "Convolution"
  bottom: "reduction_b_concat"
  top: "inception_c1_1x1_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 384
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x1_3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_3_scale"
  type: "Scale"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x1_3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_3_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x1_3"
}
layer {
  name: "inception_c1_1x3"
  type: "Convolution"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 0
    pad_w: 1
    kernel_h: 1
    kernel_w: 3
  }
}
layer {
  name: "inception_c1_1x3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x3"
  top: "inception_c1_1x3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x3_scale"
  type: "Scale"
  bottom: "inception_c1_1x3"
  top: "inception_c1_1x3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x3_relu"
  type: "ReLU"
  bottom: "inception_c1_1x3"
  top: "inception_c1_1x3"
}
layer {
  name: "inception_c1_3x1"
  type: "Convolution"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_3x1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 1
    pad_w: 0
    kernel_h: 3
    kernel_w: 1
  }
}
layer {
  name: "inception_c1_3x1_bn"
  type: "BatchNorm"
  bottom: "inception_c1_3x1"
  top: "inception_c1_3x1"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_3x1_scale"
  type: "Scale"
  bottom: "inception_c1_3x1"
  top: "inception_c1_3x1"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_3x1_relu"
  type: "ReLU"
  bottom: "inception_c1_3x1"
  top: "inception_c1_3x1"
}
layer {
  name: "inception_c1_1x1_4"
  type: "Convolution"
  bottom: "reduction_b_concat"
  top: "inception_c1_1x1_4"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 384
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_4_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_1x1_4"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_4_scale"
  type: "Scale"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_1x1_4"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_4_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_1x1_4"
}
layer {
  name: "inception_c1_1x3_2"
  type: "Convolution"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_1x3_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 448
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 0
    pad_w: 1
    kernel_h: 1
    kernel_w: 3
  }
}
layer {
  name: "inception_c1_1x3_2_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_1x3_2"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x3_2_scale"
  type: "Scale"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_1x3_2"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x3_2_relu"
  type: "ReLU"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_1x3_2"
}
layer {
  name: "inception_c1_3x1_2"
  type: "Convolution"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_3x1_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 512
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 1
    pad_w: 0
    kernel_h: 3
    kernel_w: 1
  }
}
layer {
  name: "inception_c1_3x1_2_bn"
  type: "BatchNorm"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_3x1_2"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_3x1_2_scale"
  type: "Scale"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_3x1_2"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_3x1_2_relu"
  type: "ReLU"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_3x1_2"
}
layer {
  name: "inception_c1_1x3_3"
  type: "Convolution"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_1x3_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 0
    pad_w: 1
    kernel_h: 1
    kernel_w: 3
  }
}
layer {
  name: "inception_c1_1x3_3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x3_3"
  top: "inception_c1_1x3_3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x3_3_scale"
  type: "Scale"
  bottom: "inception_c1_1x3_3"
  top: "inception_c1_1x3_3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x3_3_relu"
  type: "ReLU"
  bottom: "inception_c1_1x3_3"
  top: "inception_c1_1x3_3"
}
layer {
  name: "inception_c1_3x1_3"
  type: "Convolution"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_3x1_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 1
    pad_w: 0
    kernel_h: 3
    kernel_w: 1
  }
}
layer {
  name: "inception_c1_3x1_3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_3x1_3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_3x1_3_scale"
  type: "Scale"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_3x1_3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_3x1_3_relu"
  type: "ReLU"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_3x1_3"
}
layer {
  name: "inception_c1_concat"
  type: "Concat"
  bottom: "inception_c1_1x1"
  bottom: "inception_c1_1x1_2"
  bottom: "inception_c1_1x3"
  bottom: "inception_c1_3x1"
  bottom: "inception_c1_1x3_3"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_concat"
}

Code of one Inception-C block, if you want to be able to use these pretrained weights

layer {
  name: "inception_c1_pool_ave"
  type: "Pooling"
  bottom: "reduction_b_concat"
  top: "inception_c1_pool_ave"
  pooling_param {
    pool: AVE
    kernel_size: 3
    stride: 1
    pad: 1
  }
}
layer {
  name: "inception_c1_1x1"
  type: "Convolution"
  bottom: "inception_c1_pool_ave"
  top: "inception_c1_1x1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1"
  top: "inception_c1_1x1"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_scale"
  type: "Scale"
  bottom: "inception_c1_1x1"
  top: "inception_c1_1x1"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1"
  top: "inception_c1_1x1"
}
layer {
  name: "inception_c1_1x1_2"
  type: "Convolution"
  bottom: "reduction_b_concat"
  top: "inception_c1_1x1_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_2_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1_2"
  top: "inception_c1_1x1_2"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_2_scale"
  type: "Scale"
  bottom: "inception_c1_1x1_2"
  top: "inception_c1_1x1_2"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_2_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1_2"
  top: "inception_c1_1x1_2"
}
layer {
  name: "inception_c1_1x1_3"
  type: "Convolution"
  bottom: "reduction_b_concat"
  top: "inception_c1_1x1_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 384
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x1_3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_3_scale"
  type: "Scale"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x1_3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_3_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x1_3"
}
layer {
  name: "inception_c1_1x3"
  type: "Convolution"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_1x3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 0
    pad_w: 1
    kernel_h: 1
    kernel_w: 3
  }
}
layer {
  name: "inception_c1_1x3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x3"
  top: "inception_c1_1x3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x3_scale"
  type: "Scale"
  bottom: "inception_c1_1x3"
  top: "inception_c1_1x3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x3_relu"
  type: "ReLU"
  bottom: "inception_c1_1x3"
  top: "inception_c1_1x3"
}
layer {
  name: "inception_c1_3x1"
  type: "Convolution"
  bottom: "inception_c1_1x1_3"
  top: "inception_c1_3x1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 1
    pad_w: 0
    kernel_h: 3
    kernel_w: 1
  }
}
layer {
  name: "inception_c1_3x1_bn"
  type: "BatchNorm"
  bottom: "inception_c1_3x1"
  top: "inception_c1_3x1"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_3x1_scale"
  type: "Scale"
  bottom: "inception_c1_3x1"
  top: "inception_c1_3x1"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_3x1_relu"
  type: "ReLU"
  bottom: "inception_c1_3x1"
  top: "inception_c1_3x1"
}
layer {
  name: "inception_c1_1x1_4"
  type: "Convolution"
  bottom: "reduction_b_concat"
  top: "inception_c1_1x1_4"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 384
    bias_term: false
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
  }
}
layer {
  name: "inception_c1_1x1_4_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_1x1_4"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x1_4_scale"
  type: "Scale"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_1x1_4"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x1_4_relu"
  type: "ReLU"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_1x1_4"
}
layer {
  name: "inception_c1_3x1_2"
  type: "Convolution"
  bottom: "inception_c1_1x1_4"
  top: "inception_c1_3x1_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 448
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 1
    pad_w: 0
    kernel_h: 3
    kernel_w: 1
  }
}
layer {
  name: "inception_c1_3x1_2_bn"
  type: "BatchNorm"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_3x1_2"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_3x1_2_scale"
  type: "Scale"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_3x1_2"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_3x1_2_relu"
  type: "ReLU"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_3x1_2"
}
layer {
  name: "inception_c1_1x3_2"
  type: "Convolution"
  bottom: "inception_c1_3x1_2"
  top: "inception_c1_1x3_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 512
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 0
    pad_w: 1
    kernel_h: 1
    kernel_w: 3
  }
}
layer {
  name: "inception_c1_1x3_2_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_1x3_2"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x3_2_scale"
  type: "Scale"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_1x3_2"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x3_2_relu"
  type: "ReLU"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_1x3_2"
}
layer {
  name: "inception_c1_1x3_3"
  type: "Convolution"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_1x3_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 0
    pad_w: 1
    kernel_h: 1
    kernel_w: 3
  }
}
layer {
  name: "inception_c1_1x3_3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_1x3_3"
  top: "inception_c1_1x3_3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_1x3_3_scale"
  type: "Scale"
  bottom: "inception_c1_1x3_3"
  top: "inception_c1_1x3_3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_1x3_3_relu"
  type: "ReLU"
  bottom: "inception_c1_1x3_3"
  top: "inception_c1_1x3_3"
}
layer {
  name: "inception_c1_3x1_3"
  type: "Convolution"
  bottom: "inception_c1_1x3_2"
  top: "inception_c1_3x1_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
    num_output: 256
    bias_term: false
    stride: 1
    weight_filler {
      type: "xavier"
      std: 0.00999999977648
    }
    pad_h: 1
    pad_w: 0
    kernel_h: 3
    kernel_w: 1
  }
}
layer {
  name: "inception_c1_3x1_3_bn"
  type: "BatchNorm"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_3x1_3"
  batch_norm_param {
    use_global_stats: false
  }
}
layer {
  name: "inception_c1_3x1_3_scale"
  type: "Scale"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_3x1_3"
  scale_param {
    bias_term: true
  }
}
layer {
  name: "inception_c1_3x1_3_relu"
  type: "ReLU"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_3x1_3"
}
layer {
  name: "inception_c1_concat"
  type: "Concat"
  bottom: "inception_c1_1x1"
  bottom: "inception_c1_1x1_2"
  bottom: "inception_c1_1x3"
  bottom: "inception_c1_3x1"
  bottom: "inception_c1_1x3_3"
  bottom: "inception_c1_3x1_3"
  top: "inception_c1_concat"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant