1
+ function drop_blocks (drop_prob = 0.0 )
2
+ return [
3
+ identity,
4
+ identity,
5
+ DropBlock (drop_prob, 5 , 0.25 ),
6
+ DropBlock (drop_prob, 3 , 1.00 ),
7
+ ]
8
+ end
9
+
10
+ function downsample_conv (kernel_size, in_channels, out_channels; stride = 1 , dilation = 1 ,
11
+ first_dilation = nothing , norm_layer = BatchNorm)
12
+ kernel_size = stride == 1 && dilation == 1 ? 1 : kernel_size
13
+ first_dilation = kernel_size[1 ] > 1 ?
14
+ (! isnothing (first_dilation) ? first_dilation : dilation) : 1
15
+ pad = ((stride - 1 ) + dilation * (kernel_size[1 ] - 1 )) ÷ 2
16
+ return Chain (Conv (kernel_size, in_channels => out_channels; stride, pad,
17
+ dilation = first_dilation, bias = false ),
18
+ norm_layer (out_channels))
19
+ end
20
+
21
+ function downsample_avg (kernel_size, in_channels, out_channels; stride = 1 , dilation = 1 ,
22
+ first_dilation = nothing , norm_layer = BatchNorm)
23
+ avg_stride = dilation == 1 ? stride : 1
24
+ if stride == 1 && dilation == 1
25
+ pool = identity
26
+ else
27
+ pad = avg_stride == 1 && dilation > 1 ? SamePad () : 0
28
+ pool = avg_pool_fn ((2 , 2 ); stride = avg_stride, pad)
29
+ end
30
+ return Chain (pool,
31
+ Conv ((1 , 1 ), in_channels => out_channels; bias = false ),
32
+ norm_layer (out_channels))
33
+ end
34
+
1
35
function basicblock (inplanes, planes; stride = 1 , downsample = identity, cardinality = 1 ,
2
- base_width = 64 ,
3
- reduce_first = 1 , dilation = 1 , first_dilation = nothing ,
4
- act_layer = relu, norm_layer = BatchNorm,
36
+ base_width = 64 , reduce_first = 1 , dilation = 1 ,
37
+ first_dilation = nothing , activation = relu, norm_layer = BatchNorm,
5
38
drop_block = identity, drop_path = identity)
6
- expansion = 1
39
+ expansion = expansion_factor (basicblock)
7
40
@assert cardinality== 1 " BasicBlock only supports cardinality of 1"
8
41
@assert base_width== 64 " BasicBlock does not support changing base width"
9
42
first_planes = planes ÷ reduce_first
@@ -17,16 +50,16 @@ function basicblock(inplanes, planes; stride = 1, downsample = identity, cardina
17
50
dilation = dilation, bias = false ),
18
51
norm_layer (outplanes))
19
52
return Chain (Parallel (+ , downsample,
20
- Chain (conv_bn1, drop_block, act_layer , conv_bn2, drop_path)),
21
- act_layer )
53
+ Chain (conv_bn1, drop_block, activation , conv_bn2, drop_path)),
54
+ activation )
22
55
end
56
+ expansion_factor (:: typeof (basicblock)) = 1
23
57
24
58
function bottleneck (inplanes, planes; stride = 1 , downsample = identity, cardinality = 1 ,
25
- base_width = 64 ,
26
- reduce_first = 1 , dilation = 1 , first_dilation = nothing ,
27
- act_layer = relu, norm_layer = BatchNorm,
59
+ base_width = 64 , reduce_first = 1 , dilation = 1 ,
60
+ first_dilation = nothing , activation = relu, norm_layer = BatchNorm,
28
61
drop_block = identity, drop_path = identity)
29
- expansion = 4
62
+ expansion = expansion_factor (bottleneck)
30
63
width = floor (Int, planes * (base_width / 64 )) * cardinality
31
64
first_planes = width ÷ reduce_first
32
65
outplanes = planes * expansion
@@ -39,62 +72,33 @@ function bottleneck(inplanes, planes; stride = 1, downsample = identity, cardina
39
72
drop_block = drop_block === identity ? identity : drop_block ()
40
73
conv_bn3 = Chain (Conv ((1 , 1 ), width => outplanes; bias = false ), norm_layer (outplanes))
41
74
return Chain (Parallel (+ , downsample,
42
- Chain (conv_bn1, drop_block, act_layer, conv_bn2, drop_block,
43
- act_layer, conv_bn3, drop_path)),
44
- act_layer)
45
- end
46
-
47
- function drop_blocks (drop_prob = 0.0 )
48
- return [identity, identity,
49
- drop_prob == 0.0 ? DropBlock (drop_prob, 5 , 0.25 ) : identity,
50
- drop_prob == 0.0 ? DropBlock (drop_prob, 3 , 1.00 ) : identity]
75
+ Chain (conv_bn1, drop_block, activation, conv_bn2, drop_block,
76
+ activation, conv_bn3, drop_path)),
77
+ activation)
51
78
end
79
+ expansion_factor (:: typeof (bottleneck)) = 4
52
80
53
- function downsample_conv (kernel_size, in_channels, out_channels; stride = 1 , dilation = 1 ,
54
- first_dilation = nothing , norm_layer = BatchNorm)
55
- kernel_size = stride == 1 && dilation == 1 ? 1 : kernel_size
56
- first_dilation = kernel_size[1 ] > 1 ?
57
- (! isnothing (first_dilation) ? first_dilation : dilation) : 1
58
- pad = ((stride - 1 ) + dilation * (kernel_size[1 ] - 1 )) ÷ 2
59
- return Chain (Conv (kernel_size, in_channels => out_channels; stride, pad,
60
- dilation = first_dilation, bias = false ),
61
- norm_layer (out_channels))
62
- end
63
-
64
- function downsample_avg (kernel_size, in_channels, out_channels; stride = 1 , dilation = 1 ,
65
- first_dilation = nothing , norm_layer = BatchNorm)
66
- avg_stride = dilation == 1 ? stride : 1
67
- if stride == 1 && dilation == 1
68
- pool = identity
69
- else
70
- pad = avg_stride == 1 && dilation > 1 ? SamePad () : 0
71
- pool = avg_pool_fn ((2 , 2 ); stride = avg_stride, pad)
72
- end
73
-
74
- return Chain (pool,
75
- Conv ((1 , 1 ), in_channels => out_channels; stride = 1 , pad = 0 ,
76
- bias = false ),
77
- norm_layer (out_channels))
78
- end
79
-
80
- function make_blocks (block_fn, channels, block_repeats, inplanes; expansion = 1 ,
81
- reduce_first = 1 , output_stride = 32 ,
82
- down_kernel_size = 1 , avg_down = false , drop_block_rate = 0.0 ,
83
- drop_path_rate = 0.0 , kwargs... )
81
+ function make_blocks (block_fn, channels, block_repeats, inplanes;
82
+ reduce_first = 1 , output_stride = 32 , down_kernel_size = 1 ,
83
+ avg_down = false , drop_block_rate = 0.0 , drop_path_rate = 0.0 ,
84
+ kwargs... )
85
+ expansion = expansion_factor (block_fn)
84
86
kwarg_dict = Dict (kwargs... )
85
87
stages = []
86
88
net_block_idx = 1
87
89
net_stride = 4
88
90
dilation = prev_dilation = 1
89
- for (stage_idx, (planes, num_blocks, db)) in enumerate (zip (channels, block_repeats,
90
- drop_blocks (drop_block_rate)))
91
+ for (stage_idx, (planes, num_blocks, drop_block)) in enumerate (zip (channels,
92
+ block_repeats,
93
+ drop_blocks (drop_block_rate)))
91
94
stride = stage_idx == 1 ? 1 : 2
92
95
if net_stride >= output_stride
93
96
dilation *= stride
94
97
stride = 1
95
98
else
96
99
net_stride *= stride
97
100
end
101
+ # first block needs to be handled differently for downsampling
98
102
downsample = identity
99
103
if stride != 1 || inplanes != planes * expansion
100
104
downsample = avg_down ?
@@ -106,7 +110,7 @@ function make_blocks(block_fn, channels, block_repeats, inplanes; expansion = 1,
106
110
norm_layer = kwarg_dict[:norm_layer ])
107
111
end
108
112
block_kwargs = Dict (:reduce_first => reduce_first, :dilation => dilation,
109
- :drop_block => db , kwargs... )
113
+ :drop_block => drop_block , kwargs... )
110
114
blocks = []
111
115
for block_idx in 1 : num_blocks
112
116
downsample = block_idx == 1 ? downsample : identity
@@ -127,15 +131,13 @@ function make_blocks(block_fn, channels, block_repeats, inplanes; expansion = 1,
127
131
end
128
132
129
133
function resnet (block, layers; num_classes = 1000 , inchannels = 3 , output_stride = 32 ,
130
- expansion = 1 ,
131
134
cardinality = 1 , base_width = 64 , stem_width = 64 , stem_type = :default ,
132
- replace_stem_pool = false , reduce_first = 1 ,
133
- down_kernel_size = (1 , 1 ), avg_down = false , act_layer = relu,
134
- norm_layer = BatchNorm,
135
+ replace_stem_pool = false , reduce_first = 1 , down_kernel_size = (1 , 1 ),
136
+ avg_down = false , activation = relu, norm_layer = BatchNorm,
135
137
drop_rate = 0.0 , drop_path_rate = 0.0 , drop_block_rate = 0.0 ,
136
138
block_kwargs... )
137
- @assert output_stride in (8 , 16 , 32 )
138
- @assert stem_type in [:default , :deep , :deep_tiered ]
139
+ @assert output_stride in (8 , 16 , 32 ) " Invalid `output_stride`. Must be one of (8, 16, 32) "
140
+ @assert stem_type in [:default , :deep , :deep_tiered ] " Stem type must be one of [:default, :deep, :deep_tiered] "
139
141
# Stem
140
142
inplanes = stem_type == :deep ? stem_width * 2 : 64
141
143
if stem_type == :deep
@@ -145,38 +147,32 @@ function resnet(block, layers; num_classes = 1000, inchannels = 3, output_stride
145
147
end
146
148
conv1 = Chain (Conv ((3 , 3 ), inchannels => stem_channels[0 ]; stride = 2 , pad = 1 ,
147
149
bias = false ),
148
- norm_layer (stem_channels[1 ]),
149
- act_layer (),
150
- Conv ((3 , 3 ), stem_channels[1 ] => stem_channels[1 ]; stride = 1 ,
151
- pad = 1 , bias = false ),
152
- norm_layer (stem_channels[2 ]),
153
- act_layer (),
154
- Conv ((3 , 3 ), stem_channels[2 ] => inplanes; stride = 1 , pad = 1 ,
155
- bias = false ))
150
+ norm_layer (stem_channels[1 ], activation),
151
+ Conv ((3 , 3 ), stem_channels[1 ] => stem_channels[1 ]; pad = 1 ,
152
+ bias = false ),
153
+ norm_layer (stem_channels[2 ], activation),
154
+ Conv ((3 , 3 ), stem_channels[2 ] => inplanes; pad = 1 , bias = false ))
156
155
else
157
156
conv1 = Conv ((7 , 7 ), inchannels => inplanes; stride = 2 , pad = 3 , bias = false )
158
157
end
159
- bn1 = norm_layer (inplanes)
160
- act1 = act_layer
158
+ bn1 = norm_layer (inplanes, activation)
161
159
# Stem pooling
162
160
if replace_stem_pool
163
161
stempool = Chain (Conv ((3 , 3 ), inplanes => inplanes; stride = 2 , pad = 1 ,
164
162
bias = false ),
165
- norm_layer (inplanes),
166
- act_layer)
163
+ norm_layer (inplanes, activation))
167
164
else
168
165
stempool = MaxPool ((3 , 3 ); stride = 2 , pad = 1 )
169
166
end
170
- stem = Chain (conv1, bn1, act1, stempool)
171
-
167
+ stem = Chain (conv1, bn1, stempool)
172
168
# Feature Blocks
173
169
channels = [64 , 128 , 256 , 512 ]
174
170
stage_blocks = make_blocks (block, channels, layers, inplanes; cardinality, base_width,
175
171
output_stride, reduce_first, avg_down,
176
- down_kernel_size, act_layer , norm_layer,
172
+ down_kernel_size, activation , norm_layer,
177
173
drop_block_rate, drop_path_rate, block_kwargs... )
178
-
179
174
# Head (Pooling and Classifier)
175
+ expansion = expansion_factor (block)
180
176
num_features = 512 * expansion
181
177
classifier = Chain (GlobalMeanPool (), Dropout (drop_rate), MLUtils. flatten,
182
178
Dense (num_features, num_classes))
0 commit comments