diff --git a/config/yoeo_light_decoder_deconv.cfg b/config/yoeo_light_decoder_deconv.cfg new file mode 100644 index 0000000..f0b551e --- /dev/null +++ b/config/yoeo_light_decoder_deconv.cfg @@ -0,0 +1,312 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=4 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.0001 +burn_in=100 +max_batches = 4000 +policy=steps +steps=50000,60000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=8 +activation=linear + +[yolo] +mask = 0 +anchors = 100, 100 +classes=3 +num=1 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 24 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=8 +activation=linear + +[yolo] +mask = 0 +anchors = 100, 100 +classes=3 +num=1 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = 18 + +[deconvolutional] +batch_normalize=1 +filters=128 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 10 + +[deconvolutional] +batch_normalize=1 +filters=64 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 2 + +[deconvolutional] +batch_normalize=1 +filters=32 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 0 + +[deconvolutional] +batch_normalize=1 +filters=16 +size=2 +stride=2 +pad=0 +activation=linear + +[seg] +classes=3 diff --git a/config/yoeo_medium_decoder_deconv.cfg b/config/yoeo_medium_decoder_deconv.cfg new file mode 100644 index 0000000..45df2c2 --- /dev/null +++ b/config/yoeo_medium_decoder_deconv.cfg @@ -0,0 +1,356 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=4 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.0001 +burn_in=100 +max_batches = 4000 +policy=steps +steps=50000,60000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=8 +activation=linear + +[yolo] +mask = 0 +anchors = 100, 100 +classes=3 +num=1 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 24 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=8 +activation=linear + +[yolo] +mask = 0 +anchors = 100, 100 +classes=3 +num=1 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = 18 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=128 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 10 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=64 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=32 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 0 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=3 +size=2 +stride=2 +pad=0 +activation=linear + +[seg] +classes=3 diff --git a/config/yoeo_medium_decoder_deconv_context.cfg b/config/yoeo_medium_decoder_deconv_context.cfg new file mode 100644 index 0000000..cd1803d --- /dev/null +++ b/config/yoeo_medium_decoder_deconv_context.cfg @@ -0,0 +1,381 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=4 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.0001 +burn_in=100 +max_batches = 4000 +policy=steps +steps=50000,60000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=8 +activation=linear + +[yolo] +mask = 0 +anchors = 100, 100 +classes=3 +num=1 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 24 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=8 +activation=linear + +[yolo] +mask = 0 +anchors = 100, 100 +classes=3 +num=1 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = 18 +groups=2 +group_id=1 + +[maxpool] +size=13 +stride=13 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=13 + +[route] +layers = 18 +groups=2 +group_id=0 + +[route] +layers = -1, -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=128 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 10 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=64 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=32 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 0 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-2 + +[deconvolutional] +batch_normalize=1 +filters=3 +size=2 +stride=2 +pad=0 +activation=linear + +[seg] +classes=3 diff --git a/config/yoeo_v7_rev1.cfg b/config/yoeo_v7_rev1.cfg new file mode 100644 index 0000000..5f9d59c --- /dev/null +++ b/config/yoeo_v7_rev1.cfg @@ -0,0 +1,771 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +batch=64 +subdivisions=1 +width=512 +height=512 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.0001 +burn_in=100 +max_batches = 4000 +policy=steps +steps=100000,150000 +scales=.1,.1 + +# 0 +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=2 +pad=1 +activation=leaky + +# 1 +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 8 +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 16 +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 24 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 32 +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + + +################################## + +### SPPCSP ### +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +### SPP ### +[maxpool] +stride=1 +size=5 + +[route] +layers=-2 + +[maxpool] +stride=1 +size=9 + +[route] +layers=-4 + +[maxpool] +stride=1 +size=13 + +[route] +layers=-1,-3,-5,-6 +### End SPP ### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -10,-1 + +# 44 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky +### End SPPCSP ### + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = 24 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-3 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 56 +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = 16 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-3 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 68 +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +########################## + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=128 +activation=leaky + +[route] +layers = -1,56 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 77 +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=2 +pad=1 +filters=256 +activation=leaky + +[route] +layers = -1,44 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -5,-3,-2,-1 + +# 86 +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +############################# + +# ============ End of Neck ============ # + +# ============ Head ============ # + + +# P3 +[route] +layers = 68 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=128 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=8 +activation=linear +#activation=logistic + +[yolo] +mask = 0 +anchors = 100, 100 +classes=3 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# P4 +[route] +layers = 77 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=8 +activation=linear +#activation=logistic + +[yolo] +mask = 0 +anchors = 100, 100 +classes=3 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + + +# P5 +[route] +layers = 86 + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=8 +activation=linear +#activation=logistic + +[yolo] +mask = 0 +anchors = 100, 100 +classes=3 +num=9 +jitter=.1 +scale_x_y = 2.0 +objectness_smooth=1 +ignore_thresh = .7 +truth_thresh = 1 +#random=1 +resize=1.5 +iou_thresh=0.2 +iou_normalizer=0.05 +cls_normalizer=0.5 +obj_normalizer=1.0 +iou_loss=ciou +nms_kind=diounms +beta_nms=0.6 +new_coords=1 +max_delta=2 + +[route] +layers = 44 + +[deconvolutional] +batch_normalize=1 +filters=256 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 24 + +[deconvolutional] +batch_normalize=1 +filters=128 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 16 + +[deconvolutional] +batch_normalize=1 +filters=64 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 8 + +[deconvolutional] +batch_normalize=1 +filters=32 +size=2 +stride=2 +pad=0 +activation=leaky + +[route] +layers = -1, 0 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[deconvolutional] +batch_normalize=1 +filters=3 +size=2 +stride=2 +pad=0 +activation=linear + +[seg] +classes=3 diff --git a/yoeo/models.py b/yoeo/models.py index 1b69cd8..4dffee9 100644 --- a/yoeo/models.py +++ b/yoeo/models.py @@ -16,24 +16,31 @@ def create_modules(module_defs): Constructs module list of layer blocks from module configuration in module_defs """ hyperparams = module_defs.pop(0) - hyperparams.update({ - 'batch': int(hyperparams['batch']), - 'subdivisions': int(hyperparams['subdivisions']), - 'width': int(hyperparams['width']), - 'height': int(hyperparams['height']), - 'channels': int(hyperparams['channels']), - 'optimizer': hyperparams.get('optimizer'), - 'momentum': float(hyperparams['momentum']), - 'decay': float(hyperparams['decay']), - 'learning_rate': float(hyperparams['learning_rate']), - 'burn_in': int(hyperparams['burn_in']), - 'max_batches': int(hyperparams['max_batches']), - 'policy': hyperparams['policy'], - 'lr_steps': list(zip(map(int, hyperparams["steps"].split(",")), - map(float, hyperparams["scales"].split(",")))) - }) - assert hyperparams["height"] == hyperparams["width"], \ + hyperparams.update( + { + "batch": int(hyperparams["batch"]), + "subdivisions": int(hyperparams["subdivisions"]), + "width": int(hyperparams["width"]), + "height": int(hyperparams["height"]), + "channels": int(hyperparams["channels"]), + "optimizer": hyperparams.get("optimizer"), + "momentum": float(hyperparams["momentum"]), + "decay": float(hyperparams["decay"]), + "learning_rate": float(hyperparams["learning_rate"]), + "burn_in": int(hyperparams["burn_in"]), + "max_batches": int(hyperparams["max_batches"]), + "policy": hyperparams["policy"], + "lr_steps": list( + zip( + map(int, hyperparams["steps"].split(",")), + map(float, hyperparams["scales"].split(",")), + ) + ), + } + ) + assert hyperparams["height"] == hyperparams["width"], ( "Height and width should be equal! Non square images are padded with zeros." + ) output_filters = [hyperparams["channels"]] module_list = nn.ModuleList() for module_i, module_def in enumerate(module_defs): @@ -56,20 +63,69 @@ def create_modules(module_defs): ), ) if bn: - modules.add_module(f"batch_norm_{module_i}", - nn.BatchNorm2d(filters, momentum=0.1, eps=1e-5)) + modules.add_module( + f"batch_norm_{module_i}", + nn.BatchNorm2d(filters, momentum=0.1, eps=1e-5), + ) if module_def["activation"] == "leaky": modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1)) - if module_def["activation"] == "mish": - modules.add_module(f"mish_{module_i}", Mish()) + elif module_def["activation"] == "mish": + modules.add_module(f"mish_{module_i}", nn.Mish()) + elif module_def["activation"] == "logistic": + modules.add_module(f"sigmoid_{module_i}", nn.Sigmoid()) + elif module_def["activation"] == "swish": + modules.add_module(f"swish_{module_i}", nn.SiLU()) + elif module_def["activation"] == "linear": + pass + else: + raise ValueError(f"Unknown activation: {module_def['activation']}") + + elif module_def["type"] == "deconvolutional": + bn = int(module_def["batch_normalize"]) + filters = int(module_def["filters"]) + kernel_size = int(module_def["size"]) + pad = int(module_def["pad"]) + modules.add_module( + f"deconv_{module_i}", + nn.ConvTranspose2d( + in_channels=output_filters[-1], + out_channels=filters, + kernel_size=kernel_size, + stride=int(module_def["stride"]), + padding=pad, + bias=not bn, + ), + ) + if bn: + modules.add_module( + f"batch_norm_{module_i}", + nn.BatchNorm2d(filters, momentum=0.1, eps=1e-5), + ) + if module_def["activation"] == "leaky": + modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1)) + elif module_def["activation"] == "mish": + modules.add_module(f"mish_{module_i}", nn.Mish()) + elif module_def["activation"] == "logistic": + modules.add_module(f"sigmoid_{module_i}", nn.Sigmoid()) + elif module_def["activation"] == "swish": + modules.add_module(f"swish_{module_i}", nn.SiLU()) + elif module_def["activation"] == "linear": + pass + else: + raise ValueError(f"Unknown activation: {module_def['activation']}") elif module_def["type"] == "maxpool": kernel_size = int(module_def["size"]) stride = int(module_def["stride"]) if kernel_size == 2 and stride == 1: - modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1))) - maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, - padding=int((kernel_size - 1) // 2)) + modules.add_module( + f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1)) + ) + maxpool = nn.MaxPool2d( + kernel_size=kernel_size, + stride=stride, + padding=int((kernel_size - 1) // 2), + ) modules.add_module(f"maxpool_{module_i}", maxpool) elif module_def["type"] == "upsample": @@ -78,7 +134,9 @@ def create_modules(module_defs): elif module_def["type"] == "route": layers = [int(x) for x in module_def["layers"].split(",")] - filters = sum([output_filters[1:][i] for i in layers]) // int(module_def.get("groups", 1)) + filters = sum([output_filters[1:][i] for i in layers]) // int( + module_def.get("groups", 1) + ) modules.add_module(f"route_{module_i}", nn.Sequential()) elif module_def["type"] == "shortcut": @@ -92,8 +150,9 @@ def create_modules(module_defs): anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in anchor_idxs] num_classes = int(module_def["classes"]) + new_coords = bool(module_def.get("new_coords", False)) # Define detection layer - yolo_layer = YOLOLayer(anchors, num_classes) + yolo_layer = YOLOLayer(anchors, num_classes, new_coords) modules.add_module(f"yolo_{module_i}", yolo_layer) elif module_def["type"] == "seg": num_classes = int(module_def["classes"]) @@ -106,7 +165,7 @@ def create_modules(module_defs): class Upsample(nn.Module): - """ nn.Upsample is deprecated """ + """nn.Upsample is deprecated""" def __init__(self, scale_factor, mode="nearest"): super(Upsample, self).__init__() @@ -117,54 +176,50 @@ def forward(self, x): x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode) return x -class Mish(nn.Module): - """ The MISH activation function (https://github.com/digantamisra98/Mish) """ - - def __init__(self): - super(Mish, self).__init__() - - def forward(self, x): - return x * torch.tanh(F.softplus(x)) class YOLOLayer(nn.Module): """Detection layer""" - def __init__(self, anchors, num_classes): + def __init__(self, anchors, num_classes, new_coords): super(YOLOLayer, self).__init__() self.num_anchors = len(anchors) self.num_classes = num_classes + self.new_coords = new_coords self.mse_loss = nn.MSELoss() self.bce_loss = nn.BCELoss() self.no = num_classes + 5 # number of outputs per anchor self.grid = torch.zeros(1) # TODO anchors = torch.tensor(list(chain(*anchors))).float().view(-1, 2) - self.register_buffer('anchors', anchors) - self.register_buffer( - 'anchor_grid', anchors.clone().view(1, -1, 1, 1, 2)) + self.register_buffer("anchors", anchors) + self.register_buffer("anchor_grid", anchors.clone().view(1, -1, 1, 1, 2)) self.stride = None def forward(self, x, img_size): stride = img_size // x.size(2) self.stride = stride bs, _, ny, nx = x.shape # x(bs,255,20,20) to x(bs,3,20,20,85) - x = x.view(bs, self.num_anchors, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() + x = ( + x.view(bs, self.num_anchors, self.no, ny, nx) + .permute(0, 1, 3, 4, 2) + .contiguous() + ) if not self.training: # inference if self.grid.shape[2:4] != x.shape[2:4]: self.grid = self._make_grid(nx, ny).to(x.device) - - x = torch.cat([ - (x[..., 0:2].sigmoid() + self.grid) * stride, # xy - torch.exp(x[..., 2:4]) * self.anchor_grid, # wh - x[..., 4:].sigmoid(), - ], axis=4).view(bs, -1, self.no) - + x[..., 0:2] = (x[..., 0:2].sigmoid() + self.grid) * stride # xy + if self.new_coords: + x[..., 2:4] = x[..., 2:4].sigmoid() ** 2 * (4 * self.anchor_grid) # wh + else: + x[..., 2:4] = torch.exp(x[..., 2:4]) * self.anchor_grid # wh + x[..., 4:] = x[..., 4:].sigmoid() # conf, cls + x = x.view(bs, -1, self.no) return x @staticmethod def _make_grid(nx=20, ny=20): - yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)], indexing='ij') + yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)], indexing="ij") return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() @@ -189,24 +244,44 @@ def __init__(self, config_path): super(Darknet, self).__init__() self.module_defs = parse_model_config(config_path) self.hyperparams, self.module_list = create_modules(self.module_defs) - self.yolo_layers = [layer[0] for layer in self.module_list if isinstance(layer[0], YOLOLayer)] - self.seg_layers = [layer[0] for layer in self.module_list if isinstance(layer[0], SegLayer)] + self.yolo_layers = [ + layer[0] for layer in self.module_list if isinstance(layer[0], YOLOLayer) + ] + self.seg_layers = [ + layer[0] for layer in self.module_list if isinstance(layer[0], SegLayer) + ] self.num_seg_classes = self.seg_layers[0].num_classes self.seen = 0 self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32) def forward(self, x, bb_targets=None, mask_targets=None): img_size = x.size(2) - loss = 0 layer_outputs, yolo_outputs, segmentation_outputs = [], [], [] - for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): - if module_def["type"] in ["convolutional", "upsample", "maxpool"]: + for i, (module_def, module) in enumerate( + zip(self.module_defs, self.module_list) + ): + if module_def["type"] in [ + "convolutional", + "deconvolutional", + "upsample", + "maxpool", + ]: x = module(x) elif module_def["type"] == "route": - combined_outputs = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1) - group_size = combined_outputs.shape[1] // int(module_def.get("groups", 1)) + combined_outputs = torch.cat( + [ + layer_outputs[int(layer_i)] + for layer_i in module_def["layers"].split(",") + ], + 1, + ) + group_size = combined_outputs.shape[1] // int( + module_def.get("groups", 1) + ) group_id = int(module_def.get("group_id", 0)) - x = combined_outputs[:, group_size * group_id : group_size * (group_id + 1)] # Slice groupings used by yolo v4 + x = combined_outputs[ + :, group_size * group_id : group_size * (group_id + 1) + ] # Slice groupings used by yolo v4 elif module_def["type"] == "shortcut": layer_i = int(module_def["from"]) x = layer_outputs[-1] + layer_outputs[layer_i] @@ -217,7 +292,11 @@ def forward(self, x, bb_targets=None, mask_targets=None): x = module[0](x) segmentation_outputs.append(x) layer_outputs.append(x) - return (yolo_outputs, segmentation_outputs) if self.training else (torch.cat(yolo_outputs, 1), torch.cat(segmentation_outputs, 1)) + return ( + (yolo_outputs, segmentation_outputs) + if self.training + else (torch.cat(yolo_outputs, 1), torch.cat(segmentation_outputs, 1)) + ) def load_darknet_weights(self, weights_path): """Parses and loads the weights stored in 'weights_path'""" @@ -242,7 +321,9 @@ def load_darknet_weights(self, weights_path): pass ptr = 0 - for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)): + for i, (module_def, module) in enumerate( + zip(self.module_defs, self.module_list) + ): if i == cutoff: break if module_def["type"] == "convolutional": @@ -252,50 +333,58 @@ def load_darknet_weights(self, weights_path): bn_layer = module[1] num_b = bn_layer.bias.numel() # Number of biases # Bias - bn_b = torch.from_numpy( - weights[ptr: ptr + num_b]).view_as(bn_layer.bias) + bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as( + bn_layer.bias + ) bn_layer.bias.data.copy_(bn_b) ptr += num_b # Weight - bn_w = torch.from_numpy( - weights[ptr: ptr + num_b]).view_as(bn_layer.weight) + bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as( + bn_layer.weight + ) bn_layer.weight.data.copy_(bn_w) ptr += num_b # Running Mean - bn_rm = torch.from_numpy( - weights[ptr: ptr + num_b]).view_as(bn_layer.running_mean) + bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as( + bn_layer.running_mean + ) bn_layer.running_mean.data.copy_(bn_rm) ptr += num_b # Running Var - bn_rv = torch.from_numpy( - weights[ptr: ptr + num_b]).view_as(bn_layer.running_var) + bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as( + bn_layer.running_var + ) bn_layer.running_var.data.copy_(bn_rv) ptr += num_b else: # Load conv. bias num_b = conv_layer.bias.numel() - conv_b = torch.from_numpy( - weights[ptr: ptr + num_b]).view_as(conv_layer.bias) + conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as( + conv_layer.bias + ) conv_layer.bias.data.copy_(conv_b) ptr += num_b # Load conv. weights num_w = conv_layer.weight.numel() - conv_w = torch.from_numpy( - weights[ptr: ptr + num_w]).view_as(conv_layer.weight) + conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as( + conv_layer.weight + ) conv_layer.weight.data.copy_(conv_w) ptr += num_w def save_darknet_weights(self, path, cutoff=-1): """ - @:param path - path of the new weights file - @:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved) + @:param path - path of the new weights file + @:param cutoff - save layers between 0 and cutoff (cutoff = -1 -> all are saved) """ fp = open(path, "wb") self.header_info[3] = self.seen self.header_info.tofile(fp) # Iterate through layers - for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])): + for i, (module_def, module) in enumerate( + zip(self.module_defs[:cutoff], self.module_list[:cutoff]) + ): if module_def["type"] == "convolutional": conv_layer = module[0] # If batch norm, load bn first @@ -324,8 +413,9 @@ def load_model(model_path, weights_path=None): :return: Returns model :rtype: Darknet """ - device = torch.device("cuda" if torch.cuda.is_available() - else "cpu") # Select device for inference + device = torch.device( + "cuda" if torch.cuda.is_available() else "cpu" + ) # Select device for inference model = Darknet(model_path).to(device) diff --git a/yoeo/train.py b/yoeo/train.py index 6f582fe..1572043 100755 --- a/yoeo/train.py +++ b/yoeo/train.py @@ -9,6 +9,7 @@ import numpy as np import torch +import torch.nn as nn from torch.utils.data import DataLoader import torch.optim as optim from torch.autograd import Variable @@ -138,23 +139,36 @@ def run(): # Create optimizer # ################ - params = [p for p in model.parameters() if p.requires_grad] + unregularized_parameters, regularized_parameters = [], [] + for _, v in model.named_modules(): + if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): + unregularized_parameters.append(v.bias) # biases + if isinstance(v, nn.BatchNorm2d): + unregularized_parameters.append(v.weight) # no decay + elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): + regularized_parameters.append(v.weight) # apply decay + if (model.hyperparams['optimizer'] in [None, "adam"]): optimizer = optim.Adam( - params, + unregularized_parameters, lr=model.hyperparams['learning_rate'], - weight_decay=model.hyperparams['decay'], ) elif (model.hyperparams['optimizer'] == "sgd"): optimizer = optim.SGD( - params, + unregularized_parameters, lr=model.hyperparams['learning_rate'], - weight_decay=model.hyperparams['decay'], momentum=model.hyperparams['momentum']) else: print("Unknown optimizer. Please choose between (adam, sgd).") + # add normal weights with with weight_decay + optimizer.add_param_group({'params': regularized_parameters, 'weight_decay': model.hyperparams['decay']}) + + print(f'Optimizer groups: {len(unregularized_parameters)} unregularized, ' + f'{len(regularized_parameters)} with weight decay') + del unregularized_parameters, regularized_parameters + # skip epoch zero, because then the calculations for when to evaluate/checkpoint makes more intuitive sense # e.g. when you stop after 30 epochs and evaluate every 10 epochs then the evaluations happen after: 10,20,30 # instead of: 0, 10, 20 diff --git a/yoeo/utils/loss.py b/yoeo/utils/loss.py index f819b52..239fb12 100644 --- a/yoeo/utils/loss.py +++ b/yoeo/utils/loss.py @@ -94,9 +94,17 @@ def compute_loss(combined_predictions, combined_targets, model): # Regression of the box # Apply sigmoid to xy offset predictions in each cell that has a target - pxy = ps[:, :2].sigmoid() - # Apply exponent to wh predictions and multiply with the anchor box that matched best with the label for each cell that has a target - pwh = torch.exp(ps[:, 2:4]) * anchors[layer_index] + + # Check if the model has the new_coords system + if model.yolo_layers[layer_index].new_coords: + pxy = ps[:, :2].sigmoid() + # Apply exponent to wh predictions and multiply with the anchor box that matched best with the label for each cell that has a target + pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[layer_index] + else: + pxy = ps[:, :2].sigmoid() + # Apply exponent to wh predictions and multiply with the anchor box that matched best with the label for each cell that has a target + pwh = torch.exp(ps[:, 2:4]) * anchors[layer_index] + # Build box out of xy and wh pbox = torch.cat((pxy, pwh), 1) # Calculate CIoU or GIoU for each target with the predicted box for its cell + anchor