diff --git a/config/yoeo_light_decoder_deconv.cfg b/config/yoeo_light_decoder_deconv.cfg
new file mode 100644
index 0000000..f0b551e
--- /dev/null
+++ b/config/yoeo_light_decoder_deconv.cfg
@@ -0,0 +1,312 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=4
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.0001
+burn_in=100
+max_batches = 4000
+policy=steps
+steps=50000,60000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-1
+groups=2
+group_id=1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -6,-1
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-1
+groups=2
+group_id=1
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -6,-1
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-1
+groups=2
+group_id=1
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -6,-1
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+###########
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=8
+activation=linear
+
+[yolo]
+mask = 0
+anchors = 100, 100
+classes=3
+num=1
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 24
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=8
+activation=linear
+
+[yolo]
+mask = 0
+anchors = 100, 100
+classes=3
+num=1
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+[route]
+layers = 18
+
+[deconvolutional]
+batch_normalize=1
+filters=128
+size=2
+stride=2
+pad=0
+activation=leaky
+
+[route]
+layers = -1, 10
+
+[deconvolutional]
+batch_normalize=1
+filters=64
+size=2
+stride=2
+pad=0
+activation=leaky
+
+[route]
+layers = -1, 2
+
+[deconvolutional]
+batch_normalize=1
+filters=32
+size=2
+stride=2
+pad=0
+activation=leaky
+
+[route]
+layers = -1, 0
+
+[deconvolutional]
+batch_normalize=1
+filters=16
+size=2
+stride=2
+pad=0
+activation=linear
+
+[seg]
+classes=3
diff --git a/config/yoeo_medium_decoder_deconv.cfg b/config/yoeo_medium_decoder_deconv.cfg
new file mode 100644
index 0000000..45df2c2
--- /dev/null
+++ b/config/yoeo_medium_decoder_deconv.cfg
@@ -0,0 +1,356 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=4
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.0001
+burn_in=100
+max_batches = 4000
+policy=steps
+steps=50000,60000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-1
+groups=2
+group_id=1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -6,-1
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-1
+groups=2
+group_id=1
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -6,-1
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-1
+groups=2
+group_id=1
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -6,-1
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+###########
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=8
+activation=linear
+
+[yolo]
+mask = 0
+anchors = 100, 100
+classes=3
+num=1
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 24
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=8
+activation=linear
+
+[yolo]
+mask = 0
+anchors = 100, 100
+classes=3
+num=1
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+[route]
+layers = 18
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-2
+
+[deconvolutional]
+batch_normalize=1
+filters=128
+size=2
+stride=2
+pad=0
+activation=leaky
+
+[route]
+layers = -1, 10
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-2
+
+[deconvolutional]
+batch_normalize=1
+filters=64
+size=2
+stride=2
+pad=0
+activation=leaky
+
+[route]
+layers = -1, 2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-2
+
+[deconvolutional]
+batch_normalize=1
+filters=32
+size=2
+stride=2
+pad=0
+activation=leaky
+
+[route]
+layers = -1, 0
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-2
+
+[deconvolutional]
+batch_normalize=1
+filters=3
+size=2
+stride=2
+pad=0
+activation=linear
+
+[seg]
+classes=3
diff --git a/config/yoeo_medium_decoder_deconv_context.cfg b/config/yoeo_medium_decoder_deconv_context.cfg
new file mode 100644
index 0000000..cd1803d
--- /dev/null
+++ b/config/yoeo_medium_decoder_deconv_context.cfg
@@ -0,0 +1,381 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=4
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.0001
+burn_in=100
+max_batches = 4000
+policy=steps
+steps=50000,60000
+scales=.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-1
+groups=2
+group_id=1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -6,-1
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-1
+groups=2
+group_id=1
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -6,-1
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-1
+groups=2
+group_id=1
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -6,-1
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+###########
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=8
+activation=linear
+
+[yolo]
+mask = 0
+anchors = 100, 100
+classes=3
+num=1
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 24
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=8
+activation=linear
+
+[yolo]
+mask = 0
+anchors = 100, 100
+classes=3
+num=1
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+[route]
+layers = 18
+groups=2
+group_id=1
+
+[maxpool]
+size=13
+stride=13
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=13
+
+[route]
+layers = 18
+groups=2
+group_id=0
+
+[route]
+layers = -1, -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-2
+
+[deconvolutional]
+batch_normalize=1
+filters=128
+size=2
+stride=2
+pad=0
+activation=leaky
+
+[route]
+layers = -1, 10
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-2
+
+[deconvolutional]
+batch_normalize=1
+filters=64
+size=2
+stride=2
+pad=0
+activation=leaky
+
+[route]
+layers = -1, 2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-2
+
+[deconvolutional]
+batch_normalize=1
+filters=32
+size=2
+stride=2
+pad=0
+activation=leaky
+
+[route]
+layers = -1, 0
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[shortcut]
+from=-2
+
+[deconvolutional]
+batch_normalize=1
+filters=3
+size=2
+stride=2
+pad=0
+activation=linear
+
+[seg]
+classes=3
diff --git a/config/yoeo_v7_rev1.cfg b/config/yoeo_v7_rev1.cfg
new file mode 100644
index 0000000..5f9d59c
--- /dev/null
+++ b/config/yoeo_v7_rev1.cfg
@@ -0,0 +1,771 @@
+[net]
+# Testing
+batch=1
+subdivisions=1
+# Training
+batch=64
+subdivisions=1
+width=512
+height=512
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.0001
+burn_in=100
+max_batches = 4000
+policy=steps
+steps=100000,150000
+scales=.1,.1
+
+# 0
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=2
+pad=1
+activation=leaky
+
+# 1
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -5,-3,-2,-1
+
+# 8
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -5,-3,-2,-1
+
+# 16
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -5,-3,-2,-1
+
+# 24
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -5,-3,-2,-1
+
+# 32
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+
+##################################
+
+### SPPCSP ###
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+### SPP ###
+[maxpool]
+stride=1
+size=5
+
+[route]
+layers=-2
+
+[maxpool]
+stride=1
+size=9
+
+[route]
+layers=-4
+
+[maxpool]
+stride=1
+size=13
+
+[route]
+layers=-1,-3,-5,-6
+### End SPP ###
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -10,-1
+
+# 44
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+### End SPPCSP ###
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = 24
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-3
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -5,-3,-2,-1
+
+# 56
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = 16
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-3
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -5,-3,-2,-1
+
+# 68
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+##########################
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=128
+activation=leaky
+
+[route]
+layers = -1,56
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -5,-3,-2,-1
+
+# 77
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=2
+pad=1
+filters=256
+activation=leaky
+
+[route]
+layers = -1,44
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -5,-3,-2,-1
+
+# 86
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+#############################
+
+# ============ End of Neck ============ #
+
+# ============ Head ============ #
+
+
+# P3
+[route]
+layers = 68
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=128
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=8
+activation=linear
+#activation=logistic
+
+[yolo]
+mask = 0
+anchors = 100, 100
+classes=3
+num=9
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=1.0
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+
+# P4
+[route]
+layers = 77
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=8
+activation=linear
+#activation=logistic
+
+[yolo]
+mask = 0
+anchors = 100, 100
+classes=3
+num=9
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=1.0
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+
+# P5
+[route]
+layers = 86
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=8
+activation=linear
+#activation=logistic
+
+[yolo]
+mask = 0
+anchors = 100, 100
+classes=3
+num=9
+jitter=.1
+scale_x_y = 2.0
+objectness_smooth=1
+ignore_thresh = .7
+truth_thresh = 1
+#random=1
+resize=1.5
+iou_thresh=0.2
+iou_normalizer=0.05
+cls_normalizer=0.5
+obj_normalizer=1.0
+iou_loss=ciou
+nms_kind=diounms
+beta_nms=0.6
+new_coords=1
+max_delta=2
+
+[route]
+layers = 44
+
+[deconvolutional]
+batch_normalize=1
+filters=256
+size=2
+stride=2
+pad=0
+activation=leaky
+
+[route]
+layers = -1, 24
+
+[deconvolutional]
+batch_normalize=1
+filters=128
+size=2
+stride=2
+pad=0
+activation=leaky
+
+[route]
+layers = -1, 16
+
+[deconvolutional]
+batch_normalize=1
+filters=64
+size=2
+stride=2
+pad=0
+activation=leaky
+
+[route]
+layers = -1, 8
+
+[deconvolutional]
+batch_normalize=1
+filters=32
+size=2
+stride=2
+pad=0
+activation=leaky
+
+[route]
+layers = -1, 0
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[deconvolutional]
+batch_normalize=1
+filters=3
+size=2
+stride=2
+pad=0
+activation=linear
+
+[seg]
+classes=3
diff --git a/yoeo/models.py b/yoeo/models.py
index 1b69cd8..4dffee9 100644
--- a/yoeo/models.py
+++ b/yoeo/models.py
@@ -16,24 +16,31 @@ def create_modules(module_defs):
     Constructs module list of layer blocks from module configuration in module_defs
     """
     hyperparams = module_defs.pop(0)
-    hyperparams.update({
-        'batch': int(hyperparams['batch']),
-        'subdivisions': int(hyperparams['subdivisions']),
-        'width': int(hyperparams['width']),
-        'height': int(hyperparams['height']),
-        'channels': int(hyperparams['channels']),
-        'optimizer': hyperparams.get('optimizer'),
-        'momentum': float(hyperparams['momentum']),
-        'decay': float(hyperparams['decay']),
-        'learning_rate': float(hyperparams['learning_rate']),
-        'burn_in': int(hyperparams['burn_in']),
-        'max_batches': int(hyperparams['max_batches']),
-        'policy': hyperparams['policy'],
-        'lr_steps': list(zip(map(int,   hyperparams["steps"].split(",")),
-                             map(float, hyperparams["scales"].split(","))))
-    })
-    assert hyperparams["height"] == hyperparams["width"], \
+    hyperparams.update(
+        {
+            "batch": int(hyperparams["batch"]),
+            "subdivisions": int(hyperparams["subdivisions"]),
+            "width": int(hyperparams["width"]),
+            "height": int(hyperparams["height"]),
+            "channels": int(hyperparams["channels"]),
+            "optimizer": hyperparams.get("optimizer"),
+            "momentum": float(hyperparams["momentum"]),
+            "decay": float(hyperparams["decay"]),
+            "learning_rate": float(hyperparams["learning_rate"]),
+            "burn_in": int(hyperparams["burn_in"]),
+            "max_batches": int(hyperparams["max_batches"]),
+            "policy": hyperparams["policy"],
+            "lr_steps": list(
+                zip(
+                    map(int, hyperparams["steps"].split(",")),
+                    map(float, hyperparams["scales"].split(",")),
+                )
+            ),
+        }
+    )
+    assert hyperparams["height"] == hyperparams["width"], (
         "Height and width should be equal! Non square images are padded with zeros."
+    )
     output_filters = [hyperparams["channels"]]
     module_list = nn.ModuleList()
     for module_i, module_def in enumerate(module_defs):
@@ -56,20 +63,69 @@ def create_modules(module_defs):
                 ),
             )
             if bn:
-                modules.add_module(f"batch_norm_{module_i}",
-                                   nn.BatchNorm2d(filters, momentum=0.1, eps=1e-5))
+                modules.add_module(
+                    f"batch_norm_{module_i}",
+                    nn.BatchNorm2d(filters, momentum=0.1, eps=1e-5),
+                )
             if module_def["activation"] == "leaky":
                 modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1))
-            if module_def["activation"] == "mish":
-                modules.add_module(f"mish_{module_i}", Mish())
+            elif module_def["activation"] == "mish":
+                modules.add_module(f"mish_{module_i}", nn.Mish())
+            elif module_def["activation"] == "logistic":
+                modules.add_module(f"sigmoid_{module_i}", nn.Sigmoid())
+            elif module_def["activation"] == "swish":
+                modules.add_module(f"swish_{module_i}", nn.SiLU())
+            elif module_def["activation"] == "linear":
+                pass
+            else:
+                raise ValueError(f"Unknown activation: {module_def['activation']}")
+
+        elif module_def["type"] == "deconvolutional":
+            bn = int(module_def["batch_normalize"])
+            filters = int(module_def["filters"])
+            kernel_size = int(module_def["size"])
+            pad = int(module_def["pad"])
+            modules.add_module(
+                f"deconv_{module_i}",
+                nn.ConvTranspose2d(
+                    in_channels=output_filters[-1],
+                    out_channels=filters,
+                    kernel_size=kernel_size,
+                    stride=int(module_def["stride"]),
+                    padding=pad,
+                    bias=not bn,
+                ),
+            )
+            if bn:
+                modules.add_module(
+                    f"batch_norm_{module_i}",
+                    nn.BatchNorm2d(filters, momentum=0.1, eps=1e-5),
+                )
+            if module_def["activation"] == "leaky":
+                modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1))
+            elif module_def["activation"] == "mish":
+                modules.add_module(f"mish_{module_i}", nn.Mish())
+            elif module_def["activation"] == "logistic":
+                modules.add_module(f"sigmoid_{module_i}", nn.Sigmoid())
+            elif module_def["activation"] == "swish":
+                modules.add_module(f"swish_{module_i}", nn.SiLU())
+            elif module_def["activation"] == "linear":
+                pass
+            else:
+                raise ValueError(f"Unknown activation: {module_def['activation']}")
 
         elif module_def["type"] == "maxpool":
             kernel_size = int(module_def["size"])
             stride = int(module_def["stride"])
             if kernel_size == 2 and stride == 1:
-                modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1)))
-            maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride,
-                                   padding=int((kernel_size - 1) // 2))
+                modules.add_module(
+                    f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1))
+                )
+            maxpool = nn.MaxPool2d(
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=int((kernel_size - 1) // 2),
+            )
             modules.add_module(f"maxpool_{module_i}", maxpool)
 
         elif module_def["type"] == "upsample":
@@ -78,7 +134,9 @@ def create_modules(module_defs):
 
         elif module_def["type"] == "route":
             layers = [int(x) for x in module_def["layers"].split(",")]
-            filters = sum([output_filters[1:][i] for i in layers]) // int(module_def.get("groups", 1))
+            filters = sum([output_filters[1:][i] for i in layers]) // int(
+                module_def.get("groups", 1)
+            )
             modules.add_module(f"route_{module_i}", nn.Sequential())
 
         elif module_def["type"] == "shortcut":
@@ -92,8 +150,9 @@ def create_modules(module_defs):
             anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
             anchors = [anchors[i] for i in anchor_idxs]
             num_classes = int(module_def["classes"])
+            new_coords = bool(module_def.get("new_coords", False))
             # Define detection layer
-            yolo_layer = YOLOLayer(anchors, num_classes)
+            yolo_layer = YOLOLayer(anchors, num_classes, new_coords)
             modules.add_module(f"yolo_{module_i}", yolo_layer)
         elif module_def["type"] == "seg":
             num_classes = int(module_def["classes"])
@@ -106,7 +165,7 @@ def create_modules(module_defs):
 
 
 class Upsample(nn.Module):
-    """ nn.Upsample is deprecated """
+    """nn.Upsample is deprecated"""
 
     def __init__(self, scale_factor, mode="nearest"):
         super(Upsample, self).__init__()
@@ -117,54 +176,50 @@ def forward(self, x):
         x = F.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
         return x
 
-class Mish(nn.Module):
-    """ The MISH activation function (https://github.com/digantamisra98/Mish) """
-
-    def __init__(self):
-        super(Mish, self).__init__()
-
-    def forward(self, x):
-        return x * torch.tanh(F.softplus(x))
 
 class YOLOLayer(nn.Module):
     """Detection layer"""
 
-    def __init__(self, anchors, num_classes):
+    def __init__(self, anchors, num_classes, new_coords):
         super(YOLOLayer, self).__init__()
         self.num_anchors = len(anchors)
         self.num_classes = num_classes
+        self.new_coords = new_coords
         self.mse_loss = nn.MSELoss()
         self.bce_loss = nn.BCELoss()
         self.no = num_classes + 5  # number of outputs per anchor
         self.grid = torch.zeros(1)  # TODO
 
         anchors = torch.tensor(list(chain(*anchors))).float().view(-1, 2)
-        self.register_buffer('anchors', anchors)
-        self.register_buffer(
-            'anchor_grid', anchors.clone().view(1, -1, 1, 1, 2))
+        self.register_buffer("anchors", anchors)
+        self.register_buffer("anchor_grid", anchors.clone().view(1, -1, 1, 1, 2))
         self.stride = None
 
     def forward(self, x, img_size):
         stride = img_size // x.size(2)
         self.stride = stride
         bs, _, ny, nx = x.shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
-        x = x.view(bs, self.num_anchors, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+        x = (
+            x.view(bs, self.num_anchors, self.no, ny, nx)
+            .permute(0, 1, 3, 4, 2)
+            .contiguous()
+        )
 
         if not self.training:  # inference
             if self.grid.shape[2:4] != x.shape[2:4]:
                 self.grid = self._make_grid(nx, ny).to(x.device)
-
-            x = torch.cat([
-                (x[..., 0:2].sigmoid() + self.grid) * stride,  # xy
-                torch.exp(x[..., 2:4]) * self.anchor_grid, # wh
-                x[..., 4:].sigmoid(),
-            ], axis=4).view(bs, -1, self.no)
-
+            x[..., 0:2] = (x[..., 0:2].sigmoid() + self.grid) * stride  # xy
+            if self.new_coords:
+                x[..., 2:4] = x[..., 2:4].sigmoid() ** 2 * (4 * self.anchor_grid)  # wh
+            else:
+                x[..., 2:4] = torch.exp(x[..., 2:4]) * self.anchor_grid  # wh
+            x[..., 4:] = x[..., 4:].sigmoid()  # conf, cls
+            x = x.view(bs, -1, self.no)
         return x
 
     @staticmethod
     def _make_grid(nx=20, ny=20):
-        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)], indexing='ij')
+        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)], indexing="ij")
         return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
 
 
@@ -189,24 +244,44 @@ def __init__(self, config_path):
         super(Darknet, self).__init__()
         self.module_defs = parse_model_config(config_path)
         self.hyperparams, self.module_list = create_modules(self.module_defs)
-        self.yolo_layers = [layer[0] for layer in self.module_list if isinstance(layer[0], YOLOLayer)]
-        self.seg_layers = [layer[0] for layer in self.module_list if isinstance(layer[0], SegLayer)]
+        self.yolo_layers = [
+            layer[0] for layer in self.module_list if isinstance(layer[0], YOLOLayer)
+        ]
+        self.seg_layers = [
+            layer[0] for layer in self.module_list if isinstance(layer[0], SegLayer)
+        ]
         self.num_seg_classes = self.seg_layers[0].num_classes
         self.seen = 0
         self.header_info = np.array([0, 0, 0, self.seen, 0], dtype=np.int32)
 
     def forward(self, x, bb_targets=None, mask_targets=None):
         img_size = x.size(2)
-        loss = 0
         layer_outputs, yolo_outputs, segmentation_outputs = [], [], []
-        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
-            if module_def["type"] in ["convolutional", "upsample", "maxpool"]:
+        for i, (module_def, module) in enumerate(
+            zip(self.module_defs, self.module_list)
+        ):
+            if module_def["type"] in [
+                "convolutional",
+                "deconvolutional",
+                "upsample",
+                "maxpool",
+            ]:
                 x = module(x)
             elif module_def["type"] == "route":
-                combined_outputs = torch.cat([layer_outputs[int(layer_i)] for layer_i in module_def["layers"].split(",")], 1)
-                group_size = combined_outputs.shape[1] // int(module_def.get("groups", 1))
+                combined_outputs = torch.cat(
+                    [
+                        layer_outputs[int(layer_i)]
+                        for layer_i in module_def["layers"].split(",")
+                    ],
+                    1,
+                )
+                group_size = combined_outputs.shape[1] // int(
+                    module_def.get("groups", 1)
+                )
                 group_id = int(module_def.get("group_id", 0))
-                x = combined_outputs[:, group_size * group_id : group_size * (group_id + 1)] # Slice groupings used by yolo v4
+                x = combined_outputs[
+                    :, group_size * group_id : group_size * (group_id + 1)
+                ]  # Slice groupings used by yolo v4
             elif module_def["type"] == "shortcut":
                 layer_i = int(module_def["from"])
                 x = layer_outputs[-1] + layer_outputs[layer_i]
@@ -217,7 +292,11 @@ def forward(self, x, bb_targets=None, mask_targets=None):
                 x = module[0](x)
                 segmentation_outputs.append(x)
             layer_outputs.append(x)
-        return (yolo_outputs, segmentation_outputs) if self.training else (torch.cat(yolo_outputs, 1), torch.cat(segmentation_outputs, 1))
+        return (
+            (yolo_outputs, segmentation_outputs)
+            if self.training
+            else (torch.cat(yolo_outputs, 1), torch.cat(segmentation_outputs, 1))
+        )
 
     def load_darknet_weights(self, weights_path):
         """Parses and loads the weights stored in 'weights_path'"""
@@ -242,7 +321,9 @@ def load_darknet_weights(self, weights_path):
                 pass
 
         ptr = 0
-        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
+        for i, (module_def, module) in enumerate(
+            zip(self.module_defs, self.module_list)
+        ):
             if i == cutoff:
                 break
             if module_def["type"] == "convolutional":
@@ -252,50 +333,58 @@ def load_darknet_weights(self, weights_path):
                     bn_layer = module[1]
                     num_b = bn_layer.bias.numel()  # Number of biases
                     # Bias
-                    bn_b = torch.from_numpy(
-                        weights[ptr: ptr + num_b]).view_as(bn_layer.bias)
+                    bn_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(
+                        bn_layer.bias
+                    )
                     bn_layer.bias.data.copy_(bn_b)
                     ptr += num_b
                     # Weight
-                    bn_w = torch.from_numpy(
-                        weights[ptr: ptr + num_b]).view_as(bn_layer.weight)
+                    bn_w = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(
+                        bn_layer.weight
+                    )
                     bn_layer.weight.data.copy_(bn_w)
                     ptr += num_b
                     # Running Mean
-                    bn_rm = torch.from_numpy(
-                        weights[ptr: ptr + num_b]).view_as(bn_layer.running_mean)
+                    bn_rm = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(
+                        bn_layer.running_mean
+                    )
                     bn_layer.running_mean.data.copy_(bn_rm)
                     ptr += num_b
                     # Running Var
-                    bn_rv = torch.from_numpy(
-                        weights[ptr: ptr + num_b]).view_as(bn_layer.running_var)
+                    bn_rv = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(
+                        bn_layer.running_var
+                    )
                     bn_layer.running_var.data.copy_(bn_rv)
                     ptr += num_b
                 else:
                     # Load conv. bias
                     num_b = conv_layer.bias.numel()
-                    conv_b = torch.from_numpy(
-                        weights[ptr: ptr + num_b]).view_as(conv_layer.bias)
+                    conv_b = torch.from_numpy(weights[ptr : ptr + num_b]).view_as(
+                        conv_layer.bias
+                    )
                     conv_layer.bias.data.copy_(conv_b)
                     ptr += num_b
                 # Load conv. weights
                 num_w = conv_layer.weight.numel()
-                conv_w = torch.from_numpy(
-                    weights[ptr: ptr + num_w]).view_as(conv_layer.weight)
+                conv_w = torch.from_numpy(weights[ptr : ptr + num_w]).view_as(
+                    conv_layer.weight
+                )
                 conv_layer.weight.data.copy_(conv_w)
                 ptr += num_w
 
     def save_darknet_weights(self, path, cutoff=-1):
         """
-            @:param path    - path of the new weights file
-            @:param cutoff  - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
+        @:param path    - path of the new weights file
+        @:param cutoff  - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
         """
         fp = open(path, "wb")
         self.header_info[3] = self.seen
         self.header_info.tofile(fp)
 
         # Iterate through layers
-        for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
+        for i, (module_def, module) in enumerate(
+            zip(self.module_defs[:cutoff], self.module_list[:cutoff])
+        ):
             if module_def["type"] == "convolutional":
                 conv_layer = module[0]
                 # If batch norm, load bn first
@@ -324,8 +413,9 @@ def load_model(model_path, weights_path=None):
     :return: Returns model
     :rtype: Darknet
     """
-    device = torch.device("cuda" if torch.cuda.is_available()
-                          else "cpu")  # Select device for inference
+    device = torch.device(
+        "cuda" if torch.cuda.is_available() else "cpu"
+    )  # Select device for inference
 
     model = Darknet(model_path).to(device)
 
diff --git a/yoeo/train.py b/yoeo/train.py
index 6f582fe..1572043 100755
--- a/yoeo/train.py
+++ b/yoeo/train.py
@@ -9,6 +9,7 @@
 import numpy as np
 
 import torch
+import torch.nn as nn
 from torch.utils.data import DataLoader
 import torch.optim as optim
 from torch.autograd import Variable
@@ -138,23 +139,36 @@ def run():
     # Create optimizer
     # ################
 
-    params = [p for p in model.parameters() if p.requires_grad]
+    unregularized_parameters, regularized_parameters = [], []
+    for _, v in model.named_modules():
+        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
+            unregularized_parameters.append(v.bias)  # biases
+        if isinstance(v, nn.BatchNorm2d):
+            unregularized_parameters.append(v.weight)  # no decay
+        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
+            regularized_parameters.append(v.weight)  # apply decay
+
 
     if (model.hyperparams['optimizer'] in [None, "adam"]):
         optimizer = optim.Adam(
-            params,
+            unregularized_parameters,
             lr=model.hyperparams['learning_rate'],
-            weight_decay=model.hyperparams['decay'],
         )
     elif (model.hyperparams['optimizer'] == "sgd"):
         optimizer = optim.SGD(
-            params,
+            unregularized_parameters,
             lr=model.hyperparams['learning_rate'],
-            weight_decay=model.hyperparams['decay'],
             momentum=model.hyperparams['momentum'])
     else:
         print("Unknown optimizer. Please choose between (adam, sgd).")
 
+    # add normal weights with with weight_decay
+    optimizer.add_param_group({'params': regularized_parameters, 'weight_decay': model.hyperparams['decay']})
+
+    print(f'Optimizer groups: {len(unregularized_parameters)} unregularized, '
+          f'{len(regularized_parameters)} with weight decay')
+    del unregularized_parameters, regularized_parameters
+
     # skip epoch zero, because then the calculations for when to evaluate/checkpoint makes more intuitive sense
     # e.g. when you stop after 30 epochs and evaluate every 10 epochs then the evaluations happen after: 10,20,30
     # instead of: 0, 10, 20
diff --git a/yoeo/utils/loss.py b/yoeo/utils/loss.py
index f819b52..239fb12 100644
--- a/yoeo/utils/loss.py
+++ b/yoeo/utils/loss.py
@@ -94,9 +94,17 @@ def compute_loss(combined_predictions, combined_targets, model):
 
             # Regression of the box
             # Apply sigmoid to xy offset predictions in each cell that has a target
-            pxy = ps[:, :2].sigmoid()
-            # Apply exponent to wh predictions and multiply with the anchor box that matched best with the label for each cell that has a target
-            pwh = torch.exp(ps[:, 2:4]) * anchors[layer_index]
+
+            # Check if the model has the new_coords system 
+            if model.yolo_layers[layer_index].new_coords:
+                pxy = ps[:, :2].sigmoid()
+                # Apply exponent to wh predictions and multiply with the anchor box that matched best with the label for each cell that has a target
+                pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[layer_index]
+            else:
+                pxy = ps[:, :2].sigmoid()
+                # Apply exponent to wh predictions and multiply with the anchor box that matched best with the label for each cell that has a target
+                pwh = torch.exp(ps[:, 2:4]) * anchors[layer_index]
+
             # Build box out of xy and wh
             pbox = torch.cat((pxy, pwh), 1)
             # Calculate CIoU or GIoU for each target with the predicted box for its cell + anchor