added readMe

kocchop · kocchop · commit bed02490661d · 2023-01-26T13:40:28.000-05:00
diff --git a/README.md b/README.md
@@ -1,2 +1,86 @@
 # Robust Multimodal Fusion GAN
-Codebase for ACM MM'22 paper titled "Robust Multimodal Depth Estimation using Transformer based Generative Adversarial Networks"
+
+This repo is the PyTorch implementation of our ACM Multimedia'22 paper on [Robust Multimodal Depth Estimation using Transformer based Generative Adversarial Networks](https://dl.acm.org/doi/abs/10.1145/3503161.3548418)
+
+<p align="center">
+	<img src="misc/teapot.png" alt="photo not available">
+</p>
+
+## Requirements
+The base environment(python 3.6) consists of:
+```
+pytorch == 1.10.2
+torchvision == 0.11.3
+tensorboard == 1.15
+py-opencv == 4.5.5
+pillow == 8.4.0
+numpy == 1.17.4
+typing == 3.6.4
+```
+
+## Dataset
+Primarily two datasets were used [ShapeNet](https://shapenet.org/) and [NYU_v2](https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html)
+
+## Training
+
+python train.py --model nyu_modelA --gpus=0,1 --batch_size=40 --n_epochs=27 --decay_epoch=15 --lr_gap=3 -p chkpts/nyu_modelA.pth -n nyu_modelA_train
+
+1. -n --> give a name to the run
+2. Modify the val dataloader path with appropriate data directory
+3. Typically the directory has the following structure
+    ----|->data.nyu_v2|
+                      |->train|
+                              |->sparse_depth
+                              |->depth_gt
+                              |->image_rgb
+                              |->meta_info.txt
+                      |->val|
+                            |->sparse_depth
+                            |->depth_gt
+                            |->image_rgb
+                            |->meta_info.txt
+                      |->sample|
+                               |->sparse_depth
+                               |->depth_gt
+                               |->image_rgb
+                               |->meta_info.txt
+
+4. The "depth_gt" and "sparse_depth" are the folders containing dense and sparse depth respectively
+5. The meta_info.txt contains the file names of these folders. Refer to misc/ folder for sample meta_info file
+6. The folder "sample" contains a few sparse samples. This is to track the model learning visually. This is optional.
+
+ 
+## Validation
+You can run standalone validation if you have a trained model. For that the checkpoint model path has to have 2 files named generator_best.pth and discriminator_best.pth. You can invoke the validation script by:
+```bash
+python validate.py --model nyu_modelA --gpus=0 --batch_size=16 --checkpoint_model=./logdir/nyu_train/saved_models/ -n nyu_test
+```
+## Misc
+For convenience, some helping scripts have been provided in the misc\ folder
+```
+├── meta_info.txt       #example meta_info file
+```
+
+## Citation
+If you found the repository helpful, please cite using the following:
+```
+@inproceedings{10.1145/3503161.3548418,
+author = {Khan, Md Fahim Faysal and Devulapally, Anusha and Advani, Siddharth and Narayanan, Vijaykrishnan},
+title = {Robust Multimodal Depth Estimation Using Transformer Based Generative Adversarial Networks},
+year = {2022},
+isbn = {9781450392037},
+publisher = {Association for Computing Machinery},
+address = {New York, NY, USA},
+url = {https://doi.org/10.1145/3503161.3548418},
+doi = {10.1145/3503161.3548418},
+booktitle = {Proceedings of the 30th ACM International Conference on Multimedia},
+pages = {3559–3568},
+numpages = {10},
+keywords = {sensor fusion, depth completion, generative adversarial nertworks (gan), multimodal sensing, robustness, sensor failure},
+location = {Lisboa, Portugal},
+series = {MM '22}
+}
+```
+
+## Acknowledgement
+This work was supported in part by National Science Foundation (NSF) SOPHIA (CCF-1822923) and Center for Brain-inspired Computing (C-BRIC) & Center for Research in Intelligent Storage and Processing in Memory (CRISP), two of the six centers in JUMP, a Semiconductor Research Corporation (SRC) program sponsored by DARPA.
diff --git a/datasets.py b/datasets.py
@@ -190,7 +190,7 @@ def __init__(self, root, opt, hr_shape):
         # assumption is that the sparse depth is in "lidar" folder
         #                   ground truth depth is in "depth_gt" folder
         #               and rgb image is in "image_rgb" folder
-        self.gt_folder, self.lq_folder, self.rgb_folder = os.path.join(root,'depth_gt'), os.path.join(root,'lidar_5p'), os.path.join(root,'image_rgb')
+        self.gt_folder, self.lq_folder, self.rgb_folder = os.path.join(root,'depth_gt'), os.path.join(root,'sparse_depth'), os.path.join(root,'image_rgb')
         
         self.filename_tmpl = '{}'
         
diff --git a/misc/meta_info.txt b/misc/meta_info.txt
@@ -0,0 +1,3 @@
+047550 (228, 304, 1)
+047551 (228, 304, 1)
+047552 (228, 304, 1)
diff --git a/misc/teapot.png b/misc/teapot.png
diff --git a/train.py b/train.py
@@ -70,7 +70,7 @@ def getOpt():
     parser.add_argument("--n_epochs", type=int, default=10, help="number of epochs of training")
     parser.add_argument("--dataset", type=str, default="nyu_v2", help="name of the dataset (shapeNet or nyu_v2)")
     parser.add_argument("--model", type=str, default="nyu_modelA", required = True, help="name of the model (nyu_modelA | nyu_modelB)")
-    parser.add_argument("--dataset_path", type=str, default="/home/mdl/mzk591/dataset/data.nyuv2/disk3/", help="path to the dataset")
+    parser.add_argument("--dataset_path", type=str, default="/home/dataset/nyu_v2/", help="path to the dataset")
     parser.add_argument("--batch_size", type=int, default=16, help="size of the batches")
     parser.add_argument('--robust', '-r', action='store_true', help="flag to enable robust training")
     parser.add_argument("--save_size", type=int, default=8, help="batch size for saved outputs")
@@ -83,7 +83,7 @@ def getOpt():
     parser.add_argument("--hr_width", type=int, default=304, help="dense depth width")
     parser.add_argument("--channels", type=int, default=1, help="depth image has only 1 channel")
     parser.add_argument("--sample_interval", type=int, default=20, help="interval between saving image samples")
-    parser.add_argument("--warmup_batches", type=int, default=15, help="number of batches with pixel-wise loss only")
+    parser.add_argument("--warmup_batches", type=int, default=250, help="number of batches with pixel-wise loss only")
     parser.add_argument("--lambda_adv", type=float, default=5e-3, help="adversarial loss weight")
     parser.add_argument("--lambda_pixel", type=float, default=1e-2, help="pixel-wise loss weight")
     parser.add_argument("--gpus", metavar='DEV_ID', default=None,
@@ -215,9 +215,7 @@ def main():
     milestones = [opt.decay_epoch, opt.decay_epoch + opt.lr_gap, opt.decay_epoch + opt.lr_gap*2, opt.decay_epoch + opt.lr_gap*3]
     
     total_train_batches = len(train_dataloader)
-    # snapshot_interval = round(total_train_batches/2)
-    snapshot_interval = 30
-    
+    snapshot_interval = round(total_train_batches/2)
 
     if opt.robust:
         # Finding noisy batches

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+047550 (228, 304, 1)`
	`2`	`+047551 (228, 304, 1)`
	`3`	`+047552 (228, 304, 1)`