Checkpoint commit, minor QoL updates

booth-algo · booth-algo · commit b079f8d141af · 2024-09-11T21:05:59.000+01:00
diff --git a/.gitignore b/.gitignore
@@ -3,4 +3,4 @@
 **/__pycache__/**
 gsplat
 vis
-B075X65R3X/
+training-data/
diff --git a/B075X65R3X.zip b/B075X65R3X.zip
diff --git a/gaussian_splatting/gauss_render.py b/gaussian_splatting/gauss_render.py
@@ -175,7 +175,8 @@ def __init__(self, active_sh_degree=3, white_bkgd=True, **kwargs):
         self.active_sh_degree = active_sh_degree
         self.debug = False
         self.white_bkgd = white_bkgd
-        self.pix_coord = torch.stack(torch.meshgrid(torch.arange(256), torch.arange(256), indexing='xy'), dim=-1).to('cuda')
+        # self.pix_coord = torch.stack(torch.meshgrid(torch.arange(256), torch.arange(256), indexing='xy'), dim=-1).to('cuda')
+        self.pix_coord = None
         
     
     def build_color(self, means3D, shs, camera):
@@ -239,6 +240,9 @@ def render(self, camera, means2D, cov2d, color, opacity, depths):
 
 
     def forward(self, pc_output, camera, **kwargs):
+        if self.pix_coord is None or self.pix_coord.shape[:2] != (camera.image_height, camera.image_width):
+            self.pix_coord = torch.stack(torch.meshgrid(torch.arange(camera.image_height), torch.arange(camera.image_width), indexing='xy'), dim=-1).to('cuda')
+        
         means3D = pc_output['xyz']
         opacity = pc_output['opacity']
         scales = pc_output['scaling']
diff --git a/gaussian_splatting/utils/data_utils.py b/gaussian_splatting/utils/data_utils.py
@@ -47,6 +47,9 @@ def read_all(folder, resize_factor=1.):
         src_rgb , src_depth, src_alpha, src_camera = \
         read_image(src_rgb_file, src_pose, 
             intrinsic, max_depth=max_depth, resize_factor=resize_factor)
+        
+        # Extract focal lengths
+        focal_x, focal_y = intrinsic[0, 0], intrinsic[1, 1]
 
         src_rgbs.append(src_rgb)
         src_depths.append(src_depth)
diff --git a/train.py b/train.py
@@ -41,12 +41,15 @@ def __init__(self, **kwargs):
     
     def on_train_step(self):
         ind = np.random.choice(len(self.data['camera']))
-        camera = self.data['camera'][ind]
+        camera_params = self.data['camera'][ind]
+        camera = to_viewpoint_camera(camera_params)
+        # camera = self.data['camera'][ind]
         rgb = self.data['rgb'][ind]
         depth = self.data['depth'][ind]
         mask = (self.data['alpha'][ind] > 0.5)
-        if USE_GPU_PYTORCH:
-            camera = to_viewpoint_camera(camera)
+        
+        # if USE_GPU_PYTORCH:
+        #     camera = to_viewpoint_camera(camera)
 
         if USE_PROFILE:
             prof = profile(activities=[ProfilerActivity.CUDA], with_stack=True)
@@ -101,9 +104,11 @@ def log_psnr_stats(self):
     def on_evaluate_step(self, **kwargs):
         import matplotlib.pyplot as plt
         ind = np.random.choice(len(self.data['camera']))
-        camera = self.data['camera'][ind]
-        if USE_GPU_PYTORCH:
-            camera = to_viewpoint_camera(camera)
+        # camera = self.data['camera'][ind]
+        # if USE_GPU_PYTORCH:
+        #     camera = to_viewpoint_camera(camera)
+
+        camera = to_viewpoint_camera(self.data['camera'][ind])
 
         rgb = self.data['rgb'][ind].detach().cpu().numpy()
 
@@ -114,9 +119,17 @@ def on_evaluate_step(self, **kwargs):
         rgb_pd = out['render'].detach().cpu().numpy()
         depth_pd = out['depth'].detach().cpu().numpy()[..., 0]
         depth = self.data['depth'][ind].detach().cpu().numpy()
+
+        if depth.shape != depth_pd.shape:
+            depth = np.resize(depth, depth_pd.shape)
+
         depth = np.concatenate([depth, depth_pd], axis=1)
         depth = (1 - depth / depth.max())
         depth = plt.get_cmap('jet')(depth)[..., :3]
+
+        if rgb.shape != rgb_pd.shape:
+            rgb = np.resize(rgb, rgb_pd.shape)
+
         image = np.concatenate([rgb, rgb_pd], axis=1)
         image = np.concatenate([image, depth], axis=0)
         utils.imwrite(str(self.results_folder / f'image-{self.step}.png'), image)
@@ -144,7 +157,7 @@ def get_test_folder(base_folder='result', prefix='test'):
 
 if __name__ == "__main__":
     device = 'cuda'
-    folder = './B075X65R3X'
+    folder = './training-data/B075X65R3X'
     data = read_all(folder, resize_factor=0.5)
     data = {k: v.to(device) for k, v in data.items()}
     data['depth_range'] = torch.Tensor([[1,3]]*len(data['rgb'])).to(device)
@@ -197,7 +210,7 @@ def get_test_folder(base_folder='result', prefix='test'):
         # model=GaussModel,
         data=data,
         train_batch_size=1, 
-        train_num_steps=30,
+        train_num_steps=1000,
         i_image =100,
         train_lr=1e-3, 
         amp=False,