ekumenlabs · Santoi · Aug 6, 2024 · Aug 6, 2024 · Aug 6, 2024 · Aug 13, 2024
diff --git a/scripts/splatam.py b/scripts/splatam.py
@@ -173,6 +173,8 @@ def initialize_first_timestep(dataset, num_frames, scene_radius_depth_ratio,
 
     # Process RGB-D Data
     color = color.permute(2, 0, 1) / 255 # (H, W, C) -> (C, H, W)
+    # Flatten to match expected dimensions
+    depth = torch.flatten(depth, start_dim=2)
     depth = depth.permute(2, 0, 1) # (H, W, C) -> (C, H, W)
 
     # Process Camera Parameters
@@ -186,14 +188,17 @@ def initialize_first_timestep(dataset, num_frames, scene_radius_depth_ratio,
         # Get Densification RGB-D Data & Camera Parameters
         color, depth, densify_intrinsics, _ = densify_dataset[0]
         color = color.permute(2, 0, 1) / 255 # (H, W, C) -> (C, H, W)
+        # Flatten to match expected dimensions
+        depth = torch.flatten(depth, start_dim=2)
         depth = depth.permute(2, 0, 1) # (H, W, C) -> (C, H, W)
         densify_intrinsics = densify_intrinsics[:3, :3]
         densify_cam = setup_camera(color.shape[2], color.shape[1], densify_intrinsics.cpu().numpy(), w2c.detach().cpu().numpy())
     else:
         densify_intrinsics = intrinsics
 
     # Get Initial Point Cloud (PyTorch CUDA Tensor)
-    mask = (depth > 0) # Mask out invalid depth values
+    depth_z = depth[0] # Take only the 1st channel
+    mask = (depth_z > 0) # Mask out invalid depth values
     mask = mask.reshape(-1)
     init_pt_cld, mean3_sq_dist = get_pointcloud(color, depth, densify_intrinsics, w2c, 
                                                 mask=mask, compute_mean_sq_dist=True, 
@@ -281,7 +286,7 @@ def get_loss(params, curr_data, variables, iter_time_idx, loss_weights, use_sil_
 
     # RGB Loss
     if tracking and (use_sil_for_loss or ignore_outlier_depth_loss):
-        color_mask = torch.tile(mask, (3, 1, 1))
+        color_mask = torch.tile(mask, (1, 1, 1))
         color_mask = color_mask.detach()
         losses['im'] = torch.abs(curr_data['im'] - im)[color_mask].sum()
     elif tracking:
@@ -632,6 +637,8 @@ def rgbd_slam(config: dict):
                 curr_w2c[:3, 3] = curr_cam_tran
                 # Initialize Keyframe Info
                 color = color.permute(2, 0, 1) / 255
+                # Flatten to match expected dimensions
+                depth = torch.flatten(depth, start_dim=2)
                 depth = depth.permute(2, 0, 1)
                 curr_keyframe = {'id': time_idx, 'est_w2c': curr_w2c, 'color': color, 'depth': depth}
                 # Add to keyframe list
@@ -647,6 +654,8 @@ def rgbd_slam(config: dict):
         gt_w2c = torch.linalg.inv(gt_pose)
         # Process RGB-D Data
         color = color.permute(2, 0, 1) / 255
+        # Flatten to match expected dimensions
+        depth = torch.flatten(depth, start_dim=2)
         depth = depth.permute(2, 0, 1)
         gt_w2c_all_frames.append(gt_w2c)
         curr_gt_w2c = gt_w2c_all_frames
@@ -782,6 +791,7 @@ def rgbd_slam(config: dict):
                     # Load RGBD frames incrementally instead of all frames
                     densify_color, densify_depth, _, _ = densify_dataset[time_idx]
                     densify_color = densify_color.permute(2, 0, 1) / 255
+                    densify_depth = torch.flatten(densify_depth, start_dim=2)
                     densify_depth = densify_depth.permute(2, 0, 1)
                     densify_curr_data = {'cam': densify_cam, 'im': densify_color, 'depth': densify_depth, 'id': time_idx, 
                                  'intrinsics': densify_intrinsics, 'w2c': first_frame_w2c, 'iter_gt_w2c_list': curr_gt_w2c}
@@ -1011,4 +1021,4 @@ def rgbd_slam(config: dict):
         os.makedirs(results_dir, exist_ok=True)
         shutil.copy(args.experiment, os.path.join(results_dir, "config.py"))
 
-    rgbd_slam(experiment.config)
+    rgbd_slam(experiment.config)
diff --git a/utils/eval_helpers.py b/utils/eval_helpers.py
@@ -132,7 +132,7 @@ def plot_rgbd_silhouette(color, depth, rastered_color, rastered_depth, presence_
         axs[0, 2].imshow(presence_sil_mask, cmap='gray')
         axs[0, 2].set_title("Rasterized Silhouette")
     diff_depth_l1 = diff_depth_l1.cpu().squeeze(0)
-    axs[1, 2].imshow(diff_depth_l1, cmap='jet', vmin=0, vmax=6)
+    axs[1, 2].imshow(diff_depth_l1.permute(1, 2, 0), cmap='jet', vmin=0, vmax=6)
     axs[1, 2].set_title("Diff Depth L1")
     for ax in axs.flatten():
         ax.axis('off')
@@ -435,6 +435,7 @@ def eval(dataset, final_params, num_frames, eval_dir, sil_thres,
 
         # Process RGB-D Data
         color = color.permute(2, 0, 1) / 255 # (H, W, C) -> (C, H, W)
+        depth = depth.flatten(start_dim=2)
         depth = depth.permute(2, 0, 1) # (H, W, C) -> (C, H, W)
 
         if time_idx == 0:
@@ -838,4 +839,4 @@ def eval_nvs(dataset, final_params, num_frames, eval_dir, sil_thres,
     plt.savefig(os.path.join(eval_dir, "metrics.png"), bbox_inches='tight')
     if wandb_run is not None:
         wandb_run.log({"Eval/Metrics": fig})
-    plt.close()
+    plt.close()
diff --git a/venv_requirements.txt b/venv_requirements.txt
@@ -11,4 +11,6 @@ torchmetrics
 cyclonedds
 pytorch-msssim
 plyfile==0.8.1
-git+https://github.com/JonathonLuiten/diff-gaussian-rasterization-w-depth.git@cb65e4b86bc3bd8ed42174b72a62e8d3a3a71110
+opencv-python
+open3d
+git+https://github.com/JonathonLuiten/diff-gaussian-rasterization-w-depth.git@cb65e4b86bc3bd8ed42174b72a62e8d3a3a71110
diff --git a/viz_scripts/final_recon.py b/viz_scripts/final_recon.py
@@ -186,6 +186,9 @@ def visualize(scene_path, cfg):
     pcd = o3d.geometry.PointCloud()
     pcd.points = init_pts
     pcd.colors = init_cols
+    path = cfg['output']
+    o3d.io.write_point_cloud(path, pcd);
+    print("PCD written at: ", path);
     vis.add_geometry(pcd)
 
     w = cfg['viz_w']
@@ -279,6 +282,7 @@ def visualize(scene_path, cfg):
     parser = argparse.ArgumentParser()
 
     parser.add_argument("experiment", type=str, help="Path to experiment file")
+    parser.add_argument("pointcloud", type=str, help="Path to write output pointcloud file")
 
     args = parser.parse_args()
 
@@ -296,6 +300,7 @@ def visualize(scene_path, cfg):
     else:
         scene_path = experiment.config["scene_path"]
     viz_cfg = experiment.config["viz"]
+    viz_cfg["output"] = args.pointcloud
 
     # Visualize Final Reconstruction
     visualize(scene_path, viz_cfg)