fixed bugs for PSNR and env, and remove redundant code

DekuLiuTesla · DekuLiuTesla · commit e84c7c8774dd · 2025-10-18T15:44:20.000+08:00
diff --git a/internal/metrics/vanilla_metrics.py b/internal/metrics/vanilla_metrics.py
@@ -40,7 +40,7 @@ def adapter(pred, gt):
 
 
     def setup(self, stage: str, pl_module):
-        self.psnr = PeakSignalNoiseRatio()
+        self.psnr = PeakSignalNoiseRatio(data_range=1.)
         self.no_state_dict_models["lpips"] = LearnedPerceptualImagePatchSimilarity(normalize=True, net_type=self.config.lpips_net_type)
 
         self.lambda_dssim = self.config.lambda_dssim
diff --git a/internal/renderers/gsplat_camera_opt.py b/internal/renderers/gsplat_camera_opt.py
@@ -42,9 +42,8 @@ def rotation_6d_to_matrix(d6: torch.Tensor) -> torch.Tensor:
 class ModelConfig:
     
     n_cameras: int = -1
-    pose_opt_type: Literal["sfm", "mlp", "7dmlp"] = "sfm"
+    pose_opt_type: Literal["sfm", "mlp"] = "sfm"
     cam_scale: float = 1.0
-    scale: float = 1e-3  # Used for 7dmlp
     mlp_width: int = 64
     mlp_depth: int = 2
 
@@ -58,7 +57,6 @@ class OptimizationConfig:
     shceduler_type: Literal["step", "cosine", "none"] = "none"
     eps: float = 1e-15
     max_steps: int = 30_000
-    opt_test: bool = False  # TODO: remove it
 
 class CameraOptModule(nn.Module):
     """Camera pose optimization module."""
@@ -166,80 +164,7 @@ def forward(self, camtoworlds: torch.Tensor, embed_ids: torch.Tensor) -> torch.T
         transform[..., :3, 3] = dx * self.cam_scale
             
         return torch.matmul(camtoworlds, transform)
-
-class CameraOptModule7dMLP(torch.nn.Module):
-    """Camera pose optimization module using MLP."""
-
-    def __init__(self, n: int, mlp_width: int = 256, mlp_depth: int = 2, scale: float = 1e-6):
-        super().__init__()
-        # Identity rotation in 6D representation
-        self.register_buffer("identity", torch.tensor([1.0, 0.0, 0.0, 0.0, 1.0, 0.0]))
-        
-        # Initial embeddings for each camera
-        self.num_cams = n
-        
-        # MLP layers
-        activation = torch.nn.ELU(inplace=True)
-        layers = []
-        layers.append(torch.nn.Linear(7, mlp_width))
-        layers.append(activation)
-        for _ in range(mlp_depth - 1):
-            layers.append(torch.nn.Linear(mlp_width, mlp_width))
-            layers.append(activation)
-        # Output layer produces 9D adjustments (3D position + 6D rotation)
-        layers.append(torch.nn.Linear(mlp_width, 6))
-        self.mlp = torch.nn.Sequential(*layers)
-
-        self.scale = scale
-        
-    def zero_init(self):
-        # torch.nn.init.zeros_(self.embeds.weight)
-        #torch.nn.init.normal_(self.embeds.weight)
-        # Also initialize the last layer of MLP with small weights
-        # torch.nn.init.zeros_(self.mlp[-1].weight)
-        # torch.nn.init.zeros_(self.mlp[-1].bias)
-        pass
-
-    def random_init(self, std: float):
-        # torch.nn.init.normal_(self.embeds.weight, std=std)
-        # Initialize the last layer of MLP with small weights
-        torch.nn.init.normal_(self.mlp[-1].weight, std=std)
-        torch.nn.init.normal_(self.mlp[-1].bias, std=std)
-
-    def forward(self, camtoworlds: torch.Tensor, embed_ids: torch.Tensor) -> torch.Tensor:
-        """Adjust camera pose based on MLP outputs with SGLD noise.
-
-        Args:
-            camtoworlds: (..., 4, 4)
-            embed_ids: (...,)
-
-        Returns:
-            updated camtoworlds: (..., 4, 4)
-        """
-        assert camtoworlds.shape[:-2] == embed_ids.shape
-        if camtoworlds.ndim == 2:
-            camtoworlds = camtoworlds.unsqueeze(0)
-        if embed_ids.ndim == 0:
-            embed_ids = embed_ids.unsqueeze(0)
-        batch_shape = camtoworlds.shape[:-2]
-        
-        # Get embeddings and process through MLP with noise
-        r_init = rotation_matrix_to_axis_angle(camtoworlds[..., :3, :3])
-        t_init = camtoworlds[..., :3, 3]
-
-        mlp_input = torch.cat((embed_ids[..., None], r_init, t_init), dim=-1)  # (..., 7)
-
-        out = self.mlp(mlp_input) * self.scale
-        
-        r = out[..., :3] + r_init
-        t = out[..., 3:] + t_init
-        R = axis_angle_to_rotation_matrix(r)
-        
-        camtoworlds_corrected = torch.eye(4, device=camtoworlds.device).repeat((*batch_shape, 1, 1))
-        camtoworlds_corrected[..., :3, :3] = R
-        camtoworlds_corrected[..., :3, 3] = t
-            
-        return camtoworlds_corrected.squeeze()
+    
 
 @dataclass
 class GSplatCameraOptRenderer(GSplatV1Renderer):
@@ -281,13 +206,6 @@ def _setup_model(self, device=None):
                 mlp_depth=self.config.model.mlp_depth,
                 cam_scale=self.config.model.cam_scale
             )
-        elif self.config.model.pose_opt_type == "7dmlp":
-            self.model = CameraOptModule7dMLP(
-                n=self.config.model.n_cameras,
-                mlp_width=self.config.model.mlp_width,
-                mlp_depth=self.config.model.mlp_depth,
-                scale=self.config.model.scale
-            )
         else:
             self.model = CameraOptModule(self.config.model.n_cameras)
         
diff --git a/notebooks/preprocess.ipynb b/notebooks/preprocess.ipynb
@@ -864,7 +864,7 @@
    ],
    "source": [
     "from torchmetrics import PeakSignalNoiseRatio\n",
-    "psnr = PeakSignalNoiseRatio().to(rgb.device)\n",
+    "psnr = PeakSignalNoiseRatio(data_range=1.).to(rgb.device)\n",
     "psnr(rgb, results[\"render\"].permute(1, 2, 0))"
    ],
    "metadata": {
diff --git a/notebooks/render.ipynb b/notebooks/render.ipynb
@@ -406,7 +406,7 @@
    ],
    "source": [
     "from torchmetrics import PeakSignalNoiseRatio\n",
-    "psnr = PeakSignalNoiseRatio().to(gt.device)\n",
+    "psnr = PeakSignalNoiseRatio(data_range=1.).to(gt.device)\n",
     "psnr(results[\"render\"], gt)"
    ]
   }
diff --git a/notebooks/rotate_shs.ipynb b/notebooks/rotate_shs.ipynb
@@ -978,7 +978,7 @@
     "from internal.renderers.gsplat_renderer import GSPlatRenderer\n",
     "from internal.cameras.cameras import Cameras, CameraType\n",
     "from torchmetrics.image.psnr import PeakSignalNoiseRatio\n",
-    "psnr = PeakSignalNoiseRatio().to(model.get_xyz.device)"
+    "psnr = PeakSignalNoiseRatio(data_range=1.).to(model.get_xyz.device)"
    ],
    "metadata": {
     "collapsed": false,
diff --git a/requirements/gsplat.txt b/requirements/gsplat.txt
@@ -1 +1,2 @@
+kornia
 git+https://github.com/yzslab/gsplat.git@58f3772541b6fb55e3219b36cd2b64be0584645c
diff --git a/requirements/lightning23.txt b/requirements/lightning23.txt
@@ -1,3 +1,4 @@
 lightning[pytorch-extra]==2.3.*
 pytorch-lightning==2.3.*
+bitsandbytes==0.45.*
 -r common.txt

Original file line number	Diff line number	Diff line change
`@@ -406,7 +406,7 @@`
`406`	`406`	`],`
`407`	`407`	`"source": [`
`408`	`408`	`"from torchmetrics import PeakSignalNoiseRatio\n",`
`409`		`- "psnr = PeakSignalNoiseRatio().to(gt.device)\n",`
	`409`	`+ "psnr = PeakSignalNoiseRatio(data_range=1.).to(gt.device)\n",`
`410`	`410`	`"psnr(results[\"render\"], gt)"`
`411`	`411`	`]`
`412`	`412`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
	`1`	`+kornia`
`1`	`2`	`git+https://github.com/yzslab/gsplat.git@58f3772541b6fb55e3219b36cd2b64be0584645c`