Background image depth compositing (#83)

* very early WIP of depth compositing shader, sort of works for a hardcoded turtle example * fix texture sampling logic with vertex shader * improvements * GUI container refactor * Clean up panel display, still some kinks to work out * working depth compositing, need to clean up/document things * Improve expanded panel behavior; drag regression for mobile view * Fix toggle for sidebar display * Fix type * refactor for popup img instead of background_image * add enabled flag to texture * bad merge * 4 byte packing * more work * working float passing? * works! * Remove icons : ) * Revert SMPL-X example * remove depth scale * cleanup * make popup example * comment * garbage collect old textures, repackage PopupImage into BackgroundImage * Update compositing example * More precision * Nits, type errors * Nits * Use 3 bytes, fix mypy/ruff * Naming --------- Co-authored-by: Brent Yi <[email protected]>
nerfstudio-project · Aug 25, 2023 · 30aa5a9 · 30aa5a9
2 parents 2df05ab + 3c7d4aa
commit 30aa5a9
Show file tree

Hide file tree

Showing 6 changed files with 209 additions and 10 deletions.
diff --git a/examples/17_background_composite.py b/examples/17_background_composite.py
@@ -0,0 +1,36 @@
+# mypy: disable-error-code="var-annotated"
+"""Background image example with depth compositing.
+
+In this example, we show how to use a background image with depth compositing. This can
+be useful when we want a 2D image to occlude 3D geometry, such as for NeRF rendering.
+"""
+
+import time
+
+import numpy as onp
+import trimesh
+import trimesh.creation
+
+import viser
+
+server = viser.ViserServer()
+
+
+img = onp.random.randint(0, 255, size=(1000, 1000, 3), dtype=onp.uint8)
+depth = onp.ones((1000, 1000, 1), dtype=onp.float32)
+
+# Make a square middle portal.
+depth[250:750, 250:750, :] = 10.0
+img[250:750, 250:750, :] = 255
+
+mesh = trimesh.creation.box((0.5, 0.5, 0.5))
+server.add_mesh_trimesh(
+    name="/cube",
+    mesh=mesh,
+    position=(0, 0, 0.0),
+)
+server.set_background_image(img, depth=depth)
+
+
+while True:
+    time.sleep(1.0)
diff --git a/viser/_message_api.py b/viser/_message_api.py
@@ -396,14 +396,39 @@ def set_background_image(
         image: onp.ndarray,
         format: Literal["png", "jpeg"] = "jpeg",
         jpeg_quality: Optional[int] = None,
+        depth: Optional[onp.ndarray] = None,
     ) -> None:
-        """Set a background image for the scene. Useful for NeRF visualization."""
+        """Set a background image for the scene, optionally with depth compositing."""
         media_type, base64_data = _encode_image_base64(
             image, format, jpeg_quality=jpeg_quality
         )
+
+        # Encode depth if provided. We use a 3-channel PNG to represent a fixed point
+        # depth at each pixel.
+        depth_base64data = None
+        if depth is not None:
+            # Convert to fixed-point.
+            # We'll support from 0 -> (2^24 - 1) / 100_000.
+            #
+            # This translates to a range of [0, 167.77215], with a precision of 1e-5.
+            assert len(depth.shape) == 2 or (
+                len(depth.shape) == 3 and depth.shape[2] == 1
+            ), "Depth should have shape (H,W) or (H,W,1)."
+            depth = onp.clip(depth * 100_000, 0, 2**24 - 1).astype(onp.uint32)
+            assert depth is not None  # Appease mypy.
+            intdepth: onp.ndarray = depth.reshape((*depth.shape[:2], 1)).view(onp.uint8)
+            assert intdepth.shape == (*depth.shape[:2], 4)
+            with io.BytesIO() as data_buffer:
+                iio.imwrite(data_buffer, intdepth[:, :, :3], extension=".png")
+                depth_base64data = base64.b64encode(data_buffer.getvalue()).decode(
+                    "ascii"
+                )
+
         self._queue(
             _messages.BackgroundImageMessage(
-                media_type=media_type, base64_data=base64_data
+                media_type=media_type,
+                base64_rgb=base64_data,
+                base64_depth=depth_base64data,
             )
         )
 

diff --git a/viser/_messages.py b/viser/_messages.py
@@ -229,7 +229,8 @@ class BackgroundImageMessage(Message):
     """Message for rendering a background image."""
 
     media_type: Literal["image/jpeg", "image/png"]
-    base64_data: str
+    base64_rgb: str
+    base64_depth: Optional[str]
 
 
 @dataclasses.dataclass

diff --git a/viser/client/src/App.tsx b/viser/client/src/App.tsx
@@ -6,7 +6,7 @@ import {
   Environment,
 } from "@react-three/drei";
 import * as THREE from "three";
-import { Canvas, useThree } from "@react-three/fiber";
+import { Canvas, useThree, useFrame } from "@react-three/fiber";
 import {
   EffectComposer,
   Outline,
@@ -43,6 +43,7 @@ export type ViewerContextContents = {
   canvasRef: React.MutableRefObject<HTMLCanvasElement | null>;
   sceneRef: React.MutableRefObject<THREE.Scene | null>;
   cameraRef: React.MutableRefObject<THREE.PerspectiveCamera | null>;
+  backgroundMaterialRef: React.MutableRefObject<THREE.ShaderMaterial | null>;
   cameraControlRef: React.MutableRefObject<CameraControls | null>;
   // Scene node attributes.
   // This is intentionally placed outside of the Zustand state to reduce overhead.
@@ -88,6 +89,7 @@ function ViewerRoot() {
     canvasRef: React.useRef(null),
     sceneRef: React.useRef(null),
     cameraRef: React.useRef(null),
+    backgroundMaterialRef: React.useRef(null),
     cameraControlRef: React.useRef(null),
     // Scene node attributes that aren't placed in the zustand state for performance reasons.
     nodeAttributesFromName: React.useRef({}),
@@ -159,6 +161,7 @@ function ViewerCanvas({ children }: { children: React.ReactNode }) {
       ref={viewer.canvasRef}
     >
       {children}
+      <BackgroundImage />
       <AdaptiveDpr pixelated />
       <AdaptiveEvents />
       <SceneContextSetter />
@@ -182,6 +185,116 @@ function ViewerCanvas({ children }: { children: React.ReactNode }) {
   );
 }
 
+/* Background image with support for depth compositing. */
+function BackgroundImage() {
+  // Create a fragment shader that composites depth using depth and rgb
+  const vertShader = `
+  varying vec2 vUv;
+
+  void main() {
+    vUv = uv;
+    gl_Position = projectionMatrix * modelViewMatrix * vec4(position, 1.0);
+  }
+  `.trim();
+  const fragShader = `  
+  #include <packing>
+  precision highp float;
+  precision highp int;
+
+  varying vec2 vUv;
+  uniform sampler2D colorMap;
+  uniform sampler2D depthMap;
+  uniform float cameraNear;
+  uniform float cameraFar;
+  uniform bool enabled;
+  uniform bool hasDepth;
+
+  float readDepth(sampler2D depthMap, vec2 coord) {
+    vec4 rgbPacked = texture(depthMap, coord);
+
+    // For the k-th channel, coefficients are calculated as: 255 * 1e-5 * 2^(8 * k).
+    // Note that: [0, 255] channels are scaled to [0, 1], and we multiply by 1e5 on the server side.
+    float depth = rgbPacked.r * 0.00255 + rgbPacked.g * 0.6528 + rgbPacked.b * 167.1168;
+    return depth;
+  }
+
+  void main() {
+    if (!enabled) {
+      // discard the pixel if we're not enabled
+      discard;
+    }
+    vec4 color = texture(colorMap, vUv);
+    gl_FragColor = vec4(color.rgb, 1.0);
+
+    float bufDepth;
+    if(hasDepth){
+      float depth = readDepth(depthMap, vUv);
+      bufDepth = viewZToPerspectiveDepth(-depth, cameraNear, cameraFar);
+    } else {
+      // If no depth enabled, set depth to 1.0 (infinity) to treat it like a background image.
+      bufDepth = 1.0;
+    }
+    gl_FragDepth = bufDepth;
+  }`.trim();
+  // initialize the rgb texture with all white and depth at infinity
+  const backgroundMaterial = new THREE.ShaderMaterial({
+    fragmentShader: fragShader,
+    vertexShader: vertShader,
+    uniforms: {
+      enabled: { value: false },
+      depthMap: { value: null },
+      colorMap: { value: null },
+      cameraNear: { value: null },
+      cameraFar: { value: null },
+      hasDepth: { value: false },
+    },
+  });
+  const { backgroundMaterialRef } = React.useContext(ViewerContext)!;
+  backgroundMaterialRef.current = backgroundMaterial;
+  const backgroundMesh = React.useRef<THREE.Mesh>(null);
+  useFrame(({ camera }) => {
+    // Logic ahead relies on perspective camera assumption.
+    if (!(camera instanceof THREE.PerspectiveCamera)) {
+      console.error(
+        "Camera is not a perspective camera, cannot render background image",
+      );
+      return;
+    }
+
+    // Update the position of the mesh based on the camera position.
+    const lookdir = camera.getWorldDirection(new THREE.Vector3());
+    backgroundMesh.current!.position.set(
+      camera.position.x,
+      camera.position.y,
+      camera.position.z,
+    );
+    backgroundMesh.current!.position.addScaledVector(lookdir, 1.0);
+    backgroundMesh.current!.quaternion.copy(camera.quaternion);
+
+    // Resize the mesh based on focal length.
+    const f = camera.getFocalLength();
+    backgroundMesh.current!.scale.set(
+      camera.getFilmWidth() / f,
+      camera.getFilmHeight() / f,
+      1.0,
+    );
+
+    // Set near/far uniforms.
+    backgroundMaterial.uniforms.cameraNear.value = camera.near;
+    backgroundMaterial.uniforms.cameraFar.value = camera.far;
+  });
+
+  return (
+    <mesh
+      ref={backgroundMesh}
+      material={backgroundMaterial}
+      matrixWorldAutoUpdate={false}
+    >
+      <planeGeometry attach="geometry" args={[1, 1]} />
+    </mesh>
+  );
+}
+
 /** Component for helping us set the scene reference. */
 function SceneContextSetter() {
   const { sceneRef, cameraRef } = React.useContext(ViewerContext)!;

diff --git a/viser/client/src/WebsocketInterface.tsx b/viser/client/src/WebsocketInterface.tsx
@@ -413,18 +413,39 @@ function useMessageHandler() {
       // Add a background image.
       case "BackgroundImageMessage": {
         new TextureLoader().load(
-          `data:${message.media_type};base64,${message.base64_data}`,
+          `data:${message.media_type};base64,${message.base64_rgb}`,
           (texture) => {
-            // TODO: this onLoad callback prevents flickering, but could cause messages to be handled slightly out-of-order.
             texture.encoding = THREE.sRGBEncoding;
 
-            const oldBackground = viewer.sceneRef.current?.background;
-            viewer.sceneRef.current!.background = texture;
-            if (isTexture(oldBackground)) oldBackground.dispose();
+            const oldBackgroundTexture =
+              viewer.backgroundMaterialRef.current!.uniforms.colorMap.value;
+            viewer.backgroundMaterialRef.current!.uniforms.colorMap.value =
+              texture;
+            if (isTexture(oldBackgroundTexture)) oldBackgroundTexture.dispose();
 
             viewer.useGui.setState({ backgroundAvailable: true });
           },
         );
+        viewer.backgroundMaterialRef.current!.uniforms.enabled.value = true;
+        viewer.backgroundMaterialRef.current!.uniforms.hasDepth.value =
+          message.base64_depth !== null;
+        console.log(
+          viewer.backgroundMaterialRef.current!.uniforms.hasDepth.value,
+        );
+
+        if (message.base64_depth !== null) {
+          // If depth is available set the texture
+          new TextureLoader().load(
+            `data:image/png;base64,${message.base64_depth}`,
+            (texture) => {
+              const oldDepthTexture =
+                viewer.backgroundMaterialRef.current?.uniforms.depthMap.value;
+              viewer.backgroundMaterialRef.current!.uniforms.depthMap.value =
+                texture;
+              if (isTexture(oldDepthTexture)) oldDepthTexture.dispose();
+            },
+          );
+        }
         return;
       }
       // Add a 2D label.
@@ -568,6 +589,8 @@ function useMessageHandler() {
         if (isTexture(oldBackground)) oldBackground.dispose();
 
         viewer.useGui.setState({ backgroundAvailable: false });
+        // Disable the depth texture rendering
+        viewer.backgroundMaterialRef.current!.uniforms.enabled.value = false;
         return;
       }
       // Set the value of a GUI input.

diff --git a/viser/client/src/WebsocketMessages.tsx b/viser/client/src/WebsocketMessages.tsx
@@ -109,7 +109,8 @@ export interface TransformControlsUpdateMessage {
 export interface BackgroundImageMessage {
   type: "BackgroundImageMessage";
   media_type: "image/jpeg" | "image/png";
-  base64_data: string;
+  base64_rgb: string;
+  base64_depth: string | null;
 }
 export interface ImageMessage {
   type: "ImageMessage";