Source code for real_robot.utils.camera

from __future__ import annotations

from typing import Optional

import cv2
import numpy as np
from sapien import Pose

# Convert between camera frame conventions
#   OpenCV frame convention: right(x), down(y), forward(z)
#   OpenGL frame convention: right(x), up(y), backwards(z)
#   ROS/Sapien frame convention: forward(x), left(y) and up(z)
# For ROS frame conventions, see https://www.ros.org/reps/rep-0103.html#axis-orientation
T_CV_GL = np.array(
    [[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], dtype=np.float32
)
pose_CV_GL = Pose(T_CV_GL)
pose_GL_CV = pose_CV_GL.inv()
T_GL_CV = pose_GL_CV.to_transformation_matrix()
T_CV_ROS = np.array(
    [[0, -1, 0, 0], [0, 0, -1, 0], [1, 0, 0, 0], [0, 0, 0, 1]], dtype=np.float32
)
pose_CV_ROS = Pose(T_CV_ROS)
pose_ROS_CV = pose_CV_ROS.inv()
T_ROS_CV = pose_ROS_CV.to_transformation_matrix()
T_GL_ROS = np.array(
    [[0, -1, 0, 0], [0, 0, 1, 0], [-1, 0, 0, 0], [0, 0, 0, 1]], dtype=np.float32
)
pose_GL_ROS = Pose(T_GL_ROS)
pose_ROS_GL = pose_GL_ROS.inv()
T_ROS_GL = pose_ROS_GL.to_transformation_matrix()


[docs] def depth2xyz( depth_image: np.ndarray, intrinsics: np.ndarray, depth_scale: Optional[int | float] = None, ) -> np.ndarray: """Use camera intrinsics to convert depth_image to xyz_image :param depth_image: [H, W] or [H, W, 1] np.uint16/np.floating np.ndarray For np.uint16, depth_scale is assumed to be 1000 (in millimeters). For np.floating, depth_scale is assumed to be 1 (in meters). :param intrinsics: [3, 3] camera intrinsics matrix or [fx, fy, cx, cy] :param depth_scale: depth scale units, in meters. If None, choose based on dtype. :return xyz_image: [H, W, 3] np.floating np.ndarray """ if intrinsics.size == 4: fx, fy, cx, cy = intrinsics else: fx, fy = intrinsics[0, 0], intrinsics[1, 1] cx, cy = intrinsics[0, 2], intrinsics[1, 2] if depth_image.ndim == 3: assert ( depth_image.shape[-1] == 1 ), f"Wrong number of channels: {depth_image.shape}" depth_image = depth_image[..., 0] height, width = depth_image.shape[:2] uu, vv = np.meshgrid(np.arange(width), np.arange(height)) if depth_scale is None: if np.issubdtype(depth_image.dtype, np.uint16): depth_scale = 1000.0 elif np.issubdtype(depth_image.dtype, np.floating): depth_scale = 1.0 else: raise TypeError(f"Unsupported depth image dtype: {depth_image.dtype}") z = depth_image / depth_scale x = (uu - cx) * z / fx y = (vv - cy) * z / fy xyz_image = np.stack([x, y, z], axis=-1) return xyz_image
[docs] def transform_points(pts: np.ndarray, H: np.ndarray) -> np.ndarray: """Transform points by 4x4 transformation matrix H :return out: same shape as pts """ assert H.shape == (4, 4), H.shape assert pts.shape[-1] == 3, pts.shape return pts @ H[:3, :3].T + H[:3, 3]
[docs] def transform_points_batch(pts: np.ndarray, H: np.ndarray) -> np.ndarray: """Transform points by Bx4x4 transformation matrix H [3,], [4, 4] => [3,] [P, 3], [4, 4] => [P, 3] [H, W, 3], [4, 4] => [H, W, 3] [N, H, W, 3], [4, 4] => [N, H, W, 3] [P, 3], [B, 4, 4] => [B, P, 3] [B, P, 3], [B, 4, 4] => [B, P, 3] [H, W, 3], [B, 4, 4] => [B, H, W, 3] # (H != B) [B, H, W, 3], [B, 4, 4] => [B, H, W, 3] [N, H, W, 3], [B, 4, 4] => [B, N, H, W, 3] # (N != B) [B, N, H, W, 3], [B, 4, 4] => [B, N, H, W, 3] [B, N, 3], [B, N, 4, 4] => [B, N, 3] [B, N, P, 3], [B, N, 4, 4] => [B, N, P, 3] [B, N, H, W, 3], [B, N, 4, 4] => [B, N, H, W, 3] """ assert H.shape[-2:] == (4, 4), H.shape assert pts.shape[-1] == 3, pts.shape batch_shape = H.shape[:-2] pts_shape = batch_shape + (-1, 3) out_pts_shape = pts.shape if batch_shape != pts.shape[: len(batch_shape)] or pts.ndim < H.ndim < 4: pts_shape = (-1, 3) out_pts_shape = batch_shape + out_pts_shape H = H.swapaxes(-1, -2) return (pts.reshape(pts_shape) @ H[..., :3, :3] + H[..., [3], :3]).reshape( out_pts_shape )
[docs] def resize_obs_image( rgb_image, depth_image, intr_params: tuple, new_size, interpolation=cv2.INTER_NEAREST_EXACT, ): """Resize rgb/depth images into shape=(width, height) :param rgb_image: [H, W, 3] np.uint8 np.ndarray :param depth_image: [H, W] np.uint16 np.ndarray :param intr_params: (fx, fy, cx, cy) tuple :param new_size: (width, height) tuple """ new_width, new_height = new_size fx, fy, cx, cy = intr_params # Update intrinsics old_height, old_width = rgb_image.shape[:2] u_ratio = new_width / old_width v_ratio = new_height / old_height intr_params = fx * u_ratio, fy * v_ratio, cx * u_ratio, cy * v_ratio # Resize images rgb_image = cv2.resize(rgb_image, new_size, interpolation=interpolation) depth_image = cv2.resize(depth_image, new_size, interpolation=interpolation) return rgb_image, depth_image, intr_params
[docs] def register_depth( depth_unaligned: np.ndarray, k_depth: np.ndarray, k_color: np.ndarray, T_color_depth: np.ndarray, color_im_size: tuple[int, int], dist_color: np.ndarray | None = None, depth_dilation: bool = False, ) -> np.ndarray: """Register depth to color frame (a.k.a., align_depth_to_color) This will align depth image to color frame (intrinsics, extrinsics, and resolution) :param depth_unaligned: unaligned depth image. [H, W] or [H, W, 1] np.uint16/np.floating np.ndarray :param k_depth: depth camera intrinsic matrix, [3, 3] np.floating np.ndarray :param k_color: color camera intrinsic matrix, [3, 3] np.floating np.ndarray :param T_color_depth: extrinsics from depth camera to color camera, following OpenCV frame convention. [4, 4] np.floating np.ndarray :param color_im_size: color image size, (width, height) :param dist_color: color camera distortion coefficients, [5,] np.floating np.ndarray :param depth_dilation: whether to dilate depth to avoid holes and occlusion errors. :return depth: aligned depth image, [H, W] np.uint16/np.floating np.ndarray """ depth = cv2.rgbd.registerDepth( k_depth, k_color, dist_color, T_color_depth, depth_unaligned, color_im_size, depthDilation=depth_dilation, ) depth[np.isnan(depth) | np.isinf(depth) | (depth < 0)] = 0.0 return depth