from __future__ import annotations
from typing import Optional
import cv2
import numpy as np
from sapien import Pose
# Convert between camera frame conventions
# OpenCV frame convention: right(x), down(y), forward(z)
# OpenGL frame convention: right(x), up(y), backwards(z)
# ROS/Sapien frame convention: forward(x), left(y) and up(z)
# For ROS frame conventions, see https://www.ros.org/reps/rep-0103.html#axis-orientation
T_CV_GL = np.array(
[[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]], dtype=np.float32
)
pose_CV_GL = Pose(T_CV_GL)
pose_GL_CV = pose_CV_GL.inv()
T_GL_CV = pose_GL_CV.to_transformation_matrix()
T_CV_ROS = np.array(
[[0, -1, 0, 0], [0, 0, -1, 0], [1, 0, 0, 0], [0, 0, 0, 1]], dtype=np.float32
)
pose_CV_ROS = Pose(T_CV_ROS)
pose_ROS_CV = pose_CV_ROS.inv()
T_ROS_CV = pose_ROS_CV.to_transformation_matrix()
T_GL_ROS = np.array(
[[0, -1, 0, 0], [0, 0, 1, 0], [-1, 0, 0, 0], [0, 0, 0, 1]], dtype=np.float32
)
pose_GL_ROS = Pose(T_GL_ROS)
pose_ROS_GL = pose_GL_ROS.inv()
T_ROS_GL = pose_ROS_GL.to_transformation_matrix()
[docs]
def depth2xyz(
depth_image: np.ndarray,
intrinsics: np.ndarray,
depth_scale: Optional[int | float] = None,
) -> np.ndarray:
"""Use camera intrinsics to convert depth_image to xyz_image
:param depth_image: [H, W] or [H, W, 1] np.uint16/np.floating np.ndarray
For np.uint16, depth_scale is assumed to be 1000 (in millimeters).
For np.floating, depth_scale is assumed to be 1 (in meters).
:param intrinsics: [3, 3] camera intrinsics matrix or [fx, fy, cx, cy]
:param depth_scale: depth scale units, in meters. If None, choose based on dtype.
:return xyz_image: [H, W, 3] np.floating np.ndarray
"""
if intrinsics.size == 4:
fx, fy, cx, cy = intrinsics
else:
fx, fy = intrinsics[0, 0], intrinsics[1, 1]
cx, cy = intrinsics[0, 2], intrinsics[1, 2]
if depth_image.ndim == 3:
assert (
depth_image.shape[-1] == 1
), f"Wrong number of channels: {depth_image.shape}"
depth_image = depth_image[..., 0]
height, width = depth_image.shape[:2]
uu, vv = np.meshgrid(np.arange(width), np.arange(height))
if depth_scale is None:
if np.issubdtype(depth_image.dtype, np.uint16):
depth_scale = 1000.0
elif np.issubdtype(depth_image.dtype, np.floating):
depth_scale = 1.0
else:
raise TypeError(f"Unsupported depth image dtype: {depth_image.dtype}")
z = depth_image / depth_scale
x = (uu - cx) * z / fx
y = (vv - cy) * z / fy
xyz_image = np.stack([x, y, z], axis=-1)
return xyz_image
[docs]
def resize_obs_image(
rgb_image,
depth_image,
intr_params: tuple,
new_size,
interpolation=cv2.INTER_NEAREST_EXACT,
):
"""Resize rgb/depth images into shape=(width, height)
:param rgb_image: [H, W, 3] np.uint8 np.ndarray
:param depth_image: [H, W] np.uint16 np.ndarray
:param intr_params: (fx, fy, cx, cy) tuple
:param new_size: (width, height) tuple
"""
new_width, new_height = new_size
fx, fy, cx, cy = intr_params
# Update intrinsics
old_height, old_width = rgb_image.shape[:2]
u_ratio = new_width / old_width
v_ratio = new_height / old_height
intr_params = fx * u_ratio, fy * v_ratio, cx * u_ratio, cy * v_ratio
# Resize images
rgb_image = cv2.resize(rgb_image, new_size, interpolation=interpolation)
depth_image = cv2.resize(depth_image, new_size, interpolation=interpolation)
return rgb_image, depth_image, intr_params
[docs]
def register_depth(
depth_unaligned: np.ndarray,
k_depth: np.ndarray,
k_color: np.ndarray,
T_color_depth: np.ndarray,
color_im_size: tuple[int, int],
dist_color: np.ndarray | None = None,
depth_dilation: bool = False,
) -> np.ndarray:
"""Register depth to color frame (a.k.a., align_depth_to_color)
This will align depth image to color frame (intrinsics, extrinsics, and resolution)
:param depth_unaligned: unaligned depth image.
[H, W] or [H, W, 1] np.uint16/np.floating np.ndarray
:param k_depth: depth camera intrinsic matrix, [3, 3] np.floating np.ndarray
:param k_color: color camera intrinsic matrix, [3, 3] np.floating np.ndarray
:param T_color_depth: extrinsics from depth camera to color camera, following
OpenCV frame convention. [4, 4] np.floating np.ndarray
:param color_im_size: color image size, (width, height)
:param dist_color: color camera distortion coefficients, [5,] np.floating np.ndarray
:param depth_dilation: whether to dilate depth to avoid holes and occlusion errors.
:return depth: aligned depth image, [H, W] np.uint16/np.floating np.ndarray
"""
depth = cv2.rgbd.registerDepth(
k_depth,
k_color,
dist_color,
T_color_depth,
depth_unaligned,
color_im_size,
depthDilation=depth_dilation,
)
depth[np.isnan(depth) | np.isinf(depth) | (depth < 0)] = 0.0
return depth