Source code for real_robot.sensors.simsense_depth

"""Get depth image from stereo IR images using simsense"""

from __future__ import annotations

import cv2
import numpy as np
from sapien.pysapien.simsense import DepthSensorEngine


[docs] class SimsenseDepth: def __init__( self, ir_size: tuple[int, int], k_l: np.ndarray, k_r: np.ndarray, l2r: np.ndarray, k_rgb: np.ndarray | None = None, rgb_size: tuple[int, int] | None = None, l2rgb: np.ndarray | None = None, min_depth: float = 0.0, max_depth: float = 10.0, ir_noise_seed: int = 0, speckle_shape: float = 0.0, speckle_scale: float = 0.0, gaussian_mu: float = 0.0, gaussian_sigma: float = 0.0, rectified: bool = False, census_width: int = 7, census_height: int = 7, max_disp: int = 128, block_width: int = 7, block_height: int = 7, p1_penalty: int = 7, p2_penalty: int = 86, uniqueness_ratio: int = 15, lr_max_diff: int = 1, median_filter_size: int = 3, depth_dilation: bool = False, ): """ Initiate the SimsenseDepth class. The camera frame follows OpenCV frame convention (x right, y down, z forward). Left, right and RGB camera are assumed to be undistorted. By default, the final depth map will be presented in left camera's frame with ir_size. Specifying rgb_size, k_rgb and l2rgb will turn on depth registration, which will tranform the final depth map from left camera's frame to specified RGB camera's frame with rgb_size. Specifying speckle_shape > 0 will turn on infrared noise simulation. :param ir_size: (width, height) of the left and right image. :param k_l: Intrinsic matrix of the left camera (in OpenCV frame convention). :param k_r: Intrinsic matrix of the right camera (in OpenCV frame convention). :param l2r: Change-of-coordinate matrix from left camera's frame to right camera's frame (in OpenCV frame convention). :param rgb_size: (width, height) of the RGB image. :param k_rgb: Intrinsic matrix of the RGB camera (in OpenCV frame convention). :param l2rgb: Change-of-coordinate matrix from left camera's frame to RGB camera's frame (in OpenCV frame convention). :param min_depth: Minimum valid depth in meters. :param max_depth: Maximum valid depth (non-inclusive) in meters. :param ir_noise_seed: Random seed for simulating infrared noise. :param speckle_shape: Shape parameter for simulating infrared speckle noise (Gamma distribution). Set to 0 to disable noise simulation :param speckle_scale: Scale parameter for simulating infrared speckle noise (Gamma distribution). :param gaussian_mu: Mean for simulating infrared thermal noise (Gaussian distribution). :param gaussian_sigma: Standard deviation for simulating infrared thermal noise (Gaussian distribution). :param rectified: Whether the input has already been rectified. Set to true if no rectification is needed. :param census_width: Width of the center-symmetric census transform window. This must be an odd number. :param census_height: Height of the center-symmetric census transform window. This must be an odd number. :param max_disp: Maximum disparity search space (non-inclusive) for stereo matching. :param block_width: Width of the matched block. This must be an odd number. :param block_height: Height of the matched block. This must be an odd number. :param p1_penalty: P1 penalty for semi-global matching algorithm. :param p2_penalty: P2 penalty for semi-global matching algorithm. :param uniqueness_ratio: Margin in percentage by which the minimum computed cost should win the second best (not considering best match's adjacent pixels) cost to consider the found match valid. :param lr_max_diff: Maximum allowed difference in the left-right consistency check. Set it to 255 to disable the check. :param median_filter_size: Size of the median filter. Choices are 1, 3, 5, 7. Set to 1 to disable median filter. :param depth_dilation: Dilate the final depth map to avoid holes when depth registration is on. Recommended when rgb_size is greater than ir_size. """ img_w, img_h = ir_size registration = False if k_rgb is not None or rgb_size is not None or l2rgb is not None: registration = True # Instance check if ( not isinstance(img_h, int) or not isinstance(img_w, int) or img_h < 32 or img_w < 32 ): raise TypeError("Image height and width must be integer no less than 32") if registration and (k_rgb is None or rgb_size is None or l2rgb is None): raise TypeError( "Depth registration is on but missing some RGB camera's parameters" ) if speckle_shape > 0 and (speckle_scale <= 0 or gaussian_sigma <= 0): raise TypeError( "Infrared noise simulation is on. Speckle_scale and " "gaussian_sigma must both be positive" ) if ( not isinstance(census_width, int) or not isinstance(census_height, int) or census_width <= 0 or census_height <= 0 or census_width % 2 == 0 or census_height % 2 == 0 or census_width * census_height > 65 ): raise TypeError( "census_width and census_height must be positive odd " "integers and their product should be no larger than 65" ) if not isinstance(max_disp, int) or max_disp < 32 or max_disp > 1024: raise TypeError("max_disp must be integer within range [32, 1024]") if ( not isinstance(block_width, int) or not isinstance(block_height, int) or block_width <= 0 or block_height <= 0 or block_width % 2 == 0 or block_height % 2 == 0 or block_width * block_height > 256 ): raise TypeError( "block_width and block_height must be positive odd " "integers and their product should be no larger than 256" ) if ( not isinstance(p1_penalty, int) or not isinstance(p2_penalty, int) or p1_penalty <= 0 or p2_penalty <= 0 or p1_penalty >= p2_penalty or p2_penalty >= 224 ): raise TypeError( "p1 must be positive integer less than p2 and p2 be " "positive integer less than 224" ) if ( not isinstance(uniqueness_ratio, int) or uniqueness_ratio < 0 or uniqueness_ratio > 255 ): raise TypeError( "uniqueness_ratio must be positive integer no larger than 255" ) if not isinstance(lr_max_diff, int) or lr_max_diff < -1 or lr_max_diff > 255: raise TypeError("lr_max_diff must be integer within the range [0, 255]") if ( median_filter_size != 1 and median_filter_size != 3 and median_filter_size != 5 and median_filter_size != 7 ): raise TypeError("Median filter size choices are 1, 3, 5, 7") # Get rectification map r1, r2, p1, p2, q, _, _ = cv2.stereoRectify( cameraMatrix1=k_l, distCoeffs1=None, cameraMatrix2=k_r, distCoeffs2=None, imageSize=ir_size, R=l2r[:3, :3], T=l2r[:3, 3:], alpha=1.0, newImageSize=ir_size, ) f_len = q[2][3] # focal length of the left camera in meters b_len = 1.0 / q[3][2] # baseline length in meters map_l = cv2.initUndistortRectifyMap(k_l, None, r1, p1, ir_size, cv2.CV_32F) map_r = cv2.initUndistortRectifyMap(k_r, None, r2, p2, ir_size, cv2.CV_32F) map_lx, map_ly = map_l map_rx, map_ry = map_r if registration: # Get registration matrix a1, a2, a3, b = self._get_registration_mat(ir_size, k_l, k_rgb, l2rgb) self.engine = DepthSensorEngine( img_h, img_w, rgb_size[1], rgb_size[0], f_len, b_len, min_depth, max_depth, ir_noise_seed, speckle_shape, speckle_scale, gaussian_mu, gaussian_sigma, rectified, census_width, census_height, max_disp, block_width, block_height, p1_penalty, p2_penalty, uniqueness_ratio, lr_max_diff, median_filter_size, map_lx, map_ly, map_rx, map_ry, a1, a2, a3, b[0], b[1], b[2], depth_dilation, k_rgb[0][0], k_rgb[1][1], k_rgb[0][1], k_rgb[0][2], k_rgb[1][2], ) else: a1, a2, a3, b = self._get_registration_mat(ir_size, k_l, k_l, np.eye(4)) self.engine = DepthSensorEngine( img_h, img_w, img_h, img_w, f_len, b_len, min_depth, max_depth, ir_noise_seed, speckle_shape, speckle_scale, gaussian_mu, gaussian_sigma, rectified, census_width, census_height, max_disp, block_width, block_height, p1_penalty, p2_penalty, uniqueness_ratio, lr_max_diff, median_filter_size, map_lx, map_ly, map_rx, map_ry, a1, a2, a3, b[0], b[1], b[2], depth_dilation, k_l[0][0], k_l[1][1], k_l[0][1], k_l[0][2], k_l[1][2], ) # NOTE: Constructor for no-registration is not exposed # self.engine = DepthSensorEngine( # img_h, img_w, f_len, b_len, # min_depth, max_depth, ir_noise_seed, speckle_shape, speckle_scale, # gaussian_mu, gaussian_sigma, rectified, census_width, census_height, # max_disp, block_width, block_height, p1_penalty, p2_penalty, # uniqueness_ratio, lr_max_diff, median_filter_size, # map_lx, map_ly, map_rx, map_ry, # k_l[0][0], k_l[1][1], k_l[0][1], k_l[0][2], k_l[1][2] # )
[docs] def compute(self, img_l: np.ndarray, img_r: np.ndarray) -> np.ndarray: """ Take two images captured by a pair of nearby parallel cameras, and output the computed depth map in meters. :param img_l: Grayscale/infrared image (uint8) captured by left camera. :param img_r: Grayscale/infrared image (uint8) captured by right camera. :return: Computed depth map (in meters) from left camera's view or rgb camera's view. """ self.engine.compute(img_l, img_r) return self.engine.get_ndarray()
[docs] def set_ir_noise_parameters( self, speckle_shape: float, speckle_scale: float, gaussian_mu: float, gaussian_sigma: float, ) -> None: """ :param speckle_shape: Shape parameter for simulating infrared speckle noise (Gamma distribution). Set to 0 to disable noise simulation :param speckle_scale: Scale parameter for simulating infrared speckle noise (Gamma distribution). :param gaussian_mu: Mean for simulating infrared thermal noise (Gaussian distribution). :param gaussian_sigma: Standard deviation for simulating infrared thermal noise (Gaussian distribution). """ if speckle_shape > 0 and (speckle_scale <= 0 or gaussian_sigma <= 0): raise TypeError( "Infrared noise simulation is on. speckle_scale " "and gaussian_sigma must both be positive" ) self.engine.set_ir_noise_parameters( speckle_shape, speckle_scale, gaussian_mu, gaussian_sigma )
[docs] def set_census_window_size(self, census_width: int, census_height: int) -> None: """ :param census_width: Width of the center-symmetric census transform window. This must be an odd number. :param census_height: Height of the center-symmetric census transform window. This must be an odd number. """ if ( not isinstance(census_width, int) or not isinstance(census_height, int) or census_width <= 0 or census_height <= 0 or census_width % 2 == 0 or census_height % 2 == 0 or census_width * census_height > 65 ): raise TypeError( "census_width and census_height must be positive odd " "integers and their product should be no larger than 65" ) self.engine.set_census_window_size(census_width, census_height)
[docs] def set_matching_block_size(self, block_width: int, block_height: int) -> None: """ :param block_width: Width of the matched block. This must be an odd number. :param block_height: Height of the matched block. This must be an odd number. """ if ( not isinstance(block_width, int) or not isinstance(block_height, int) or block_width <= 0 or block_height <= 0 or block_width % 2 == 0 or block_height % 2 == 0 or block_width * block_height > 256 ): raise TypeError( "block_width and block_height must be positive odd " "integers and their product should be no larger than 256" ) self.engine.set_matching_block_size(block_width, block_height)
[docs] def set_penalties(self, p1_penalty: int, p2_penalty: int) -> None: """ :param p1_penalty: P1 penalty for semi-global matching algorithm. :param p2_penalty: P2 penalty for semi-global matching algorithm. """ if ( not isinstance(p1_penalty, int) or not isinstance(p2_penalty, int) or p1_penalty <= 0 or p2_penalty <= 0 or p1_penalty >= p2_penalty or p2_penalty >= 224 ): raise TypeError( "p1 must be positive integer less than p2 and p2 be " "positive integer less than 224" ) self.engine.set_penalties(p1_penalty, p2_penalty)
[docs] def set_uniqueness_ratio(self, uniqueness_ratio: int) -> None: """ :param uniqueness_ratio: Margin in percentage by which the minimum computed cost should win the second best (not considering best match's adjacent pixels) cost to consider the found match valid. """ if ( not isinstance(uniqueness_ratio, int) or uniqueness_ratio < 0 or uniqueness_ratio > 255 ): raise TypeError( "uniqueness_ratio must be positive integer no larger than 255" ) self.engine.set_uniqueness_ratio(uniqueness_ratio)
[docs] def set_lr_max_diff(self, lr_max_diff: int) -> None: """ :param lr_max_diff: Maximum allowed difference in the left-right consistency check. Set it to 255 to disable the check. """ if not isinstance(lr_max_diff, int) or lr_max_diff < -1 or lr_max_diff > 255: raise TypeError("lr_max_diff must be integer within the range [0, 255]") self.engine.set_lr_max_diff(lr_max_diff)
@staticmethod def _get_registration_mat( ir_size: tuple[int, int], k_ir: np.ndarray, k_rgb: np.ndarray, ir2rgb: np.ndarray, ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: R = ir2rgb[:3, :3] t = ir2rgb[:3, 3:] w, h = ir_size x = np.arange(w) y = np.arange(h) u, v = np.meshgrid(x, y) w = np.ones_like(u) pixel_coords = np.stack([u, v, w], axis=-1) # pixel_coords[y, x] is (x, y, 1) A = np.einsum("ij,hwj->hwi", k_rgb @ R @ np.linalg.inv(k_ir), pixel_coords) B = k_rgb @ t return A[..., 0], A[..., 1], A[..., 2], B