Source code for gradslam.datasets.datautils

import copy
import warnings
from collections import OrderedDict
from typing import List, Union

import numpy as np
import torch

__all__ = [
    "normalize_image",
    "channels_first",
    "scale_intrinsics",
    "pointquaternion_to_homogeneous",
    "poses_to_transforms",
    "create_label_image",
]


[docs]def normalize_image(rgb: Union[torch.Tensor, np.ndarray]): r"""Normalizes RGB image values from :math:`[0, 255]` range to :math:`[0, 1]` range. Args: rgb (torch.Tensor or numpy.ndarray): RGB image in range :math:`[0, 255]` Returns: torch.Tensor or numpy.ndarray: Normalized RGB image in range :math:`[0, 1]` Shape: - rgb: :math:`(*)` (any shape) - Output: Same shape as input :math:`(*)` """ if torch.is_tensor(rgb): return rgb.float() / 255 elif isinstance(rgb, np.ndarray): return rgb.astype(float) / 255 else: raise TypeError("Unsupported input rgb type: %r" % type(rgb))
[docs]def channels_first(rgb: Union[torch.Tensor, np.ndarray]): r"""Converts from channels last representation :math:`(*, H, W, C)` to channels first representation :math:`(*, C, H, W)` Args: rgb (torch.Tensor or numpy.ndarray): :math:`(*, H, W, C)` ordering `(*, height, width, channels)` Returns: torch.Tensor or numpy.ndarray: :math:`(*, C, H, W)` ordering Shape: - rgb: :math:`(*, H, W, C)` - Output: :math:`(*, C, H, W)` """ if not (isinstance(rgb, np.ndarray) or torch.is_tensor(rgb)): raise TypeError("Unsupported input rgb type {}".format(type(rgb))) if rgb.ndim < 3: raise ValueError( "Input rgb must contain atleast 3 dims, but had {} dims.".format(rgb.ndim) ) if rgb.shape[-3] < rgb.shape[-1]: msg = "Are you sure that the input is correct? Number of channels exceeds height of image: %r > %r" warnings.warn(msg % (rgb.shape[-1], rgb.shape[-3])) ordering = list(range(rgb.ndim)) ordering[-2], ordering[-1], ordering[-3] = ordering[-3], ordering[-2], ordering[-1] if isinstance(rgb, np.ndarray): return np.ascontiguousarray(rgb.transpose(*ordering)) elif torch.is_tensor(rgb): return rgb.permute(*ordering).contiguous()
[docs]def scale_intrinsics( intrinsics: Union[np.ndarray, torch.Tensor], h_ratio: Union[float, int], w_ratio: Union[float, int], ): r"""Scales the intrinsics appropriately for resized frames where :math:`h_\text{ratio} = h_\text{new} / h_\text{old}` and :math:`w_\text{ratio} = w_\text{new} / w_\text{old}` Args: intrinsics (numpy.ndarray or torch.Tensor): Intrinsics matrix of original frame h_ratio (float or int): Ratio of new frame's height to old frame's height :math:`h_\text{ratio} = h_\text{new} / h_\text{old}` w_ratio (float or int): Ratio of new frame's width to old frame's width :math:`w_\text{ratio} = w_\text{new} / w_\text{old}` Returns: numpy.ndarray or torch.Tensor: Intrinsics matrix scaled approprately for new frame size Shape: - intrinsics: :math:`(*, 3, 3)` or :math:`(*, 4, 4)` - Output: Matches `intrinsics` shape, :math:`(*, 3, 3)` or :math:`(*, 4, 4)` """ if isinstance(intrinsics, np.ndarray): scaled_intrinsics = intrinsics.astype(np.float32).copy() elif torch.is_tensor(intrinsics): scaled_intrinsics = intrinsics.to(torch.float).clone() else: raise TypeError("Unsupported input intrinsics type {}".format(type(intrinsics))) if not (intrinsics.shape[-2:] == (3, 3) or intrinsics.shape[-2:] == (4, 4)): raise ValueError( "intrinsics must have shape (*, 3, 3) or (*, 4, 4), but had shape {} instead".format( intrinsics.shape ) ) if (intrinsics[..., -1, -1] != 1).any() or (intrinsics[..., 2, 2] != 1).any(): warnings.warn( "Incorrect intrinsics: intrinsics[..., -1, -1] and intrinsics[..., 2, 2] should be 1." ) scaled_intrinsics[..., 0, 0] *= w_ratio # fx scaled_intrinsics[..., 1, 1] *= h_ratio # fy scaled_intrinsics[..., 0, 2] *= w_ratio # cx scaled_intrinsics[..., 1, 2] *= h_ratio # cy return scaled_intrinsics
[docs]def pointquaternion_to_homogeneous( pointquaternions: Union[np.ndarray, torch.Tensor], eps: float = 1e-12 ): r"""Converts 3D point and unit quaternions :math:`(t_x, t_y, t_z, q_x, q_y, q_z, q_w)` to homogeneous transformations [R | t] where :math:`R` denotes the :math:`(3, 3)` rotation matrix and :math:`T` denotes the :math:`(3, 1)` translation matrix: .. math:: \left[\begin{array}{@{}c:c@{}} R & T \\ \hdashline \begin{array}{@{}ccc@{}} 0 & 0 & 0 \end{array} & 1 \end{array}\right] Args: pointquaternions (numpy.ndarray or torch.Tensor): 3D point positions and unit quaternions :math:`(tx, ty, tz, qx, qy, qz, qw)` where :math:`(tx, ty, tz)` is the 3D position and :math:`(qx, qy, qz, qw)` is the unit quaternion. eps (float): Small value, to avoid division by zero. Default: 1e-12 Returns: numpy.ndarray or torch.Tensor: Homogeneous transformation matrices. Shape: - pointquaternions: :math:`(*, 7)` - Output: :math:`(*, 4, 4)` """ if not ( isinstance(pointquaternions, np.ndarray) or torch.is_tensor(pointquaternions) ): raise TypeError( '"pointquaternions" must be of type "np.ndarray" or "torch.Tensor". Got {0}'.format( type(pointquaternions) ) ) if not isinstance(eps, float): raise TypeError('"eps" must be of type "float". Got {0}.'.format(type(eps))) if pointquaternions.shape[-1] != 7: raise ValueError( '"pointquaternions" must be of shape (*, 7). Got {0}.'.format( pointquaternions.shape ) ) output_shape = (*pointquaternions.shape[:-1], 4, 4) if isinstance(pointquaternions, np.ndarray): t = pointquaternions[..., :3].astype(np.float32) q = pointquaternions[..., 3:7].astype(np.float32) transform = np.zeros(output_shape, dtype=np.float32) else: t = pointquaternions[..., :3].float() q = pointquaternions[..., 3:7].float() transform = torch.zeros( output_shape, dtype=torch.float, device=pointquaternions.device ) q_norm = (0.5 * (q ** 2).sum(-1)[..., None]) ** 0.5 q /= ( torch.max(q_norm, torch.tensor(eps)) if torch.is_tensor(q_norm) else np.maximum(q_norm, eps) ) if isinstance(q, np.ndarray): q = np.matmul(q[..., None], q[..., None, :]) else: q = torch.matmul(q.unsqueeze(-1), q.unsqueeze(-2)) txx = q[..., 0, 0] tyy = q[..., 1, 1] tzz = q[..., 2, 2] txy = q[..., 0, 1] txz = q[..., 0, 2] tyz = q[..., 1, 2] twx = q[..., 0, 3] twy = q[..., 1, 3] twz = q[..., 2, 3] transform[..., 0, 0] = 1.0 transform[..., 1, 1] = 1.0 transform[..., 2, 2] = 1.0 transform[..., 3, 3] = 1.0 transform[..., 0, 0] -= tyy + tzz transform[..., 0, 1] = txy - twz transform[..., 0, 2] = txz + twy transform[..., 1, 0] = txy + twz transform[..., 1, 1] -= txx + tzz transform[..., 1, 2] = tyz - twx transform[..., 2, 0] = txz - twy transform[..., 2, 1] = tyz + twx transform[..., 2, 2] -= txx + tyy transform[..., :3, 3] = t return transform
[docs]def poses_to_transforms(poses: Union[np.ndarray, List[np.ndarray]]): r"""Converts poses to transformations w.r.t. the first frame in the sequence having identity pose Args: poses (numpy.ndarray or list of numpy.ndarray): Sequence of poses in `numpy.ndarray` format. Returns: numpy.ndarray or list of numpy.ndarray: Sequence of frame to frame transformations where initial frame is transformed to have identity pose. Shape: - poses: Could be `numpy.ndarray` of shape :math:`(N, 4, 4)`, or list of `numpy.ndarray`s of shape :math:`(4, 4)` - Output: Of same shape as input `poses` """ transformations = copy.deepcopy(poses) for i in range(len(poses)): if i == 0: transformations[i] = np.eye(4) else: transformations[i] = np.linalg.inv(poses[i - 1]).dot(poses[i]) return transformations
[docs]def create_label_image(prediction: np.ndarray, color_palette: OrderedDict): r"""Creates a label image, given a network prediction (each pixel contains class index) and a color palette. Args: prediction (numpy.ndarray): Predicted image where each pixel contains an integer, corresponding to its class label. color_palette (OrderedDict): Contains RGB colors (`uint8`) for each class. Returns: numpy.ndarray: Label image with the given color palette Shape: - prediction: :math:`(H, W)` - Output: :math:`(H, W)` """ label_image = np.zeros( (prediction.shape[0], prediction.shape[1], 3), dtype=np.uint8 ) for idx, color in enumerate(color_palette): label_image[prediction == idx] = color return label_image