import copy
import warnings
from collections import OrderedDict
from typing import List, Union
import numpy as np
import torch
__all__ = [
"normalize_image",
"channels_first",
"scale_intrinsics",
"pointquaternion_to_homogeneous",
"poses_to_transforms",
"create_label_image",
]
[docs]def normalize_image(rgb: Union[torch.Tensor, np.ndarray]):
r"""Normalizes RGB image values from :math:`[0, 255]` range to :math:`[0, 1]` range.
Args:
rgb (torch.Tensor or numpy.ndarray): RGB image in range :math:`[0, 255]`
Returns:
torch.Tensor or numpy.ndarray: Normalized RGB image in range :math:`[0, 1]`
Shape:
- rgb: :math:`(*)` (any shape)
- Output: Same shape as input :math:`(*)`
"""
if torch.is_tensor(rgb):
return rgb.float() / 255
elif isinstance(rgb, np.ndarray):
return rgb.astype(float) / 255
else:
raise TypeError("Unsupported input rgb type: %r" % type(rgb))
[docs]def channels_first(rgb: Union[torch.Tensor, np.ndarray]):
r"""Converts from channels last representation :math:`(*, H, W, C)` to channels first representation
:math:`(*, C, H, W)`
Args:
rgb (torch.Tensor or numpy.ndarray): :math:`(*, H, W, C)` ordering `(*, height, width, channels)`
Returns:
torch.Tensor or numpy.ndarray: :math:`(*, C, H, W)` ordering
Shape:
- rgb: :math:`(*, H, W, C)`
- Output: :math:`(*, C, H, W)`
"""
if not (isinstance(rgb, np.ndarray) or torch.is_tensor(rgb)):
raise TypeError("Unsupported input rgb type {}".format(type(rgb)))
if rgb.ndim < 3:
raise ValueError(
"Input rgb must contain atleast 3 dims, but had {} dims.".format(rgb.ndim)
)
if rgb.shape[-3] < rgb.shape[-1]:
msg = "Are you sure that the input is correct? Number of channels exceeds height of image: %r > %r"
warnings.warn(msg % (rgb.shape[-1], rgb.shape[-3]))
ordering = list(range(rgb.ndim))
ordering[-2], ordering[-1], ordering[-3] = ordering[-3], ordering[-2], ordering[-1]
if isinstance(rgb, np.ndarray):
return np.ascontiguousarray(rgb.transpose(*ordering))
elif torch.is_tensor(rgb):
return rgb.permute(*ordering).contiguous()
[docs]def scale_intrinsics(
intrinsics: Union[np.ndarray, torch.Tensor],
h_ratio: Union[float, int],
w_ratio: Union[float, int],
):
r"""Scales the intrinsics appropriately for resized frames where
:math:`h_\text{ratio} = h_\text{new} / h_\text{old}` and :math:`w_\text{ratio} = w_\text{new} / w_\text{old}`
Args:
intrinsics (numpy.ndarray or torch.Tensor): Intrinsics matrix of original frame
h_ratio (float or int): Ratio of new frame's height to old frame's height
:math:`h_\text{ratio} = h_\text{new} / h_\text{old}`
w_ratio (float or int): Ratio of new frame's width to old frame's width
:math:`w_\text{ratio} = w_\text{new} / w_\text{old}`
Returns:
numpy.ndarray or torch.Tensor: Intrinsics matrix scaled approprately for new frame size
Shape:
- intrinsics: :math:`(*, 3, 3)` or :math:`(*, 4, 4)`
- Output: Matches `intrinsics` shape, :math:`(*, 3, 3)` or :math:`(*, 4, 4)`
"""
if isinstance(intrinsics, np.ndarray):
scaled_intrinsics = intrinsics.astype(np.float32).copy()
elif torch.is_tensor(intrinsics):
scaled_intrinsics = intrinsics.to(torch.float).clone()
else:
raise TypeError("Unsupported input intrinsics type {}".format(type(intrinsics)))
if not (intrinsics.shape[-2:] == (3, 3) or intrinsics.shape[-2:] == (4, 4)):
raise ValueError(
"intrinsics must have shape (*, 3, 3) or (*, 4, 4), but had shape {} instead".format(
intrinsics.shape
)
)
if (intrinsics[..., -1, -1] != 1).any() or (intrinsics[..., 2, 2] != 1).any():
warnings.warn(
"Incorrect intrinsics: intrinsics[..., -1, -1] and intrinsics[..., 2, 2] should be 1."
)
scaled_intrinsics[..., 0, 0] *= w_ratio # fx
scaled_intrinsics[..., 1, 1] *= h_ratio # fy
scaled_intrinsics[..., 0, 2] *= w_ratio # cx
scaled_intrinsics[..., 1, 2] *= h_ratio # cy
return scaled_intrinsics
[docs]def pointquaternion_to_homogeneous(
pointquaternions: Union[np.ndarray, torch.Tensor], eps: float = 1e-12
):
r"""Converts 3D point and unit quaternions :math:`(t_x, t_y, t_z, q_x, q_y, q_z, q_w)` to
homogeneous transformations [R | t] where :math:`R` denotes the :math:`(3, 3)` rotation matrix and :math:`T`
denotes the :math:`(3, 1)` translation matrix:
.. math::
\left[\begin{array}{@{}c:c@{}}
R & T \\ \hdashline
\begin{array}{@{}ccc@{}}
0 & 0 & 0
\end{array} & 1
\end{array}\right]
Args:
pointquaternions (numpy.ndarray or torch.Tensor): 3D point positions and unit quaternions
:math:`(tx, ty, tz, qx, qy, qz, qw)` where :math:`(tx, ty, tz)` is the 3D position and
:math:`(qx, qy, qz, qw)` is the unit quaternion.
eps (float): Small value, to avoid division by zero. Default: 1e-12
Returns:
numpy.ndarray or torch.Tensor: Homogeneous transformation matrices.
Shape:
- pointquaternions: :math:`(*, 7)`
- Output: :math:`(*, 4, 4)`
"""
if not (
isinstance(pointquaternions, np.ndarray) or torch.is_tensor(pointquaternions)
):
raise TypeError(
'"pointquaternions" must be of type "np.ndarray" or "torch.Tensor". Got {0}'.format(
type(pointquaternions)
)
)
if not isinstance(eps, float):
raise TypeError('"eps" must be of type "float". Got {0}.'.format(type(eps)))
if pointquaternions.shape[-1] != 7:
raise ValueError(
'"pointquaternions" must be of shape (*, 7). Got {0}.'.format(
pointquaternions.shape
)
)
output_shape = (*pointquaternions.shape[:-1], 4, 4)
if isinstance(pointquaternions, np.ndarray):
t = pointquaternions[..., :3].astype(np.float32)
q = pointquaternions[..., 3:7].astype(np.float32)
transform = np.zeros(output_shape, dtype=np.float32)
else:
t = pointquaternions[..., :3].float()
q = pointquaternions[..., 3:7].float()
transform = torch.zeros(
output_shape, dtype=torch.float, device=pointquaternions.device
)
q_norm = (0.5 * (q ** 2).sum(-1)[..., None]) ** 0.5
q /= (
torch.max(q_norm, torch.tensor(eps))
if torch.is_tensor(q_norm)
else np.maximum(q_norm, eps)
)
if isinstance(q, np.ndarray):
q = np.matmul(q[..., None], q[..., None, :])
else:
q = torch.matmul(q.unsqueeze(-1), q.unsqueeze(-2))
txx = q[..., 0, 0]
tyy = q[..., 1, 1]
tzz = q[..., 2, 2]
txy = q[..., 0, 1]
txz = q[..., 0, 2]
tyz = q[..., 1, 2]
twx = q[..., 0, 3]
twy = q[..., 1, 3]
twz = q[..., 2, 3]
transform[..., 0, 0] = 1.0
transform[..., 1, 1] = 1.0
transform[..., 2, 2] = 1.0
transform[..., 3, 3] = 1.0
transform[..., 0, 0] -= tyy + tzz
transform[..., 0, 1] = txy - twz
transform[..., 0, 2] = txz + twy
transform[..., 1, 0] = txy + twz
transform[..., 1, 1] -= txx + tzz
transform[..., 1, 2] = tyz - twx
transform[..., 2, 0] = txz - twy
transform[..., 2, 1] = tyz + twx
transform[..., 2, 2] -= txx + tyy
transform[..., :3, 3] = t
return transform
[docs]def create_label_image(prediction: np.ndarray, color_palette: OrderedDict):
r"""Creates a label image, given a network prediction (each pixel contains class index) and a color palette.
Args:
prediction (numpy.ndarray): Predicted image where each pixel contains an integer,
corresponding to its class label.
color_palette (OrderedDict): Contains RGB colors (`uint8`) for each class.
Returns:
numpy.ndarray: Label image with the given color palette
Shape:
- prediction: :math:`(H, W)`
- Output: :math:`(H, W)`
"""
label_image = np.zeros(
(prediction.shape[0], prediction.shape[1], 3), dtype=np.uint8
)
for idx, color in enumerate(color_palette):
label_image[prediction == idx] = color
return label_image