Source code for gsplat.cuda._wrapper

# SPDX-FileCopyrightText: Copyright 2024-2025 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import functools
import math
import types
import warnings
from dataclasses import dataclass
from enum import IntEnum
from abc import ABC
from typing import Any, Callable, Mapping, Optional, Tuple

import torch
from torch import Tensor
from typing_extensions import Literal
from gsplat._helper import assert_shape
from gsplat.trace import trace_function
from gsplat.cuda._lidar import (
    SpinningDirection,
    LidarModelParameters,
    RowOffsetStructuredSpinningLidarModelParameters,
    RowOffsetStructuredSpinningLidarModelParametersExt as RowOffsetStructuredSpinningLidarModelParametersExtBase,
    FOV as FOVBase,
)

ExternalDistortionModelMeta = Literal["bivariate-windshield"]
CameraModel = Literal["pinhole", "ortho", "fisheye", "ftheta", "lidar"]


def _make_lazy_cuda_func(name: str) -> Callable:
    def call_cuda(*args, **kwargs):
        # The following import statement is required to ensure that C++ module
        # gsplat/csrc.so is loaded (and JIT-compiled if necessary). Upon module
        # load, the gsplat PyTorch operators are imported into the
        # torch.ops.gsplat submodule.

        # pylint: disable=import-outside-toplevel
        from ._backend import _C

        return getattr(torch.ops.gsplat, name)(*args, **kwargs)

    return call_cuda


def _make_lazy_cuda_cls(name: str) -> Any:
    # The following import statement is required to ensure that C++ module
    # gsplat/csrc.so is loaded (and JIT-compiled if necessary). Upon module
    # load, the gsplat PyTorch custom classes are imported into the
    # torch.classes.gsplat submodule.

    # pylint: disable=import-outside-toplevel
    from ._backend import _C

    if _C is None:
        return _unavailable_cuda_cls(name)

    try:
        return getattr(torch.classes.gsplat, name)
    except RuntimeError as e:
        # Class not registered (e.g. extension built without it or partial load).
        if "does not exist" in str(e) or "torch::class_" in str(e):
            return _unavailable_cuda_cls(name)
        raise


def _unavailable_cuda_cls(name: str) -> Any:
    """Placeholder class when the CUDA extension is not available."""

    class _UnavailableCudaCls:
        __name__ = name

        def __init__(self, *args: Any, **kwargs: Any) -> None:
            raise RuntimeError(
                "gsplat CUDA extension is not available (not built or failed to load). "
                f"Cannot instantiate '{name}'."
            )

    return _UnavailableCudaCls


def _make_lazy_cuda_obj(name: str) -> Any:
    # pylint: disable=import-outside-toplevel
    from ._backend import _C

    if _C is None:
        raise RuntimeError(
            "gsplat CUDA extension is not available (not built or failed to load). "
            f"Cannot access '{name}'."
        )
    obj = _C
    for name_split in name.split("."):
        obj = getattr(obj, name_split)
    return obj


def renderer_config_mixed_batch() -> Any:
    """Return the CUDA enum value for the MixedBatch renderer config."""
    return _make_lazy_cuda_obj("RendererConfig.MIXED_BATCH")


def renderer_config_parallel_batch() -> Any:
    """Return the CUDA enum value for the ParallelBatch renderer config."""
    return _make_lazy_cuda_obj("RendererConfig.PARALLEL_BATCH")


def _renderer_config_to_cuda(renderer_config: Any) -> Any:
    if renderer_config is None:
        return renderer_config_mixed_batch()

    # RendererConfig lives in gsplat.rendering, which imports this module.
    # Import lazily here so the public low-level wrapper accepts the same
    # config objects as gsplat.rasterization without creating an import cycle.
    from gsplat.rendering import (  # pylint: disable=import-outside-toplevel
        RendererConfig,
        _renderer_config_type,
    )

    if isinstance(renderer_config, RendererConfig):
        # Delegate the config -> CUDA enum mapping to the single source of
        # truth in gsplat.rendering (which raises for unknown subtypes).
        return _renderer_config_type(renderer_config)
    return renderer_config


class RollingShutterType(IntEnum):
    ROLLING_TOP_TO_BOTTOM = 0
    ROLLING_LEFT_TO_RIGHT = 1
    ROLLING_BOTTOM_TO_TOP = 2
    ROLLING_RIGHT_TO_LEFT = 3
    GLOBAL = 4


class FThetaPolynomialType(IntEnum):
    PIXELDIST_TO_ANGLE = 0
    ANGLE_TO_PIXELDIST = 1


UnscentedTransformParameters = _make_lazy_cuda_cls("UnscentedTransformParameters")
FThetaCameraDistortionParameters = _make_lazy_cuda_cls(
    "FThetaCameraDistortionParameters"
)


class ExternalDistortionModelParameters(ABC):
    """Base class for external distortion model parameters.

    All concrete external distortion models (e.g. BivariateWindshieldModelParameters)
    should inherit from this class so that the rendering API can accept any
    distortion model through a single type-erased parameter.
    """


class ExternalDistortionReferencePolynomial(IntEnum):
    FORWARD = 1
    BACKWARD = 2


class BivariateWindshieldModelParameters(ExternalDistortionModelParameters):
    """Thin wrapper around the CUDA BivariateWindshieldModelParameters class.

    torch::Library bindings does not allow standalone constants. This
    wrapper fetches MAX_ORDER and MAX_COEFFS from the C++ static getters
    and exposes them as class-level attributes, preserving the existing
    attribute-access calling convention.
    """

    _cuda_cls = None
    MAX_ORDER: int = 5  # default, overriden by C++ value
    MAX_COEFFS: int = 21  # default, overriden by C++ value

    @classmethod
    def _ensure_cuda_cls(cls):
        if cls._cuda_cls is None:
            cls._cuda_cls = _make_lazy_cuda_cls("BivariateWindshieldModelParameters")
            cls.MAX_ORDER = cls._cuda_cls.get_max_order()
            cls.MAX_COEFFS = cls._cuda_cls.get_max_coeffs()

    def __new__(cls):
        cls._ensure_cuda_cls()
        return cls._cuda_cls()


@functools.lru_cache(maxsize=1)
def _build_config() -> Mapping[str, bool]:
    try:
        from ._backend import _C

        return (
            types.MappingProxyType(_C.build_config())
            if _C is not None
            else types.MappingProxyType({})
        )
    except (ImportError, AttributeError):
        return types.MappingProxyType({})


def _has_build_feature(name: str) -> bool:
    return _build_config().get(name, False)


def has_camera_wrappers():
    return _has_build_feature("camera_wrappers")


def has_2dgs():
    return _has_build_feature("2dgs")


def has_3dgs():
    return _has_build_feature("3dgs")


def has_3dgut():
    return _has_build_feature("3dgut")


def has_adam():
    return _has_build_feature("adam")


def has_reloc():
    return _has_build_feature("reloc")


def has_losses():
    return _has_build_feature("losses")


def create_camera_model(
    camera_model: str,
    width: Optional[int] = None,
    height: Optional[int] = None,
    principal_points: Optional[Tensor] = None,
    focal_lengths: Optional[Tensor] = None,
    radial_coeffs: Optional[Tensor] = None,
    tangential_coeffs: Optional[Tensor] = None,
    thin_prism_coeffs: Optional[Tensor] = None,
    ftheta_coeffs: Optional[FThetaCameraDistortionParameters] = None,
    external_distortion_coeffs: Optional[BivariateWindshieldModelParameters] = None,
    rs_type: RollingShutterType = RollingShutterType.GLOBAL,
    lidar_coeffs: Optional["RowOffsetStructuredSpinningLidarModelParametersExt"] = None,
):
    if camera_model == "lidar":
        assert (
            lidar_coeffs is not None
        ), "lidar_coeffs is required for lidar camera model"
        RowOffsetStructuredSpinningLidarModelCUDA = _make_lazy_cuda_cls(
            "RowOffsetStructuredSpinningLidarModel"
        )
        return RowOffsetStructuredSpinningLidarModelCUDA(lidar_coeffs.to_cpp())
    else:
        assert width is not None, "width is required for non-lidar camera models"
        assert height is not None, "height is required for non-lidar camera models"
        assert (
            principal_points is not None
        ), "principal_points is required for non-lidar camera models"
        BaseCameraModelCUDA = _make_lazy_cuda_cls("BaseCameraModel")
        return BaseCameraModelCUDA.create(
            width,
            height,
            camera_model,
            principal_points,
            focal_lengths,
            radial_coeffs,
            tangential_coeffs,
            thin_prism_coeffs,
            ftheta_coeffs,
            external_distortion_coeffs,
            rs_type,
        )


class FOV(FOVBase):
    @classmethod
    def from_base(cls, base: FOVBase) -> "FOV":
        return cls(start=base.start, span=base.span, direction=base.direction)

    def to_cpp(self):
        FOVCUDA = _make_lazy_cuda_cls("FOV")
        return FOVCUDA(start=self.start, span=self.span)


class RowOffsetStructuredSpinningLidarModelParametersExt(
    RowOffsetStructuredSpinningLidarModelParametersExtBase
):
    """Lidar camera parameters extended with acceleration structures"""

    def to_cpp(self) -> Any:
        """Convert to C++ custom class instance."""
        LidarParamsCUDA = _make_lazy_cuda_cls(
            "RowOffsetStructuredSpinningLidarModelParametersExt"
        )
        return LidarParamsCUDA(
            row_elevations_rad=self.row_elevations_rad.contiguous(),
            column_azimuths_rad=self.column_azimuths_rad.contiguous(),
            row_azimuth_offsets_rad=self.row_azimuth_offsets_rad.contiguous(),
            spinning_direction=self.spinning_direction.value,
            spinning_frequency_hz=self.spinning_frequency_hz,
            fov_vert_rad=FOV.from_base(self.fov_vert_rad).to_cpp(),
            fov_horiz_rad=FOV.from_base(self.fov_horiz_rad).to_cpp(),
            fov_eps_rad=self.fov_eps_rad,
            angles_to_columns_map=self.angles_to_columns_map,
            n_bins_azimuth=self.tiling.n_bins_azimuth,
            n_bins_elevation=self.tiling.n_bins_elevation,
            cdf_elevation=self.tiling.cdf_elevation.contiguous(),
            cdf_dense_ray_mask=self.tiling.cdf_dense_ray_mask.contiguous(),
            tiles_to_elements_map=self.tiling.tiles_to_elements_map.contiguous(),
            tiles_pack_info=self.tiling.tiles_pack_info.contiguous(),
        )


[docs] def world_to_cam( means: Tensor, # [..., N, 3] covars: Tensor, # [..., N, 3, 3] viewmats: Tensor, # [..., C, 4, 4] ) -> Tuple[Tensor, Tensor]: """Transforms Gaussians from world to camera coordinate system. Args: means: Gaussian means. [..., N, 3] covars: Gaussian covariances. [..., N, 3, 3] viewmats: World-to-camera transformation matrices. [..., C, 4, 4] Returns: A tuple: - **Gaussian means in camera coordinate system**. [..., C, N, 3] - **Gaussian covariances in camera coordinate system**. [..., C, N, 3, 3] """ from ._torch_impl import _world_to_cam warnings.warn( "world_to_cam() is removed from the CUDA backend as it's relatively easy to " "implement in PyTorch. Currently use the PyTorch implementation instead. " "This function will be completely removed in a future release.", DeprecationWarning, ) batch_dims = means.shape[:-2] N = means.shape[-2] C = viewmats.shape[-3] assert means.shape == batch_dims + (N, 3), means.shape assert covars.shape == batch_dims + (N, 3, 3), covars.shape assert viewmats.shape == batch_dims + (C, 4, 4), viewmats.shape means = means.contiguous() covars = covars.contiguous() viewmats = viewmats.contiguous() return _world_to_cam(means, covars, viewmats)
def adam( param: Tensor, param_grad: Tensor, exp_avg: Tensor, exp_avg_sq: Tensor, valid: Tensor, lr: float, b1: float, b2: float, eps: float, ) -> None: _make_lazy_cuda_func("adam")( param, param_grad, exp_avg, exp_avg_sq, valid, lr, b1, b2, eps )
[docs] @trace_function("sh-fwd") def spherical_harmonics( degrees_to_use: int, dirs: Tensor, # [..., N, 3] coeffs: Tensor, # [N, K, D] masks: Optional[Tensor] = None, # [..., N] ) -> Tensor: """Computes spherical harmonics. The output channel count ``D`` is taken from the last dim of ``coeffs`` and can be any positive integer (e.g. 3 for RGB, 1 for scalar features). In packed mode, callers pre-gather coeffs by ``gaussian_ids`` so ``N`` is ``nnz`` and ``dirs`` has no leading dims. Args: degrees_to_use: SH degree to evaluate. dirs: View directions. ``[..., N, 3]``; any leading shape, rank ≥ 2. coeffs: SH coefficients. ``[N, K, D]``, with ``N`` matching ``dirs.shape[-2]``. masks: Optional boolean masks. ``[..., N]`` matching ``dirs.shape[:-1]``. Returns: Spherical harmonics. ``[..., N, D]``. """ assert dirs.dim() >= 2 and dirs.shape[-1] == 3, dirs.shape assert coeffs.dim() == 3 and coeffs.shape[-1] >= 1, coeffs.shape assert coeffs.shape[0] == dirs.shape[-2], (coeffs.shape, dirs.shape) assert (degrees_to_use + 1) ** 2 <= coeffs.shape[-2], coeffs.shape if masks is not None: assert masks.shape == dirs.shape[:-1], (masks.shape, dirs.shape[:-1]) masks = masks.contiguous() return _SphericalHarmonics.apply( degrees_to_use, dirs.contiguous(), coeffs.contiguous(), masks )
[docs] def quat_scale_to_covar_preci( quats: Tensor, # [..., 4], scales: Tensor, # [..., 3], compute_covar: bool = True, compute_preci: bool = True, triu: bool = False, ) -> Tuple[Optional[Tensor], Optional[Tensor]]: """Converts quaternions and scales to covariance and precision matrices. Args: quats: Quaternions (No need to be normalized). [..., 4] scales: Scales. [..., 3] compute_covar: Whether to compute covariance matrices. Default: True. If False, the returned covariance matrices will be None. compute_preci: Whether to compute precision matrices. Default: True. If False, the returned precision matrices will be None. triu: If True, the return matrices will be upper triangular. Default: False. Returns: A tuple: - **Covariance matrices**. If `triu` is True the returned shape is [..., 6], otherwise [..., 3, 3]. - **Precision matrices**. If `triu` is True the returned shape is [..., 6], otherwise [..., 3, 3]. """ batch_dims = quats.shape[:-1] assert quats.shape == batch_dims + (4,), quats.shape assert scales.shape == batch_dims + (3,), scales.shape quats = quats.contiguous() scales = scales.contiguous() covars, precis = _QuatScaleToCovarPreci.apply( quats, scales, compute_covar, compute_preci, triu ) return covars if compute_covar else None, precis if compute_preci else None
def persp_proj( means: Tensor, # [..., C, N, 3] covars: Tensor, # [..., C, N, 3, 3] Ks: Tensor, # [..., C, 3, 3] width: int, height: int, ) -> Tuple[Tensor, Tensor]: """Perspective projection on Gaussians. DEPRECATED: please use `proj` with `ortho=False` instead. Args: means: Gaussian means. [..., C, N, 3] covars: Gaussian covariances. [..., C, N, 3, 3] Ks: Camera intrinsics. [..., C, 3, 3] width: Image width. height: Image height. Returns: A tuple: - **Projected means**. [..., C, N, 2] - **Projected covariances**. [..., C, N, 2, 2] """ warnings.warn( "persp_proj is deprecated and will be removed in a future release. " "Use proj with ortho=False instead.", DeprecationWarning, ) return proj(means, covars, Ks, width, height, ortho=False)
[docs] def proj( means: Tensor, # [..., C, N, 3] covars: Tensor, # [..., C, N, 3, 3] Ks: Tensor, # [..., C, 3, 3] width: int, height: int, camera_model: CameraModel = "pinhole", ) -> Tuple[Tensor, Tensor]: """Projection of Gaussians (perspective or orthographic). Args: means: Gaussian means. [..., C, N, 3] covars: Gaussian covariances. [..., C, N, 3, 3] Ks: Camera intrinsics. [..., C, 3, 3] width: Image width. height: Image height. Returns: A tuple: - **Projected means**. [..., C, N, 2] - **Projected covariances**. [..., C, N, 2, 2] """ assert ( camera_model != "ftheta" ), "ftheta camera is only supported via UT, please set with_ut=True in the rasterization()" batch_dims = means.shape[:-3] C, N = means.shape[-3:-1] assert means.shape == batch_dims + (C, N, 3), means.shape assert covars.shape == batch_dims + (C, N, 3, 3), covars.shape assert Ks.shape == batch_dims + (C, 3, 3), Ks.shape means = means.contiguous() covars = covars.contiguous() Ks = Ks.contiguous() return _Proj.apply(means, covars, Ks, width, height, camera_model)
[docs] @trace_function("project-fwd") def fully_fused_projection( means: Tensor, # [..., N, 3] covars: Optional[Tensor], # [..., N, 6] or None quats: Optional[Tensor], # [..., N, 4] or None scales: Optional[Tensor], # [..., N, 3] or None viewmats: Tensor, # [..., C, 4, 4] Ks: Tensor, # [..., C, 3, 3] width: int, height: int, eps2d: float = 0.3, near_plane: float = 0.01, far_plane: float = 1e10, radius_clip: float = 0.0, packed: bool = False, sparse_grad: bool = False, calc_compensations: bool = False, camera_model: CameraModel = "pinhole", opacities: Optional[Tensor] = None, # [..., N] or None ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: """Projects Gaussians to 2D. This function fuse the process of computing covariances (:func:`quat_scale_to_covar_preci()`), transforming to camera space (:func:`world_to_cam()`), and projection (:func:`proj()`). .. note:: During projection, we ignore the Gaussians that are outside of the camera frustum. So not all the elements in the output tensors are valid. The output `radii` could serve as an indicator, in which zero radii means the corresponding elements are invalid in the output tensors and will be ignored in the next rasterization process. If `packed=True`, the output tensors will be packed into a flattened tensor, in which all elements are valid. In this case, a `batch_ids` tensor and `camera_ids` tensor will be returned to indicate the batch, camera and gaussian indices of the packed flattened tensor, which is essentially following the COO sparse tensor format. .. note:: This functions supports projecting Gaussians with either covariances or {quaternions, scales}, which will be converted to covariances internally in a fused CUDA kernel. Either `covars` or {`quats`, `scales`} should be provided. Args: means: Gaussian means. [..., N, 3] covars: Gaussian covariances (flattened upper triangle). [..., N, 6] Optional. quats: Quaternions (No need to be normalized). [..., N, 4] Optional. scales: Scales. [..., N, 3] Optional. viewmats: World-to-camera matrices. [..., C, 4, 4] Ks: Camera intrinsics. [..., C, 3, 3] width: Image width. height: Image height. eps2d: A epsilon added to the 2D covariance for numerical stability. Default: 0.3. near_plane: Near plane distance. Default: 0.01. far_plane: Far plane distance. Default: 1e10. radius_clip: Gaussians with projected radii smaller than this value will be ignored. Default: 0.0. packed: If True, the output tensors will be packed into a flattened tensor. Default: False. sparse_grad: This is only effective when `packed` is True. If True, during backward the gradients of {`means`, `covars`, `quats`, `scales`} will be a sparse Tensor in COO layout. Default: False. calc_compensations: If True, a view-dependent opacity compensation factor will be computed, which is useful for anti-aliasing. Default: False. opacities: Gaussian opacities in range [0, 1]. If provided, will use it to compute a tighter bounds. [..., N] or None. Default: None. Returns: A tuple: If `packed` is True: - **batch_ids**. The batch indices of the projected Gaussians. Int32 tensor of shape [nnz]. - **camera_ids**. The camera indices of the projected Gaussians. Int32 tensor of shape [nnz]. - **gaussian_ids**. The column indices of the projected Gaussians. Int32 tensor of shape [nnz]. - **indptr**. CSR-style index pointer into gaussian_ids for batch-camera pairs. Int32 tensor of shape [B*C+1]. - **radii**. The maximum radius of the projected Gaussians in pixel unit. Int32 tensor of shape [nnz, 2]. - **means**. Projected Gaussian means in 2D. [nnz, 2] - **depths**. The z-depth of the projected Gaussians. [nnz] - **conics**. Inverse of the projected covariances. Return the flattend upper triangle with [nnz, 3] - **compensations**. The view-dependent opacity compensation factor. [nnz] If `packed` is False: - **radii**. The maximum radius of the projected Gaussians in pixel unit. Int32 tensor of shape [..., C, N, 2]. - **means**. Projected Gaussian means in 2D. [..., C, N, 2] - **depths**. The z-depth of the projected Gaussians. [..., C, N] - **conics**. Inverse of the projected covariances. Return the flattend upper triangle with [..., C, N, 3] - **compensations**. The view-dependent opacity compensation factor. [..., C, N] """ batch_dims = means.shape[:-2] N = means.shape[-2] C = viewmats.shape[-3] assert means.shape == batch_dims + (N, 3), means.shape assert viewmats.shape == batch_dims + (C, 4, 4), viewmats.shape assert Ks.shape == batch_dims + (C, 3, 3), Ks.shape means = means.contiguous() if covars is not None: assert covars.shape == batch_dims + (N, 6), covars.shape covars = covars.contiguous() else: assert quats is not None, "covars or quats is required" assert scales is not None, "covars or scales is required" assert quats.shape == batch_dims + (N, 4), quats.shape assert scales.shape == batch_dims + (N, 3), scales.shape quats = quats.contiguous() scales = scales.contiguous() if sparse_grad: assert packed, "sparse_grad is only supported when packed is True" assert batch_dims == (), "sparse_grad does not support batch dimensions" if opacities is not None: assert opacities.shape == batch_dims + (N,), opacities.shape opacities = opacities.contiguous() assert ( camera_model != "ftheta" ), "ftheta camera is only supported via UT, please set with_ut=True in the rasterization()" viewmats = viewmats.contiguous() Ks = Ks.contiguous() if packed: return _FullyFusedProjectionPacked.apply( means, covars, quats, scales, viewmats, Ks, width, height, eps2d, near_plane, far_plane, radius_clip, sparse_grad, calc_compensations, camera_model, opacities, ) else: return _FullyFusedProjection.apply( means, covars, quats, scales, viewmats, Ks, width, height, eps2d, near_plane, far_plane, radius_clip, calc_compensations, camera_model, opacities, )
[docs] @torch.no_grad() @trace_function("isect-camera") def isect_tiles( means2d: Tensor, # [..., N, 2] or [nnz, 2] radii: Tensor, # [..., N, 2] or [nnz, 2] depths: Tensor, # [..., N] or [nnz] tile_size: int, tile_width: int, tile_height: int, sort: bool = True, segmented: bool = False, packed: bool = False, n_images: Optional[int] = None, image_ids: Optional[Tensor] = None, gaussian_ids: Optional[Tensor] = None, conics: Optional[ Tensor ] = None, # [..., N, 3] or [nnz, 3], enables AccuTile when provided opacities: Optional[ Tensor ] = None, # [..., N] or [nnz], enables AccuTile when provided ) -> Tuple[Tensor, Tensor, Tensor]: """Maps projected Gaussians to intersecting tiles. When `conics` and `opacities` are provided the kernel uses conservative ellipse intersection (AccuTile/SNUGBOX), skipping tiles that the opacity-thresholded ellipse does not touch. When either is `None` the kernel falls back to the original axis-aligned bounding box. Args: means2d: Projected Gaussian means. [..., N, 2] if packed is False, [nnz, 2] if packed is True. radii: Maximum radii of the projected Gaussians. [..., N, 2] if packed is False, [nnz, 2] if packed is True. depths: Z-depth of the projected Gaussians. [..., N] if packed is False, [nnz] if packed is True. tile_size: Tile size. tile_width: Tile width. tile_height: Tile height. sort: If True, the returned intersections will be sorted by the intersection ids. Default: True. segmented: If True, segmented radix sort will be used to sort the intersections. Default: False. packed: If True, the input tensors are packed. Default: False. n_images: Number of images. Required if packed is True. image_ids: The image indices of the projected Gaussians. Required if packed is True. gaussian_ids: The column indices of the projected Gaussians. Required if packed is True. conics: Inverse of projected covariances (upper triangle). [..., N, 3] if packed is False, [nnz, 3] if packed is True. Enables AccuTile when provided together with opacities. opacities: Gaussian opacities. [..., N] if packed is False, [nnz] if packed is True. Enables AccuTile when provided together with conics. Returns: A tuple: - **Tiles per Gaussian**. The number of tiles intersected by each Gaussian. Int32 [..., N] if packed is False, Int32 [nnz] if packed is True. - **Intersection ids**. Each id is an 64-bit integer with the following information: image_id (Xc bits) | tile_id (Xt bits) | depth (32 bits). Xc and Xt are the maximum number of bits required to represent the image and tile ids, respectively. Int64 [n_isects] - **Flatten ids**. The global flatten indices in [I * N] or [nnz] (packed). [n_isects] """ if packed: nnz = means2d.size(0) assert means2d.shape == (nnz, 2), means2d.shape assert radii.shape == (nnz, 2), radii.shape assert depths.shape == (nnz,), depths.shape if conics is not None: assert conics.shape == (nnz, 3), conics.shape if opacities is not None: assert opacities.shape == (nnz,), opacities.shape assert image_ids is not None, "image_ids is required if packed is True" assert gaussian_ids is not None, "gaussian_ids is required if packed is True" assert n_images is not None, "n_images is required if packed is True" image_ids = image_ids.contiguous() gaussian_ids = gaussian_ids.contiguous() I = n_images else: image_dims = means2d.shape[:-2] I = math.prod(image_dims) N = means2d.shape[-2] assert means2d.shape == image_dims + (N, 2), means2d.shape assert radii.shape == image_dims + (N, 2), radii.shape assert depths.shape == image_dims + (N,), depths.shape if conics is not None: assert conics.shape == image_dims + (N, 3), conics.shape if opacities is not None: assert opacities.shape == image_dims + (N,), opacities.shape tiles_per_gauss, isect_ids, flatten_ids = _make_lazy_cuda_func("intersect_tile")( means2d.contiguous(), radii.contiguous(), depths.contiguous(), conics.contiguous() if conics is not None else None, opacities.contiguous() if opacities is not None else None, image_ids, gaussian_ids, I, tile_size, tile_width, tile_height, sort, segmented, ) return tiles_per_gauss, isect_ids, flatten_ids
@torch.no_grad() @trace_function("isect-lidar") def isect_tiles_lidar( lidar: RowOffsetStructuredSpinningLidarModelParametersExt, means2d: Tensor, # [..., N, 2] or [nnz, 2] radii: Tensor, # [..., N, 2] or [nnz, 2] depths: Tensor, # [..., N] or [nnz] sort: bool = True, segmented: bool = False, packed: bool = False, n_images: Optional[int] = None, image_ids: Optional[Tensor] = None, gaussian_ids: Optional[Tensor] = None, ) -> Tuple[Tensor, Tensor, Tensor]: """Maps projected Gaussians to intersecting tiles. Args: means2d: Projected Gaussian means. [..., N, 2] if packed is False, [nnz, 2] if packed is True. radii: Maximum radii of the projected Gaussians. [..., N, 2] if packed is False, [nnz, 2] if packed is True. depths: Z-depth of the projected Gaussians. [..., N] if packed is False, [nnz] if packed is True. sort: If True, the returned intersections will be sorted by the intersection ids. Default: True. segmented: If True, segmented radix sort will be used to sort the intersections. Default: False. packed: If True, the input tensors are packed. Default: False. n_images: Number of images. Required if packed is True. image_ids: The image indices of the projected Gaussians. Required if packed is True. gaussian_ids: The column indices of the projected Gaussians. Required if packed is True. Returns: A tuple: - **Tiles per Gaussian**. The number of tiles intersected by each Gaussian. Int32 [..., N] if packed is False, Int32 [nnz] if packed is True. - **Intersection ids**. Each id is an 64-bit integer with the following information: image_id (Xc bits) | tile_id (Xt bits) | depth (32 bits). Xc and Xt are the maximum number of bits required to represent the image and tile ids, respectively. Int64 [n_isects] - **Flatten ids**. The global flatten indices in [I * N] or [nnz] (packed). [n_isects] """ if packed: nnz = means2d.size(0) assert means2d.shape == (nnz, 2), means2d.shape assert radii.shape == (nnz, 2), radii.shape assert depths.shape == (nnz,), depths.shape assert image_ids is not None, "image_ids is required if packed is True" assert gaussian_ids is not None, "gaussian_ids is required if packed is True" assert n_images is not None, "n_images is required if packed is True" image_ids = image_ids.contiguous() gaussian_ids = gaussian_ids.contiguous() I = n_images else: image_dims = means2d.shape[:-2] I = math.prod(image_dims) N = means2d.shape[-2] assert means2d.shape == (*image_dims, N, 2), means2d.shape assert radii.shape == (*image_dims, N, 2), radii.shape assert depths.shape == (*image_dims, N), depths.shape tiles_per_gauss, isect_ids, flatten_ids = _make_lazy_cuda_func( "intersect_tile_lidar" )( lidar.to_cpp(), means2d.contiguous(), radii.contiguous(), depths.contiguous(), image_ids, gaussian_ids, I, sort, segmented, ) return tiles_per_gauss, isect_ids, flatten_ids
[docs] @torch.no_grad() @trace_function("offsets") def isect_offset_encode( isect_ids: Tensor, n_images: int, tile_width: int, tile_height: int, ) -> Tensor: """Encodes intersection ids to offsets. Args: isect_ids: Intersection ids. [n_isects] n_images: Number of images. tile_width: Tile width. tile_height: Tile height. Returns: Offsets. [I, tile_height, tile_width] """ return _make_lazy_cuda_func("intersect_offset")( isect_ids.contiguous(), n_images, tile_width, tile_height )
[docs] @trace_function("render2D-fwd") def rasterize_to_pixels( means2d: Tensor, # [..., N, 2] or [nnz, 2] conics: Tensor, # [..., N, 3] or [nnz, 3] colors: Tensor, # [..., N, channels] or [nnz, channels] opacities: Tensor, # [..., N] or [nnz] image_width: int, image_height: int, tile_size: int, isect_offsets: Tensor, # [..., tile_height, tile_width] flatten_ids: Tensor, # [n_isects] backgrounds: Optional[Tensor] = None, # [..., channels] masks: Optional[Tensor] = None, # [..., tile_height, tile_width] packed: bool = False, absgrad: bool = False, ) -> Tuple[Tensor, Tensor]: """Rasterizes Gaussians to pixels. Args: means2d: Projected Gaussian means. [..., N, 2] if packed is False, [nnz, 2] if packed is True. conics: Inverse of the projected covariances with only upper triangle values. [..., N, 3] if packed is False, [nnz, 3] if packed is True. colors: Gaussian colors or ND features. [..., N, channels] if packed is False, [nnz, channels] if packed is True. ``colors.shape[-1]`` must be one of the channel counts compiled into ``GSPLAT_NUM_CHANNELS`` (see ``gsplat/cuda/csrc/Config.h``); otherwise the CUDA kernel raises ``ValueError``. opacities: Gaussian opacities that support per-view values. [..., N] if packed is False, [nnz] if packed is True. image_width: Image width. image_height: Image height. tile_size: Tile size. isect_offsets: Intersection offsets outputs from `isect_offset_encode()`. [..., tile_height, tile_width] flatten_ids: The global flatten indices in [I * N] or [nnz] from `isect_tiles()`. [n_isects] backgrounds: Background colors. [..., channels]. Default: None. masks: Optional tile mask to skip rendering GS to masked tiles. [..., tile_height, tile_width]. Default: None. packed: If True, the input tensors are expected to be packed with shape [nnz, ...]. Default: False. absgrad: If True, the backward pass will compute a `.absgrad` attribute for `means2d`. Default: False. Returns: A tuple: - **Rendered colors**. [..., image_height, image_width, channels] - **Rendered alphas**. [..., image_height, image_width, 1] """ assert tile_size in ( 4, 16, ), f"Only tile_size in {{4, 16}} is supported for 3DGS rasterization, got {tile_size}" image_dims = means2d.shape[:-2] channels = colors.shape[-1] if packed: nnz = means2d.size(0) assert means2d.shape == (nnz, 2), means2d.shape assert conics.shape == (nnz, 3), conics.shape assert colors.shape[0] == nnz, colors.shape assert opacities.shape == (nnz,), opacities.shape else: N = means2d.size(-2) assert means2d.shape == image_dims + (N, 2), means2d.shape assert conics.shape == image_dims + (N, 3), conics.shape assert colors.shape == image_dims + (N, channels), colors.shape assert opacities.shape == image_dims + (N,), opacities.shape if backgrounds is not None: assert backgrounds.shape == image_dims + (channels,), backgrounds.shape backgrounds = backgrounds.contiguous() if masks is not None: assert masks.shape == isect_offsets.shape, masks.shape masks = masks.contiguous() tile_height, tile_width = isect_offsets.shape[-2:] assert ( tile_height * tile_size >= image_height ), f"Assert Failed: {tile_height} * {tile_size} >= {image_height}" assert ( tile_width * tile_size >= image_width ), f"Assert Failed: {tile_width} * {tile_size} >= {image_width}" render_colors, render_alphas = _RasterizeToPixels.apply( means2d.contiguous(), conics.contiguous(), colors.contiguous(), opacities.contiguous(), backgrounds, masks, image_width, image_height, tile_size, isect_offsets.contiguous(), flatten_ids.contiguous(), absgrad, ) return render_colors, render_alphas
[docs] @torch.no_grad() @trace_function("render2D-count") def rasterize_num_contributing_gaussians( means2d: Tensor, # [..., N, 2] or [nnz, 2] conics: Tensor, # [..., N, 3] or [nnz, 3] opacities: Tensor, # [..., N] or [nnz] tile_offsets: Tensor, # [..., tile_height, tile_width] flatten_ids: Tensor, # [n_isects] image_width: int, image_height: int, tile_size: int, ) -> Tuple[Tensor, Tensor]: """Counts contributing Gaussians per pixel and returns accumulated alpha. Args: means2d: Projected Gaussian means. [..., N, 2] for dense inputs, or [nnz, 2] for packed inputs. conics: Inverse projected covariances. [..., N, 3] for dense inputs, or [nnz, 3] for packed inputs. opacities: Gaussian opacities. [..., N] for dense inputs, or [nnz] for packed inputs. tile_offsets: Intersection offsets from :func:`isect_offset_encode`. [..., tile_height, tile_width]. flatten_ids: Flattened Gaussian indices from :func:`isect_tiles`. [n_isects]. image_width: Image width. image_height: Image height. tile_size: Tile size. Returns: A tuple: - **Number of contributing Gaussians**. [..., image_height, image_width] - **Rendered alphas**. [..., image_height, image_width] """ assert tile_size in ( 4, 16, ), f"Only tile_size in {{4, 16}} is supported for 3DGS rasterization, got {tile_size}" tile_height, tile_width = tile_offsets.shape[-2:] if means2d.dim() == 2: nnz = means2d.shape[0] assert means2d.shape == (nnz, 2), means2d.shape assert conics.shape == (nnz, 3), conics.shape assert opacities.shape == (nnz,), opacities.shape else: image_dims = means2d.shape[:-2] N = means2d.shape[-2] assert means2d.shape == image_dims + (N, 2), means2d.shape assert conics.shape == image_dims + (N, 3), conics.shape assert opacities.shape == image_dims + (N,), opacities.shape assert tile_offsets.shape == image_dims + ( tile_height, tile_width, ), tile_offsets.shape assert ( tile_height * tile_size >= image_height ), f"Assert Failed: {tile_height} * {tile_size} >= {image_height}" assert ( tile_width * tile_size >= image_width ), f"Assert Failed: {tile_width} * {tile_size} >= {image_width}" return _make_lazy_cuda_func("rasterize_num_contributing_gaussians")( means2d.contiguous(), conics.contiguous(), opacities.contiguous(), tile_offsets.contiguous(), flatten_ids.contiguous(), image_width, image_height, tile_size, )
[docs] @torch.no_grad() @trace_function("render2D-contributors") def rasterize_contributing_gaussian_ids( means2d: Tensor, # [..., N, 2] or [nnz, 2] conics: Tensor, # [..., N, 3] or [nnz, 3] opacities: Tensor, # [..., N] or [nnz] tile_offsets: Tensor, # [..., tile_height, tile_width] flatten_ids: Tensor, # [n_isects] image_width: int, image_height: int, tile_size: int, num_contributing_gaussians: Tensor, # [..., image_height, image_width] ) -> Tuple[Tensor, Tensor]: """Returns all contributing Gaussian IDs and weights per pixel. The output is padded to ``num_contributing_gaussians.max()`` samples per pixel. Valid entries are in front-to-back order, IDs are padded with ``-1``, and weights are padded with ``0``. Args: means2d: Projected Gaussian means. [..., N, 2] for dense inputs, or [nnz, 2] for packed inputs. conics: Inverse projected covariances. [..., N, 3] for dense inputs, or [nnz, 3] for packed inputs. opacities: Gaussian opacities. [..., N] for dense inputs, or [nnz] for packed inputs. tile_offsets: Intersection offsets from :func:`isect_offset_encode`. [..., tile_height, tile_width]. flatten_ids: Flattened Gaussian indices from :func:`isect_tiles`. [n_isects]. image_width: Image width. image_height: Image height. tile_size: Tile size. num_contributing_gaussians: Number of valid contributors per pixel. [..., image_height, image_width]. Returns: A tuple: - **Gaussian IDs**. [..., image_height, image_width, max_num_contributing] - **Radiance weights**. [..., image_height, image_width, max_num_contributing] """ assert tile_size in ( 4, 16, ), f"Only tile_size in {{4, 16}} is supported for 3DGS rasterization, got {tile_size}" tile_height, tile_width = tile_offsets.shape[-2:] if means2d.dim() == 2: image_dims = tile_offsets.shape[:-2] nnz = means2d.shape[0] assert means2d.shape == (nnz, 2), means2d.shape assert conics.shape == (nnz, 3), conics.shape assert opacities.shape == (nnz,), opacities.shape else: image_dims = means2d.shape[:-2] N = means2d.shape[-2] assert means2d.shape == image_dims + (N, 2), means2d.shape assert conics.shape == image_dims + (N, 3), conics.shape assert opacities.shape == image_dims + (N,), opacities.shape assert tile_offsets.shape == image_dims + ( tile_height, tile_width, ), tile_offsets.shape assert num_contributing_gaussians.shape == image_dims + ( image_height, image_width, ), num_contributing_gaussians.shape assert ( tile_height * tile_size >= image_height ), f"Assert Failed: {tile_height} * {tile_size} >= {image_height}" assert ( tile_width * tile_size >= image_width ), f"Assert Failed: {tile_width} * {tile_size} >= {image_width}" return _make_lazy_cuda_func("rasterize_contributing_gaussian_ids")( means2d.contiguous(), conics.contiguous(), opacities.contiguous(), tile_offsets.contiguous(), flatten_ids.contiguous(), image_width, image_height, tile_size, num_contributing_gaussians.contiguous(), )
[docs] @torch.no_grad() @trace_function("render2D-top-contributors") def rasterize_top_contributing_gaussian_ids( means2d: Tensor, # [..., N, 2] or [nnz, 2] conics: Tensor, # [..., N, 3] or [nnz, 3] opacities: Tensor, # [..., N] or [nnz] tile_offsets: Tensor, # [..., tile_height, tile_width] flatten_ids: Tensor, # [n_isects] image_width: int, image_height: int, tile_size: int, num_depth_samples: int, ) -> Tuple[Tensor, Tensor]: """Returns the top radiance-weight Gaussian IDs and weights per pixel. The selected samples are the strongest contributors by ``alpha * T`` during front-to-back rasterization, then sorted back into front-to-back order. Args: means2d: Projected Gaussian means. [..., N, 2] for dense inputs, or [nnz, 2] for packed inputs. conics: Inverse projected covariances. [..., N, 3] for dense inputs, or [nnz, 3] for packed inputs. opacities: Gaussian opacities. [..., N] for dense inputs, or [nnz] for packed inputs. tile_offsets: Intersection offsets from :func:`isect_offset_encode`. [..., tile_height, tile_width]. flatten_ids: Flattened Gaussian indices from :func:`isect_tiles`. [n_isects]. image_width: Image width. image_height: Image height. tile_size: Tile size. num_depth_samples: Number of contributors to return per pixel. Returns: A tuple: - **Gaussian IDs**. [..., image_height, image_width, num_depth_samples] - **Radiance weights**. [..., image_height, image_width, num_depth_samples] """ assert tile_size in ( 4, 16, ), f"Only tile_size in {{4, 16}} is supported for 3DGS rasterization, got {tile_size}" assert num_depth_samples > 0, "num_depth_samples must be greater than 0" tile_height, tile_width = tile_offsets.shape[-2:] if means2d.dim() == 2: nnz = means2d.shape[0] assert means2d.shape == (nnz, 2), means2d.shape assert conics.shape == (nnz, 3), conics.shape assert opacities.shape == (nnz,), opacities.shape else: image_dims = means2d.shape[:-2] N = means2d.shape[-2] assert means2d.shape == image_dims + (N, 2), means2d.shape assert conics.shape == image_dims + (N, 3), conics.shape assert opacities.shape == image_dims + (N,), opacities.shape assert tile_offsets.shape == image_dims + ( tile_height, tile_width, ), tile_offsets.shape assert ( tile_height * tile_size >= image_height ), f"Assert Failed: {tile_height} * {tile_size} >= {image_height}" assert ( tile_width * tile_size >= image_width ), f"Assert Failed: {tile_width} * {tile_size} >= {image_width}" return _make_lazy_cuda_func("rasterize_top_contributing_gaussian_ids")( means2d.contiguous(), conics.contiguous(), opacities.contiguous(), tile_offsets.contiguous(), flatten_ids.contiguous(), image_width, image_height, tile_size, num_depth_samples, )
def rasterize_to_pixels_eval3d( means: Tensor, # [..., N, 3] quats: Tensor, # [..., N, 4] scales: Tensor, # [..., N, 3] colors: Tensor, # [..., C, N, channels] or [nnz, channels] opacities: Tensor, # [..., C, N] or [nnz] viewmats: Tensor, # [..., C, 4, 4] Ks: Tensor, # [..., C, 3, 3] image_width: int, image_height: int, tile_size: int, isect_offsets: Tensor, # [..., C, tile_height, tile_width] flatten_ids: Tensor, # [n_isects] backgrounds: Optional[Tensor] = None, # [..., C, channels] masks: Optional[Tensor] = None, # [..., C, tile_height, tile_width] camera_model: CameraModel = "pinhole", ut_params: Optional[UnscentedTransformParameters] = None, rays: Optional[Tensor] = None, # [..., C, H, W, 6] # distortion radial_coeffs: Optional[Tensor] = None, # [..., C, 6] or [..., C, 4] tangential_coeffs: Optional[Tensor] = None, # [..., C, 2] thin_prism_coeffs: Optional[Tensor] = None, # [..., C, 4] ftheta_coeffs: Optional[FThetaCameraDistortionParameters] = None, lidar_coeffs: Optional[RowOffsetStructuredSpinningLidarModelParametersExt] = None, external_distortion_coeffs: Optional[BivariateWindshieldModelParameters] = None, # rolling shutter rolling_shutter: RollingShutterType = RollingShutterType.GLOBAL, viewmats_rs: Optional[Tensor] = None, # [..., C, 4, 4] use_hit_distance: bool = False, return_normals: bool = False, renderer_config: Any = None, ) -> Tuple[Tensor, Tensor]: """Rasterizes Gaussians to pixels. Similar to `rasterize_to_pixels()`, but compute the Gaussian responses in the 3D world space instead of the 2D image space. Supports rolling shutter and camera distortion. ``colors.shape[-1]`` must be one of the channel counts compiled into ``GSPLAT_NUM_CHANNELS`` (see ``gsplat/cuda/csrc/Config.h``); otherwise the CUDA kernel raises ``ValueError``. Args: renderer_config: Eval3d renderer selector. ``None`` uses the default ``RendererConfig_MixedBatch`` policy. Pass public ``RendererConfig_MixedBatch`` / ``RendererConfig_ParallelBatch`` instances, or the already-translated low-level CUDA config value. Returns: A tuple: - **Rendered colors**. [..., C, image_height, image_width, channels] - **Rendered alphas**. [..., C, image_height, image_width, 1] """ if ut_params is None: ut_params = UnscentedTransformParameters() colors, alphas, *_ = rasterize_to_pixels_eval3d_extra( means=means, quats=quats, scales=scales, colors=colors, opacities=opacities, viewmats=viewmats, Ks=Ks, rays=rays, image_width=image_width, image_height=image_height, tile_size=tile_size, isect_offsets=isect_offsets, flatten_ids=flatten_ids, backgrounds=backgrounds, masks=masks, camera_model=camera_model, ut_params=ut_params, radial_coeffs=radial_coeffs, tangential_coeffs=tangential_coeffs, thin_prism_coeffs=thin_prism_coeffs, ftheta_coeffs=ftheta_coeffs, lidar_coeffs=lidar_coeffs, external_distortion_coeffs=external_distortion_coeffs, rolling_shutter=rolling_shutter, viewmats_rs=viewmats_rs, return_last_ids=False, return_sample_counts=False, use_hit_distance=use_hit_distance, return_normals=return_normals, renderer_config=renderer_config, ) return colors, alphas @trace_function("raster3D-fwd") def rasterize_to_pixels_eval3d_extra( means: Tensor, # [..., N, 3] quats: Tensor, # [..., N, 4] scales: Tensor, # [..., N, 3] colors: Tensor, # [..., C, N, channels] or [nnz, channels] opacities: Tensor, # [..., C, N] or [nnz] viewmats: Tensor, # [..., C, 4, 4] Ks: Tensor, # [..., C, 3, 3] image_width: int, image_height: int, tile_size: int, isect_offsets: Tensor, # [..., C, tile_height, tile_width] flatten_ids: Tensor, # [n_isects] backgrounds: Optional[Tensor] = None, # [..., C, channels] masks: Optional[Tensor] = None, # [..., C, tile_height, tile_width] camera_model: CameraModel = "pinhole", ut_params: Optional[UnscentedTransformParameters] = None, rays: Optional[Tensor] = None, # [..., C, P, 6] # distortion radial_coeffs: Optional[Tensor] = None, # [..., C, 6] or [..., C, 4] tangential_coeffs: Optional[Tensor] = None, # [..., C, 2] thin_prism_coeffs: Optional[Tensor] = None, # [..., C, 4] ftheta_coeffs: Optional[FThetaCameraDistortionParameters] = None, lidar_coeffs: Optional[RowOffsetStructuredSpinningLidarModelParametersExt] = None, external_distortion_coeffs: Optional[BivariateWindshieldModelParameters] = None, # rolling shutter rolling_shutter: RollingShutterType = RollingShutterType.GLOBAL, viewmats_rs: Optional[Tensor] = None, # [..., C, 4, 4] return_sample_counts: bool = False, use_hit_distance: bool = False, return_normals: bool = False, renderer_config: Any = None, return_last_ids: bool = True, unsafe_masked_tile_outputs: bool = False, ) -> Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor], Optional[Tensor]]: """Rasterizes Gaussians to pixels, returning extra information for debugging. Similar to `rasterize_to_pixels_eval3d()`, but can return the last gaussian id accumulated in a pixel and optionally the number of accumulated samples per pixel. ``colors.shape[-1]`` must be one of the channel counts compiled into ``GSPLAT_NUM_CHANNELS`` (see ``gsplat/cuda/csrc/Config.h``); otherwise the CUDA kernel raises ``ValueError``. Args: return_last_ids: If True, also return last flatten_idx per pixel. Default: True. return_sample_counts: If True, also return number of accumulated samples per pixel. Default: False. return_normals: If True, compute and return accumulated normals per pixel. Normals are computed from Gaussian quaternions (canonical normal = (0,0,1) transformed by rotation, flipped if facing away from ray). Default: False. renderer_config: Eval3d renderer selector. ``None`` uses the default ``RendererConfig_MixedBatch`` policy. Pass public ``RendererConfig_MixedBatch`` / ``RendererConfig_ParallelBatch`` instances, or the already-translated low-level CUDA config value. unsafe_masked_tile_outputs: If True, outputs for masked tiles are left undefined and must not be read by the caller. Default False writes per-pixel safe values for masked tiles: render_colors = backgrounds (or 0.0 when no backgrounds are provided), render_alphas = 0.0, render_normals = 0.0, last_ids = -1, sample_counts = 0. Returns: A tuple (contents depend on return flags): - **Rendered colors**. [..., C, image_height, image_width, channels] - **Rendered alphas**. [..., C, image_height, image_width, 1] - **Last flatten_idx** (optional). [..., C, image_height, image_width]. If return_last_ids=True. - **Sample counts** (optional). [..., C, image_height, image_width]. If return_sample_counts=True. - **Rendered normals** (optional). [..., C, image_height, image_width, 3]. If return_normals=True. """ if ut_params is None: ut_params = UnscentedTransformParameters() renderer_config = _renderer_config_to_cuda(renderer_config) batch_dims = means.shape[:-2] num_batch_dims = len(batch_dims) N = means.size(-2) C = viewmats.size(-3) P = rays.shape[-2] if rays is not None else 0 channels = colors.shape[-1] assert means.shape == batch_dims + (N, 3), means.shape assert quats.shape == batch_dims + (N, 4), quats.shape assert scales.shape == batch_dims + (N, 3), scales.shape assert viewmats.shape == batch_dims + (C, 4, 4), viewmats.shape assert Ks.shape == batch_dims + (C, 3, 3), Ks.shape if rays is not None: assert_shape("rays", rays, batch_dims + (C, P, 6)) assert ( rays.dtype == torch.float32 ), f"rays must be torch.float32, got {rays.dtype}" assert colors.ndim in (num_batch_dims + 2, num_batch_dims + 3), colors.shape if colors.ndim == num_batch_dims + 2: raise NotImplementedError("packed mode is not supported yet") assert ( colors.shape[:-2] == batch_dims and colors.shape[-1] == channels ), colors.shape else: assert colors.shape == batch_dims + (C, N, channels), colors.shape assert opacities.shape == colors.shape[:-1], opacities.shape if backgrounds is not None: assert backgrounds.shape == batch_dims + (C, channels), backgrounds.shape backgrounds = backgrounds.contiguous() if masks is not None: assert masks.shape == isect_offsets.shape, masks.shape masks = masks.contiguous() if radial_coeffs is not None: assert radial_coeffs.shape[:-1] == batch_dims + (C,) and radial_coeffs.shape[ -1 ] in (6, 4), radial_coeffs.shape radial_coeffs = radial_coeffs.contiguous() if tangential_coeffs is not None: assert tangential_coeffs.shape == batch_dims + (C, 2), tangential_coeffs.shape tangential_coeffs = tangential_coeffs.contiguous() if thin_prism_coeffs is not None: assert thin_prism_coeffs.shape == batch_dims + (C, 4), thin_prism_coeffs.shape thin_prism_coeffs = thin_prism_coeffs.contiguous() if viewmats_rs is not None: assert viewmats_rs.shape == batch_dims + (C, 4, 4), viewmats_rs.shape viewmats_rs = viewmats_rs.contiguous() # The 3DGUT fwd launcher dispatches at on tile_size: # tile_size=8 -> kernel<CDIM, 8, 32 > (compact CTA, PPT=2) # tile_size=16 -> kernel<CDIM, 16, 256> (one thread per pixel, PPT=1) # tile_size must match the launcher dispatch or the tile grid will be # misaligned with the kernel's TILE_SIZE constexpr. assert tile_size in ( 8, 16, ), f"3DGUT rasterization requires tile_size in (8, 16), got {tile_size}" tile_height, tile_width = isect_offsets.shape[-2:] if camera_model == "lidar": assert tile_width == lidar_coeffs.tiling.n_bins_azimuth assert tile_height == lidar_coeffs.tiling.n_bins_elevation # TODO: improve checks. Right now we don't have access to max_pts_per_tile used, # hence this assert needs to be commented out. # assert tile_width*tile_height*lidar_coeffs.tiling.max_pts_per_tile >= lidar_coeffs.n_rows*lidar_coeffs.n_columns else: assert ( tile_height * tile_size >= image_height ), f"Assert Failed: {tile_height} * {tile_size} >= {image_height}" assert ( tile_width * tile_size >= image_width ), f"Assert Failed: {tile_width} * {tile_size} >= {image_width}" camera_model_type = _make_lazy_cuda_obj(f"CameraModelType.{camera_model.upper()}") ftheta_coeffs = ( ftheta_coeffs if ftheta_coeffs is not None else FThetaCameraDistortionParameters() ) lidar_coeffs = lidar_coeffs.to_cpp() if lidar_coeffs is not None else None ( render_colors, render_alphas, last_ids, sample_counts, render_normals, ) = _make_lazy_cuda_func("rasterize_to_pixels_from_world_3dgs")( means.contiguous(), quats.contiguous(), scales.contiguous(), colors.contiguous(), opacities.contiguous(), backgrounds.contiguous() if backgrounds is not None else None, masks.contiguous() if masks is not None else None, image_width, image_height, tile_size, viewmats.contiguous(), viewmats_rs.contiguous() if viewmats_rs is not None else None, Ks.contiguous(), camera_model_type, ut_params, rolling_shutter, rays.contiguous() if rays is not None else None, # distortion radial_coeffs.contiguous() if radial_coeffs is not None else None, tangential_coeffs.contiguous() if tangential_coeffs is not None else None, thin_prism_coeffs.contiguous() if thin_prism_coeffs is not None else None, ftheta_coeffs, lidar_coeffs, external_distortion_coeffs, isect_offsets.contiguous(), flatten_ids.contiguous(), return_sample_counts, # Pass flag to forward use_hit_distance, return_normals, # Pass return_normals flag to forward renderer_config, return_last_ids, unsafe_masked_tile_outputs, ) return render_colors, render_alphas, last_ids, sample_counts, render_normals
[docs] @torch.no_grad() def rasterize_to_indices_in_range( range_start: int, range_end: int, transmittances: Tensor, # [..., image_height, image_width] means2d: Tensor, # [..., N, 2] conics: Tensor, # [..., N, 3] opacities: Tensor, # [..., N] image_width: int, image_height: int, tile_size: int, isect_offsets: Tensor, # [..., tile_height, tile_width] flatten_ids: Tensor, # [n_isects] ) -> Tuple[Tensor, Tensor, Tensor]: """Rasterizes a batch of Gaussians to images but only returns the indices. .. note:: This function supports iterative rasterization, in which each call of this function will rasterize a batch of Gaussians from near to far, defined by `[range_start, range_end)`. If a one-step full rasterization is desired, set `range_start` to 0 and `range_end` to a really large number, e.g, 1e10. Args: range_start: The start batch of Gaussians to be rasterized (inclusive). range_end: The end batch of Gaussians to be rasterized (exclusive). transmittances: Currently transmittances. [..., image_height, image_width] means2d: Projected Gaussian means. [..., N, 2] conics: Inverse of the projected covariances with only upper triangle values. [..., N, 3] opacities: Gaussian opacities that support per-view values. [..., N] image_width: Image width. image_height: Image height. tile_size: Tile size. isect_offsets: Intersection offsets outputs from `isect_offset_encode()`. [..., tile_height, tile_width] flatten_ids: The global flatten indices in [I * N] from `isect_tiles()`. [n_isects] Returns: A tuple: - **Gaussian ids**. Gaussian ids for the pixel intersection. A flattened list of shape [M]. - **Pixel ids**. pixel indices (row-major). A flattened list of shape [M]. - **Image ids**. image indices. A flattened list of shape [M]. """ image_dims = means2d.shape[:-2] tile_height, tile_width = isect_offsets.shape[-2:] N = means2d.shape[-2] assert transmittances.shape == image_dims + ( image_height, image_width, ), transmittances.shape assert means2d.shape == image_dims + (N, 2), means2d.shape assert conics.shape == image_dims + (N, 3), conics.shape assert opacities.shape == image_dims + (N,), opacities.shape assert isect_offsets.shape == image_dims + ( tile_height, tile_width, ), isect_offsets.shape assert ( tile_height * tile_size >= image_height ), f"Assert Failed: {tile_height} * {tile_size} >= {image_height}" assert ( tile_width * tile_size >= image_width ), f"Assert Failed: {tile_width} * {tile_size} >= {image_width}" out_gauss_ids, out_indices = _make_lazy_cuda_func("rasterize_to_indices_3dgs")( range_start, range_end, transmittances.contiguous(), means2d.contiguous(), conics.contiguous(), opacities.contiguous(), image_width, image_height, tile_size, isect_offsets.contiguous(), flatten_ids.contiguous(), ) out_pixel_ids = out_indices % (image_width * image_height) out_image_ids = out_indices // (image_width * image_height) return out_gauss_ids, out_pixel_ids, out_image_ids
class _QuatScaleToCovarPreci(torch.autograd.Function): """Converts quaternions and scales to covariance and precision matrices.""" @staticmethod def forward( ctx, quats: Tensor, # [..., 4], scales: Tensor, # [..., 3], compute_covar: bool = True, compute_preci: bool = True, triu: bool = False, ) -> Tuple[Tensor, Tensor]: covars, precis = _make_lazy_cuda_func("quat_scale_to_covar_preci_fwd")( quats, scales, compute_covar, compute_preci, triu ) ctx.save_for_backward(quats, scales) ctx.compute_covar = compute_covar ctx.compute_preci = compute_preci ctx.triu = triu return covars, precis @staticmethod def backward(ctx, v_covars: Tensor, v_precis: Tensor): quats, scales = ctx.saved_tensors compute_covar = ctx.compute_covar compute_preci = ctx.compute_preci triu = ctx.triu if compute_covar and v_covars.is_sparse: v_covars = v_covars.to_dense() if compute_preci and v_precis.is_sparse: v_precis = v_precis.to_dense() v_quats, v_scales = _make_lazy_cuda_func("quat_scale_to_covar_preci_bwd")( quats, scales, triu, v_covars.contiguous() if compute_covar else None, v_precis.contiguous() if compute_preci else None, ) return ( v_quats, v_scales, None, # compute_covar None, # compute_preci None, # triu ) class _Proj(torch.autograd.Function): """Perspective fully_fused_projection on Gaussians.""" @staticmethod def forward( ctx, means: Tensor, # [..., C, N, 3] covars: Tensor, # [..., C, N, 3, 3] Ks: Tensor, # [..., C, 3, 3] width: int, height: int, camera_model: CameraModel = "pinhole", ) -> Tuple[Tensor, Tensor]: assert ( camera_model != "ftheta" ), "ftheta camera is only supported via UT, please set with_ut=True in the rasterization()" camera_model_type = _make_lazy_cuda_obj( f"CameraModelType.{camera_model.upper()}" ) means2d, covars2d = _make_lazy_cuda_func("projection_ewa_simple_fwd")( means, covars, Ks, width, height, camera_model_type, ) ctx.save_for_backward(means, covars, Ks) ctx.width = width ctx.height = height ctx.camera_model_type = camera_model_type return means2d, covars2d @staticmethod def backward(ctx, v_means2d: Tensor, v_covars2d: Tensor): means, covars, Ks = ctx.saved_tensors width = ctx.width height = ctx.height camera_model_type = ctx.camera_model_type v_means, v_covars = _make_lazy_cuda_func("projection_ewa_simple_bwd")( means, covars, Ks, width, height, camera_model_type, v_means2d.contiguous(), v_covars2d.contiguous(), ) return ( v_means, v_covars, None, # Ks None, # width None, # height None, # camera_model ) class _FullyFusedProjection(torch.autograd.Function): """Projects Gaussians to 2D.""" @staticmethod def forward( ctx, means: Tensor, # [..., N, 3] covars: Tensor, # [..., N, 6] or None quats: Tensor, # [..., N, 4] or None scales: Tensor, # [..., N, 3] or None viewmats: Tensor, # [..., C, 4, 4] Ks: Tensor, # [..., C, 3, 3] width: int, height: int, eps2d: float, near_plane: float, far_plane: float, radius_clip: float, calc_compensations: bool, camera_model: CameraModel = "pinhole", opacities: Optional[Tensor] = None, # [..., N] or None ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: assert ( camera_model != "ftheta" ), "ftheta camera is only supported via UT, please set with_ut=True in the rasterization()" camera_model_type = _make_lazy_cuda_obj( f"CameraModelType.{camera_model.upper()}" ) # "covars" and {"quats", "scales"} are mutually exclusive radii, means2d, depths, conics, compensations = _make_lazy_cuda_func( "projection_ewa_3dgs_fused_fwd" )( means, covars, quats, scales, opacities, viewmats, Ks, width, height, eps2d, near_plane, far_plane, radius_clip, calc_compensations, camera_model_type, ) if not calc_compensations: compensations = None ctx.save_for_backward( means, covars, quats, scales, viewmats, Ks, radii, conics, compensations ) ctx.width = width ctx.height = height ctx.eps2d = eps2d ctx.camera_model_type = camera_model_type return radii, means2d, depths, conics, compensations @staticmethod @trace_function("project-bwd") def backward(ctx, v_radii, v_means2d, v_depths, v_conics, v_compensations): ( means, covars, quats, scales, viewmats, Ks, radii, conics, compensations, ) = ctx.saved_tensors width = ctx.width height = ctx.height eps2d = ctx.eps2d camera_model_type = ctx.camera_model_type if v_compensations is not None: v_compensations = v_compensations.contiguous() v_means, v_covars, v_quats, v_scales, v_viewmats = _make_lazy_cuda_func( "projection_ewa_3dgs_fused_bwd" )( means, covars, quats, scales, viewmats, Ks, width, height, eps2d, camera_model_type, radii, conics, compensations, v_means2d.contiguous(), v_depths.contiguous(), v_conics.contiguous(), v_compensations, ctx.needs_input_grad[4], # viewmats_requires_grad ) if not ctx.needs_input_grad[0]: v_means = None if not ctx.needs_input_grad[1]: v_covars = None if not ctx.needs_input_grad[2]: v_quats = None if not ctx.needs_input_grad[3]: v_scales = None if not ctx.needs_input_grad[4]: v_viewmats = None return ( v_means, v_covars, v_quats, v_scales, v_viewmats, None, # Ks None, # width None, # height None, # eps2d None, # near_plane None, # far_plane None, # radius_clip None, # calc_compensations None, # camera_model None, # ut_params None, # radial_coeffs ) @trace_function("projectUT-fwd") def fully_fused_projection_with_ut( means: Tensor, # [..., N, 3] quats: Tensor, # [..., N, 4] scales: Tensor, # [..., N, 3] opacities: Optional[Tensor], # [..., N] viewmats: Tensor, # [..., C, 4, 4] Ks: Tensor, # [..., C, 3, 3] width: int, height: int, eps2d: float = 0.3, near_plane: float = 0.01, far_plane: float = 1e10, radius_clip: float = 0.0, calc_compensations: bool = False, camera_model: CameraModel = "pinhole", ut_params: Optional[UnscentedTransformParameters] = None, # distortion radial_coeffs: Optional[Tensor] = None, # [..., C, 6] or [..., C, 4] tangential_coeffs: Optional[Tensor] = None, # [..., C, 2] thin_prism_coeffs: Optional[Tensor] = None, # [..., C, 4] ftheta_coeffs: Optional[FThetaCameraDistortionParameters] = None, lidar_coeffs: Optional[RowOffsetStructuredSpinningLidarModelParametersExt] = None, external_distortion_coeffs: Optional[BivariateWindshieldModelParameters] = None, # rolling shutter rolling_shutter: RollingShutterType = RollingShutterType.GLOBAL, viewmats_rs: Optional[Tensor] = None, # [..., C, 4, 4] global_z_order: bool = True, ) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: """Projects Gaussians to 2D using Unscented Transform (UT). similar to `fully_fused_projection()`, but supports camera distortion and rolling shutter. .. warning:: This function is not differentiable to any input. Args: global_z_order: Defines how Gaussians are sorted for depth ordering. If True (default), Gaussians are sorted by their z-coordinate in camera space. If False, they are sorted by their Euclidean distance from the camera origin. The z-coordinate sorting is typically faster and sufficient for most cases, while Euclidean distance can be useful for scenes with wide field-of-view or non-standard camera models. Default: True. """ if ut_params is None: ut_params = UnscentedTransformParameters() batch_dims = means.shape[:-2] N = means.shape[-2] C = viewmats.shape[-3] assert means.shape == batch_dims + (N, 3), means.shape assert quats.shape == batch_dims + (N, 4), quats.shape assert scales.shape == batch_dims + (N, 3), scales.shape if opacities is not None: assert opacities.shape == batch_dims + (N,), opacities.shape assert viewmats.shape == batch_dims + (C, 4, 4), viewmats.shape assert Ks.shape == batch_dims + (C, 3, 3), Ks.shape if radial_coeffs is not None: assert radial_coeffs.shape[:-1] == batch_dims + (C,) and radial_coeffs.shape[ -1 ] in [6, 4], radial_coeffs.shape if tangential_coeffs is not None: assert tangential_coeffs.shape == batch_dims + (C, 2), tangential_coeffs.shape if thin_prism_coeffs is not None: assert thin_prism_coeffs.shape == batch_dims + (C, 4), thin_prism_coeffs.shape if viewmats_rs is not None: assert viewmats_rs.shape == batch_dims + (C, 4, 4), viewmats_rs.shape if lidar_coeffs is not None: assert isinstance( lidar_coeffs, RowOffsetStructuredSpinningLidarModelParametersExt ) camera_model_type = _make_lazy_cuda_obj(f"CameraModelType.{camera_model.upper()}") ftheta_coeffs = ( ftheta_coeffs if ftheta_coeffs is not None else FThetaCameraDistortionParameters() ) radii, means2d, depths, conics, compensations = _make_lazy_cuda_func( "projection_ut_3dgs_fused" )( means.contiguous(), quats.contiguous(), scales.contiguous(), opacities.contiguous() if opacities is not None else None, viewmats.contiguous(), viewmats_rs.contiguous() if viewmats_rs is not None else None, Ks.contiguous(), width, height, eps2d, near_plane, far_plane, radius_clip, calc_compensations, camera_model_type, global_z_order, ut_params, rolling_shutter, radial_coeffs.contiguous() if radial_coeffs is not None else None, tangential_coeffs.contiguous() if tangential_coeffs is not None else None, thin_prism_coeffs.contiguous() if thin_prism_coeffs is not None else None, ftheta_coeffs, lidar_coeffs.to_cpp() if lidar_coeffs is not None else None, external_distortion_coeffs, ) if not calc_compensations: compensations = None return radii, means2d, depths, conics, compensations class _RasterizeToPixels(torch.autograd.Function): """Rasterize gaussians""" @staticmethod def forward( ctx, means2d: Tensor, # [..., N, 2] or [nnz, 2] conics: Tensor, # [..., N, 3] or [nnz, 3] colors: Tensor, # [..., N, channels] or [nnz, channels] opacities: Tensor, # [..., N] or [nnz] backgrounds: Tensor, # [..., channels], Optional masks: Tensor, # [..., tile_height, tile_width], Optional width: int, height: int, tile_size: int, isect_offsets: Tensor, # [..., tile_height, tile_width] flatten_ids: Tensor, # [n_isects] absgrad: bool, ) -> Tuple[Tensor, Tensor]: render_colors, render_alphas, last_ids = _make_lazy_cuda_func( "rasterize_to_pixels_3dgs_fwd" )( means2d, conics, colors, opacities, backgrounds, masks, width, height, tile_size, isect_offsets, flatten_ids, ) ctx.save_for_backward( means2d, conics, colors, opacities, backgrounds, masks, isect_offsets, flatten_ids, render_alphas, last_ids, ) ctx.width = width ctx.height = height ctx.tile_size = tile_size ctx.absgrad = absgrad # double to float render_alphas = render_alphas.float() return render_colors, render_alphas @staticmethod @trace_function("render2D-bwd") def backward( ctx, v_render_colors: Tensor, # [..., H, W, 3] v_render_alphas: Tensor, # [..., H, W, 1] ): ( means2d, conics, colors, opacities, backgrounds, masks, isect_offsets, flatten_ids, render_alphas, last_ids, ) = ctx.saved_tensors width = ctx.width height = ctx.height tile_size = ctx.tile_size absgrad = ctx.absgrad ( v_means2d_abs, v_means2d, v_conics, v_colors, v_opacities, ) = _make_lazy_cuda_func("rasterize_to_pixels_3dgs_bwd")( means2d, conics, colors, opacities, backgrounds, masks, width, height, tile_size, isect_offsets, flatten_ids, render_alphas, last_ids, v_render_colors.contiguous(), v_render_alphas.contiguous(), absgrad, ) if absgrad: means2d.absgrad = v_means2d_abs if ctx.needs_input_grad[4]: v_backgrounds = (v_render_colors * (1.0 - render_alphas).float()).sum( dim=(-3, -2) ) else: v_backgrounds = None return ( v_means2d, v_conics, v_colors, v_opacities, v_backgrounds, None, # masks None, # width None, # height None, # tile_size None, # isect_offsets None, # flatten_ids None, # absgrad ) class _FullyFusedProjectionPacked(torch.autograd.Function): """Projects Gaussians to 2D. Return packed tensors.""" @staticmethod def forward( ctx, means: Tensor, # [..., N, 3] covars: Tensor, # [..., N, 6] or None quats: Tensor, # [..., N, 4] or None scales: Tensor, # [..., N, 3] or None viewmats: Tensor, # [..., C, 4, 4] Ks: Tensor, # [..., C, 3, 3] width: int, height: int, eps2d: float, near_plane: float, far_plane: float, radius_clip: float, sparse_grad: bool, calc_compensations: bool, camera_model: CameraModel = "pinhole", opacities: Optional[Tensor] = None, # [..., N] or None ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: assert ( camera_model != "ftheta" ), "ftheta camera is only supported via UT, please set with_ut=True in the rasterization()" camera_model_type = _make_lazy_cuda_obj( f"CameraModelType.{camera_model.upper()}" ) ( indptr, batch_ids, camera_ids, gaussian_ids, radii, means2d, depths, conics, compensations, ) = _make_lazy_cuda_func("projection_ewa_3dgs_packed_fwd")( means, covars, # optional quats, # optional scales, # optional opacities, # optional viewmats, Ks, width, height, eps2d, near_plane, far_plane, radius_clip, calc_compensations, camera_model_type, ) if not calc_compensations: compensations = None ctx.save_for_backward( batch_ids, camera_ids, gaussian_ids, means, covars, quats, scales, viewmats, Ks, conics, compensations, ) ctx.width = width ctx.height = height ctx.eps2d = eps2d ctx.sparse_grad = sparse_grad ctx.camera_model_type = camera_model_type return ( batch_ids, camera_ids, gaussian_ids, indptr, radii, means2d, depths, conics, compensations, ) @staticmethod @trace_function("project-bwd") def backward( ctx, v_batch_ids, v_camera_ids, v_gaussian_ids, v_indptr, v_radii, v_means2d, v_depths, v_conics, v_compensations, ): ( batch_ids, camera_ids, gaussian_ids, means, covars, quats, scales, viewmats, Ks, conics, compensations, ) = ctx.saved_tensors width = ctx.width height = ctx.height eps2d = ctx.eps2d sparse_grad = ctx.sparse_grad camera_model_type = ctx.camera_model_type if v_compensations is not None: v_compensations = v_compensations.contiguous() v_means, v_covars, v_quats, v_scales, v_viewmats = _make_lazy_cuda_func( "projection_ewa_3dgs_packed_bwd" )( means, covars, quats, scales, viewmats, Ks, width, height, eps2d, camera_model_type, batch_ids, camera_ids, gaussian_ids, conics, compensations, v_means2d.contiguous(), v_depths.contiguous(), v_conics.contiguous(), v_compensations, ctx.needs_input_grad[4], # viewmats_requires_grad sparse_grad, ) if sparse_grad: batch_dims = means.shape[:-2] B = math.prod(batch_dims) N = means.shape[-2] if not ctx.needs_input_grad[0]: v_means = None else: if sparse_grad: # TODO: gaussian_ids is duplicated so not ideal. # An idea is to directly set the attribute (e.g., .sparse_grad) of # the tensor but this requires the tensor to be leaf node only. And # a customized optimizer would be needed in this case. v_means = torch.sparse_coo_tensor( indices=gaussian_ids[None], values=v_means, # [nnz, 3] size=means.shape, is_coalesced=len(viewmats) == 1, ) if not ctx.needs_input_grad[1]: v_covars = None else: if sparse_grad: v_covars = torch.sparse_coo_tensor( indices=gaussian_ids[None], values=v_covars, # [nnz, 6] size=covars.shape, is_coalesced=len(viewmats) == 1, ) if not ctx.needs_input_grad[2]: v_quats = None else: if sparse_grad: v_quats = torch.sparse_coo_tensor( indices=gaussian_ids[None], values=v_quats, # [nnz, 4] size=quats.shape, is_coalesced=len(viewmats) == 1, ) if not ctx.needs_input_grad[3]: v_scales = None else: if sparse_grad: v_scales = torch.sparse_coo_tensor( indices=gaussian_ids[None], values=v_scales, # [nnz, 3] size=scales.shape, is_coalesced=len(viewmats) == 1, ) if not ctx.needs_input_grad[4]: v_viewmats = None return ( v_means, v_covars, v_quats, v_scales, v_viewmats, None, # Ks None, # width None, # height None, # eps2d None, # near_plane None, # far_plane None, # radius_clip None, # calc_compensations None, # sparse_grad None, # camera_model None, # ut_params ) class _SphericalHarmonics(torch.autograd.Function): """Spherical Harmonics""" @staticmethod def forward( ctx, sh_degree: int, dirs: Tensor, coeffs: Tensor, masks: Tensor ) -> Tensor: colors = _make_lazy_cuda_func("spherical_harmonics_fwd")( sh_degree, dirs, coeffs, masks ) ctx.save_for_backward(dirs, coeffs, masks) ctx.sh_degree = sh_degree return colors @staticmethod @trace_function("sh-bwd") def backward(ctx, v_colors: Tensor): dirs, coeffs, masks = ctx.saved_tensors sh_degree = ctx.sh_degree compute_v_dirs = ctx.needs_input_grad[1] v_coeffs, v_dirs = _make_lazy_cuda_func("spherical_harmonics_bwd")( sh_degree, dirs, coeffs, masks, v_colors.contiguous(), compute_v_dirs, ) if not compute_v_dirs: v_dirs = None return ( None, # sh_degree v_dirs, v_coeffs, None, # masks ) ###### 2DGS ######
[docs] def fully_fused_projection_2dgs( means: Tensor, # [..., N, 3] quats: Tensor, # [..., N, 4] scales: Tensor, # [..., N, 3] viewmats: Tensor, # [..., C, 4, 4] Ks: Tensor, # [..., C, 3, 3] width: int, height: int, eps2d: float = 0.3, near_plane: float = 0.01, far_plane: float = 1e10, radius_clip: float = 0.0, packed: bool = False, sparse_grad: bool = False, ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: """Prepare Gaussians for rasterization This function prepares ray-splat intersection matrices, computes per splat bounding box and 2D means in image space. Args: means: Gaussian means. [..., N, 3] quats: Quaternions (No need to be normalized). [..., N, 4]. scales: Scales. [..., N, 3]. viewmats: World-to-camera matrices. [..., C, 4, 4] Ks: Camera intrinsics. [..., C, 3, 3] width: Image width. height: Image height. near_plane: Near plane distance. Default: 0.01. far_plane: Far plane distance. Default: 200. radius_clip: Gaussians with projected radii smaller than this value will be ignored. Default: 0.0. packed: If True, the output tensors will be packed into a flattened tensor. Default: False. sparse_grad (Experimental): This is only effective when `packed` is True. If True, during backward the gradients of {`means`, `covars`, `quats`, `scales`} will be a sparse Tensor in COO layout. Default: False. Returns: A tuple: If `packed` is True: - **batch_ids**. The batch indices of the projected Gaussians. Int32 tensor of shape [nnz]. - **camera_ids**. The camera indices of the projected Gaussians. Int32 tensor of shape [nnz]. - **gaussian_ids**. The column indices of the projected Gaussians. Int32 tensor of shape [nnz]. - **indptr**. CSR-style index pointer into gaussian_ids for batch-camera pairs. Int32 tensor of shape [B*C+1]. - **radii**. The maximum radius of the projected Gaussians in pixel unit. Int32 tensor of shape [nnz, 2]. - **means**. Projected Gaussian means in 2D. [nnz, 2] - **depths**. The z-depth of the projected Gaussians. [nnz] - **ray_transforms**. transformation matrices that transforms xy-planes in pixel spaces into splat coordinates (WH)^T in equation (9) in paper [nnz, 3, 3] - **normals**. The normals in camera spaces. [nnz, 3] If `packed` is False: - **radii**. The maximum radius of the projected Gaussians in pixel unit. Int32 tensor of shape [..., C, N, 2]. - **means**. Projected Gaussian means in 2D. [..., C, N, 2] - **depths**. The z-depth of the projected Gaussians. [..., C, N] - **ray_transforms**. transformation matrices that transforms xy-planes in pixel spaces into splat coordinates [..., C, N, 3, 3] - **normals**. The normals in camera spaces. [..., C, N, 3] """ batch_dims = means.shape[:-2] N = means.shape[-2] C = viewmats.shape[-3] assert means.shape == batch_dims + (N, 3), means.shape assert viewmats.shape == batch_dims + (C, 4, 4), viewmats.shape assert Ks.shape == batch_dims + (C, 3, 3), Ks.shape means = means.contiguous() assert quats is not None, "quats is required" assert scales is not None, "scales is required" assert quats.shape == batch_dims + (N, 4), quats.shape assert scales.shape == batch_dims + (N, 3), scales.shape quats = quats.contiguous() scales = scales.contiguous() if sparse_grad: assert packed, "sparse_grad is only supported when packed is True" viewmats = viewmats.contiguous() Ks = Ks.contiguous() if packed: return _FullyFusedProjectionPacked2DGS.apply( means, quats, scales, viewmats, Ks, width, height, near_plane, far_plane, radius_clip, sparse_grad, ) else: return _FullyFusedProjection2DGS.apply( means, quats, scales, viewmats, Ks, width, height, eps2d, near_plane, far_plane, radius_clip, )
class _FullyFusedProjection2DGS(torch.autograd.Function): """Projects Gaussians to 2D.""" @staticmethod def forward( ctx, means: Tensor, # [..., N, 3] quats: Tensor, # [..., N, 4] scales: Tensor, # [..., N, 3] viewmats: Tensor, # [..., C, 4, 4] Ks: Tensor, # [..., C, 3, 3] width: int, height: int, eps2d: float, near_plane: float, far_plane: float, radius_clip: float, ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: radii, means2d, depths, ray_transforms, normals = _make_lazy_cuda_func( "projection_2dgs_fused_fwd" )( means, quats, scales, viewmats, Ks, width, height, eps2d, near_plane, far_plane, radius_clip, ) ctx.save_for_backward( means, quats, scales, viewmats, Ks, radii, ray_transforms, normals, ) ctx.width = width ctx.height = height ctx.eps2d = eps2d return radii, means2d, depths, ray_transforms, normals @staticmethod def backward(ctx, v_radii, v_means2d, v_depths, v_ray_transforms, v_normals): ( means, quats, scales, viewmats, Ks, radii, ray_transforms, normals, ) = ctx.saved_tensors width = ctx.width height = ctx.height eps2d = ctx.eps2d v_means, v_quats, v_scales, v_viewmats = _make_lazy_cuda_func( "projection_2dgs_fused_bwd" )( means, quats, scales, viewmats, Ks, width, height, radii, ray_transforms, v_means2d.contiguous(), v_depths.contiguous(), v_normals.contiguous(), v_ray_transforms.contiguous(), ctx.needs_input_grad[3], # viewmats_requires_grad ) if not ctx.needs_input_grad[0]: v_means = None if not ctx.needs_input_grad[1]: v_quats = None if not ctx.needs_input_grad[2]: v_scales = None if not ctx.needs_input_grad[3]: v_viewmats = None return ( v_means, v_quats, v_scales, v_viewmats, None, # Ks None, # width None, # height None, # eps2d None, # near_plane None, # far_plane None, # radius_clip None, # camera_model ) class _FullyFusedProjectionPacked2DGS(torch.autograd.Function): """Projects Gaussians to 2D. Return packed tensors.""" @staticmethod def forward( ctx, means: Tensor, # [..., N, 3] quats: Tensor, # [..., N, 4] scales: Tensor, # [..., N, 3] viewmats: Tensor, # [..., C, 4, 4] Ks: Tensor, # [..., C, 3, 3] width: int, height: int, near_plane: float, far_plane: float, radius_clip: float, sparse_grad: bool, ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: ( indptr, batch_ids, camera_ids, gaussian_ids, radii, means2d, depths, ray_transforms, normals, ) = _make_lazy_cuda_func("projection_2dgs_packed_fwd")( means, quats, scales, viewmats, Ks, width, height, near_plane, far_plane, radius_clip, ) ctx.save_for_backward( batch_ids, camera_ids, gaussian_ids, means, quats, scales, viewmats, Ks, ray_transforms, ) ctx.width = width ctx.height = height ctx.sparse_grad = sparse_grad return ( batch_ids, camera_ids, gaussian_ids, indptr, radii, means2d, depths, ray_transforms, normals, ) @staticmethod def backward( ctx, v_batch_ids, v_camera_ids, v_gaussian_ids, v_indptr, v_radii, v_means2d, v_depths, v_ray_transforms, v_normals, ): ( batch_ids, camera_ids, gaussian_ids, means, quats, scales, viewmats, Ks, ray_transforms, ) = ctx.saved_tensors width = ctx.width height = ctx.height sparse_grad = ctx.sparse_grad v_means, v_quats, v_scales, v_viewmats = _make_lazy_cuda_func( "projection_2dgs_packed_bwd" )( means, quats, scales, viewmats, Ks, width, height, batch_ids, camera_ids, gaussian_ids, ray_transforms, v_means2d.contiguous(), v_depths.contiguous(), v_ray_transforms.contiguous(), v_normals.contiguous(), ctx.needs_input_grad[3], # viewmats_requires_grad sparse_grad, ) if sparse_grad: batch_dims = means.shape[:-2] B = math.prod(batch_dims) N = means.shape[-2] if not ctx.needs_input_grad[0]: v_means = None else: if sparse_grad: # TODO: gaussian_ids is duplicated so not ideal. # An idea is to directly set the attribute (e.g., .sparse_grad) of # the tensor but this requires the tensor to be leaf node only. And # a customized optimizer would be needed in this case. v_means = torch.sparse_coo_tensor( indices=gaussian_ids[None], values=v_means, # [nnz, 3] size=means.shape, is_coalesced=len(viewmats) == 1, ) if not ctx.needs_input_grad[1]: v_quats = None else: if sparse_grad: v_quats = torch.sparse_coo_tensor( indices=gaussian_ids[None], values=v_quats, # [nnz, 4] size=quats.shape, is_coalesced=len(viewmats) == 1, ) if not ctx.needs_input_grad[2]: v_scales = None else: if sparse_grad: v_scales = torch.sparse_coo_tensor( indices=gaussian_ids[None], values=v_scales, # [nnz, 3] size=scales.shape, is_coalesced=len(viewmats) == 1, ) if not ctx.needs_input_grad[3]: v_viewmats = None return ( v_means, v_quats, v_scales, v_viewmats, None, # Ks None, # width None, # height None, # eps2d None, # near_plane None, # far_plane None, # radius_clip None, # sparse_grad None, # camera_model )
[docs] def rasterize_to_pixels_2dgs( means2d: Tensor, # [..., N, 2] ray_transforms: Tensor, # [..., N, 3, 3] colors: Tensor, # [..., N, channels] opacities: Tensor, # [..., N] normals: Tensor, # [..., N, 3] densify: Tensor, # [..., N, 2] image_width: int, image_height: int, tile_size: int, isect_offsets: Tensor, # [..., tile_height, tile_width] flatten_ids: Tensor, # [n_isects] backgrounds: Optional[Tensor] = None, # [..., channels] masks: Optional[Tensor] = None, # [..., tile_height, tile_width] packed: bool = False, absgrad: bool = False, distloss: bool = False, ) -> Tuple[Tensor, Tensor]: """Rasterize Gaussians to pixels. Args: means2d: Projected Gaussian means. [..., N, 2] if packed is False, [nnz, 2] if packed is True. ray_transforms: transformation matrices that transforms xy-planes in pixel spaces into splat coordinates. [..., N, 3, 3] if packed is False, [nnz, channels] if packed is True. colors: Gaussian colors or ND features. [..., N, channels] if packed is False, [nnz, channels] if packed is True. ``colors.shape[-1]`` must be one of the channel counts compiled into ``GSPLAT_NUM_CHANNELS`` (see ``gsplat/cuda/csrc/Config.h``); otherwise the CUDA kernel raises ``ValueError``. opacities: Gaussian opacities that support per-view values. [..., N] if packed is False, [nnz] if packed is True. normals: The normals in camera space. [..., N, 3] if packed is False, [nnz, 3] if packed is True. densify: Dummy variable to keep track of gradient for densification. [..., N, 2] if packed, [nnz, 3] if packed is True. tile_size: Tile size. isect_offsets: Intersection offsets outputs from `isect_offset_encode()`. [..., tile_height, tile_width] flatten_ids: The global flatten indices in [I * N] or [nnz] from `isect_tiles()`. [n_isects] backgrounds: Background colors. [..., channels]. Default: None. masks: Optional tile mask to skip rendering GS to masked tiles. [..., tile_height, tile_width]. Default: None. packed: If True, the input tensors are expected to be packed with shape [nnz, ...]. Default: False. absgrad: If True, the backward pass will compute a `.absgrad` attribute for `means2d`. Default: False. Returns: A tuple: - **Rendered colors**. [..., image_height, image_width, channels] - **Rendered alphas**. [..., image_height, image_width, 1] - **Rendered normals**. [..., image_height, image_width, 3] - **Rendered distortion**. [..., image_height, image_width, 1] - **Rendered median depth**.[..., image_height, image_width, 1] """ image_dims = means2d.shape[:-2] channels = colors.shape[-1] if packed: nnz = means2d.size(0) assert means2d.shape == (nnz, 2), means2d.shape assert ray_transforms.shape == (nnz, 3, 3), ray_transforms.shape assert colors.shape[0] == nnz, colors.shape assert opacities.shape == (nnz,), opacities.shape else: N = means2d.size(-2) assert means2d.shape == image_dims + (N, 2), means2d.shape assert ray_transforms.shape == image_dims + (N, 3, 3), ray_transforms.shape assert colors.shape[:-2] == image_dims, colors.shape assert opacities.shape == image_dims + (N,), opacities.shape if backgrounds is not None: assert backgrounds.shape == image_dims + (channels,), backgrounds.shape backgrounds = backgrounds.contiguous() tile_height, tile_width = isect_offsets.shape[-2:] assert ( tile_height * tile_size >= image_height ), f"Assert Failed: {tile_height} * {tile_size} >= {image_height}" assert ( tile_width * tile_size >= image_width ), f"Assert Failed: {tile_width} * {tile_size} >= {image_width}" ( render_colors, render_alphas, render_normals, render_distort, render_median, ) = _RasterizeToPixels2DGS.apply( means2d.contiguous(), ray_transforms.contiguous(), colors.contiguous(), opacities.contiguous(), normals.contiguous(), densify.contiguous(), backgrounds, masks, image_width, image_height, tile_size, isect_offsets.contiguous(), flatten_ids.contiguous(), absgrad, distloss, ) return render_colors, render_alphas, render_normals, render_distort, render_median
[docs] @torch.no_grad() def rasterize_to_indices_in_range_2dgs( range_start: int, range_end: int, transmittances: Tensor, # [..., image_height, image_width] means2d: Tensor, # [..., N, 2] ray_transforms: Tensor, # [..., N, 3, 3] opacities: Tensor, # [..., N] image_width: int, image_height: int, tile_size: int, isect_offsets: Tensor, flatten_ids: Tensor, ) -> Tuple[Tensor, Tensor, Tensor]: """Rasterizes a batch of Gaussians to images but only returns the indices. .. note:: This function supports iterative rasterization, in which each call of this function will rasterize a batch of Gaussians from near to far, defined by `[range_start, range_end)`. If a one-step full rasterization is desired, set `range_start` to 0 and `range_end` to a really large number, e.g, 1e10. Args: range_start: The start batch of Gaussians to be rasterized (inclusive). range_end: The end batch of Gaussians to be rasterized (exclusive). transmittances: Currently transmittances. [..., image_height, image_width] means2d: Projected Gaussian means. [..., N, 2] ray_transforms: transformation matrices that transforms xy-planes in pixel spaces into splat coordinates. [..., N, 3, 3] opacities: Gaussian opacities that support per-view values. [..., N] image_width: Image width. image_height: Image height. tile_size: Tile size. isect_offsets: Intersection offsets outputs from `isect_offset_encode()`. [..., tile_height, tile_width] flatten_ids: The global flatten indices in [I * N] from `isect_tiles()`. [n_isects] Returns: A tuple: - **Gaussian ids**. Gaussian ids for the pixel intersection. A flattened list of shape [M]. - **Pixel ids**. pixel indices (row-major). A flattened list of shape [M]. - **Camera ids**. Camera indices. A flattened list of shape [M]. - **Batch ids**. Batch indices. A flattened list of shape [M]. """ image_dims = means2d.shape[:-2] tile_height, tile_width = isect_offsets.shape[-2:] N = means2d.shape[-2] assert transmittances.shape == image_dims + ( image_height, image_width, ), transmittances.shape assert means2d.shape == image_dims + (N, 2), means2d.shape assert ray_transforms.shape == image_dims + (N, 3, 3), ray_transforms.shape assert opacities.shape == image_dims + (N,), opacities.shape assert isect_offsets.shape == image_dims + ( tile_height, tile_width, ), isect_offsets.shape assert ( tile_height * tile_size >= image_height ), f"Assert Failed: {tile_height} * {tile_size} >= {image_height}" assert ( tile_width * tile_size >= image_width ), f"Assert Failed: {tile_width} * {tile_size} >= {image_width}" out_gauss_ids, out_indices = _make_lazy_cuda_func("rasterize_to_indices_2dgs")( range_start, range_end, transmittances.contiguous(), means2d.contiguous(), ray_transforms.contiguous(), opacities.contiguous(), image_width, image_height, tile_size, isect_offsets.contiguous(), flatten_ids.contiguous(), ) out_pixel_ids = out_indices % (image_width * image_height) out_image_ids = out_indices // (image_width * image_height) return out_gauss_ids, out_pixel_ids, out_image_ids
class _RasterizeToPixels2DGS(torch.autograd.Function): """Rasterize gaussians 2DGS""" @staticmethod def forward( ctx, means2d: Tensor, ray_transforms: Tensor, colors: Tensor, opacities: Tensor, normals: Tensor, densify: Tensor, backgrounds: Tensor, masks: Tensor, width: int, height: int, tile_size: int, isect_offsets: Tensor, flatten_ids: Tensor, absgrad: bool, distloss: bool, ) -> Tuple[Tensor, Tensor]: ( render_colors, render_alphas, render_normals, render_distort, render_median, last_ids, median_ids, ) = _make_lazy_cuda_func("rasterize_to_pixels_2dgs_fwd")( means2d, ray_transforms, colors, opacities, normals, backgrounds, masks, width, height, tile_size, isect_offsets, flatten_ids, ) ctx.save_for_backward( means2d, ray_transforms, colors, opacities, normals, densify, backgrounds, masks, isect_offsets, flatten_ids, render_colors, render_alphas, last_ids, median_ids, ) ctx.width = width ctx.height = height ctx.tile_size = tile_size ctx.absgrad = absgrad ctx.distloss = distloss # double to float render_alphas = render_alphas.float() return ( render_colors, render_alphas, render_normals, render_distort, render_median, ) @staticmethod def backward( ctx, v_render_colors: Tensor, v_render_alphas: Tensor, v_render_normals: Tensor, v_render_distort: Tensor, v_render_median: Tensor, ): ( means2d, ray_transforms, colors, opacities, normals, densify, backgrounds, masks, isect_offsets, flatten_ids, render_colors, render_alphas, last_ids, median_ids, ) = ctx.saved_tensors width = ctx.width height = ctx.height tile_size = ctx.tile_size absgrad = ctx.absgrad ( v_means2d_abs, v_means2d, v_ray_transforms, v_colors, v_opacities, v_normals, v_densify, ) = _make_lazy_cuda_func("rasterize_to_pixels_2dgs_bwd")( means2d, ray_transforms, colors, opacities, normals, densify, backgrounds, masks, width, height, tile_size, isect_offsets, flatten_ids, render_colors, render_alphas, last_ids, median_ids, v_render_colors.contiguous(), v_render_alphas.contiguous(), v_render_normals.contiguous(), v_render_distort.contiguous(), v_render_median.contiguous(), absgrad, ) torch.cuda.synchronize() if absgrad: means2d.absgrad = v_means2d_abs if ctx.needs_input_grad[6]: v_backgrounds = (v_render_colors * (1.0 - render_alphas).float()).sum( dim=(-3, -2) ) else: v_backgrounds = None return ( v_means2d, v_ray_transforms, v_colors, v_opacities, v_normals, v_densify, v_backgrounds, None, # masks None, # width None, # height None, # tile_size None, # isect_offsets None, # flatten_ids None, # absgrad None, # distloss )