PyPI - warp-lang - Versions diffs - 1.4.1__py3-none-win_amd64.whl → 1.5.0__py3-none-win_amd64.whl - Mend

warp-lang 1.4.1__py3-none-win_amd64.whl → 1.5.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (164) hide show

warp/__init__.py +4 -0
warp/autograd.py +43 -8
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +21 -2
warp/build_dll.py +23 -6
warp/builtins.py +1920 -111
warp/codegen.py +186 -62
warp/config.py +2 -2
warp/context.py +322 -73
warp/examples/assets/pixel.jpg +0 -0
warp/examples/benchmarks/benchmark_cloth_paddle.py +86 -0
warp/examples/benchmarks/benchmark_gemm.py +121 -0
warp/examples/benchmarks/benchmark_interop_paddle.py +158 -0
warp/examples/benchmarks/benchmark_tile.py +179 -0
warp/examples/core/example_dem.py +2 -1
warp/examples/core/example_mesh_intersect.py +3 -3
warp/examples/fem/example_adaptive_grid.py +37 -10
warp/examples/fem/example_apic_fluid.py +3 -2
warp/examples/fem/example_convection_diffusion_dg.py +4 -5
warp/examples/fem/example_deformed_geometry.py +1 -1
warp/examples/fem/example_diffusion_3d.py +47 -4
warp/examples/fem/example_distortion_energy.py +220 -0
warp/examples/fem/example_magnetostatics.py +127 -85
warp/examples/fem/example_nonconforming_contact.py +5 -5
warp/examples/fem/example_stokes.py +3 -1
warp/examples/fem/example_streamlines.py +12 -19
warp/examples/fem/utils.py +38 -15
warp/examples/optim/example_walker.py +2 -2
warp/examples/sim/example_cloth.py +2 -25
warp/examples/sim/example_jacobian_ik.py +6 -2
warp/examples/sim/example_quadruped.py +2 -1
warp/examples/tile/example_tile_convolution.py +58 -0
warp/examples/tile/example_tile_fft.py +47 -0
warp/examples/tile/example_tile_filtering.py +105 -0
warp/examples/tile/example_tile_matmul.py +79 -0
warp/examples/tile/example_tile_mlp.py +375 -0
warp/fem/__init__.py +8 -0
warp/fem/cache.py +16 -12
warp/fem/dirichlet.py +1 -1
warp/fem/domain.py +44 -1
warp/fem/field/__init__.py +1 -2
warp/fem/field/field.py +31 -19
warp/fem/field/nodal_field.py +101 -49
warp/fem/field/virtual.py +794 -0
warp/fem/geometry/__init__.py +2 -2
warp/fem/geometry/deformed_geometry.py +3 -105
warp/fem/geometry/element.py +13 -0
warp/fem/geometry/geometry.py +165 -5
warp/fem/geometry/grid_2d.py +3 -6
warp/fem/geometry/grid_3d.py +31 -28
warp/fem/geometry/hexmesh.py +3 -46
warp/fem/geometry/nanogrid.py +3 -2
warp/fem/geometry/{quadmesh_2d.py → quadmesh.py} +280 -159
warp/fem/geometry/tetmesh.py +2 -43
warp/fem/geometry/{trimesh_2d.py → trimesh.py} +354 -186
warp/fem/integrate.py +683 -261
warp/fem/linalg.py +404 -0
warp/fem/operator.py +101 -18
warp/fem/polynomial.py +5 -5
warp/fem/quadrature/quadrature.py +45 -21
warp/fem/space/__init__.py +45 -11
warp/fem/space/basis_function_space.py +451 -0
warp/fem/space/basis_space.py +58 -11
warp/fem/space/function_space.py +146 -5
warp/fem/space/grid_2d_function_space.py +80 -66
warp/fem/space/grid_3d_function_space.py +113 -68
warp/fem/space/hexmesh_function_space.py +96 -108
warp/fem/space/nanogrid_function_space.py +62 -110
warp/fem/space/quadmesh_function_space.py +208 -0
warp/fem/space/shape/__init__.py +45 -7
warp/fem/space/shape/cube_shape_function.py +328 -54
warp/fem/space/shape/shape_function.py +10 -1
warp/fem/space/shape/square_shape_function.py +328 -60
warp/fem/space/shape/tet_shape_function.py +269 -19
warp/fem/space/shape/triangle_shape_function.py +238 -19
warp/fem/space/tetmesh_function_space.py +69 -37
warp/fem/space/topology.py +38 -0
warp/fem/space/trimesh_function_space.py +179 -0
warp/fem/utils.py +6 -331
warp/jax_experimental.py +3 -1
warp/native/array.h +55 -40
warp/native/builtin.h +124 -43
warp/native/bvh.h +4 -0
warp/native/coloring.cpp +600 -0
warp/native/cuda_util.cpp +14 -0
warp/native/cuda_util.h +2 -1
warp/native/fabric.h +8 -0
warp/native/hashgrid.h +4 -0
warp/native/marching.cu +8 -0
warp/native/mat.h +14 -3
warp/native/mathdx.cpp +59 -0
warp/native/mesh.h +4 -0
warp/native/range.h +13 -1
warp/native/reduce.cpp +9 -1
warp/native/reduce.cu +7 -0
warp/native/runlength_encode.cpp +9 -1
warp/native/runlength_encode.cu +7 -1
warp/native/scan.cpp +8 -0
warp/native/scan.cu +8 -0
warp/native/scan.h +8 -1
warp/native/sparse.cpp +8 -0
warp/native/sparse.cu +8 -0
warp/native/temp_buffer.h +7 -0
warp/native/tile.h +1857 -0
warp/native/tile_gemm.h +341 -0
warp/native/tile_reduce.h +210 -0
warp/native/volume_builder.cu +8 -0
warp/native/volume_builder.h +8 -0
warp/native/warp.cpp +10 -2
warp/native/warp.cu +369 -15
warp/native/warp.h +12 -2
warp/optim/adam.py +39 -4
warp/paddle.py +29 -12
warp/render/render_opengl.py +137 -65
warp/sim/graph_coloring.py +292 -0
warp/sim/integrator_euler.py +4 -2
warp/sim/integrator_featherstone.py +115 -44
warp/sim/integrator_vbd.py +6 -0
warp/sim/model.py +90 -17
warp/stubs.py +651 -85
warp/tape.py +12 -7
warp/tests/assets/pixel.npy +0 -0
warp/tests/aux_test_instancing_gc.py +18 -0
warp/tests/test_array.py +207 -48
warp/tests/test_closest_point_edge_edge.py +8 -8
warp/tests/test_codegen.py +120 -1
warp/tests/test_codegen_instancing.py +30 -0
warp/tests/test_collision.py +110 -0
warp/tests/test_coloring.py +241 -0
warp/tests/test_context.py +34 -0
warp/tests/test_examples.py +18 -4
warp/tests/test_fabricarray.py +33 -0
warp/tests/test_fem.py +453 -113
warp/tests/test_func.py +48 -1
warp/tests/test_generics.py +52 -0
warp/tests/test_iter.py +68 -0
warp/tests/test_mat_scalar_ops.py +1 -1
warp/tests/test_mesh_query_point.py +5 -4
warp/tests/test_module_hashing.py +23 -0
warp/tests/test_paddle.py +27 -87
warp/tests/test_print.py +191 -1
warp/tests/test_spatial.py +1 -1
warp/tests/test_tile.py +700 -0
warp/tests/test_tile_mathdx.py +144 -0
warp/tests/test_tile_mlp.py +383 -0
warp/tests/test_tile_reduce.py +374 -0
warp/tests/test_tile_shared_memory.py +190 -0
warp/tests/test_vbd.py +12 -20
warp/tests/test_volume.py +43 -0
warp/tests/unittest_suites.py +23 -2
warp/tests/unittest_utils.py +4 -0
warp/types.py +339 -73
warp/utils.py +22 -1
{warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/METADATA +33 -7
{warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/RECORD +159 -132
{warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/WHEEL +1 -1
warp/fem/field/test.py +0 -180
warp/fem/field/trial.py +0 -183
warp/fem/space/collocated_function_space.py +0 -102
warp/fem/space/quadmesh_2d_function_space.py +0 -261
warp/fem/space/trimesh_2d_function_space.py +0 -153
{warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/LICENSE.md +0 -0
{warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/top_level.txt +0 -0

warp/tests/unittest_suites.py CHANGED Viewed

@@ -99,8 +99,11 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
     from warp.tests.test_closest_point_edge_edge import TestClosestPointEdgeEdgeMethods
     from warp.tests.test_codegen import TestCodeGen
     from warp.tests.test_codegen_instancing import TestCodeGenInstancing
+    from warp.tests.test_collision import TestCollision
+    from warp.tests.test_coloring import TestColoring
     from warp.tests.test_compile_consts import TestConstants
     from warp.tests.test_conditional import TestConditional
+    from warp.tests.test_context import TestContext
     from warp.tests.test_copy import TestCopy
     from warp.tests.test_ctypes import TestCTypes
     from warp.tests.test_dense import TestDense
@@ -115,7 +118,7 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
     )
     from warp.tests.test_fabricarray import TestFabricArray
     from warp.tests.test_fast_math import TestFastMath
-    from warp.tests.test_fem import TestFem, TestFemShapeFunctions
+    from warp.tests.test_fem import TestFem, TestFemShapeFunctions, TestFemUtilities
     from warp.tests.test_fp16 import TestFp16
     from warp.tests.test_func import TestFunc
     from warp.tests.test_future_annotations import TestFutureAnnotations
@@ -127,6 +130,7 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
     from warp.tests.test_import import TestImport
     from warp.tests.test_indexedarray import TestIndexedArray
     from warp.tests.test_intersect import TestIntersect
+    from warp.tests.test_iter import TestIter
     from warp.tests.test_jax import TestJax
     from warp.tests.test_large import TestLarge
     from warp.tests.test_launch import TestLaunch
@@ -170,9 +174,14 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
     from warp.tests.test_sparse import TestSparse
     from warp.tests.test_spatial import TestSpatial
     from warp.tests.test_special_values import TestSpecialValues
+    from warp.tests.test_static import TestStatic
     from warp.tests.test_streams import TestStreams
     from warp.tests.test_struct import TestStruct
     from warp.tests.test_tape import TestTape
+    from warp.tests.test_tile import TestTile
+    from warp.tests.test_tile_mathdx import TestTileMathDx
+    from warp.tests.test_tile_reduce import TestTileReduce
+    from warp.tests.test_tile_shared_memory import TestTileSharedMemory
     from warp.tests.test_torch import TestTorch
     from warp.tests.test_transient_module import TestTransientModule
     from warp.tests.test_triangle_closest_point import TestTriangleClosestPoint
@@ -199,8 +208,11 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
         TestClosestPointEdgeEdgeMethods,
         TestCodeGen,
         TestCodeGenInstancing,
-        TestConstants,
+        TestCollision,
+        TestColoring,
         TestConditional,
+        TestConstants,
+        TestContext,
         TestCopy,
         TestCTypes,
         TestDense,
@@ -215,6 +227,7 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
         TestFastMath,
         TestFem,
         TestFemShapeFunctions,
+        TestFemUtilities,
         TestFp16,
         TestFunc,
         TestFutureAnnotations,
@@ -226,6 +239,7 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
         TestImport,
         TestIndexedArray,
         TestIntersect,
+        TestIter,
         TestJax,
         TestLarge,
         TestLaunch,
@@ -269,9 +283,14 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
         TestSparse,
         TestSpatial,
         TestSpecialValues,
+        TestStatic,
         TestStreams,
         TestStruct,
         TestTape,
+        TestTile,
+        TestTileMathDx,
+        TestTileReduce,
+        TestTileSharedMemory,
         TestTorch,
         TestTransientModule,
         TestTriangleClosestPoint,
@@ -329,6 +348,7 @@ def kit_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader):
     from warp.tests.test_rounding import TestRounding
     from warp.tests.test_runlength_encode import TestRunlengthEncode
     from warp.tests.test_sparse import TestSparse
+    from warp.tests.test_static import TestStatic
     from warp.tests.test_streams import TestStreams
     from warp.tests.test_tape import TestTape
     from warp.tests.test_transient_module import TestTransientModule
@@ -374,6 +394,7 @@ def kit_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader):
         TestRounding,
         TestRunlengthEncode,
         TestSparse,
+        TestStatic,
         TestStreams,
         TestTape,
         TestTransientModule,

warp/tests/unittest_utils.py CHANGED Viewed

@@ -232,6 +232,10 @@ def create_test_func(func, device, check_output, **kwargs):
         else:
             func(self, device, **kwargs)
+    # Copy the __unittest_expecting_failure__ attribute from func to test_func
+    if hasattr(func, "__unittest_expecting_failure__"):
+        test_func.__unittest_expecting_failure__ = func.__unittest_expecting_failure__
     return test_func

warp/types.py CHANGED Viewed

@@ -12,9 +12,10 @@ import ctypes
 import inspect
 import struct
 import zlib
-from typing import Any, Callable, Generic, List, NamedTuple, Optional, Sequence, Tuple, TypeVar, Union
+from typing import Any, Callable, Generic, List, Literal, NamedTuple, Optional, Sequence, Tuple, TypeVar, Union
 import numpy as np
+import numpy.typing as npt
 import warp
@@ -100,8 +101,10 @@ def vector(length, dtype):
         if dtype is bool:
             _type_ = ctypes.c_bool
-        elif dtype in [Scalar, Float]:
+        elif dtype in (Scalar, Float):
             _type_ = ctypes.c_float
+        elif dtype is Int:
+            _type_ = ctypes.c_int
         else:
             _type_ = dtype._type_
@@ -289,8 +292,10 @@ def matrix(shape, dtype):
         if dtype is bool:
             _type_ = ctypes.c_bool
-        elif dtype in [Scalar, Float]:
+        elif dtype in (Scalar, Float):
             _type_ = ctypes.c_float
+        elif dtype is Int:
+            _type_ = ctypes.c_int
         else:
             _type_ = dtype._type_
@@ -991,43 +996,6 @@ vector_types = (
     spatial_matrixd,
 )
-atomic_vector_types = (
-    vec2i,
-    vec2ui,
-    vec2l,
-    vec2ul,
-    vec2h,
-    vec2f,
-    vec2d,
-    vec3i,
-    vec3ui,
-    vec3l,
-    vec3ul,
-    vec3h,
-    vec3f,
-    vec3d,
-    vec4i,
-    vec4ui,
-    vec4l,
-    vec4ul,
-    vec4h,
-    vec4f,
-    vec4d,
-    mat22h,
-    mat22f,
-    mat22d,
-    mat33h,
-    mat33f,
-    mat33d,
-    mat44h,
-    mat44f,
-    mat44d,
-    quath,
-    quatf,
-    quatd,
-)
-atomic_types = float_types + (int32, uint32, int64, uint64) + atomic_vector_types
 np_dtype_to_warp_type = {
     # Numpy scalar types
     np.bool_: bool,
@@ -1076,6 +1044,14 @@ warp_type_to_np_dtype = {
     float64: np.float64,
 }
+non_atomic_types = (
+    int8,
+    uint8,
+    int16,
+    uint16,
+    int64,
+)
 def dtype_from_numpy(numpy_dtype):
     """Return the Warp dtype corresponding to a NumPy dtype."""
@@ -1337,6 +1313,8 @@ def type_typestr(dtype):
 def type_repr(t):
     if is_array(t):
         return str(f"array(ndim={t.ndim}, dtype={t.dtype})")
+    if is_tile(t):
+        return str(f"tile(dtype={t.dtype}, m={t.M}, n={t.N})")
     if type_is_vector(t):
         return str(f"vector(length={t._shape_[0]}, dtype={t._wp_scalar_type_})")
     if type_is_matrix(t):
@@ -1448,6 +1426,8 @@ def scalars_equal(a, b, match_generic):
 def types_equal(a, b, match_generic=False):
     if match_generic:
+        # Special cases to interpret the types listed in `int_tuple_type_hints`
+        # as generic hints that accept any integer types.
         if a in int_tuple_type_hints and isinstance(b, Sequence):
             a_length = int_tuple_type_hints[a]
             if (a_length == -1 or a_length == len(b)) and all(
@@ -1466,6 +1446,24 @@ def types_equal(a, b, match_generic=False):
             if a_length is None or b_length is None or a_length == b_length:
                 return True
+    a_origin = warp.codegen.get_type_origin(a)
+    b_origin = warp.codegen.get_type_origin(b)
+    if a_origin is tuple and b_origin is tuple:
+        a_args = warp.codegen.get_type_args(a)
+        b_args = warp.codegen.get_type_args(b)
+        if len(a_args) == len(b_args) and all(
+            scalars_equal(x, y, match_generic=match_generic) for x, y in zip(a_args, b_args)
+        ):
+            return True
+    elif a_origin is tuple and isinstance(b, Sequence):
+        a_args = warp.codegen.get_type_args(a)
+        if len(a_args) == len(b) and all(scalars_equal(x, y, match_generic=match_generic) for x, y in zip(a_args, b)):
+            return True
+    elif b_origin is tuple and isinstance(a, Sequence):
+        b_args = warp.codegen.get_type_args(b)
+        if len(b_args) == len(a) and all(scalars_equal(x, y, match_generic=match_generic) for x, y in zip(b_args, a)):
+            return True
     # convert to canonical types
     if a == float:
         a = float32
@@ -1488,13 +1486,16 @@ def types_equal(a, b, match_generic=False):
         return True
-    if is_array(a) and type(a) is type(b):
+    if is_array(a) and type(a) is type(b) and types_equal(a.dtype, b.dtype, match_generic=match_generic):
         return True
     # match NewStructInstance and Struct dtype
     if getattr(a, "cls", "a") is getattr(b, "cls", "b"):
         return True
+    if is_tile(a) and is_tile(b):
+        return True
     return scalars_equal(a, b, match_generic)
@@ -1581,6 +1582,23 @@ def array_ctype_from_interface(interface: dict, dtype=None, owner=None):
 class array(Array):
+    """A fixed-size multi-dimensional array containing values of the same type.
+    Attributes:
+        dtype (DType): The data type of the array.
+        ndim (int): The number of array dimensions.
+        size (int): The number of items in the array.
+        capacity (int): The amount of memory in bytes allocated for this array.
+        shape (Tuple[int]): Dimensions of the array.
+        strides (Tuple[int]): Number of bytes in each dimension between successive elements of the array.
+        ptr (int): Pointer to underlying memory allocation backing the array.
+        device (Device): The device where the array's memory allocation resides.
+        pinned (bool): Indicates whether the array was allocated in pinned host memory.
+        is_contiguous (bool): Indicates whether this array has a contiguous memory layout.
+        deleter (Callable[[int, int], None]): A function to be called when the array is deleted,
+            taking two arguments: pointer and size. If ``None``, then no function is called.
+    """
     # member attributes available during code-gen (e.g.: d = array.shape[0])
     # (initialized when needed)
     _vars = None
@@ -1592,21 +1610,21 @@ class array(Array):
     def __init__(
         self,
-        data=None,
-        dtype: DType = Any,
-        shape=None,
-        strides=None,
-        length=None,
-        ptr=None,
-        capacity=None,
+        data: Optional[Union[List, Tuple, npt.NDArray]] = None,
+        dtype: Union[DType, Any] = Any,
+        shape: Optional[Tuple[int, ...]] = None,
+        strides: Optional[Tuple[int, ...]] = None,
+        length: Optional[int] = None,
+        ptr: Optional[int] = None,
+        capacity: Optional[int] = None,
         device=None,
-        pinned=False,
-        copy=True,
-        owner=False,  # deprecated - pass deleter instead
-        deleter=None,
-        ndim=None,
-        grad=None,
-        requires_grad=False,
+        pinned: bool = False,
+        copy: bool = True,
+        owner: bool = False,  # deprecated - pass deleter instead
+        deleter: Optional[Callable[[int, int], None]] = None,
+        ndim: Optional[int] = None,
+        grad: Optional[array] = None,
+        requires_grad: bool = False,
     ):
         """Constructs a new Warp array object
@@ -1628,20 +1646,24 @@ class array(Array):
         are taken into account and no memory is allocated for the array.
         Args:
-            data (Union[list, tuple, ndarray]): An object to construct the array from, can be a Tuple, List, or generally any type convertible to an np.array
-            dtype (Union): One of the available `data types <#data-types>`_, such as :class:`warp.float32`, :class:`warp.mat33`, or a custom `struct <#structs>`_. If dtype is ``Any`` and data is an ndarray, then it will be inferred from the array data type
-            shape (tuple): Dimensions of the array
-            strides (tuple): Number of bytes in each dimension between successive elements of the array
-            length (int): Number of elements of the data type (deprecated, users should use `shape` argument)
-            ptr (uint64): Address of an external memory address to alias (data should be None)
-            capacity (int): Maximum size in bytes of the ptr allocation (data should be None)
+            data: An object to construct the array from, can be a Tuple, List, or generally any type convertible to an np.array
+            dtype: One of the available `data types <#data-types>`_, such as :class:`warp.float32`, :class:`warp.mat33`, or a custom `struct <#structs>`_. If dtype is ``Any`` and data is an ndarray, then it will be inferred from the array data type
+            shape: Dimensions of the array
+            strides: Number of bytes in each dimension between successive elements of the array
+            length: Number of elements of the data type (deprecated, users should use ``shape`` argument)
+            ptr: Address of an external memory address to alias (``data`` should be ``None``)
+            capacity: Maximum size in bytes of the ``ptr`` allocation (``data`` should be ``None``)
             device (Devicelike): Device the array lives on
-            copy (bool): Whether the incoming data will be copied or aliased, this is only possible when the incoming `data` already lives on the device specified and types match
-            owner (bool): Should the array object try to deallocate memory when it is deleted (deprecated, pass `deleter` if you wish to transfer ownership to Warp)
-            deleter (Callable): Function to be called when deallocating the array, taking two arguments, pointer and size
-            requires_grad (bool): Whether or not gradients will be tracked for this array, see :class:`warp.Tape` for details
-            grad (array): The gradient array to use
-            pinned (bool): Whether to allocate pinned host memory, which allows asynchronous host-device transfers (only applicable with device="cpu")
+            copy: Whether the incoming ``data`` will be copied or aliased. Aliasing requires that
+                the incoming ``data`` already lives on the ``device`` specified and the data types match.
+            owner: Whether the array will try to deallocate the underlying memory when it is deleted
+                (deprecated, pass ``deleter`` if you wish to transfer ownership to Warp)
+            deleter: Function to be called when the array is deleted, taking two arguments: pointer and size
+            requires_grad: Whether or not gradients will be tracked for this array, see :class:`warp.Tape` for details
+            grad: The array in which to accumulate gradients in the backward pass. If ``None`` and ``requires_grad`` is ``True``,
+                then a gradient array will be allocated automatically.
+            pinned: Whether to allocate pinned host memory, which allows asynchronous host–device transfers
+                (only applicable with ``device="cpu"``)
         """
@@ -2963,6 +2985,116 @@ def array_type_id(a):
         raise ValueError("Invalid array type")
+# tile expression objects
+class Tile:
+    alignment = 16
+    def __init__(self, dtype, M, N, op=None, storage="register", layout="rowmajor", strides=None, owner=True):
+        self.dtype = type_to_warp(dtype)
+        self.M = M
+        self.N = N
+        self.op = op
+        self.storage = storage
+        self.layout = layout
+        if strides is None:
+            if layout == "rowmajor":
+                self.strides = (N, 1)
+            elif layout == "colmajor":
+                self.strides = (1, M)
+        else:
+            self.strides = strides
+        self.owner = owner
+    # generates C-type string
+    def ctype(self):
+        from warp.codegen import Var
+        if self.storage == "register":
+            return f"wp::tile_register_t<{Var.type_to_ctype(self.dtype)},{self.M},{self.N}>"
+        elif self.storage == "shared":
+            return f"wp::tile_shared_t<{Var.type_to_ctype(self.dtype)},{self.M},{self.N},{self.strides[0]}, {self.strides[1]}, {'true' if self.owner else 'false'}>"
+        else:
+            raise RuntimeError(f"Unrecognized tile storage type {self.storage}")
+    # generates C-initializer string
+    def cinit(self, requires_grad=False):
+        from warp.codegen import Var
+        if self.storage == "register":
+            return self.ctype() + "(0.0)"
+        elif self.storage == "shared":
+            if self.owner:
+                # allocate new shared memory tile
+                return f"wp::tile_alloc_empty<{Var.type_to_ctype(self.dtype)},{self.M},{self.N},{'true' if requires_grad else 'false'}>()"
+            else:
+                # tile will be initialized by another call, e.g.: tile_transpose()
+                return "NULL"
+    # return total tile size in bytes
+    def size_in_bytes(self):
+        num_bytes = self.align(type_size_in_bytes(self.dtype) * self.M * self.N)
+        return num_bytes
+    # align tile size to natural boundary, default 16-bytes
+    def align(self, bytes):
+        return ((bytes + self.alignment - 1) // self.alignment) * self.alignment
+class TileZeros(Tile):
+    def __init__(self, dtype, M, N, storage="register"):
+        Tile.__init__(self, dtype, M, N, op="zeros", storage=storage)
+class TileRange(Tile):
+    def __init__(self, dtype, start, stop, step, storage="register"):
+        self.start = start
+        self.stop = stop
+        self.step = step
+        M = 1
+        N = int((stop - start) / step)
+        Tile.__init__(self, dtype, M, N, op="arange", storage=storage)
+class TileConstant(Tile):
+    def __init__(self, dtype, M, N):
+        Tile.__init__(self, dtype, M, N, op="constant", storage="register")
+class TileLoad(Tile):
+    def __init__(self, array, M, N, storage="register"):
+        Tile.__init__(self, array.dtype, M, N, op="load", storage=storage)
+class TileUnaryMap(Tile):
+    def __init__(self, t, storage="register"):
+        Tile.__init__(self, t.dtype, t.M, t.N, op="unary_map", storage=storage)
+        self.t = t
+class TileBinaryMap(Tile):
+    def __init__(self, a, b, storage="register"):
+        Tile.__init__(self, a.dtype, a.M, a.N, op="binary_map", storage=storage)
+        self.a = a
+        self.b = b
+class TileShared(Tile):
+    def __init__(self, t):
+        Tile.__init__(self, t.dtype, t.M, t.N, "shared", storage="shared")
+        self.t = t
+def is_tile(t):
+    return isinstance(t, Tile)
 class Bvh:
     def __new__(cls, *args, **kwargs):
         instance = super(Bvh, cls).__new__(cls)
@@ -3544,9 +3676,9 @@ class Volume:
             grid_data = bytearray()
             while grid_data_offset < file_end:
                 chunk_size = struct.unpack("<Q", data[grid_data_offset : grid_data_offset + 8])[0]
-                grid_data += zlib.decompress(data[grid_data_offset + 8 :])
-                grid_data_offset += 8 + chunk_size
+                grid_data_offset += 8
+                grid_data += zlib.decompress(data[grid_data_offset : grid_data_offset + chunk_size])
+                grid_data_offset += chunk_size
         elif codec == 2:  # blosc compression
             try:
                 import blosc
@@ -3558,8 +3690,9 @@ class Volume:
             grid_data = bytearray()
             while grid_data_offset < file_end:
                 chunk_size = struct.unpack("<Q", data[grid_data_offset : grid_data_offset + 8])[0]
-                grid_data += blosc.decompress(data[grid_data_offset + 8 :])
-                grid_data_offset += 8 + chunk_size
+                grid_data_offset += 8
+                grid_data += blosc.decompress(data[grid_data_offset : grid_data_offset + chunk_size])
+                grid_data_offset += chunk_size
         else:
             raise RuntimeError(f"Unsupported codec code: {codec}")
@@ -3570,6 +3703,139 @@ class Volume:
         data_array = array(np.frombuffer(grid_data, dtype=np.byte), device=device)
         return cls(data_array)
+    def save_to_nvdb(self, path, codec: Literal["none", "zip", "blosc"] = "none"):
+        """Serialize the Volume into a NanoVDB (.nvdb) file.
+        Args:
+            path: File path to save.
+            codec: Compression codec used
+                "none" - no compression
+                "zip" - ZIP compression
+                "blosc" - BLOSC compression, requires the blosc module to be installed
+        """
+        codec_dict = {"none": 0, "zip": 1, "blosc": 2}
+        class FileHeader(ctypes.Structure):
+            _fields_ = [
+                ("magic", ctypes.c_uint64),
+                ("version", ctypes.c_uint32),
+                ("gridCount", ctypes.c_uint16),
+                ("codec", ctypes.c_uint16),
+            ]
+        class FileMetaData(ctypes.Structure):
+            _fields_ = [
+                ("gridSize", ctypes.c_uint64),
+                ("fileSize", ctypes.c_uint64),
+                ("nameKey", ctypes.c_uint64),
+                ("voxelCount", ctypes.c_uint64),
+                ("gridType", ctypes.c_uint32),
+                ("gridClass", ctypes.c_uint32),
+                ("worldBBox", ctypes.c_double * 6),
+                ("indexBBox", ctypes.c_uint32 * 6),
+                ("voxelSize", ctypes.c_double * 3),
+                ("nameSize", ctypes.c_uint32),
+                ("nodeCount", ctypes.c_uint32 * 4),
+                ("tileCount", ctypes.c_uint32 * 3),
+                ("codec", ctypes.c_uint16),
+                ("padding", ctypes.c_uint16),
+                ("version", ctypes.c_uint32),
+            ]
+        class GridData(ctypes.Structure):
+            _fields_ = [
+                ("magic", ctypes.c_uint64),
+                ("checksum", ctypes.c_uint64),
+                ("version", ctypes.c_uint32),
+                ("flags", ctypes.c_uint32),
+                ("gridIndex", ctypes.c_uint32),
+                ("gridCount", ctypes.c_uint32),
+                ("gridSize", ctypes.c_uint64),
+                ("gridName", ctypes.c_char * 256),
+                ("map", ctypes.c_byte * 264),
+                ("worldBBox", ctypes.c_double * 6),
+                ("voxelSize", ctypes.c_double * 3),
+                ("gridClass", ctypes.c_uint32),
+                ("gridType", ctypes.c_uint32),
+                ("blindMetadataOffset", ctypes.c_int64),
+                ("blindMetadataCount", ctypes.c_uint32),
+                ("data0", ctypes.c_uint32),
+                ("data1", ctypes.c_uint64),
+                ("data2", ctypes.c_uint64),
+            ]
+        NVDB_MAGIC = 0x304244566F6E614E
+        NVDB_VERSION = 32 << 21 | 3 << 10 | 3
+        try:
+            codec_int = codec_dict[codec]
+        except KeyError as err:
+            raise RuntimeError(f"Unsupported codec requested: {codec}") from err
+        if codec_int == 2:
+            try:
+                import blosc
+            except ImportError as err:
+                raise RuntimeError(
+                    f"blosc compression was requested, but Python module could not be imported: {err}"
+                ) from err
+        data = self.array().numpy()
+        grid_data = GridData.from_buffer(data)
+        if grid_data.gridIndex > 0:
+            raise RuntimeError(
+                "Saving of aliased Volumes is not supported. Use `save_to_nvdb` on the original volume, before any `load_next_grid` calls."
+            )
+        file_header = FileHeader(NVDB_MAGIC, NVDB_VERSION, grid_data.gridCount, codec_int)
+        grid_data_offset = 0
+        all_file_meta_data = []
+        for i in range(file_header.gridCount):
+            if i > 0:
+                grid_data = GridData.from_buffer(data[grid_data_offset : grid_data_offset + 672])
+            current_grid_data = data[grid_data_offset : grid_data_offset + grid_data.gridSize]
+            if codec_int == 1:  # zip compression
+                compressed_data = zlib.compress(current_grid_data)
+                compressed_size = len(compressed_data)
+            elif codec_int == 2:  # blosc compression
+                compressed_data = blosc.compress(current_grid_data)
+                compressed_size = len(compressed_data)
+            else:  # no compression
+                compressed_data = current_grid_data
+                compressed_size = grid_data.gridSize
+            file_meta_data = FileMetaData()
+            file_meta_data.gridSize = grid_data.gridSize
+            file_meta_data.fileSize = compressed_size
+            file_meta_data.gridType = grid_data.gridType
+            file_meta_data.gridClass = grid_data.gridClass
+            file_meta_data.worldBBox = grid_data.worldBBox
+            file_meta_data.voxelSize = grid_data.voxelSize
+            file_meta_data.nameSize = len(grid_data.gridName) + 1  # including the closing 0x0
+            file_meta_data.codec = codec_int
+            file_meta_data.version = NVDB_VERSION
+            grid_data_offset += file_meta_data.gridSize
+            all_file_meta_data.append((file_meta_data, grid_data.gridName, compressed_data))
+        with open(path, "wb") as nvdb:
+            nvdb.write(file_header)
+            for file_meta_data, grid_name, _ in all_file_meta_data:
+                nvdb.write(file_meta_data)
+                nvdb.write(grid_name + b"\x00")
+            for file_meta_data, _, compressed_data in all_file_meta_data:
+                if codec_int > 0:
+                    chunk_size = struct.pack("<Q", file_meta_data.fileSize)
+                    nvdb.write(chunk_size)
+                nvdb.write(compressed_data)
+        return path
     @classmethod
     def load_from_address(cls, grid_ptr: int, buffer_size: int = 0, device=None) -> Volume:
         """

warp/utils.py CHANGED Viewed

@@ -18,6 +18,7 @@ import numpy as np
 import warp as wp
 import warp.context
 import warp.types
+from warp.context import Devicelike
 warnings_seen = set()
@@ -554,7 +555,27 @@ def mem_report():  # pragma: no cover
 class ScopedDevice:
-    def __init__(self, device):
+    """A context manager to temporarily change the current default device.
+    For CUDA devices, this context manager makes the device's CUDA context
+    current and restores the previous CUDA context on exit. This is handy when
+    running Warp scripts as part of a bigger pipeline because it avoids any side
+    effects of changing the CUDA context in the enclosed code.
+    Attributes:
+        device (Device): The device that will temporarily become the default
+          device within the context.
+        saved_device (Device): The previous default device. This is restored as
+          the default device on exiting the context.
+    """
+    def __init__(self, device: Devicelike):
+        """Initializes the context manager with a device.
+        Args:
+            device: The device that will temporarily become the default device
+              within the context.
+        """
         self.device = wp.get_device(device)
     def __enter__(self):