PyPI - warp-lang - Versions diffs - 1.7.2rc1__py3-none-win_amd64.whl → 1.8.1__py3-none-win_amd64.whl - Mend

warp-lang 1.7.2rc1__py3-none-win_amd64.whl → 1.8.1__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (193) hide show

warp/__init__.py +3 -1
warp/__init__.pyi +3489 -1
warp/autograd.py +45 -122
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +241 -252
warp/build_dll.py +130 -26
warp/builtins.py +1907 -384
warp/codegen.py +272 -104
warp/config.py +12 -1
warp/constants.py +1 -1
warp/context.py +770 -238
warp/dlpack.py +1 -1
warp/examples/benchmarks/benchmark_cloth.py +2 -2
warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
warp/examples/core/example_sample_mesh.py +1 -1
warp/examples/core/example_spin_lock.py +93 -0
warp/examples/core/example_work_queue.py +118 -0
warp/examples/fem/example_adaptive_grid.py +5 -5
warp/examples/fem/example_apic_fluid.py +1 -1
warp/examples/fem/example_burgers.py +1 -1
warp/examples/fem/example_convection_diffusion.py +9 -6
warp/examples/fem/example_darcy_ls_optimization.py +489 -0
warp/examples/fem/example_deformed_geometry.py +1 -1
warp/examples/fem/example_diffusion.py +2 -2
warp/examples/fem/example_diffusion_3d.py +1 -1
warp/examples/fem/example_distortion_energy.py +1 -1
warp/examples/fem/example_elastic_shape_optimization.py +387 -0
warp/examples/fem/example_magnetostatics.py +5 -3
warp/examples/fem/example_mixed_elasticity.py +5 -3
warp/examples/fem/example_navier_stokes.py +11 -9
warp/examples/fem/example_nonconforming_contact.py +5 -3
warp/examples/fem/example_streamlines.py +8 -3
warp/examples/fem/utils.py +9 -8
warp/examples/interop/example_jax_callable.py +34 -4
warp/examples/interop/example_jax_ffi_callback.py +2 -2
warp/examples/interop/example_jax_kernel.py +27 -1
warp/examples/optim/example_drone.py +1 -1
warp/examples/sim/example_cloth.py +1 -1
warp/examples/sim/example_cloth_self_contact.py +48 -54
warp/examples/tile/example_tile_block_cholesky.py +502 -0
warp/examples/tile/example_tile_cholesky.py +2 -1
warp/examples/tile/example_tile_convolution.py +1 -1
warp/examples/tile/example_tile_filtering.py +1 -1
warp/examples/tile/example_tile_matmul.py +1 -1
warp/examples/tile/example_tile_mlp.py +2 -0
warp/fabric.py +7 -7
warp/fem/__init__.py +5 -0
warp/fem/adaptivity.py +1 -1
warp/fem/cache.py +152 -63
warp/fem/dirichlet.py +2 -2
warp/fem/domain.py +136 -6
warp/fem/field/field.py +141 -99
warp/fem/field/nodal_field.py +85 -39
warp/fem/field/virtual.py +99 -52
warp/fem/geometry/adaptive_nanogrid.py +91 -86
warp/fem/geometry/closest_point.py +13 -0
warp/fem/geometry/deformed_geometry.py +102 -40
warp/fem/geometry/element.py +56 -2
warp/fem/geometry/geometry.py +323 -22
warp/fem/geometry/grid_2d.py +157 -62
warp/fem/geometry/grid_3d.py +116 -20
warp/fem/geometry/hexmesh.py +86 -20
warp/fem/geometry/nanogrid.py +166 -86
warp/fem/geometry/partition.py +59 -25
warp/fem/geometry/quadmesh.py +86 -135
warp/fem/geometry/tetmesh.py +47 -119
warp/fem/geometry/trimesh.py +77 -270
warp/fem/integrate.py +181 -95
warp/fem/linalg.py +25 -58
warp/fem/operator.py +124 -27
warp/fem/quadrature/pic_quadrature.py +36 -14
warp/fem/quadrature/quadrature.py +40 -16
warp/fem/space/__init__.py +1 -1
warp/fem/space/basis_function_space.py +66 -46
warp/fem/space/basis_space.py +17 -4
warp/fem/space/dof_mapper.py +1 -1
warp/fem/space/function_space.py +2 -2
warp/fem/space/grid_2d_function_space.py +4 -1
warp/fem/space/hexmesh_function_space.py +4 -2
warp/fem/space/nanogrid_function_space.py +3 -1
warp/fem/space/partition.py +11 -2
warp/fem/space/quadmesh_function_space.py +4 -1
warp/fem/space/restriction.py +5 -2
warp/fem/space/shape/__init__.py +10 -8
warp/fem/space/tetmesh_function_space.py +4 -1
warp/fem/space/topology.py +52 -21
warp/fem/space/trimesh_function_space.py +4 -1
warp/fem/utils.py +53 -8
warp/jax.py +1 -2
warp/jax_experimental/ffi.py +210 -67
warp/jax_experimental/xla_ffi.py +37 -24
warp/math.py +171 -1
warp/native/array.h +103 -4
warp/native/builtin.h +182 -35
warp/native/coloring.cpp +6 -2
warp/native/cuda_util.cpp +1 -1
warp/native/exports.h +118 -63
warp/native/intersect.h +5 -5
warp/native/mat.h +8 -13
warp/native/mathdx.cpp +11 -5
warp/native/matnn.h +1 -123
warp/native/mesh.h +1 -1
warp/native/quat.h +34 -6
warp/native/rand.h +7 -7
warp/native/sparse.cpp +121 -258
warp/native/sparse.cu +181 -274
warp/native/spatial.h +305 -17
warp/native/svd.h +23 -8
warp/native/tile.h +603 -73
warp/native/tile_radix_sort.h +1112 -0
warp/native/tile_reduce.h +239 -13
warp/native/tile_scan.h +240 -0
warp/native/tuple.h +189 -0
warp/native/vec.h +10 -20
warp/native/warp.cpp +36 -4
warp/native/warp.cu +588 -52
warp/native/warp.h +47 -74
warp/optim/linear.py +5 -1
warp/paddle.py +7 -8
warp/py.typed +0 -0
warp/render/render_opengl.py +110 -80
warp/render/render_usd.py +124 -62
warp/sim/__init__.py +9 -0
warp/sim/collide.py +253 -80
warp/sim/graph_coloring.py +8 -1
warp/sim/import_mjcf.py +4 -3
warp/sim/import_usd.py +11 -7
warp/sim/integrator.py +5 -2
warp/sim/integrator_euler.py +1 -1
warp/sim/integrator_featherstone.py +1 -1
warp/sim/integrator_vbd.py +761 -322
warp/sim/integrator_xpbd.py +1 -1
warp/sim/model.py +265 -260
warp/sim/utils.py +10 -7
warp/sparse.py +303 -166
warp/tape.py +54 -51
warp/tests/cuda/test_conditional_captures.py +1046 -0
warp/tests/cuda/test_streams.py +1 -1
warp/tests/geometry/test_volume.py +2 -2
warp/tests/interop/test_dlpack.py +9 -9
warp/tests/interop/test_jax.py +0 -1
warp/tests/run_coverage_serial.py +1 -1
warp/tests/sim/disabled_kinematics.py +2 -2
warp/tests/sim/{test_vbd.py → test_cloth.py} +378 -112
warp/tests/sim/test_collision.py +159 -51
warp/tests/sim/test_coloring.py +91 -2
warp/tests/test_array.py +254 -2
warp/tests/test_array_reduce.py +2 -2
warp/tests/test_assert.py +53 -0
warp/tests/test_atomic_cas.py +312 -0
warp/tests/test_codegen.py +142 -19
warp/tests/test_conditional.py +47 -1
warp/tests/test_ctypes.py +0 -20
warp/tests/test_devices.py +8 -0
warp/tests/test_fabricarray.py +4 -2
warp/tests/test_fem.py +58 -25
warp/tests/test_func.py +42 -1
warp/tests/test_grad.py +1 -1
warp/tests/test_lerp.py +1 -3
warp/tests/test_map.py +481 -0
warp/tests/test_mat.py +23 -24
warp/tests/test_quat.py +28 -15
warp/tests/test_rounding.py +10 -38
warp/tests/test_runlength_encode.py +7 -7
warp/tests/test_smoothstep.py +1 -1
warp/tests/test_sparse.py +83 -2
warp/tests/test_spatial.py +507 -1
warp/tests/test_static.py +48 -0
warp/tests/test_struct.py +2 -2
warp/tests/test_tape.py +38 -0
warp/tests/test_tuple.py +265 -0
warp/tests/test_types.py +2 -2
warp/tests/test_utils.py +24 -18
warp/tests/test_vec.py +38 -408
warp/tests/test_vec_constructors.py +325 -0
warp/tests/tile/test_tile.py +438 -131
warp/tests/tile/test_tile_mathdx.py +518 -14
warp/tests/tile/test_tile_matmul.py +179 -0
warp/tests/tile/test_tile_reduce.py +307 -5
warp/tests/tile/test_tile_shared_memory.py +136 -7
warp/tests/tile/test_tile_sort.py +121 -0
warp/tests/unittest_suites.py +14 -6
warp/types.py +462 -308
warp/utils.py +647 -86
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/METADATA +20 -6
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/RECORD +190 -176
warp/stubs.py +0 -3381
warp/tests/sim/test_xpbd.py +0 -399
warp/tests/test_mlp.py +0 -282
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/WHEEL +0 -0
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/licenses/LICENSE.md +0 -0
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/top_level.txt +0 -0

warp/types.py CHANGED Viewed

@@ -18,20 +18,19 @@ from __future__ import annotations
 import builtins
 import ctypes
 import inspect
+import math
 import struct
 import zlib
 from typing import (
     Any,
     Callable,
+    ClassVar,
     Generic,
-    List,
     Literal,
     NamedTuple,
-    Optional,
     Sequence,
     Tuple,
     TypeVar,
-    Union,
     get_args,
     get_origin,
 )
@@ -47,6 +46,7 @@ Length = TypeVar("Length", bound=int)
 Rows = TypeVar("Rows")
 Cols = TypeVar("Cols")
 DType = TypeVar("DType")
+Shape = TypeVar("Shape", bound=Tuple[int, ...])
 Int = TypeVar("Int")
 Float = TypeVar("Float")
@@ -70,18 +70,96 @@ class Transformation(Generic[Float]):
 class Array(Generic[DType]):
-    device: Optional[warp.context.Device]
+    device: warp.context.Device | None
     dtype: type
     size: int
+    def __add__(self, other) -> array:
+        return warp.map(warp.add, self, other)  # type: ignore
-int_tuple_type_hints = {
-    Tuple[int]: 1,
-    Tuple[int, int]: 2,
-    Tuple[int, int, int]: 3,
-    Tuple[int, int, int, int]: 4,
-    Tuple[int, ...]: -1,
-}
+    def __radd__(self, other) -> array:
+        return warp.map(warp.add, other, self)  # type: ignore
+    def __sub__(self, other) -> array:
+        return warp.map(warp.sub, self, other)  # type: ignore
+    def __rsub__(self, other) -> array:
+        return warp.map(warp.sub, other, self)  # type: ignore
+    def __mul__(self, other) -> array:
+        return warp.map(warp.mul, self, other)  # type: ignore
+    def __rmul__(self, other) -> array:
+        return warp.map(warp.mul, other, self)  # type: ignore
+    def __truediv__(self, other) -> array:
+        return warp.map(warp.div, self, other)  # type: ignore
+    def __rtruediv__(self, other) -> array:
+        return warp.map(warp.div, other, self)  # type: ignore
+    def __floordiv__(self, other) -> array:
+        return warp.map(warp.floordiv, self, other)  # type: ignore
+    def __rfloordiv__(self, other) -> array:
+        return warp.map(warp.floordiv, other, self)  # type: ignore
+    def __mod__(self, other) -> array:
+        return warp.map(warp.mod, self, other)  # type: ignore
+    def __rmod__(self, other) -> array:
+        return warp.map(warp.mod, other, self)  # type: ignore
+    def __pow__(self, other) -> array:
+        return warp.map(warp.pow, self, other)  # type: ignore
+    def __rpow__(self, other) -> array:
+        return warp.map(warp.pow, other, self)  # type: ignore
+    def __neg__(self) -> array:
+        return warp.map(warp.neg, self)  # type: ignore
+    def __pos__(self) -> array:
+        return warp.map(warp.pos, self)  # type: ignore
+    def __iadd__(self, other):
+        """In-place addition operator."""
+        warp.map(warp.add, self, other, out=self)
+        return self
+    def __isub__(self, other):
+        """In-place subtraction operator."""
+        warp.map(warp.sub, self, other, out=self)
+        return self
+    def __imul__(self, other):
+        """In-place multiplication operator."""
+        warp.map(warp.mul, self, other, out=self)
+        return self
+    def __itruediv__(self, other):
+        """In-place division operator."""
+        warp.map(warp.div, self, other, out=self)
+        return self
+    def __ifloordiv__(self, other):
+        """In-place floor division operator."""
+        warp.map(warp.floordiv, self, other, out=self)
+        return self
+    def __imod__(self, other):
+        """In-place modulo operator."""
+        warp.map(warp.mod, self, other, out=self)
+        return self
+    def __ipow__(self, other):
+        """In-place power operator."""
+        warp.map(warp.pow, self, other, out=self)
+        return self
+class Tile(Generic[DType, Shape]):
+    pass
 def constant(x):
@@ -105,6 +183,13 @@ def half_bits_to_float(value):
     return warp.context.runtime.core.half_bits_to_float(value)
+def safe_len(obj):
+    try:
+        return len(obj)
+    except TypeError:
+        return -1
 # ----------------------
 # built-in types
@@ -134,8 +219,8 @@ def vector(length, dtype):
         # warp scalar type:
         _wp_scalar_type_ = dtype
-        _wp_type_params_ = [length, dtype]
-        _wp_type_args_ = {"length": length, "dtype": dtype}
+        _wp_type_params_ = (length, dtype)
+        _wp_type_args_: ClassVar[dict[str, Any]] = {"length": length, "dtype": dtype}
         _wp_generic_type_str_ = "vec_t"
         _wp_generic_type_hint_ = Vector
         _wp_constructor_ = "vector"
@@ -282,7 +367,7 @@ def vector(length, dtype):
             return f"{type_repr(self)}([{', '.join(map(repr, self))}])"
         def __eq__(self, other):
-            if self._length_ != len(other):
+            if self._length_ != safe_len(other):
                 return False
             for i in range(self._length_):
                 if self[i] != other[i]:
@@ -330,8 +415,8 @@ def matrix(shape, dtype):
         # warp scalar type:
         # used in type checking and when writing out c++ code for constructors:
         _wp_scalar_type_ = dtype
-        _wp_type_params_ = [shape[0], shape[1], dtype]
-        _wp_type_args_ = {"shape": (shape[0], shape[1]), "dtype": dtype}
+        _wp_type_params_ = (shape[0], shape[1], dtype)
+        _wp_type_args_: ClassVar[dict[str, Any]] = {"shape": (shape[0], shape[1]), "dtype": dtype}
         _wp_generic_type_str_ = "mat_t"
         _wp_generic_type_hint_ = Matrix
         _wp_constructor_ = "matrix"
@@ -426,10 +511,10 @@ def matrix(shape, dtype):
             return "[" + ",\n ".join(row_str) + "]"
         def __eq__(self, other):
-            if self._shape_[0] != len(other):
+            if self._shape_[0] != safe_len(other):
                 return False
             for i in range(self._shape_[0]):
-                if self._shape_[1] != len(other[i]):
+                if self._shape_[1] != safe_len(other[i]):
                     return False
                 for j in range(self._shape_[1]):
                     if self[i][j] != other[i][j]:
@@ -703,15 +788,15 @@ def transformation(dtype=Any):
                 ),
             ),
         )
-        _wp_type_params_ = [dtype]
-        _wp_type_args_ = {"dtype": dtype}
+        _wp_type_params_ = (dtype,)
+        _wp_type_args_: ClassVar[dict[str, type]] = {"dtype": dtype}
         _wp_generic_type_str_ = "transform_t"
         _wp_generic_type_hint_ = Transformation
         _wp_constructor_ = "transformation"
         def __init__(self, *args, **kwargs):
             if len(args) == 1 and len(kwargs) == 0:
-                if is_float(args[0]):
+                if is_float(args[0]) or is_int(args[0]):
                     # Initialize from a single scalar.
                     super().__init__(args[0])
                     return
@@ -745,13 +830,26 @@ def transformation(dtype=Any):
             # Fallback to the vector's constructor.
             super().__init__(*args)
-        @property
-        def p(self):
-            return vec3(self[0:3])
+        def __getattr__(self, name):
+            if name == "p":
+                return vec3(self[0:3])
+            elif name == "q":
+                return quat(self[3:7])
+            else:
+                return self.__getattribute__(name)
-        @property
-        def q(self):
-            return quat(self[3:7])
+        def __setattr__(self, name, value):
+            if name == "p":
+                self[0:3] = vector(length=3, dtype=dtype)(*value)
+            elif name == "q":
+                self[3:7] = quaternion(dtype=dtype)(*value)
+            else:
+                # we don't permit vector xyzw indexing for transformations
+                idx = "xyzw".find(name)
+                if idx != -1:
+                    raise RuntimeError(f"Cannot set attribute {name} of transformation")
+                else:
+                    super().__setattr__(name, value)
     return transform_t
@@ -976,7 +1074,7 @@ spatial_matrix = spatial_matrixf
 int_types = (int8, uint8, int16, uint16, int32, uint32, int64, uint64)
 float_types = (float16, float32, float64)
 scalar_types = int_types + float_types
-scalar_and_bool_types = scalar_types + (bool,)
+scalar_and_bool_types = (*scalar_types, bool)
 vector_types = (
     vec2b,
@@ -1150,36 +1248,24 @@ class range_t:
 # definition just for kernel type (cannot be a parameter), see bvh.h
-class bvh_query_t:
+class BvhQuery:
     """Object used to track state during BVH traversal."""
-    def __init__(self):
-        pass
-BvhQuery = bvh_query_t
+    _wp_native_name_ = "bvh_query_t"
 # definition just for kernel type (cannot be a parameter), see mesh.h
-class mesh_query_aabb_t:
+class MeshQueryAABB:
     """Object used to track state during mesh traversal."""
-    def __init__(self):
-        pass
-MeshQueryAABB = mesh_query_aabb_t
+    _wp_native_name_ = "mesh_query_aabb_t"
 # definition just for kernel type (cannot be a parameter), see hash_grid.h
-class hash_grid_query_t:
+class HashGridQuery:
     """Object used to track state during neighbor traversal."""
-    def __init__(self):
-        pass
-HashGridQuery = hash_grid_query_t
+    _wp_native_name_ = "hash_grid_query_t"
 # maximum number of dimensions, must match array.h
@@ -1195,9 +1281,13 @@ ARRAY_TYPE_FABRIC_INDEXED = 3
 # represents bounds for kernel launch (number of threads across multiple dimensions)
 class launch_bounds_t(ctypes.Structure):
-    _fields_ = [("shape", ctypes.c_int32 * LAUNCH_MAX_DIMS), ("ndim", ctypes.c_int32), ("size", ctypes.c_size_t)]
+    _fields_ = (
+        ("shape", ctypes.c_int32 * LAUNCH_MAX_DIMS),
+        ("ndim", ctypes.c_int32),
+        ("size", ctypes.c_size_t),
+    )
-    def __init__(self, shape: Union[int, Sequence[int]]):
+    def __init__(self, shape: int | Sequence[int]):
         if isinstance(shape, int):
             # 1d launch
             self.ndim = 1
@@ -1219,20 +1309,20 @@ class launch_bounds_t(ctypes.Structure):
 class shape_t(ctypes.Structure):
-    _fields_ = [("dims", ctypes.c_int32 * ARRAY_MAX_DIMS)]
+    _fields_ = (("dims", ctypes.c_int32 * ARRAY_MAX_DIMS),)
     def __init__(self):
         pass
 class array_t(ctypes.Structure):
-    _fields_ = [
+    _fields_ = (
         ("data", ctypes.c_uint64),
         ("grad", ctypes.c_uint64),
         ("shape", ctypes.c_int32 * ARRAY_MAX_DIMS),
         ("strides", ctypes.c_int32 * ARRAY_MAX_DIMS),
         ("ndim", ctypes.c_int32),
-    ]
+    )
     def __init__(self, data=0, grad=0, ndim=0, shape=(0,), strides=(0,)):
         self.data = data
@@ -1268,11 +1358,11 @@ array_t._numpy_dtype_ = {
 class indexedarray_t(ctypes.Structure):
-    _fields_ = [
+    _fields_ = (
         ("data", array_t),
         ("indices", ctypes.c_void_p * ARRAY_MAX_DIMS),
         ("shape", ctypes.c_int32 * ARRAY_MAX_DIMS),
-    ]
+    )
     def __init__(self, data, indices, shape):
         if data is None:
@@ -1290,17 +1380,44 @@ class indexedarray_t(ctypes.Structure):
                 self.shape[i] = shape[i]
+class tuple_t:
+    """Used during codegen to store multiple values into a single variable."""
+    def __init__(self, types, values):
+        self.types = types
+        self.values = values
 def type_ctype(dtype):
     if dtype == float:
         return ctypes.c_float
     elif dtype == int:
         return ctypes.c_int32
+    elif dtype == bool:
+        return ctypes.c_bool
+    elif issubclass(dtype, (ctypes.Array, ctypes.Structure)):
+        return dtype
     else:
         # scalar type
         return dtype._type_
-def type_length(dtype):
+def type_length(obj):
+    if is_tile(obj):
+        return obj.shape[0]
+    elif is_tuple(obj):
+        return len(obj.types)
+    elif get_origin(obj) is tuple:
+        return len(get_args(obj))
+    elif hasattr(obj, "_shape_"):
+        return obj._shape_[0]
+    elif hasattr(obj, "_length_"):
+        return obj._length_
+    return len(obj)
+def type_size(dtype):
     if dtype == float or dtype == int or isinstance(dtype, warp.codegen.Struct):
         return 1
     else:
@@ -1406,12 +1523,14 @@ def scalar_short_name(t):
 # converts any known type to a human readable string, good for error messages, reporting etc
-def type_repr(t):
+def type_repr(t) -> str:
     if is_array(t):
         if t.device is None:
             # array is used as a type annotation - display ndim instead of shape
             return f"array(ndim={t.ndim}, dtype={type_repr(t.dtype)})"
         return f"array(shape={t.shape}, dtype={type_repr(t.dtype)})"
+    if is_tuple(t):
+        return f"tuple({', '.join(type_repr(x) for x in t.types)})"
     if is_tile(t):
         return f"tile(shape={t.shape}, dtype={type_repr(t.dtype)})"
     if isinstance(t, warp.codegen.Struct):
@@ -1437,6 +1556,12 @@ def type_repr(t):
         return f"matrix(shape=({t._shape_[0]}, {t._shape_[1]}), dtype={type_repr(t._wp_scalar_type_)})"
     if t in scalar_types:
         return t.__name__
+    if t == builtins.bool:
+        return "bool"
+    if t == builtins.float:
+        return "float"
+    if t == builtins.int:
+        return "int"
     name = getattr(t, "__name__", None)
     if name is None:
@@ -1479,7 +1604,7 @@ def type_is_transformation(t):
     return getattr(t, "_wp_generic_type_hint_", None) is Transformation
-value_types = (int, float, builtins.bool) + scalar_and_bool_types
+value_types = (int, float, builtins.bool, *scalar_and_bool_types)
 # returns true for all value types (int, float, bool, scalars, vectors, matrices)
@@ -1505,6 +1630,10 @@ def is_array(a) -> builtins.bool:
     return isinstance(a, array_types)
+def is_tuple(t) -> builtins.bool:
+    return isinstance(t, tuple_t)
 def scalars_equal(a, b, match_generic=False):
     # convert to canonical types
     if a == float:
@@ -1546,45 +1675,58 @@ def scalars_equal(a, b, match_generic=False):
     return a == b
+def seq_match_ellipsis(a, b, match_generic=False) -> bool:
+    assert a and a[-1] is Ellipsis and len(a) == 2
+    # Compare the args against the type being repeated through the ellipsis.
+    repeated_arg = a[0]
+    if not all(types_equal(x, repeated_arg, match_generic=match_generic) for x in b):
+        return False
+    return True
 def types_equal(a, b, match_generic=False):
     if match_generic:
-        # Special cases to interpret the types listed in `int_tuple_type_hints`
-        # as generic hints that accept any integer types.
-        if a in int_tuple_type_hints and isinstance(b, Sequence):
-            a_length = int_tuple_type_hints[a]
-            if (a_length == -1 or a_length == len(b)) and all(
-                scalars_equal(x, Int, match_generic=match_generic) for x in b
-            ):
-                return True
-        if b in int_tuple_type_hints and isinstance(a, Sequence):
-            b_length = int_tuple_type_hints[b]
-            if (b_length == -1 or b_length == len(a)) and all(
-                scalars_equal(x, Int, match_generic=match_generic) for x in a
-            ):
-                return True
-        if a in int_tuple_type_hints and b in int_tuple_type_hints:
-            a_length = int_tuple_type_hints[a]
-            b_length = int_tuple_type_hints[b]
-            if a_length is None or b_length is None or a_length == b_length:
+        a_is_seq = True
+        a_is_tuple = True
+        if is_tuple(a):
+            a = a.types
+        elif get_origin(a) is tuple:
+            a = get_args(a)
+        else:
+            a_is_tuple = False
+            a_is_seq = isinstance(a, Sequence)
+        b_is_seq = True
+        b_is_tuple = True
+        if is_tuple(b):
+            b = b.types
+        elif get_origin(b) is tuple:
+            b = get_args(b)
+        else:
+            b_is_tuple = False
+            b_is_seq = isinstance(b, Sequence)
+        if a_is_seq and b_is_seq:
+            if (not a and a_is_tuple) or (not b and b_is_tuple):
+                # We have a bare tuple definition like `Tuple`, which matches to anything.
                 return True
-    a_origin = get_origin(a)
-    b_origin = get_origin(b)
-    if a_origin is tuple and b_origin is tuple:
-        a_args = get_args(a)
-        b_args = get_args(b)
-        if len(a_args) == len(b_args) and all(
-            scalars_equal(x, y, match_generic=match_generic) for x, y in zip(a_args, b_args)
-        ):
-            return True
-    elif a_origin is tuple and isinstance(b, Sequence):
-        a_args = get_args(a)
-        if len(a_args) == len(b) and all(scalars_equal(x, y, match_generic=match_generic) for x, y in zip(a_args, b)):
-            return True
-    elif b_origin is tuple and isinstance(a, Sequence):
-        b_args = get_args(b)
-        if len(b_args) == len(a) and all(scalars_equal(x, y, match_generic=match_generic) for x, y in zip(b_args, a)):
-            return True
+            a_has_ellipsis = a and a[-1] is Ellipsis
+            b_has_ellipsis = b and b[-1] is Ellipsis
+            if a_has_ellipsis and b_has_ellipsis:
+                # Delegate to comparing all the elements using the standard approach.
+                pass
+            elif a_has_ellipsis:
+                return seq_match_ellipsis(a, b, match_generic=match_generic)
+            elif b_has_ellipsis:
+                return seq_match_ellipsis(b, a, match_generic=match_generic)
+            return len(a) == len(b) and all(types_equal(x, y, match_generic=match_generic) for x, y in zip(a, b))
+        elif a_is_seq or b_is_seq:
+            # A sequence can only match to another sequence.
+            return False
     # convert to canonical types
     if a == float:
@@ -1621,7 +1763,7 @@ def types_equal(a, b, match_generic=False):
     return scalars_equal(a, b, match_generic)
-def strides_from_shape(shape: Tuple, dtype):
+def strides_from_shape(shape: tuple, dtype):
     ndims = len(shape)
     strides = [None] * ndims
@@ -1635,7 +1777,7 @@ def strides_from_shape(shape: Tuple, dtype):
     return tuple(strides)
-def check_array_shape(shape: Tuple):
+def check_array_shape(shape: tuple):
     """Checks that the size in each dimension is positive and less than 2^31."""
     for dim_index, dim_size in enumerate(shape):
@@ -1711,8 +1853,8 @@ class array(Array[DType]):
         ndim (int): The number of array dimensions.
         size (int): The number of items in the array.
         capacity (int): The amount of memory in bytes allocated for this array.
-        shape (Tuple[int]): Dimensions of the array.
-        strides (Tuple[int]): Number of bytes in each dimension between successive elements of the array.
+        shape (tuple[int]): Dimensions of the array.
+        strides (tuple[int]): Number of bytes in each dimension between successive elements of the array.
         ptr (int): Pointer to underlying memory allocation backing the array.
         device (Device): The device where the array's memory allocation resides.
         pinned (bool): Indicates whether the array was allocated in pinned host memory.
@@ -1726,26 +1868,24 @@ class array(Array[DType]):
     _vars = None
     def __new__(cls, *args, **kwargs):
-        instance = super(array, cls).__new__(cls)
+        instance = super().__new__(cls)
         instance.deleter = None
         return instance
     def __init__(
         self,
-        data: Union[List, Tuple, npt.NDArray, None] = None,
+        data: list | tuple | npt.NDArray | None = None,
         dtype: Any = Any,
-        shape: Union[int, Tuple[int, ...], List[int], None] = None,
-        strides: Optional[Tuple[int, ...]] = None,
-        length: Optional[int] = None,
-        ptr: Optional[int] = None,
-        capacity: Optional[int] = None,
+        shape: int | tuple[int, ...] | list[int] | None = None,
+        strides: tuple[int, ...] | None = None,
+        ptr: int | None = None,
+        capacity: int | None = None,
         device=None,
         pinned: builtins.bool = False,
         copy: builtins.bool = True,
-        owner: builtins.bool = False,  # deprecated - pass deleter instead
-        deleter: Optional[Callable[[int, int], None]] = None,
-        ndim: Optional[int] = None,
-        grad: Optional[array] = None,
+        deleter: Callable[[int, int], None] | None = None,
+        ndim: int | None = None,
+        grad: array | None = None,
         requires_grad: builtins.bool = False,
     ):
         """Constructs a new Warp array object
@@ -1759,7 +1899,7 @@ class array(Array[DType]):
         allocation should reside on the same device given by the device argument, and the user should set the length
         and dtype parameter appropriately.
-        If neither ``data`` nor ``ptr`` are specified, the ``shape`` or ``length`` arguments are checked next.
+        If neither ``data`` nor ``ptr`` are specified, the ``shape`` argument is checked next.
         This construction path can be used to create new uninitialized arrays, but users are encouraged to call
         ``wp.empty()``, ``wp.zeros()``, or ``wp.full()`` instead to create new arrays.
@@ -1772,14 +1912,11 @@ class array(Array[DType]):
             dtype: One of the available `data types <#data-types>`_, such as :class:`warp.float32`, :class:`warp.mat33`, or a custom `struct <#structs>`_. If dtype is ``Any`` and data is an ndarray, then it will be inferred from the array data type
             shape: Dimensions of the array
             strides: Number of bytes in each dimension between successive elements of the array
-            length: Number of elements of the data type (deprecated, users should use ``shape`` argument)
             ptr: Address of an external memory address to alias (``data`` should be ``None``)
             capacity: Maximum size in bytes of the ``ptr`` allocation (``data`` should be ``None``)
             device (Devicelike): Device the array lives on
             copy: Whether the incoming ``data`` will be copied or aliased. Aliasing requires that
                 the incoming ``data`` already lives on the ``device`` specified and the data types match.
-            owner: Whether the array will try to deallocate the underlying memory when it is deleted
-                (deprecated, pass ``deleter`` if you wish to transfer ownership to Warp)
             deleter: Function to be called when the array is deleted, taking two arguments: pointer and size
             requires_grad: Whether or not gradients will be tracked for this array, see :class:`warp.Tape` for details
             grad: The array in which to accumulate gradients in the backward pass. If ``None`` and ``requires_grad`` is ``True``,
@@ -1787,7 +1924,7 @@ class array(Array[DType]):
             pinned: Whether to allocate pinned host memory, which allows asynchronous host–device transfers
                 (only applicable with ``device="cpu"``)
-        """
+        """  # noqa: RUF002
         self.ctype = None
@@ -1821,23 +1958,6 @@ class array(Array[DType]):
                     raise RuntimeError(
                         f"Failed to create array with shape {shape}, the maximum number of dimensions is {ARRAY_MAX_DIMS}"
                     )
-        elif length is not None:
-            # backward compatibility
-            warp.utils.warn(
-                "The 'length' keyword is deprecated and will be removed in a future version. Use 'shape' instead.",
-                category=DeprecationWarning,
-                stacklevel=2,
-            )
-            shape = (length,)
-        if owner:
-            warp.utils.warn(
-                "The 'owner' keyword in the array initializer is\n"
-                "deprecated and will be removed in a future version. It currently has no effect.\n"
-                "Pass a function to the 'deleter' keyword instead.",
-                category=DeprecationWarning,
-                stacklevel=2,
-            )
         # determine the construction path from the given arguments
         if data is not None:
@@ -1891,65 +2011,131 @@ class array(Array[DType]):
             desc = data.__cuda_array_interface__
             data_shape = desc.get("shape")
             data_strides = desc.get("strides")
-            data_dtype = np.dtype(desc.get("typestr"))
+            data_dtype_np = np.dtype(desc.get("typestr"))
+            data_dtype = dtype_from_numpy(data_dtype_np)
             data_ptr = desc.get("data")[0]
             if dtype == Any:
-                dtype = np_dtype_to_warp_type[data_dtype]
+                dtype = data_dtype
             else:
                 # Warn if the data type is compatible with the requested dtype
-                if not np_dtype_is_compatible(data_dtype, dtype):
+                if not np_dtype_is_compatible(data_dtype_np, dtype):
                     warp.utils.warn(
-                        f"The input data type {data_dtype} does not appear to be "
+                        f"The input data type {data_dtype_np} does not appear to be "
                         f"compatible with the requested dtype {dtype}. If "
                         "data-type sizes do not match, then this may lead to memory-access violations."
                     )
             if data_strides is None:
-                data_strides = strides_from_shape(data_shape, dtype)
+                data_strides = strides_from_shape(data_shape, data_dtype)
             data_ndim = len(data_shape)
-            # determine whether the input needs reshaping
-            target_npshape = None
-            if shape is not None:
-                target_npshape = (*shape, *dtype_shape)
-            elif dtype_ndim > 0:
-                # prune inner dimensions of length 1
-                while data_ndim > 1 and data_shape[-1] == 1:
-                    data_shape = data_shape[:-1]
-                # if the inner dims don't match exactly, check if the innermost dim is a multiple of type length
-                if data_ndim < dtype_ndim or data_shape[-dtype_ndim:] != dtype_shape:
-                    if data_shape[-1] == dtype._length_:
-                        target_npshape = (*data_shape[:-1], *dtype_shape)
-                    elif data_shape[-1] % dtype._length_ == 0:
-                        target_npshape = (*data_shape[:-1], data_shape[-1] // dtype._length_, *dtype_shape)
+            # determine shape and strides
+            if shape is None:
+                if dtype_ndim == 0:
+                    # scalars
+                    shape = data_shape
+                    strides = data_strides
+                else:
+                    # vectors/matrices
+                    if data_ndim >= dtype_ndim and data_shape[-dtype_ndim:] == dtype_shape:
+                        # the inner shape matches exactly, check inner strides
+                        if data_strides[-dtype_ndim:] != strides_from_shape(dtype._shape_, dtype._wp_scalar_type_):
+                            raise RuntimeError(
+                                f"The inner strides of the input array {data_strides} are not compatible with the requested data type {warp.context.type_str(dtype)}"
+                            )
+                        shape = data_shape[:-dtype_ndim] or (1,)
+                        strides = data_strides[:-dtype_ndim] or (type_size_in_bytes(dtype),)
                     else:
-                        if dtype_ndim == 1:
+                        # ensure inner strides are contiguous
+                        if data_strides[-1] != type_size_in_bytes(data_dtype):
                             raise RuntimeError(
-                                f"The inner dimensions of the input data are not compatible with the requested vector type {warp.context.type_str(dtype)}: expected an inner dimension that is a multiple of {dtype._length_}"
+                                f"The inner strides of the input array {data_strides} are not compatible with the requested data type {warp.context.type_str(dtype)}"
                             )
+                        # check if the innermost dim is a multiple of type length
+                        if data_shape[-1] == dtype._length_:
+                            shape = data_shape[:-1] or (1,)
+                            strides = data_strides[:-1] or (type_size_in_bytes(dtype),)
+                        elif data_shape[-1] % dtype._length_ == 0:
+                            shape = (*data_shape[:-1], data_shape[-1] // dtype._length_)
+                            strides = (*data_strides[:-1], data_strides[-1] * dtype._length_)
                         else:
                             raise RuntimeError(
-                                f"The inner dimensions of the input data are not compatible with the requested matrix type {warp.context.type_str(dtype)}: expected inner dimensions {dtype._shape_} or a multiple of {dtype._length_}"
+                                f"The shape of the input array {data_shape} is not compatible with the requested data type {warp.context.type_str(dtype)}"
                             )
-            if target_npshape is None:
-                target_npshape = data_shape if shape is None else shape
-            # determine final shape and strides
-            if dtype_ndim > 0:
-                # make sure the inner dims are contiguous for vector/matrix types
-                scalar_size = type_size_in_bytes(dtype._wp_scalar_type_)
-                inner_contiguous = data_strides[-1] == scalar_size
-                if inner_contiguous and dtype_ndim > 1:
-                    inner_contiguous = data_strides[-2] == scalar_size * dtype_shape[-1]
-                shape = target_npshape[:-dtype_ndim] or (1,)
-                strides = data_strides if shape == data_shape else strides_from_shape(shape, dtype)
             else:
-                shape = target_npshape or (1,)
-                strides = data_strides if shape == data_shape else strides_from_shape(shape, dtype)
+                # a shape was given, reshape if needed
+                if dtype_ndim == 0:
+                    # scalars
+                    if shape == data_shape:
+                        strides = data_strides
+                    else:
+                        # check if given shape is compatible
+                        if math.prod(shape) != math.prod(data_shape):
+                            raise RuntimeError(
+                                f"The shape of the input array {data_shape} is not compatible with the requested shape {shape}"
+                            )
+                        # check if data is contiguous
+                        if data_strides != strides_from_shape(data_shape, data_dtype):
+                            raise RuntimeError(
+                                f"The requested shape {shape} is not possible because the input array is not contiguous"
+                            )
+                        strides = strides_from_shape(shape, dtype)
+                else:
+                    # vectors/matrices
+                    if data_ndim >= dtype_ndim and data_shape[-dtype_ndim:] == dtype_shape:
+                        # the inner shape matches exactly, check outer shape
+                        if shape == data_shape[:-dtype_ndim]:
+                            strides = data_strides[:-dtype_ndim]
+                        else:
+                            # check if given shape is compatible
+                            if math.prod(shape) != math.prod(data_shape[:-dtype_ndim]):
+                                raise RuntimeError(
+                                    f"The shape of the input array {data_shape} is not compatible with the requested shape {shape} and data type {warp.context.type_str(dtype)}"
+                                )
+                            # check if data is contiguous
+                            if data_strides != strides_from_shape(data_shape, data_dtype):
+                                raise RuntimeError(
+                                    f"The requested shape {shape} is not possible because the input array is not contiguous"
+                                )
+                            strides = strides_from_shape(shape, dtype)
+                    else:
+                        # check if the innermost dim is a multiple of type length
+                        if data_shape[-1] == dtype._length_:
+                            if shape == data_shape[:-1]:
+                                strides = data_strides[:-1]
+                            else:
+                                # check if given shape is compatible
+                                if math.prod(shape) != math.prod(data_shape[:-1]):
+                                    raise RuntimeError(
+                                        f"The shape of the input array {data_shape} is not compatible with the requested shape {shape} and data type {warp.context.type_str(dtype)}"
+                                    )
+                                # check if data is contiguous
+                                if data_strides != strides_from_shape(data_shape, data_dtype):
+                                    raise RuntimeError(
+                                        f"The requested shape {shape} is not possible because the input array is not contiguous"
+                                    )
+                                strides = strides_from_shape(shape, dtype)
+                        elif data_shape[-1] % dtype._length_ == 0:
+                            if shape == (*data_shape[:-1], data_shape[-1] // dtype._length_):
+                                strides = (*data_strides[:-1], data_strides[-1] * dtype._length_)
+                            else:
+                                # check if given shape is compatible
+                                if math.prod(shape) != math.prod((*data_shape[:-1], data_shape[-1] // dtype._length_)):
+                                    raise RuntimeError(
+                                        f"The shape of the input array {data_shape} is not compatible with the requested shape {shape} and data type {warp.context.type_str(dtype)}"
+                                    )
+                                # check if data is contiguous
+                                if data_strides != strides_from_shape(data_shape, data_dtype):
+                                    raise RuntimeError(
+                                        f"The requested shape {shape} is not possible because the input array is not contiguous"
+                                    )
+                                strides = strides_from_shape(shape, dtype)
+                        else:
+                            raise RuntimeError(
+                                f"The shape of the input array {data_shape} is not compatible with the requested data type {warp.context.type_str(dtype)} and requested shape {shape}"
+                            )
             self._init_from_ptr(data_ptr, dtype, shape, strides, None, device, False, None)
@@ -2466,9 +2652,7 @@ class array(Array[DType]):
         if self.ndim != 2 or other.ndim != 2:
             raise RuntimeError(
-                "A has dim = {}, B has dim = {}. If multiplying with @, A and B must have dim = 2.".format(
-                    self.ndim, other.ndim
-                )
+                f"A has dim = {self.ndim}, B has dim = {other.ndim}. If multiplying with @, A and B must have dim = 2."
             )
         m = self.shape[0]
@@ -3011,7 +3195,7 @@ def _close_cuda_ipc_handle(ptr, size):
 def from_ipc_handle(
-    handle: bytes, dtype, shape: Tuple[int, ...], strides: Optional[Tuple[int, ...]] = None, device=None
+    handle: bytes, dtype, shape: tuple[int, ...], strides: tuple[int, ...] | None = None, device=None
 ) -> array:
     """Create an array from an IPC handle.
@@ -3157,10 +3341,10 @@ class indexedarray(noncontiguous_array_base):
     def __init__(
         self,
-        data: Optional[array] = None,
-        indices: Union[array, List[array], None] = None,
+        data: array | None = None,
+        indices: array | list[array] | None = None,
         dtype=None,
-        ndim: Optional[int] = None,
+        ndim: int | None = None,
     ):
         super().__init__(ARRAY_TYPE_INDEXED)
@@ -3294,14 +3478,32 @@ def array_type_id(a):
         raise ValueError("Invalid array type")
-# tile object
-class Tile:
+class tile(Tile[DType, Shape]):
+    """A Warp tile object.
+    Attributes:
+        dtype (DType): The data type of the tile
+        shape (Shape): Dimensions of the tile
+        storage (str): 'register' or 'shared' memory storage
+        layout (str): 'rowmajor' or 'colmajor' layout
+        strides (tuple[int]): Number of tile elements between successive tile entries in each dimension
+        size (int): Total number of tile elements
+        owner (bool): Whether this tile aliases or owns its data
+    """
     alignment = 16
-    def __init__(self, dtype, shape, op=None, storage="register", layout="rowmajor", strides=None, owner=True):
+    def __init__(
+        self,
+        dtype: Any,
+        shape: tuple[int, ...] | list[int],
+        storage: str = "register",
+        layout: str = "rowmajor",
+        strides: tuple[int, ...] | None = None,
+        owner: builtins.bool = True,
+    ):
         self.dtype = type_to_warp(dtype)
         self.shape = shape
-        self.op = op
         self.storage = storage
         self.layout = layout
         self.strides = strides
@@ -3349,7 +3551,7 @@ class Tile:
         elif self.storage == "shared":
             if self.owner:
                 # allocate new shared memory tile
-                return f"wp::tile_alloc_empty<{Var.type_to_ctype(self.dtype)},wp::tile_shape_t<{','.join(map(str, self.shape))}>,{'true' if requires_grad else 'false'}>()"
+                return f"wp::tile_alloc_empty<{Var.type_to_ctype(self.dtype)},wp::tile_shape_t<{','.join(map(str, self.shape))}>,wp::tile_stride_t<{','.join(map(str, self.strides))}>,{'true' if requires_grad else 'false'}>()"
             else:
                 # tile will be initialized by another call, e.g.: tile_transpose()
                 return "nullptr"
@@ -3361,76 +3563,15 @@ class Tile:
     @staticmethod
     def round_up(bytes):
-        return ((bytes + Tile.alignment - 1) // Tile.alignment) * Tile.alignment
+        return ((bytes + tile.alignment - 1) // tile.alignment) * tile.alignment
     # align tile size to natural boundary, default 16-bytes
     def align(self, bytes):
-        return Tile.round_up(bytes)
-class TileZeros(Tile):
-    def __init__(self, dtype, shape, storage="register"):
-        Tile.__init__(self, dtype, shape, op="zeros", storage=storage)
-class TileOnes(Tile):
-    def __init__(self, dtype, shape, storage="register"):
-        Tile.__init__(self, dtype, shape, op="ones", storage=storage)
-class TileRange(Tile):
-    def __init__(self, dtype, start, stop, step, storage="register"):
-        self.start = start
-        self.stop = stop
-        self.step = step
-        n = int((stop - start) / step)
-        Tile.__init__(self, dtype, shape=(n,), op="arange", storage=storage)
-class TileConstant(Tile):
-    def __init__(self, dtype, shape):
-        Tile.__init__(self, dtype, shape, op="constant", storage="register")
-class TileLoad(Tile):
-    def __init__(self, array, shape, storage="register"):
-        Tile.__init__(self, array.dtype, shape, op="load", storage=storage)
-class TileUnaryMap(Tile):
-    def __init__(self, t, dtype=None, storage="register"):
-        Tile.__init__(self, dtype, t.shape, op="unary_map", storage=storage)
-        # if no output dtype specified then assume it's the same as the first arg
-        if self.dtype is None:
-            self.dtype = t.dtype
-        self.t = t
-class TileBinaryMap(Tile):
-    def __init__(self, a, b, dtype=None, storage="register"):
-        Tile.__init__(self, dtype, a.shape, op="binary_map", storage=storage)
-        # if no output dtype specified then assume it's the same as the first arg
-        if self.dtype is None:
-            self.dtype = a.dtype
-        self.a = a
-        self.b = b
-class TileShared(Tile):
-    def __init__(self, t):
-        Tile.__init__(self, t.dtype, t.shape, "shared", storage="shared")
-        self.t = t
+        return tile.round_up(bytes)
 def is_tile(t):
-    return isinstance(t, Tile)
+    return isinstance(t, tile)
 bvh_constructor_values = {"sah": 0, "median": 1, "lbvh": 2}
@@ -3438,11 +3579,11 @@ bvh_constructor_values = {"sah": 0, "median": 1, "lbvh": 2}
 class Bvh:
     def __new__(cls, *args, **kwargs):
-        instance = super(Bvh, cls).__new__(cls)
+        instance = super().__new__(cls)
         instance.id = None
         return instance
-    def __init__(self, lowers: array, uppers: array, constructor: Optional[str] = None):
+    def __init__(self, lowers: array, uppers: array, constructor: str | None = None):
         """Class representing a bounding volume hierarchy.
         Depending on which device the input bounds live, it can be either a CPU tree or a GPU tree.
@@ -3523,14 +3664,14 @@ class Bvh:
                 constructor = "sah"
             self.id = self.runtime.core.bvh_create_host(
-                get_data(lowers), get_data(uppers), int(len(lowers)), bvh_constructor_values[constructor]
+                get_data(lowers), get_data(uppers), len(lowers), bvh_constructor_values[constructor]
             )
         else:
             self.id = self.runtime.core.bvh_create_device(
                 self.device.context,
                 get_data(lowers),
                 get_data(uppers),
-                int(len(lowers)),
+                len(lowers),
                 bvh_constructor_values[constructor],
             )
@@ -3561,14 +3702,14 @@ class Bvh:
 class Mesh:
     from warp.codegen import Var
-    vars = {
+    vars: ClassVar[dict[str, Var]] = {
         "points": Var("points", array(dtype=vec3)),
         "velocities": Var("velocities", array(dtype=vec3)),
         "indices": Var("indices", array(dtype=int32)),
     }
     def __new__(cls, *args, **kwargs):
-        instance = super(Mesh, cls).__new__(cls)
+        instance = super().__new__(cls)
         instance.id = None
         return instance
@@ -3576,9 +3717,9 @@ class Mesh:
         self,
         points: array,
         indices: array,
-        velocities: Optional[array] = None,
-        support_winding_number: bool = False,
-        bvh_constructor: Optional[str] = None,
+        velocities: array | None = None,
+        support_winding_number: builtins.bool = False,
+        bvh_constructor: str | None = None,
     ):
         """Class representing a triangle mesh.
@@ -3640,7 +3781,7 @@ class Mesh:
                 points.__ctype__(),
                 velocities.__ctype__() if velocities else array().__ctype__(),
                 indices.__ctype__(),
-                int(len(points)),
+                len(points),
                 int(indices.size / 3),
                 int(support_winding_number),
                 bvh_constructor_values[bvh_constructor],
@@ -3651,7 +3792,7 @@ class Mesh:
                 points.__ctype__(),
                 velocities.__ctype__() if velocities else array().__ctype__(),
                 indices.__ctype__(),
-                int(len(points)),
+                len(points),
                 int(indices.size / 3),
                 int(support_winding_number),
                 bvh_constructor_values[bvh_constructor],
@@ -3750,7 +3891,7 @@ class Volume:
     LINEAR = constant(1)
     def __new__(cls, *args, **kwargs):
-        instance = super(Volume, cls).__new__(cls)
+        instance = super().__new__(cls)
         instance.id = None
         return instance
@@ -3799,7 +3940,7 @@ class Volume:
         buf = ctypes.c_void_p(0)
         size = ctypes.c_uint64(0)
         self.runtime.core.volume_get_buffer_info(self.id, ctypes.byref(buf), ctypes.byref(size))
-        return array(ptr=buf.value, dtype=uint8, shape=size.value, device=self.device, owner=False)
+        return array(ptr=buf.value, dtype=uint8, shape=size.value, device=self.device)
     def get_tile_count(self) -> int:
         """Return the number of tiles (NanoVDB leaf nodes) of the volume."""
@@ -3811,7 +3952,7 @@ class Volume:
         self.runtime.core.volume_get_tile_and_voxel_count(self.id, ctypes.byref(tile_count), ctypes.byref(voxel_count))
         return tile_count.value
-    def get_tiles(self, out: Optional[array] = None) -> array:
+    def get_tiles(self, out: array | None = None) -> array:
         """Return the integer coordinates of all allocated tiles for this volume.
         Args:
@@ -3851,7 +3992,7 @@ class Volume:
         self.runtime.core.volume_get_tile_and_voxel_count(self.id, ctypes.byref(tile_count), ctypes.byref(voxel_count))
         return voxel_count.value
-    def get_voxels(self, out: Optional[array] = None) -> array:
+    def get_voxels(self, out: array | None = None) -> array:
         """Return the integer coordinates of all allocated voxels for this volume.
         Args:
@@ -3880,7 +4021,7 @@ class Volume:
         return out
-    def get_voxel_size(self) -> Tuple[float, float, float]:
+    def get_voxel_size(self) -> tuple[float, float, float]:
         """Return the voxel size, i.e, world coordinates of voxel's diagonal vector"""
         if self.id == 0:
@@ -3943,7 +4084,7 @@ class Volume:
             mat33f.from_buffer_copy(transform_buffer),
         )
-    _nvdb_type_to_dtype = {
+    _nvdb_type_to_dtype: ClassVar[dict[str, type]] = {
         "float": float32,
         "double": float64,
         "int16": int16,
@@ -4061,7 +4202,7 @@ class Volume:
         if type_size_in_bytes(dtype) != value_size:
             raise RuntimeError(f"Cannot cast feature data of size {value_size} to array dtype {type_repr(dtype)}")
-        return array(ptr=info.ptr, dtype=dtype, shape=value_count, device=self.device, owner=False)
+        return array(ptr=info.ptr, dtype=dtype, shape=value_count, device=self.device)
     @classmethod
     def load_from_nvdb(cls, file_or_buffer, device=None) -> Volume:
@@ -4141,15 +4282,15 @@ class Volume:
         codec_dict = {"none": 0, "zip": 1, "blosc": 2}
         class FileHeader(ctypes.Structure):
-            _fields_ = [
+            _fields_ = (
                 ("magic", ctypes.c_uint64),
                 ("version", ctypes.c_uint32),
                 ("gridCount", ctypes.c_uint16),
                 ("codec", ctypes.c_uint16),
-            ]
+            )
         class FileMetaData(ctypes.Structure):
-            _fields_ = [
+            _fields_ = (
                 ("gridSize", ctypes.c_uint64),
                 ("fileSize", ctypes.c_uint64),
                 ("nameKey", ctypes.c_uint64),
@@ -4165,10 +4306,10 @@ class Volume:
                 ("codec", ctypes.c_uint16),
                 ("padding", ctypes.c_uint16),
                 ("version", ctypes.c_uint32),
-            ]
+            )
         class GridData(ctypes.Structure):
-            _fields_ = [
+            _fields_ = (
                 ("magic", ctypes.c_uint64),
                 ("checksum", ctypes.c_uint64),
                 ("version", ctypes.c_uint32),
@@ -4187,7 +4328,7 @@ class Volume:
                 ("data0", ctypes.c_uint32),
                 ("data1", ctypes.c_uint64),
                 ("data2", ctypes.c_uint64),
-            ]
+            )
         NVDB_MAGIC = 0x304244566F6E614E
         NVDB_VERSION = 32 << 21 | 3 << 10 | 3
@@ -4293,7 +4434,7 @@ class Volume:
                 "A warp Volume has already been created for this grid, aliasing it more than once is not possible."
             )
-        data_array = array(ptr=grid_ptr, dtype=uint8, shape=buffer_size, owner=False, device=device)
+        data_array = array(ptr=grid_ptr, dtype=uint8, shape=buffer_size, device=device)
         return cls(data_array, copy=False)
@@ -4410,8 +4551,8 @@ class Volume:
     @classmethod
     def allocate(
         cls,
-        min: List[int],
-        max: List[int],
+        min: list[int],
+        max: list[int],
         voxel_size: float,
         bg_value=0.0,
         translation=(0.0, 0.0, 0.0),
@@ -4459,7 +4600,7 @@ class Volume:
     @staticmethod
     def _fill_transform_buffers(
-        voxel_size: Union[float, List[float]],
+        voxel_size: float | list[float],
         translation,
         transform,
     ):
@@ -4483,18 +4624,13 @@ class Volume:
     # nanovdb types for which we instantiate the grid builder
     # Should be in sync with WP_VOLUME_BUILDER_INSTANTIATE_TYPES in volume_builder.h
-    _supported_allocation_types = [
-        "int32",
-        "float",
-        "Vec3f",
-        "Vec4f",
-    ]
+    _supported_allocation_types = ("int32", "float", "Vec3f", "Vec4f")
     @classmethod
     def allocate_by_tiles(
         cls,
         tile_points: array,
-        voxel_size: Union[float, List[float]] = None,
+        voxel_size: float | list[float] | None = None,
         bg_value=0.0,
         translation=(0.0, 0.0, 0.0),
         device=None,
@@ -4602,7 +4738,7 @@ class Volume:
     def allocate_by_voxels(
         cls,
         voxel_points: array,
-        voxel_size: Union[float, List[float]] = None,
+        voxel_size: float | list[float] | None = None,
         translation=(0.0, 0.0, 0.0),
         device=None,
         transform=None,
@@ -4658,20 +4794,20 @@ class Volume:
         return volume
-def _is_contiguous_vec_like_array(array, vec_length: int, scalar_types: Tuple[type]) -> bool:
+def _is_contiguous_vec_like_array(array, vec_length: int, scalar_types: tuple[type]) -> builtins.bool:
     if not (is_array(array) and array.is_contiguous):
         return False
     if type_scalar_type(array.dtype) not in scalar_types:
         return False
-    return (array.ndim == 1 and type_length(array.dtype) == vec_length) or (
-        array.ndim == 2 and array.shape[1] == vec_length and type_length(array.dtype) == 1
+    return (array.ndim == 1 and type_size(array.dtype) == vec_length) or (
+        array.ndim == 2 and array.shape[1] == vec_length and type_size(array.dtype) == 1
     )
 # definition just for kernel type (cannot be a parameter), see mesh.h
-# NOTE: its layout must match the corresponding struct defined in C.
+# NOTE: its layout must match the mesh_query_point_t struct defined in C.
 # NOTE: it needs to be defined after `indexedarray` to workaround a circular import issue.
-class mesh_query_point_t:
+class MeshQueryPoint:
     """Output for the mesh query point functions.
     Attributes:
@@ -4691,7 +4827,9 @@ class mesh_query_point_t:
     from warp.codegen import Var
-    vars = {
+    _wp_native_name_ = "mesh_query_point_t"
+    vars: ClassVar[dict[str, Var]] = {
         "result": Var("result", bool),
         "sign": Var("sign", float32),
         "face": Var("face", int32),
@@ -4700,12 +4838,9 @@ class mesh_query_point_t:
     }
-MeshQueryPoint = mesh_query_point_t
 # definition just for kernel type (cannot be a parameter), see mesh.h
-# NOTE: its layout must match the corresponding struct defined in C.
-class mesh_query_ray_t:
+# NOTE: its layout must match the mesh_query_ray_t struct defined in C.
+class MeshQueryRay:
     """Output for the mesh query ray functions.
     Attributes:
@@ -4723,7 +4858,9 @@ class mesh_query_ray_t:
     from warp.codegen import Var
-    vars = {
+    _wp_native_name_ = "mesh_query_ray_t"
+    vars: ClassVar[dict[str, Var]] = {
         "result": Var("result", bool),
         "sign": Var("sign", float32),
         "face": Var("face", int32),
@@ -4734,9 +4871,6 @@ class mesh_query_ray_t:
     }
-MeshQueryRay = mesh_query_ray_t
 def matmul(
     a: array2d,
     b: array2d,
@@ -4863,7 +4997,7 @@ def adj_batched_matmul(
 class HashGrid:
     def __new__(cls, *args, **kwargs):
-        instance = super(HashGrid, cls).__new__(cls)
+        instance = super().__new__(cls)
         instance.id = None
         return instance
@@ -4938,7 +5072,7 @@ class HashGrid:
 class MarchingCubes:
     def __new__(cls, *args, **kwargs):
-        instance = super(MarchingCubes, cls).__new__(cls)
+        instance = super().__new__(cls)
         instance.id = None
         return instance
@@ -5101,6 +5235,9 @@ def type_is_generic(t):
     if is_array(t):
         return type_is_generic(t.dtype)
+    if get_origin(t) is tuple:
+        return True
     if hasattr(t, "_wp_scalar_type_"):
         # vector/matrix type, check if dtype is generic
         if type_is_generic(t._wp_scalar_type_):
@@ -5170,8 +5307,18 @@ def type_matches_template(arg_type, template_type):
     return True
-def infer_argument_types(args, template_types, arg_names=None):
-    """Resolve argument types with the given list of template types."""
+def infer_argument_types(args: list[Any], template_types, arg_names: list[str] | None = None) -> list[type]:
+    """Resolve argument types with the given list of template types.
+    Args:
+        args: List of arguments to infer types for.
+        template_types: List of template types to match against.
+        arg_names: List of argument names to use for error messages.
+    Raises:
+        RuntimeError: Number of arguments must match number of template types.
+        TypeError: Unable to infer the type of an argument.
+    """
     if len(args) != len(template_types):
         raise RuntimeError("Number of arguments must match number of template types.")
@@ -5195,10 +5342,6 @@ def infer_argument_types(args, template_types, arg_names=None):
         elif issubclass(arg_type, warp.codegen.StructInstance):
             # a struct
             arg_types.append(arg._cls)
-        # elif arg_type in [warp.types.launch_bounds_t, warp.types.shape_t, warp.types.range_t]:
-        #     arg_types.append(arg_type)
-        # elif arg_type in [warp.hash_grid_query_t, warp.mesh_query_aabb_t, warp.mesh_query_point_t, warp.mesh_query_ray_t, warp.bvh_query_t]:
-        #     arg_types.append(arg_type)
         elif arg is None:
             # allow passing None for arrays
             t = template_types[i]
@@ -5233,11 +5376,11 @@ simple_type_codes = {
     shape_t: "sh",
     range_t: "rg",
     launch_bounds_t: "lb",
-    hash_grid_query_t: "hgq",
-    mesh_query_aabb_t: "mqa",
-    mesh_query_point_t: "mqp",
-    mesh_query_ray_t: "mqr",
-    bvh_query_t: "bvhq",
+    HashGridQuery: "hgq",
+    MeshQueryAABB: "mqa",
+    MeshQueryPoint: "mqp",
+    MeshQueryRay: "mqr",
+    BvhQuery: "bvhq",
 }
@@ -5287,8 +5430,17 @@ def get_type_code(arg_type: type) -> str:
         return f"fa{arg_type.ndim}{get_type_code(arg_type.dtype)}"
     elif isinstance(arg_type, indexedfabricarray):
         return f"ifa{arg_type.ndim}{get_type_code(arg_type.dtype)}"
+    elif get_origin(arg_type) is tuple:
+        arg_types = get_args(arg_type)
+        return f"tpl{len(arg_types)}{''.join(get_type_code(x) for x in arg_types)}"
+    elif isinstance(arg_type, tuple_t):
+        return f"tplt{len(arg_type.types)}{''.join(get_type_code(x) for x in arg_type.types)}"
     elif isinstance(arg_type, warp.codegen.Struct):
         return arg_type.native_name
+    elif isinstance(arg_type, tile):
+        shape_string = "".join(str(num) for num in arg_type.shape)
+        storage = "s" if arg_type.storage == "shared" else "r"
+        return f"t{storage}{shape_string}{get_type_code(arg_type.dtype)}"
     elif arg_type == Scalar:
         # generic scalar type
         return "s?"
@@ -5301,12 +5453,14 @@ def get_type_code(arg_type: type) -> str:
     elif isinstance(arg_type, Callable):
         # TODO: elaborate on Callable type?
         return "c"
+    elif arg_type is Ellipsis:
+        return "?"
     else:
         raise TypeError(f"Unrecognized type '{arg_type}'")
-def get_signature(arg_types: List[type], func_name: Optional[str] = None, arg_names: Optional[List[str]] = None) -> str:
-    type_codes: List[str] = []
+def get_signature(arg_types: list[type], func_name: str | None = None, arg_names: list[str] | None = None) -> str:
+    type_codes: list[str] = []
     for i, arg_type in enumerate(arg_types):
         try:
             type_codes.append(get_type_code(arg_type))