warp-lang 1.5.1__py3-none-manylinux2014_aarch64.whl → 1.6.0__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (123) hide show
  1. warp/__init__.py +5 -0
  2. warp/autograd.py +414 -191
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +40 -12
  6. warp/build_dll.py +13 -6
  7. warp/builtins.py +1076 -480
  8. warp/codegen.py +240 -119
  9. warp/config.py +1 -1
  10. warp/context.py +298 -84
  11. warp/examples/assets/square_cloth.usd +0 -0
  12. warp/examples/benchmarks/benchmark_gemm.py +27 -18
  13. warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
  14. warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
  15. warp/examples/core/example_torch.py +18 -34
  16. warp/examples/fem/example_apic_fluid.py +1 -0
  17. warp/examples/fem/example_mixed_elasticity.py +1 -1
  18. warp/examples/optim/example_bounce.py +1 -1
  19. warp/examples/optim/example_cloth_throw.py +1 -1
  20. warp/examples/optim/example_diffray.py +4 -15
  21. warp/examples/optim/example_drone.py +1 -1
  22. warp/examples/optim/example_softbody_properties.py +392 -0
  23. warp/examples/optim/example_trajectory.py +1 -3
  24. warp/examples/optim/example_walker.py +5 -0
  25. warp/examples/sim/example_cartpole.py +0 -2
  26. warp/examples/sim/example_cloth_self_contact.py +260 -0
  27. warp/examples/sim/example_granular_collision_sdf.py +4 -5
  28. warp/examples/sim/example_jacobian_ik.py +0 -2
  29. warp/examples/sim/example_quadruped.py +5 -2
  30. warp/examples/tile/example_tile_cholesky.py +79 -0
  31. warp/examples/tile/example_tile_convolution.py +2 -2
  32. warp/examples/tile/example_tile_fft.py +2 -2
  33. warp/examples/tile/example_tile_filtering.py +3 -3
  34. warp/examples/tile/example_tile_matmul.py +4 -4
  35. warp/examples/tile/example_tile_mlp.py +12 -12
  36. warp/examples/tile/example_tile_nbody.py +180 -0
  37. warp/examples/tile/example_tile_walker.py +319 -0
  38. warp/math.py +147 -0
  39. warp/native/array.h +12 -0
  40. warp/native/builtin.h +0 -1
  41. warp/native/bvh.cpp +149 -70
  42. warp/native/bvh.cu +287 -68
  43. warp/native/bvh.h +195 -85
  44. warp/native/clang/clang.cpp +5 -1
  45. warp/native/cuda_util.cpp +35 -0
  46. warp/native/cuda_util.h +5 -0
  47. warp/native/exports.h +40 -40
  48. warp/native/intersect.h +17 -0
  49. warp/native/mat.h +41 -0
  50. warp/native/mathdx.cpp +19 -0
  51. warp/native/mesh.cpp +25 -8
  52. warp/native/mesh.cu +153 -101
  53. warp/native/mesh.h +482 -403
  54. warp/native/quat.h +40 -0
  55. warp/native/solid_angle.h +7 -0
  56. warp/native/sort.cpp +85 -0
  57. warp/native/sort.cu +34 -0
  58. warp/native/sort.h +3 -1
  59. warp/native/spatial.h +11 -0
  60. warp/native/tile.h +1185 -664
  61. warp/native/tile_reduce.h +8 -6
  62. warp/native/vec.h +41 -0
  63. warp/native/warp.cpp +8 -1
  64. warp/native/warp.cu +263 -40
  65. warp/native/warp.h +19 -5
  66. warp/optim/linear.py +22 -4
  67. warp/render/render_opengl.py +124 -59
  68. warp/sim/__init__.py +6 -1
  69. warp/sim/collide.py +270 -26
  70. warp/sim/integrator_euler.py +25 -7
  71. warp/sim/integrator_featherstone.py +154 -35
  72. warp/sim/integrator_vbd.py +842 -40
  73. warp/sim/model.py +111 -53
  74. warp/stubs.py +248 -115
  75. warp/tape.py +28 -30
  76. warp/tests/aux_test_module_unload.py +15 -0
  77. warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
  78. warp/tests/test_array.py +74 -0
  79. warp/tests/test_assert.py +242 -0
  80. warp/tests/test_codegen.py +14 -61
  81. warp/tests/test_collision.py +2 -2
  82. warp/tests/test_examples.py +9 -0
  83. warp/tests/test_grad_debug.py +87 -2
  84. warp/tests/test_hash_grid.py +1 -1
  85. warp/tests/test_ipc.py +116 -0
  86. warp/tests/test_mat.py +138 -167
  87. warp/tests/test_math.py +47 -1
  88. warp/tests/test_matmul.py +11 -7
  89. warp/tests/test_matmul_lite.py +4 -4
  90. warp/tests/test_mesh.py +84 -60
  91. warp/tests/test_mesh_query_aabb.py +165 -0
  92. warp/tests/test_mesh_query_point.py +328 -286
  93. warp/tests/test_mesh_query_ray.py +134 -121
  94. warp/tests/test_mlp.py +2 -2
  95. warp/tests/test_operators.py +43 -0
  96. warp/tests/test_overwrite.py +2 -2
  97. warp/tests/test_quat.py +77 -0
  98. warp/tests/test_reload.py +29 -0
  99. warp/tests/test_sim_grad_bounce_linear.py +204 -0
  100. warp/tests/test_static.py +16 -0
  101. warp/tests/test_tape.py +25 -0
  102. warp/tests/test_tile.py +134 -191
  103. warp/tests/test_tile_load.py +356 -0
  104. warp/tests/test_tile_mathdx.py +61 -8
  105. warp/tests/test_tile_mlp.py +17 -17
  106. warp/tests/test_tile_reduce.py +24 -18
  107. warp/tests/test_tile_shared_memory.py +66 -17
  108. warp/tests/test_tile_view.py +165 -0
  109. warp/tests/test_torch.py +35 -0
  110. warp/tests/test_utils.py +36 -24
  111. warp/tests/test_vec.py +110 -0
  112. warp/tests/unittest_suites.py +29 -4
  113. warp/tests/unittest_utils.py +30 -11
  114. warp/thirdparty/unittest_parallel.py +2 -2
  115. warp/types.py +409 -99
  116. warp/utils.py +9 -5
  117. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/METADATA +68 -44
  118. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/RECORD +121 -110
  119. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/WHEEL +1 -1
  120. warp/examples/benchmarks/benchmark_tile.py +0 -179
  121. warp/native/tile_gemm.h +0 -341
  122. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/LICENSE.md +0 -0
  123. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/top_level.txt +0 -0
warp/types.py CHANGED
@@ -171,8 +171,7 @@ def vector(length, dtype):
171
171
  iter(value)
172
172
  except TypeError:
173
173
  raise TypeError(
174
- f"Expected to assign a slice from a sequence of values "
175
- f"but got `{type(value).__name__}` instead"
174
+ f"Expected to assign a slice from a sequence of values but got `{type(value).__name__}` instead"
176
175
  ) from None
177
176
 
178
177
  if self._wp_scalar_type_ == float16:
@@ -350,6 +349,9 @@ def matrix(shape, dtype):
350
349
  f"Invalid number of arguments in matrix constructor, expected {self._length_} elements, got {num_args}"
351
350
  )
352
351
 
352
+ def __len__(self):
353
+ return self._shape_[0]
354
+
353
355
  def __add__(self, y):
354
356
  return warp.add(self, y)
355
357
 
@@ -419,7 +421,7 @@ def matrix(shape, dtype):
419
421
  iter(v)
420
422
  except TypeError:
421
423
  raise TypeError(
422
- f"Expected to assign a slice from a sequence of values " f"but got `{type(v).__name__}` instead"
424
+ f"Expected to assign a slice from a sequence of values but got `{type(v).__name__}` instead"
423
425
  ) from None
424
426
 
425
427
  row_start = r * self._shape_[1]
@@ -676,6 +678,10 @@ def transformation(dtype=Any):
676
678
 
677
679
  def __init__(self, *args, **kwargs):
678
680
  if len(args) == 1 and len(kwargs) == 0:
681
+ if is_float(args[0]):
682
+ # Initialize from a single scalar.
683
+ super().__init__(args[0])
684
+ return
679
685
  if args[0]._wp_generic_type_str_ == self._wp_generic_type_str_:
680
686
  # Copy constructor.
681
687
  super().__init__(*args[0])
@@ -1314,7 +1320,7 @@ def type_repr(t):
1314
1320
  if is_array(t):
1315
1321
  return str(f"array(ndim={t.ndim}, dtype={t.dtype})")
1316
1322
  if is_tile(t):
1317
- return str(f"tile(dtype={t.dtype}, m={t.M}, n={t.N})")
1323
+ return str(f"tile(dtype={t.dtype}, shape={t.shape}")
1318
1324
  if type_is_vector(t):
1319
1325
  return str(f"vector(length={t._shape_[0]}, dtype={t._wp_scalar_type_})")
1320
1326
  if type_is_matrix(t):
@@ -1357,6 +1363,11 @@ def type_is_matrix(t):
1357
1363
  return getattr(t, "_wp_generic_type_hint_", None) is Matrix
1358
1364
 
1359
1365
 
1366
+ # returns True if the passed *type* is a transformation
1367
+ def type_is_transformation(t):
1368
+ return getattr(t, "_wp_generic_type_hint_", None) is Transformation
1369
+
1370
+
1360
1371
  value_types = (int, float, builtins.bool) + scalar_types
1361
1372
 
1362
1373
 
@@ -1514,7 +1525,7 @@ def strides_from_shape(shape: Tuple, dtype):
1514
1525
 
1515
1526
 
1516
1527
  def check_array_shape(shape: Tuple):
1517
- """Checks that the size in each dimension is positive and less than 2^32."""
1528
+ """Checks that the size in each dimension is positive and less than 2^31."""
1518
1529
 
1519
1530
  for dim_index, dim_size in enumerate(shape):
1520
1531
  if dim_size < 0:
@@ -1701,8 +1712,22 @@ class array(Array):
1701
1712
  )
1702
1713
  elif length is not None:
1703
1714
  # backward compatibility
1715
+ warp.utils.warn(
1716
+ "The 'length' keyword is deprecated and will be removed in a future version. Use 'shape' instead.",
1717
+ category=DeprecationWarning,
1718
+ stacklevel=2,
1719
+ )
1704
1720
  shape = (length,)
1705
1721
 
1722
+ if owner:
1723
+ warp.utils.warn(
1724
+ "The 'owner' keyword in the array initializer is\n"
1725
+ "deprecated and will be removed in a future version. It currently has no effect.\n"
1726
+ "Pass a function to the 'deleter' keyword instead.",
1727
+ category=DeprecationWarning,
1728
+ stacklevel=2,
1729
+ )
1730
+
1706
1731
  # determine the construction path from the given arguments
1707
1732
  if data is not None:
1708
1733
  # data or ptr, not both
@@ -1734,6 +1759,15 @@ class array(Array):
1734
1759
  if not hasattr(data, "__len__"):
1735
1760
  raise RuntimeError(f"Data must be a sequence or array, got scalar {data}")
1736
1761
 
1762
+ if hasattr(dtype, "_wp_scalar_type_"):
1763
+ dtype_shape = dtype._shape_
1764
+ dtype_ndim = len(dtype_shape)
1765
+ scalar_dtype = dtype._wp_scalar_type_
1766
+ else:
1767
+ dtype_shape = ()
1768
+ dtype_ndim = 0
1769
+ scalar_dtype = dtype
1770
+
1737
1771
  if hasattr(data, "__cuda_array_interface__"):
1738
1772
  try:
1739
1773
  # Performance note: try first, ask questions later
@@ -1745,12 +1779,58 @@ class array(Array):
1745
1779
 
1746
1780
  if device.is_cuda:
1747
1781
  desc = data.__cuda_array_interface__
1748
- shape = desc.get("shape")
1749
- strides = desc.get("strides")
1750
- dtype = np_dtype_to_warp_type[np.dtype(desc.get("typestr"))]
1751
- ptr = desc.get("data")[0]
1782
+ data_shape = desc.get("shape")
1783
+ data_strides = desc.get("strides")
1784
+ data_dtype = np.dtype(desc.get("typestr"))
1785
+ data_ptr = desc.get("data")[0]
1786
+
1787
+ if dtype == Any:
1788
+ dtype = np_dtype_to_warp_type[data_dtype]
1789
+
1790
+ data_ndim = len(data_shape)
1791
+
1792
+ # determine whether the input needs reshaping
1793
+ target_npshape = None
1794
+ if shape is not None:
1795
+ target_npshape = (*shape, *dtype_shape)
1796
+ elif dtype_ndim > 0:
1797
+ # prune inner dimensions of length 1
1798
+ while data_ndim > 1 and data_shape[-1] == 1:
1799
+ data_shape = data_shape[:-1]
1800
+ # if the inner dims don't match exactly, check if the innermost dim is a multiple of type length
1801
+ if data_ndim < dtype_ndim or data_shape[-dtype_ndim:] != dtype_shape:
1802
+ if data_shape[-1] == dtype._length_:
1803
+ target_npshape = (*data_shape[:-1], *dtype_shape)
1804
+ elif data_shape[-1] % dtype._length_ == 0:
1805
+ target_npshape = (*data_shape[:-1], data_shape[-1] // dtype._length_, *dtype_shape)
1806
+ else:
1807
+ if dtype_ndim == 1:
1808
+ raise RuntimeError(
1809
+ f"The inner dimensions of the input data are not compatible with the requested vector type {warp.context.type_str(dtype)}: expected an inner dimension that is a multiple of {dtype._length_}"
1810
+ )
1811
+ else:
1812
+ raise RuntimeError(
1813
+ f"The inner dimensions of the input data are not compatible with the requested matrix type {warp.context.type_str(dtype)}: expected inner dimensions {dtype._shape_} or a multiple of {dtype._length_}"
1814
+ )
1815
+
1816
+ if target_npshape is None:
1817
+ target_npshape = data_shape if shape is None else shape
1818
+
1819
+ # determine final shape and strides
1820
+ if dtype_ndim > 0:
1821
+ # make sure the inner dims are contiguous for vector/matrix types
1822
+ scalar_size = type_size_in_bytes(dtype._wp_scalar_type_)
1823
+ inner_contiguous = data_strides[-1] == scalar_size
1824
+ if inner_contiguous and dtype_ndim > 1:
1825
+ inner_contiguous = data_strides[-2] == scalar_size * dtype_shape[-1]
1826
+
1827
+ shape = target_npshape[:-dtype_ndim] or (1,)
1828
+ strides = data_strides if shape == data_shape else strides_from_shape(shape, dtype)
1829
+ else:
1830
+ shape = target_npshape or (1,)
1831
+ strides = data_strides if shape == data_shape else strides_from_shape(shape, dtype)
1752
1832
 
1753
- self._init_from_ptr(ptr, dtype, shape, strides, None, device, False, None)
1833
+ self._init_from_ptr(data_ptr, dtype, shape, strides, None, device, False, None)
1754
1834
 
1755
1835
  # keep a ref to the source data to keep allocation alive
1756
1836
  self._ref = data
@@ -1760,15 +1840,6 @@ class array(Array):
1760
1840
  f"Trying to construct a Warp array from data argument's __cuda_array_interface__ but {device} is not CUDA-capable"
1761
1841
  )
1762
1842
 
1763
- if hasattr(dtype, "_wp_scalar_type_"):
1764
- dtype_shape = dtype._shape_
1765
- dtype_ndim = len(dtype_shape)
1766
- scalar_dtype = dtype._wp_scalar_type_
1767
- else:
1768
- dtype_shape = ()
1769
- dtype_ndim = 0
1770
- scalar_dtype = dtype
1771
-
1772
1843
  # convert input data to ndarray (handles lists, tuples, etc.) and determine dtype
1773
1844
  if dtype == Any:
1774
1845
  # infer dtype from data
@@ -1971,7 +2042,21 @@ class array(Array):
1971
2042
  else:
1972
2043
  strides = tuple(strides)
1973
2044
  is_contiguous = strides == contiguous_strides
1974
- capacity = shape[0] * strides[0]
2045
+
2046
+ # To calculate the required capacity, find the dimension with largest stride.
2047
+ # Normally it is the first one, but it could be different (e.g., transposed arrays).
2048
+ max_stride = strides[0]
2049
+ max_dim = 0
2050
+ for i in range(1, ndim):
2051
+ if strides[i] > max_stride:
2052
+ max_stride = strides[i]
2053
+ max_dim = i
2054
+
2055
+ if max_stride > 0:
2056
+ capacity = shape[max_dim] * strides[max_dim]
2057
+ else:
2058
+ # single element storage with zero strides
2059
+ capacity = dtype_size
1975
2060
 
1976
2061
  allocator = device.get_allocator(pinned=pinned)
1977
2062
  if capacity > 0:
@@ -1990,6 +2075,7 @@ class array(Array):
1990
2075
  self.pinned = pinned if device.is_cpu else False
1991
2076
  self.is_contiguous = is_contiguous
1992
2077
  self.deleter = allocator.deleter
2078
+ self._allocator = allocator
1993
2079
 
1994
2080
  def _init_annotation(self, dtype, ndim):
1995
2081
  self.dtype = dtype
@@ -2706,6 +2792,52 @@ class array(Array):
2706
2792
  a._ref = self
2707
2793
  return a
2708
2794
 
2795
+ def ipc_handle(self) -> bytes:
2796
+ """Return an IPC handle of the array as a 64-byte ``bytes`` object
2797
+
2798
+ :func:`from_ipc_handle` can be used with this handle in another process
2799
+ to obtain a :class:`array` that shares the same underlying memory
2800
+ allocation.
2801
+
2802
+ IPC is currently only supported on Linux.
2803
+ Additionally, IPC is only supported for arrays allocated using
2804
+ the default memory allocator.
2805
+
2806
+ :class:`Event` objects created with the ``interprocess=True`` argument
2807
+ may similarly be shared between processes to synchronize GPU work.
2808
+
2809
+ Example:
2810
+ Temporarily using the default memory allocator to allocate an array
2811
+ and get its IPC handle::
2812
+
2813
+ with wp.ScopedMempool("cuda:0", False):
2814
+ test_array = wp.full(1024, value=42.0, dtype=wp.float32, device="cuda:0")
2815
+ ipc_handle = test_array.ipc_handle()
2816
+
2817
+ Raises:
2818
+ RuntimeError: The array is not associated with a CUDA device.
2819
+ RuntimeError: The CUDA device does not appear to support IPC.
2820
+ RuntimeError: The array was allocated using the :ref:`mempool memory allocator <mempool_allocators>`.
2821
+ """
2822
+
2823
+ if self.device is None or not self.device.is_cuda:
2824
+ raise RuntimeError("IPC requires a CUDA device")
2825
+ elif self.device.is_ipc_supported is False:
2826
+ raise RuntimeError("IPC does not appear to be supported on this CUDA device")
2827
+ elif isinstance(self._allocator, warp.context.CudaMempoolAllocator):
2828
+ raise RuntimeError(
2829
+ "Currently, IPC is only supported for arrays using the default memory allocator.\n"
2830
+ "See https://nvidia.github.io/warp/modules/allocators.html for instructions on how to disable\n"
2831
+ f"the mempool allocator on device {self.device}."
2832
+ )
2833
+
2834
+ # Allocate a buffer for the data (64-element char array)
2835
+ ipc_handle_buffer = (ctypes.c_char * 64)()
2836
+
2837
+ warp.context.runtime.core.cuda_ipc_get_mem_handle(self.ptr, ipc_handle_buffer)
2838
+
2839
+ return ipc_handle_buffer.raw
2840
+
2709
2841
 
2710
2842
  # aliases for arrays with small dimensions
2711
2843
  def array1d(*args, **kwargs):
@@ -2733,7 +2865,13 @@ def array4d(*args, **kwargs):
2733
2865
 
2734
2866
  def from_ptr(ptr, length, dtype=None, shape=None, device=None):
2735
2867
  warp.utils.warn(
2736
- "This version of wp.from_ptr() is deprecated. OmniGraph applications should use from_omni_graph_ptr() instead. In the future, wp.from_ptr() will work only with regular pointers.",
2868
+ """This version of wp.from_ptr() is deprecated. OmniGraph
2869
+ applications should use from_omni_graph_ptr() instead. To create an array
2870
+ from a C pointer, use the array constructor and pass the ptr argument as a
2871
+ uint64 value representing the start address in memory where the existing
2872
+ array resides. For example, if using ctypes, pass
2873
+ ptr=ctypes.cast(pointer, ctypes.POINTER(ctypes.c_size_t)).contents.value.
2874
+ Be sure to also specify the dtype and shape parameters.""",
2737
2875
  category=DeprecationWarning,
2738
2876
  )
2739
2877
 
@@ -2748,6 +2886,51 @@ def from_ptr(ptr, length, dtype=None, shape=None, device=None):
2748
2886
  )
2749
2887
 
2750
2888
 
2889
+ def _close_cuda_ipc_handle(ptr, size):
2890
+ warp.context.runtime.core.cuda_ipc_close_mem_handle(ptr)
2891
+
2892
+
2893
+ def from_ipc_handle(
2894
+ handle: bytes, dtype, shape: Tuple[int, ...], strides: Optional[Tuple[int, ...]] = None, device=None
2895
+ ) -> array:
2896
+ """Create an array from an IPC handle.
2897
+
2898
+ The ``dtype``, ``shape``, and optional ``strides`` arguments should
2899
+ match the values from the :class:`array` from which ``handle`` was created.
2900
+
2901
+ Args:
2902
+ handle: The interprocess memory handle for an existing device memory allocation.
2903
+ dtype: One of the available `data types <#data-types>`_, such as :class:`warp.float32`, :class:`warp.mat33`, or a custom `struct <#structs>`_.
2904
+ shape: Dimensions of the array.
2905
+ strides: Number of bytes in each dimension between successive elements of the array.
2906
+ device (Devicelike): Device to associate with the array.
2907
+
2908
+ Returns:
2909
+ An array created from the existing memory allocation described by the interprocess memory handle ``handle``.
2910
+
2911
+ A copy of the underlying data is not made. Modifications to the array's data will be reflected in the
2912
+ original process from which ``handle`` was exported.
2913
+
2914
+ Raises:
2915
+ RuntimeError: IPC is not supported on ``device``.
2916
+ """
2917
+
2918
+ try:
2919
+ # Performance note: try first, ask questions later
2920
+ device = warp.context.runtime.get_device(device)
2921
+ except Exception:
2922
+ # Fallback to using the public API for retrieving the device,
2923
+ # which takes take of initializing Warp if needed.
2924
+ device = warp.context.get_device(device)
2925
+
2926
+ if device.is_ipc_supported is False:
2927
+ raise RuntimeError(f"IPC is not supported on device {device}.")
2928
+
2929
+ ptr = warp.context.runtime.core.cuda_ipc_open_mem_handle(device.context, handle)
2930
+
2931
+ return array(ptr=ptr, dtype=dtype, shape=shape, strides=strides, device=device, deleter=_close_cuda_ipc_handle)
2932
+
2933
+
2751
2934
  # A base class for non-contiguous arrays, providing the implementation of common methods like
2752
2935
  # contiguous(), to(), numpy(), list(), assign(), zero_(), and fill_().
2753
2936
  class noncontiguous_array_base(Generic[T]):
@@ -2985,25 +3168,38 @@ def array_type_id(a):
2985
3168
  raise ValueError("Invalid array type")
2986
3169
 
2987
3170
 
2988
- # tile expression objects
3171
+ # tile object
2989
3172
  class Tile:
2990
3173
  alignment = 16
2991
3174
 
2992
- def __init__(self, dtype, M, N, op=None, storage="register", layout="rowmajor", strides=None, owner=True):
3175
+ def __init__(self, dtype, shape, op=None, storage="register", layout="rowmajor", strides=None, owner=True):
2993
3176
  self.dtype = type_to_warp(dtype)
2994
- self.M = M
2995
- self.N = N
3177
+ self.shape = shape
2996
3178
  self.op = op
2997
3179
  self.storage = storage
2998
3180
  self.layout = layout
3181
+ self.strides = strides
2999
3182
 
3000
- if strides is None:
3001
- if layout == "rowmajor":
3002
- self.strides = (N, 1)
3003
- elif layout == "colmajor":
3004
- self.strides = (1, M)
3005
- else:
3006
- self.strides = strides
3183
+ # handle case where shape is concrete (rather than just Any)
3184
+ if isinstance(self.shape, (list, tuple)):
3185
+ if len(shape) == 0:
3186
+ raise RuntimeError("Empty shape specified, must have at least 1 dimension")
3187
+
3188
+ # compute total size
3189
+ self.size = 1
3190
+ for s in self.shape:
3191
+ self.size *= s
3192
+
3193
+ # if strides are not provided compute default strides
3194
+ if self.strides is None:
3195
+ self.strides = [1] * len(self.shape)
3196
+
3197
+ if layout == "rowmajor":
3198
+ for i in range(len(self.shape) - 2, -1, -1):
3199
+ self.strides[i] = self.strides[i + 1] * self.shape[i + 1]
3200
+ else:
3201
+ for i in range(1, len(shape)):
3202
+ self.strides[i] = self.strides[i - 1] * self.shape[i - 1]
3007
3203
 
3008
3204
  self.owner = owner
3009
3205
 
@@ -3012,9 +3208,9 @@ class Tile:
3012
3208
  from warp.codegen import Var
3013
3209
 
3014
3210
  if self.storage == "register":
3015
- return f"wp::tile_register_t<{Var.type_to_ctype(self.dtype)},{self.M},{self.N}>"
3211
+ return f"wp::tile_register_t<{Var.type_to_ctype(self.dtype)},wp::tile_layout_register_t<wp::tile_shape_t<{','.join(map(str, self.shape))}>>>"
3016
3212
  elif self.storage == "shared":
3017
- return f"wp::tile_shared_t<{Var.type_to_ctype(self.dtype)},{self.M},{self.N},{self.strides[0]}, {self.strides[1]}, {'true' if self.owner else 'false'}>"
3213
+ return f"wp::tile_shared_t<{Var.type_to_ctype(self.dtype)},wp::tile_layout_strided_t<wp::tile_shape_t<{','.join(map(str, self.shape))}>, wp::tile_stride_t<{','.join(map(str, self.strides))}>>, {'true' if self.owner else 'false'}>"
3018
3214
  else:
3019
3215
  raise RuntimeError(f"Unrecognized tile storage type {self.storage}")
3020
3216
 
@@ -3027,24 +3223,33 @@ class Tile:
3027
3223
  elif self.storage == "shared":
3028
3224
  if self.owner:
3029
3225
  # allocate new shared memory tile
3030
- return f"wp::tile_alloc_empty<{Var.type_to_ctype(self.dtype)},{self.M},{self.N},{'true' if requires_grad else 'false'}>()"
3226
+ return f"wp::tile_alloc_empty<{Var.type_to_ctype(self.dtype)},wp::tile_shape_t<{','.join(map(str, self.shape))}>,{'true' if requires_grad else 'false'}>()"
3031
3227
  else:
3032
3228
  # tile will be initialized by another call, e.g.: tile_transpose()
3033
3229
  return "NULL"
3034
3230
 
3035
3231
  # return total tile size in bytes
3036
3232
  def size_in_bytes(self):
3037
- num_bytes = self.align(type_size_in_bytes(self.dtype) * self.M * self.N)
3233
+ num_bytes = self.align(type_size_in_bytes(self.dtype) * self.size)
3038
3234
  return num_bytes
3039
3235
 
3236
+ @staticmethod
3237
+ def round_up(bytes):
3238
+ return ((bytes + Tile.alignment - 1) // Tile.alignment) * Tile.alignment
3239
+
3040
3240
  # align tile size to natural boundary, default 16-bytes
3041
3241
  def align(self, bytes):
3042
- return ((bytes + self.alignment - 1) // self.alignment) * self.alignment
3242
+ return Tile.round_up(bytes)
3043
3243
 
3044
3244
 
3045
3245
  class TileZeros(Tile):
3046
- def __init__(self, dtype, M, N, storage="register"):
3047
- Tile.__init__(self, dtype, M, N, op="zeros", storage=storage)
3246
+ def __init__(self, dtype, shape, storage="register"):
3247
+ Tile.__init__(self, dtype, shape, op="zeros", storage=storage)
3248
+
3249
+
3250
+ class TileOnes(Tile):
3251
+ def __init__(self, dtype, shape, storage="register"):
3252
+ Tile.__init__(self, dtype, shape, op="ones", storage=storage)
3048
3253
 
3049
3254
 
3050
3255
  class TileRange(Tile):
@@ -3053,32 +3258,39 @@ class TileRange(Tile):
3053
3258
  self.stop = stop
3054
3259
  self.step = step
3055
3260
 
3056
- M = 1
3057
- N = int((stop - start) / step)
3261
+ n = int((stop - start) / step)
3058
3262
 
3059
- Tile.__init__(self, dtype, M, N, op="arange", storage=storage)
3263
+ Tile.__init__(self, dtype, shape=(n,), op="arange", storage=storage)
3060
3264
 
3061
3265
 
3062
3266
  class TileConstant(Tile):
3063
- def __init__(self, dtype, M, N):
3064
- Tile.__init__(self, dtype, M, N, op="constant", storage="register")
3267
+ def __init__(self, dtype, shape):
3268
+ Tile.__init__(self, dtype, shape, op="constant", storage="register")
3065
3269
 
3066
3270
 
3067
3271
  class TileLoad(Tile):
3068
- def __init__(self, array, M, N, storage="register"):
3069
- Tile.__init__(self, array.dtype, M, N, op="load", storage=storage)
3272
+ def __init__(self, array, shape, storage="register"):
3273
+ Tile.__init__(self, array.dtype, shape, op="load", storage=storage)
3070
3274
 
3071
3275
 
3072
3276
  class TileUnaryMap(Tile):
3073
- def __init__(self, t, storage="register"):
3074
- Tile.__init__(self, t.dtype, t.M, t.N, op="unary_map", storage=storage)
3277
+ def __init__(self, t, dtype=None, storage="register"):
3278
+ Tile.__init__(self, dtype, t.shape, op="unary_map", storage=storage)
3279
+
3280
+ # if no output dtype specified then assume it's the same as the first arg
3281
+ if self.dtype is None:
3282
+ self.dtype = t.dtype
3075
3283
 
3076
3284
  self.t = t
3077
3285
 
3078
3286
 
3079
3287
  class TileBinaryMap(Tile):
3080
- def __init__(self, a, b, storage="register"):
3081
- Tile.__init__(self, a.dtype, a.M, a.N, op="binary_map", storage=storage)
3288
+ def __init__(self, a, b, dtype=None, storage="register"):
3289
+ Tile.__init__(self, dtype, a.shape, op="binary_map", storage=storage)
3290
+
3291
+ # if no output dtype specified then assume it's the same as the first arg
3292
+ if self.dtype is None:
3293
+ self.dtype = a.dtype
3082
3294
 
3083
3295
  self.a = a
3084
3296
  self.b = b
@@ -3086,7 +3298,7 @@ class TileBinaryMap(Tile):
3086
3298
 
3087
3299
  class TileShared(Tile):
3088
3300
  def __init__(self, t):
3089
- Tile.__init__(self, t.dtype, t.M, t.N, "shared", storage="shared")
3301
+ Tile.__init__(self, t.dtype, t.shape, "shared", storage="shared")
3090
3302
 
3091
3303
  self.t = t
3092
3304
 
@@ -3095,35 +3307,66 @@ def is_tile(t):
3095
3307
  return isinstance(t, Tile)
3096
3308
 
3097
3309
 
3310
+ bvh_constructor_values = {"sah": 0, "median": 1, "lbvh": 2}
3311
+
3312
+
3098
3313
  class Bvh:
3099
3314
  def __new__(cls, *args, **kwargs):
3100
3315
  instance = super(Bvh, cls).__new__(cls)
3101
3316
  instance.id = None
3102
3317
  return instance
3103
3318
 
3104
- def __init__(self, lowers, uppers):
3319
+ def __init__(self, lowers: array, uppers: array, constructor: Optional[str] = None):
3105
3320
  """Class representing a bounding volume hierarchy.
3106
3321
 
3322
+ Depending on which device the input bounds live, it can be either a CPU tree or a GPU tree.
3323
+
3107
3324
  Attributes:
3108
- id: Unique identifier for this bvh object, can be passed to kernels.
3325
+ id: Unique identifier for this BVH object, can be passed to kernels.
3109
3326
  device: Device this object lives on, all buffers must live on the same device.
3110
3327
 
3111
3328
  Args:
3112
- lowers (:class:`warp.array`): Array of lower bounds :class:`warp.vec3`
3113
- uppers (:class:`warp.array`): Array of upper bounds :class:`warp.vec3`
3329
+ lowers: Array of lower bounds of data type :class:`warp.vec3`.
3330
+ uppers: Array of upper bounds of data type :class:`warp.vec3`.
3331
+ ``lowers`` and ``uppers`` must live on the same device.
3332
+ constructor: The construction algorithm used to build the tree.
3333
+ Valid choices are ``"sah"``, ``"median"``, ``"lbvh"``, or ``None``.
3334
+ When ``None``, the default constructor will be used (see the note).
3335
+
3336
+ Note:
3337
+ Explanation of BVH constructors:
3338
+
3339
+ - ``"sah"``: A CPU-based top-down constructor where the AABBs are split based on Surface Area
3340
+ Heuristics (SAH). Construction takes slightly longer than others but has the best query
3341
+ performance.
3342
+ - ``"median"``: A CPU-based top-down constructor where the AABBs are split based on the median
3343
+ of centroids of primitives in an AABB. This constructor is faster than SAH but offers
3344
+ inferior query performance.
3345
+ - ``"lbvh"``: A GPU-based bottom-up constructor which maximizes parallelism. Construction is very
3346
+ fast, especially for large models. Query performance is slightly slower than ``"sah"``.
3347
+ - ``None``: The constructor will be automatically chosen based on the device where the tree
3348
+ lives. For a GPU tree, the ``"lbvh"`` constructor will be selected; for a CPU tree, the ``"sah"``
3349
+ constructor will be selected.
3350
+
3351
+ All three constructors are supported for GPU trees. When a CPU-based constructor is selected
3352
+ for a GPU tree, bounds will be copied back to the CPU to run the CPU-based constructor. After
3353
+ construction, the CPU tree will be copied to the GPU.
3354
+
3355
+ Only ``"sah"`` and ``"median"`` are supported for CPU trees. If ``"lbvh"`` is selected for a CPU tree, a
3356
+ warning message will be issued, and the constructor will automatically fall back to ``"sah"``.
3114
3357
  """
3115
3358
 
3116
3359
  if len(lowers) != len(uppers):
3117
- raise RuntimeError("Bvh the same number of lower and upper bounds must be provided")
3360
+ raise RuntimeError("The same number of lower and upper bounds must be provided")
3118
3361
 
3119
3362
  if lowers.device != uppers.device:
3120
- raise RuntimeError("Bvh lower and upper bounds must live on the same device")
3363
+ raise RuntimeError("Lower and upper bounds must live on the same device")
3121
3364
 
3122
3365
  if lowers.dtype != vec3 or not lowers.is_contiguous:
3123
- raise RuntimeError("Bvh lowers should be a contiguous array of type wp.vec3")
3366
+ raise RuntimeError("lowers should be a contiguous array of type wp.vec3")
3124
3367
 
3125
3368
  if uppers.dtype != vec3 or not uppers.is_contiguous:
3126
- raise RuntimeError("Bvh uppers should be a contiguous array of type wp.vec3")
3369
+ raise RuntimeError("uppers should be a contiguous array of type wp.vec3")
3127
3370
 
3128
3371
  self.device = lowers.device
3129
3372
  self.lowers = lowers
@@ -3137,11 +3380,32 @@ class Bvh:
3137
3380
 
3138
3381
  self.runtime = warp.context.runtime
3139
3382
 
3383
+ if constructor is None:
3384
+ if self.device.is_cpu:
3385
+ constructor = "sah"
3386
+ else:
3387
+ constructor = "lbvh"
3388
+
3389
+ if constructor not in bvh_constructor_values:
3390
+ raise ValueError(f"Unrecognized BVH constructor type: {constructor}")
3391
+
3140
3392
  if self.device.is_cpu:
3141
- self.id = self.runtime.core.bvh_create_host(get_data(lowers), get_data(uppers), int(len(lowers)))
3393
+ if constructor == "lbvh":
3394
+ warp.utils.warn(
3395
+ "LBVH constructor is not available for a CPU tree. Falling back to SAH constructor.", stacklevel=2
3396
+ )
3397
+ constructor = "sah"
3398
+
3399
+ self.id = self.runtime.core.bvh_create_host(
3400
+ get_data(lowers), get_data(uppers), int(len(lowers)), bvh_constructor_values[constructor]
3401
+ )
3142
3402
  else:
3143
3403
  self.id = self.runtime.core.bvh_create_device(
3144
- self.device.context, get_data(lowers), get_data(uppers), int(len(lowers))
3404
+ self.device.context,
3405
+ get_data(lowers),
3406
+ get_data(uppers),
3407
+ int(len(lowers)),
3408
+ bvh_constructor_values[constructor],
3145
3409
  )
3146
3410
 
3147
3411
  def __del__(self):
@@ -3156,7 +3420,10 @@ class Bvh:
3156
3420
  self.runtime.core.bvh_destroy_device(self.id)
3157
3421
 
3158
3422
  def refit(self):
3159
- """Refit the BVH. This should be called after users modify the `lowers` and `uppers` arrays."""
3423
+ """Refit the BVH.
3424
+
3425
+ This should be called after users modify the ``lowers`` or ``uppers`` arrays.
3426
+ """
3160
3427
 
3161
3428
  if self.device.is_cpu:
3162
3429
  self.runtime.core.bvh_refit_host(self.id)
@@ -3179,7 +3446,14 @@ class Mesh:
3179
3446
  instance.id = None
3180
3447
  return instance
3181
3448
 
3182
- def __init__(self, points=None, indices=None, velocities=None, support_winding_number=False):
3449
+ def __init__(
3450
+ self,
3451
+ points: array,
3452
+ indices: array,
3453
+ velocities: Optional[array] = None,
3454
+ support_winding_number: bool = False,
3455
+ bvh_constructor: Optional[str] = None,
3456
+ ):
3183
3457
  """Class representing a triangle mesh.
3184
3458
 
3185
3459
  Attributes:
@@ -3187,10 +3461,15 @@ class Mesh:
3187
3461
  device: Device this object lives on, all buffers must live on the same device.
3188
3462
 
3189
3463
  Args:
3190
- points (:class:`warp.array`): Array of vertex positions of type :class:`warp.vec3`
3191
- indices (:class:`warp.array`): Array of triangle indices of type :class:`warp.int32`, should be a 1d array with shape (num_tris * 3)
3192
- velocities (:class:`warp.array`): Array of vertex velocities of type :class:`warp.vec3` (optional)
3193
- support_winding_number (bool): If true the mesh will build additional datastructures to support `wp.mesh_query_point_sign_winding_number()` queries
3464
+ points: Array of vertex positions of data type :class:`warp.vec3`.
3465
+ indices: Array of triangle indices of data type :class:`warp.int32`.
3466
+ Should be a 1D array with shape ``(num_tris * 3)``.
3467
+ velocities: Optional array of vertex velocities of data type :class:`warp.vec3`.
3468
+ support_winding_number: If ``True``, the mesh will build additional
3469
+ data structures to support ``wp.mesh_query_point_sign_winding_number()`` queries.
3470
+ bvh_constructor: The construction algorithm for the underlying BVH
3471
+ (see the docstring of :class:`Bvh` for explanation).
3472
+ Valid choices are ``"sah"``, ``"median"``, ``"lbvh"``, or ``None``.
3194
3473
  """
3195
3474
 
3196
3475
  if points.device != indices.device:
@@ -3215,7 +3494,22 @@ class Mesh:
3215
3494
 
3216
3495
  self.runtime = warp.context.runtime
3217
3496
 
3497
+ if bvh_constructor is None:
3498
+ if self.device.is_cpu:
3499
+ bvh_constructor = "sah"
3500
+ else:
3501
+ bvh_constructor = "lbvh"
3502
+
3503
+ if bvh_constructor not in bvh_constructor_values:
3504
+ raise ValueError(f"Unrecognized BVH constructor type: {bvh_constructor}")
3505
+
3218
3506
  if self.device.is_cpu:
3507
+ if bvh_constructor == "lbvh":
3508
+ warp.utils.warn(
3509
+ "LBVH constructor is not available for a CPU tree. Falling back to SAH constructor.", stacklevel=2
3510
+ )
3511
+ bvh_constructor = "sah"
3512
+
3219
3513
  self.id = self.runtime.core.mesh_create_host(
3220
3514
  points.__ctype__(),
3221
3515
  velocities.__ctype__() if velocities else array().__ctype__(),
@@ -3223,6 +3517,7 @@ class Mesh:
3223
3517
  int(len(points)),
3224
3518
  int(indices.size / 3),
3225
3519
  int(support_winding_number),
3520
+ bvh_constructor_values[bvh_constructor],
3226
3521
  )
3227
3522
  else:
3228
3523
  self.id = self.runtime.core.mesh_create_device(
@@ -3233,6 +3528,7 @@ class Mesh:
3233
3528
  int(len(points)),
3234
3529
  int(indices.size / 3),
3235
3530
  int(support_winding_number),
3531
+ bvh_constructor_values[bvh_constructor],
3236
3532
  )
3237
3533
 
3238
3534
  def __del__(self):
@@ -3247,7 +3543,10 @@ class Mesh:
3247
3543
  self.runtime.core.mesh_destroy_device(self.id)
3248
3544
 
3249
3545
  def refit(self):
3250
- """Refit the BVH to points. This should be called after users modify the `points` data."""
3546
+ """Refit the BVH to points.
3547
+
3548
+ This should be called after users modify the ``points`` data.
3549
+ """
3251
3550
 
3252
3551
  if self.device.is_cpu:
3253
3552
  self.runtime.core.mesh_refit_host(self.id)
@@ -3260,9 +3559,9 @@ class Mesh:
3260
3559
  """The array of mesh's vertex positions of type :class:`warp.vec3`.
3261
3560
 
3262
3561
  The `Mesh.points` property has a custom setter method. Users can modify the vertex positions in-place,
3263
- but the `refit()` method must be called manually after such modifications. Alternatively, assigning a new array
3562
+ but :meth:`refit` must be called manually after such modifications. Alternatively, assigning a new array
3264
3563
  to this property is also supported. The new array must have the same shape as the original, and once assigned,
3265
- the `Mesh` class will automatically perform a refit operation based on the new vertex positions.
3564
+ The :class:`Mesh` will automatically perform a refit operation based on the new vertex positions.
3266
3565
  """
3267
3566
  return self._points
3268
3567
 
@@ -3270,16 +3569,14 @@ class Mesh:
3270
3569
  def points(self, points_new):
3271
3570
  if points_new.device != self._points.device:
3272
3571
  raise RuntimeError(
3273
- "The new points and the original points must live on the same device, currently "
3274
- "the new points lives on {} while the old points lives on {}.".format(
3275
- points_new.device, self._points.device
3276
- )
3572
+ "The new points and the original points must live on the same device, the "
3573
+ f"new points are on {points_new.device} while the old points are on {self._points.device}."
3277
3574
  )
3278
3575
 
3279
3576
  if points_new.ndim != 1 or points_new.shape[0] != self._points.shape[0]:
3280
3577
  raise RuntimeError(
3281
- "the new points and the original points must have the same shape, currently new points shape is: {},"
3282
- " while the old points' shape is: {}".format(points_new.shape, self._points.shape)
3578
+ "The new points and the original points must have the same shape, the "
3579
+ f"new points' shape is {points_new.shape}, while the old points' shape is {self._points.shape}."
3283
3580
  )
3284
3581
 
3285
3582
  self._points = points_new
@@ -3294,7 +3591,7 @@ class Mesh:
3294
3591
  """The array of mesh's velocities of type :class:`warp.vec3`.
3295
3592
 
3296
3593
  This is a property with a custom setter method. Users can modify the velocities in-place,
3297
- or assigning a new array to this property. No refitting is needed for changing velocities.
3594
+ or assign a new array to this property. No refitting is needed for changing velocities.
3298
3595
  """
3299
3596
  return self._velocities
3300
3597
 
@@ -3302,16 +3599,14 @@ class Mesh:
3302
3599
  def velocities(self, velocities_new):
3303
3600
  if velocities_new.device != self._velocities.device:
3304
3601
  raise RuntimeError(
3305
- "The new points and the original points must live on the same device, currently "
3306
- "the new points lives on {} while the old points lives on {}.".format(
3307
- velocities_new.device, self._velocities.device
3308
- )
3602
+ "The new points and the original points must live on the same device, the "
3603
+ f"new points are on {velocities_new.device} while the old points are on {self._velocities.device}."
3309
3604
  )
3310
3605
 
3311
3606
  if velocities_new.ndim != 1 or velocities_new.shape[0] != self._velocities.shape[0]:
3312
3607
  raise RuntimeError(
3313
- "the new points and the original points must have the same shape, currently new points shape is: {},"
3314
- " while the old points' shape is: {}".format(velocities_new.shape, self._velocities.shape)
3608
+ "The new points and the original points must have the same shape, the "
3609
+ f"new points' shape is {velocities_new.shape}, while the old points' shape is {self._velocities.shape}."
3315
3610
  )
3316
3611
 
3317
3612
  self._velocities = velocities_new
@@ -3337,8 +3632,8 @@ class Volume:
3337
3632
  """Class representing a sparse grid.
3338
3633
 
3339
3634
  Args:
3340
- data (:class:`warp.array`): Array of bytes representing the volume in NanoVDB format
3341
- copy (bool): Whether the incoming data will be copied or aliased
3635
+ data: Array of bytes representing the volume in NanoVDB format.
3636
+ copy: Whether the incoming data will be copied or aliased.
3342
3637
  """
3343
3638
 
3344
3639
  # keep a runtime reference for orderly destruction
@@ -3373,14 +3668,15 @@ class Volume:
3373
3668
  self.runtime.core.volume_destroy_device(self.id)
3374
3669
 
3375
3670
  def array(self) -> array:
3376
- """Returns the raw memory buffer of the Volume as an array"""
3671
+ """Return the raw memory buffer of the :class:`Volume` as an array."""
3672
+
3377
3673
  buf = ctypes.c_void_p(0)
3378
3674
  size = ctypes.c_uint64(0)
3379
3675
  self.runtime.core.volume_get_buffer_info(self.id, ctypes.byref(buf), ctypes.byref(size))
3380
3676
  return array(ptr=buf.value, dtype=uint8, shape=size.value, device=self.device, owner=False)
3381
3677
 
3382
3678
  def get_tile_count(self) -> int:
3383
- """Returns the number of tiles (NanoVDB leaf nodes) of the volume"""
3679
+ """Return the number of tiles (NanoVDB leaf nodes) of the volume."""
3384
3680
 
3385
3681
  voxel_count, tile_count = (
3386
3682
  ctypes.c_uint64(0),
@@ -3390,11 +3686,12 @@ class Volume:
3390
3686
  return tile_count.value
3391
3687
 
3392
3688
  def get_tiles(self, out: Optional[array] = None) -> array:
3393
- """Returns the integer coordinates of all allocated tiles for this volume.
3689
+ """Return the integer coordinates of all allocated tiles for this volume.
3394
3690
 
3395
3691
  Args:
3396
- out (:class:`warp.array`, optional): If provided, use the `out` array to store the tile coordinates, otherwise
3397
- a new array will be allocated. `out` must be a contiguous array of ``tile_count`` ``vec3i`` or ``tile_count x 3`` ``int32``
3692
+ out: If provided, use the `out` array to store the tile coordinates, otherwise
3693
+ a new array will be allocated. ``out`` must be a contiguous array
3694
+ of ``tile_count`` ``vec3i`` or ``tile_count x 3`` ``int32``
3398
3695
  on the same device as this volume.
3399
3696
  """
3400
3697
 
@@ -3419,7 +3716,7 @@ class Volume:
3419
3716
  return out
3420
3717
 
3421
3718
  def get_voxel_count(self) -> int:
3422
- """Returns the total number of allocated voxels for this volume"""
3719
+ """Return the total number of allocated voxels for this volume"""
3423
3720
 
3424
3721
  voxel_count, tile_count = (
3425
3722
  ctypes.c_uint64(0),
@@ -3429,10 +3726,10 @@ class Volume:
3429
3726
  return voxel_count.value
3430
3727
 
3431
3728
  def get_voxels(self, out: Optional[array] = None) -> array:
3432
- """Returns the integer coordinates of all allocated voxels for this volume.
3729
+ """Return the integer coordinates of all allocated voxels for this volume.
3433
3730
 
3434
3731
  Args:
3435
- out (:class:`warp.array`, optional): If provided, use the `out` array to store the voxel coordinates, otherwise
3732
+ out: If provided, use the `out` array to store the voxel coordinates, otherwise
3436
3733
  a new array will be allocated. `out` must be a contiguous array of ``voxel_count`` ``vec3i`` or ``voxel_count x 3`` ``int32``
3437
3734
  on the same device as this volume.
3438
3735
  """
@@ -3458,7 +3755,7 @@ class Volume:
3458
3755
  return out
3459
3756
 
3460
3757
  def get_voxel_size(self) -> Tuple[float, float, float]:
3461
- """Voxel size, i.e, world coordinates of voxel's diagonal vector"""
3758
+ """Return the voxel size, i.e, world coordinates of voxel's diagonal vector"""
3462
3759
 
3463
3760
  if self.id == 0:
3464
3761
  raise RuntimeError("Invalid Volume")
@@ -3558,7 +3855,7 @@ class Volume:
3558
3855
  return self.get_grid_info().type_str in Volume._nvdb_index_types
3559
3856
 
3560
3857
  def get_feature_array_count(self) -> int:
3561
- """Returns the number of supplemental data arrays stored alongside the grid"""
3858
+ """Return the number of supplemental data arrays stored alongside the grid"""
3562
3859
 
3563
3860
  return self.runtime.core.volume_get_blind_data_count(self.id)
3564
3861
 
@@ -3578,7 +3875,7 @@ class Volume:
3578
3875
  """String describing the type of the array values"""
3579
3876
 
3580
3877
  def get_feature_array_info(self, feature_index: int) -> Volume.FeatureArrayInfo:
3581
- """Returns the metadata associated to the feature array at `feature_index`"""
3878
+ """Return the metadata associated to the feature array at ``feature_index``."""
3582
3879
 
3583
3880
  buf = ctypes.c_void_p(0)
3584
3881
  value_count = ctypes.c_uint64(0)
@@ -3606,11 +3903,12 @@ class Volume:
3606
3903
  )
3607
3904
 
3608
3905
  def feature_array(self, feature_index: int, dtype=None) -> array:
3609
- """Returns one the grid's feature data arrays as a Warp array
3906
+ """Return one the grid's feature data arrays as a Warp array.
3610
3907
 
3611
3908
  Args:
3612
3909
  feature_index: Index of the supplemental data array in the grid
3613
- dtype: Type for the returned Warp array. If not provided, will be deduced from the array metadata.
3910
+ dtype: Data type for the returned Warp array.
3911
+ If not provided, will be deduced from the array metadata.
3614
3912
  """
3615
3913
 
3616
3914
  info = self.get_feature_array_info(feature_index)
@@ -3641,7 +3939,7 @@ class Volume:
3641
3939
 
3642
3940
  @classmethod
3643
3941
  def load_from_nvdb(cls, file_or_buffer, device=None) -> Volume:
3644
- """Creates a Volume object from a serialized NanoVDB file or in-memory buffer.
3942
+ """Create a :class:`Volume` object from a serialized NanoVDB file or in-memory buffer.
3645
3943
 
3646
3944
  Returns:
3647
3945
 
@@ -4302,6 +4600,9 @@ def matmul(
4302
4600
  ):
4303
4601
  """Computes a generic matrix-matrix multiplication (GEMM) of the form: `d = alpha * (a @ b) + beta * c`.
4304
4602
 
4603
+ .. deprecated:: 1.6
4604
+ Use :doc:`tile primitives </modules/tiles>` instead.
4605
+
4305
4606
  Args:
4306
4607
  a (array2d): two-dimensional array containing matrix A
4307
4608
  b (array2d): two-dimensional array containing matrix B
@@ -4314,6 +4615,12 @@ def matmul(
4314
4615
  """
4315
4616
  from warp.context import runtime
4316
4617
 
4618
+ warp.utils.warn(
4619
+ "wp.matmul() is deprecated and will be removed in a\nfuture version. Use tile primitives instead.",
4620
+ category=DeprecationWarning,
4621
+ stacklevel=2,
4622
+ )
4623
+
4317
4624
  device = a.device
4318
4625
 
4319
4626
  if b.device != device or c.device != device or d.device != device:
@@ -4589,6 +4896,9 @@ def batched_matmul(
4589
4896
  ):
4590
4897
  """Computes a batched generic matrix-matrix multiplication (GEMM) of the form: `d = alpha * (a @ b) + beta * c`.
4591
4898
 
4899
+ .. deprecated:: 1.6
4900
+ Use :doc:`tile primitives </modules/tiles>` instead.
4901
+
4592
4902
  Args:
4593
4903
  a (array3d): three-dimensional array containing A matrices. Overall array dimension is {batch_count, M, K}
4594
4904
  b (array3d): three-dimensional array containing B matrices. Overall array dimension is {batch_count, K, N}