warp-lang 1.6.2__py3-none-macosx_10_13_universal2.whl → 1.7.0__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (179) hide show
  1. warp/__init__.py +7 -1
  2. warp/bin/libwarp-clang.dylib +0 -0
  3. warp/bin/libwarp.dylib +0 -0
  4. warp/build.py +410 -0
  5. warp/build_dll.py +6 -14
  6. warp/builtins.py +452 -362
  7. warp/codegen.py +179 -119
  8. warp/config.py +42 -6
  9. warp/context.py +490 -271
  10. warp/dlpack.py +8 -6
  11. warp/examples/assets/nonuniform.usd +0 -0
  12. warp/examples/assets/nvidia_logo.png +0 -0
  13. warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
  14. warp/examples/core/example_sample_mesh.py +300 -0
  15. warp/examples/fem/example_apic_fluid.py +1 -1
  16. warp/examples/fem/example_burgers.py +2 -2
  17. warp/examples/fem/example_deformed_geometry.py +1 -1
  18. warp/examples/fem/example_distortion_energy.py +1 -1
  19. warp/examples/fem/example_magnetostatics.py +6 -6
  20. warp/examples/fem/utils.py +9 -3
  21. warp/examples/interop/example_jax_callable.py +116 -0
  22. warp/examples/interop/example_jax_ffi_callback.py +132 -0
  23. warp/examples/interop/example_jax_kernel.py +205 -0
  24. warp/examples/optim/example_fluid_checkpoint.py +497 -0
  25. warp/examples/tile/example_tile_matmul.py +2 -4
  26. warp/fem/__init__.py +11 -1
  27. warp/fem/adaptivity.py +4 -4
  28. warp/fem/field/nodal_field.py +22 -68
  29. warp/fem/field/virtual.py +62 -23
  30. warp/fem/geometry/adaptive_nanogrid.py +9 -10
  31. warp/fem/geometry/closest_point.py +1 -1
  32. warp/fem/geometry/deformed_geometry.py +5 -2
  33. warp/fem/geometry/geometry.py +5 -0
  34. warp/fem/geometry/grid_2d.py +12 -12
  35. warp/fem/geometry/grid_3d.py +12 -15
  36. warp/fem/geometry/hexmesh.py +5 -7
  37. warp/fem/geometry/nanogrid.py +9 -11
  38. warp/fem/geometry/quadmesh.py +13 -13
  39. warp/fem/geometry/tetmesh.py +3 -4
  40. warp/fem/geometry/trimesh.py +3 -8
  41. warp/fem/integrate.py +262 -93
  42. warp/fem/linalg.py +5 -5
  43. warp/fem/quadrature/pic_quadrature.py +37 -22
  44. warp/fem/quadrature/quadrature.py +194 -25
  45. warp/fem/space/__init__.py +1 -1
  46. warp/fem/space/basis_function_space.py +4 -2
  47. warp/fem/space/basis_space.py +25 -18
  48. warp/fem/space/hexmesh_function_space.py +2 -2
  49. warp/fem/space/partition.py +6 -2
  50. warp/fem/space/quadmesh_function_space.py +8 -8
  51. warp/fem/space/shape/cube_shape_function.py +23 -23
  52. warp/fem/space/shape/square_shape_function.py +12 -12
  53. warp/fem/space/shape/triangle_shape_function.py +1 -1
  54. warp/fem/space/tetmesh_function_space.py +3 -3
  55. warp/fem/space/trimesh_function_space.py +2 -2
  56. warp/fem/utils.py +12 -6
  57. warp/jax.py +14 -1
  58. warp/jax_experimental/__init__.py +16 -0
  59. warp/{jax_experimental.py → jax_experimental/custom_call.py} +14 -27
  60. warp/jax_experimental/ffi.py +698 -0
  61. warp/jax_experimental/xla_ffi.py +602 -0
  62. warp/math.py +89 -0
  63. warp/native/array.h +13 -0
  64. warp/native/builtin.h +29 -3
  65. warp/native/bvh.cpp +3 -1
  66. warp/native/bvh.cu +42 -14
  67. warp/native/bvh.h +2 -1
  68. warp/native/clang/clang.cpp +30 -3
  69. warp/native/cuda_util.cpp +14 -0
  70. warp/native/cuda_util.h +2 -0
  71. warp/native/exports.h +68 -63
  72. warp/native/intersect.h +26 -26
  73. warp/native/intersect_adj.h +33 -33
  74. warp/native/marching.cu +1 -1
  75. warp/native/mat.h +513 -9
  76. warp/native/mesh.h +10 -10
  77. warp/native/quat.h +99 -11
  78. warp/native/rand.h +6 -0
  79. warp/native/sort.cpp +122 -59
  80. warp/native/sort.cu +152 -15
  81. warp/native/sort.h +8 -1
  82. warp/native/sparse.cpp +43 -22
  83. warp/native/sparse.cu +52 -17
  84. warp/native/svd.h +116 -0
  85. warp/native/tile.h +301 -105
  86. warp/native/tile_reduce.h +46 -3
  87. warp/native/vec.h +68 -7
  88. warp/native/volume.cpp +85 -113
  89. warp/native/volume_builder.cu +25 -10
  90. warp/native/volume_builder.h +6 -0
  91. warp/native/warp.cpp +5 -6
  92. warp/native/warp.cu +99 -10
  93. warp/native/warp.h +19 -10
  94. warp/optim/linear.py +10 -10
  95. warp/sim/articulation.py +4 -4
  96. warp/sim/collide.py +21 -10
  97. warp/sim/import_mjcf.py +449 -155
  98. warp/sim/import_urdf.py +32 -12
  99. warp/sim/integrator_euler.py +5 -5
  100. warp/sim/integrator_featherstone.py +3 -10
  101. warp/sim/integrator_vbd.py +207 -2
  102. warp/sim/integrator_xpbd.py +5 -5
  103. warp/sim/model.py +42 -13
  104. warp/sim/utils.py +2 -2
  105. warp/sparse.py +642 -555
  106. warp/stubs.py +216 -19
  107. warp/tests/__main__.py +0 -15
  108. warp/tests/cuda/__init__.py +0 -0
  109. warp/tests/{test_mempool.py → cuda/test_mempool.py} +39 -0
  110. warp/tests/{test_streams.py → cuda/test_streams.py} +71 -0
  111. warp/tests/geometry/__init__.py +0 -0
  112. warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +66 -63
  113. warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +1 -1
  114. warp/tests/{test_volume.py → geometry/test_volume.py} +41 -6
  115. warp/tests/interop/__init__.py +0 -0
  116. warp/tests/{test_dlpack.py → interop/test_dlpack.py} +28 -5
  117. warp/tests/sim/__init__.py +0 -0
  118. warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +9 -10
  119. warp/tests/{test_collision.py → sim/test_collision.py} +2 -2
  120. warp/tests/{test_model.py → sim/test_model.py} +40 -0
  121. warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +2 -1
  122. warp/tests/sim/test_vbd.py +597 -0
  123. warp/tests/test_bool.py +1 -1
  124. warp/tests/test_examples.py +28 -36
  125. warp/tests/test_fem.py +23 -4
  126. warp/tests/test_linear_solvers.py +0 -11
  127. warp/tests/test_mat.py +233 -79
  128. warp/tests/test_mat_scalar_ops.py +4 -4
  129. warp/tests/test_overwrite.py +0 -60
  130. warp/tests/test_quat.py +67 -46
  131. warp/tests/test_rand.py +44 -37
  132. warp/tests/test_sparse.py +47 -6
  133. warp/tests/test_spatial.py +75 -0
  134. warp/tests/test_static.py +1 -1
  135. warp/tests/test_utils.py +84 -4
  136. warp/tests/test_vec.py +46 -34
  137. warp/tests/tile/__init__.py +0 -0
  138. warp/tests/{test_tile.py → tile/test_tile.py} +136 -51
  139. warp/tests/{test_tile_load.py → tile/test_tile_load.py} +1 -1
  140. warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +9 -6
  141. warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +25 -14
  142. warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +60 -1
  143. warp/tests/{test_tile_view.py → tile/test_tile_view.py} +1 -1
  144. warp/tests/unittest_serial.py +1 -0
  145. warp/tests/unittest_suites.py +45 -59
  146. warp/tests/unittest_utils.py +2 -1
  147. warp/thirdparty/unittest_parallel.py +3 -1
  148. warp/types.py +110 -658
  149. warp/utils.py +137 -72
  150. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/METADATA +29 -7
  151. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/RECORD +172 -162
  152. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/WHEEL +1 -1
  153. warp/examples/optim/example_walker.py +0 -317
  154. warp/native/cutlass_gemm.cpp +0 -43
  155. warp/native/cutlass_gemm.cu +0 -382
  156. warp/tests/test_matmul.py +0 -511
  157. warp/tests/test_matmul_lite.py +0 -411
  158. warp/tests/test_vbd.py +0 -386
  159. warp/tests/unused_test_misc.py +0 -77
  160. /warp/tests/{test_async.py → cuda/test_async.py} +0 -0
  161. /warp/tests/{test_ipc.py → cuda/test_ipc.py} +0 -0
  162. /warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +0 -0
  163. /warp/tests/{test_peer.py → cuda/test_peer.py} +0 -0
  164. /warp/tests/{test_pinned.py → cuda/test_pinned.py} +0 -0
  165. /warp/tests/{test_bvh.py → geometry/test_bvh.py} +0 -0
  166. /warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +0 -0
  167. /warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +0 -0
  168. /warp/tests/{test_mesh.py → geometry/test_mesh.py} +0 -0
  169. /warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +0 -0
  170. /warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +0 -0
  171. /warp/tests/{test_jax.py → interop/test_jax.py} +0 -0
  172. /warp/tests/{test_paddle.py → interop/test_paddle.py} +0 -0
  173. /warp/tests/{test_torch.py → interop/test_torch.py} +0 -0
  174. /warp/tests/{flaky_test_sim_grad.py → sim/flaky_test_sim_grad.py} +0 -0
  175. /warp/tests/{test_coloring.py → sim/test_coloring.py} +0 -0
  176. /warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +0 -0
  177. /warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +0 -0
  178. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info/licenses}/LICENSE.md +0 -0
  179. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/top_level.txt +0 -0
warp/dlpack.py CHANGED
@@ -48,10 +48,6 @@ Py_DecRef.restype = None
48
48
 
49
49
  PyCapsule_Destructor = ctypes.CFUNCTYPE(None, ctypes.c_void_p)
50
50
 
51
- PyCapsule_New = ctypes.pythonapi.PyCapsule_New
52
- PyCapsule_New.argtypes = [ctypes.c_void_p, ctypes.c_char_p, PyCapsule_Destructor]
53
- PyCapsule_New.restype = ctypes.py_object
54
-
55
51
  PyCapsule_IsValid = ctypes.pythonapi.PyCapsule_IsValid
56
52
  PyCapsule_IsValid.argtypes = [ctypes.py_object, ctypes.c_char_p]
57
53
  PyCapsule_IsValid.restype = ctypes.c_int
@@ -105,8 +101,8 @@ def _dlpack_capsule_deleter(ptr) -> None:
105
101
 
106
102
  capsule = ctypes.cast(ptr, ctypes.py_object)
107
103
 
108
- if ctypes.pythonapi.PyCapsule_IsValid(capsule, _c_str_dltensor):
109
- managed_ptr = ctypes.pythonapi.PyCapsule_GetPointer(capsule, _c_str_dltensor)
104
+ if PyCapsule_IsValid(capsule, _c_str_dltensor):
105
+ managed_ptr = PyCapsule_GetPointer(capsule, _c_str_dltensor)
110
106
  managed_tensor = DLManagedTensor.from_address(managed_ptr)
111
107
  if managed_tensor.deleter:
112
108
  managed_tensor.deleter(managed_ptr)
@@ -302,6 +298,12 @@ def to_dlpack(wp_array: warp.array):
302
298
 
303
299
  managed_tensor.deleter = _dlpack_tensor_deleter
304
300
 
301
+ # NOTE: jax.ffi.pycapsule() defines the PyCapsule_New() argtypes incorrectly, which causes problems.
302
+ # Here we make sure that the PyCapsule_Destructor callback is correctly defined.
303
+ PyCapsule_New = ctypes.pythonapi.PyCapsule_New
304
+ PyCapsule_New.argtypes = [ctypes.c_void_p, ctypes.c_char_p, PyCapsule_Destructor]
305
+ PyCapsule_New.restype = ctypes.py_object
306
+
305
307
  capsule = PyCapsule_New(
306
308
  ctypes.byref(managed_tensor),
307
309
  _c_str_dltensor,
Binary file
Binary file
@@ -0,0 +1,103 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import numpy as np
17
+
18
+ import warp as wp
19
+
20
+ BLOCK_DIM = 128
21
+
22
+ TILE = 32
23
+
24
+
25
+ def create_test_kernel(storage_type: str):
26
+ @wp.kernel
27
+ def load_store(a: wp.array2d(dtype=wp.float32), b: wp.array2d(dtype=wp.float32)):
28
+ i, j = wp.tid()
29
+
30
+ if wp.static(storage_type == "shared"):
31
+ a_tile = wp.tile_load(a, shape=(TILE, TILE), offset=(i * TILE, j * TILE), storage="shared")
32
+ else:
33
+ a_tile = wp.tile_load(a, shape=(TILE, TILE), offset=(i * TILE, j * TILE), storage="register")
34
+
35
+ wp.tile_store(b, a_tile, offset=(i * TILE, j * TILE))
36
+
37
+ return load_store
38
+
39
+
40
+ if __name__ == "__main__":
41
+ wp.config.quiet = True
42
+ wp.init()
43
+ wp.clear_kernel_cache()
44
+ wp.set_module_options({"fast_math": True, "enable_backward": False})
45
+
46
+ iterations = 100
47
+ rng = np.random.default_rng(42)
48
+
49
+ shared_benchmark_data = {}
50
+ register_benchmark_data = {}
51
+ memcpy_benchmark_data = {}
52
+
53
+ sizes = list(range(128, 4097, 128))
54
+
55
+ print(f"{'Transfer Size (Bytes)':<23s} {'Shared (GiB/s)':<16s} {'Register (GiB/s)':<18s} {'memcpy (GiB/s)':<16s}")
56
+ print("-" * 79)
57
+
58
+ for size in sizes:
59
+ a = wp.array(rng.random((size, size), dtype=np.float32), dtype=wp.float32)
60
+ b = wp.empty_like(a)
61
+
62
+ for storage_type in ("shared", "register"):
63
+ load_store = create_test_kernel(storage_type)
64
+
65
+ cmd = wp.launch_tiled(
66
+ load_store,
67
+ dim=(a.shape[0] // TILE, a.shape[1] // TILE),
68
+ inputs=[a],
69
+ outputs=[b],
70
+ block_dim=BLOCK_DIM,
71
+ record_cmd=True,
72
+ )
73
+ # Warmup
74
+ for _ in range(5):
75
+ cmd.launch()
76
+
77
+ with wp.ScopedTimer("benchmark", cuda_filter=wp.TIMING_KERNEL, print=False, synchronize=True) as timer:
78
+ for _ in range(iterations):
79
+ cmd.launch()
80
+
81
+ np.testing.assert_equal(a.numpy(), b.numpy())
82
+
83
+ timing_results = [result.elapsed for result in timer.timing_results]
84
+ avg_bw = 2.0 * (a.capacity / (1024 * 1024 * 1024)) / (1e-3 * np.mean(timing_results))
85
+
86
+ if storage_type == "shared":
87
+ shared_benchmark_data[a.capacity] = avg_bw
88
+ else:
89
+ register_benchmark_data[a.capacity] = avg_bw
90
+
91
+ # Compare with memcpy
92
+ with wp.ScopedTimer("benchmark", cuda_filter=wp.TIMING_MEMCPY, print=False, synchronize=True) as timer:
93
+ for _ in range(iterations):
94
+ wp.copy(b, a)
95
+
96
+ timing_results = [result.elapsed for result in timer.timing_results]
97
+ avg_bw = 2.0 * (a.capacity / (1024 * 1024 * 1024)) / (1e-3 * np.mean(timing_results))
98
+ memcpy_benchmark_data[a.capacity] = avg_bw
99
+
100
+ # Print results
101
+ print(
102
+ f"{a.capacity:<23d} {shared_benchmark_data[a.capacity]:<#16.4g} {register_benchmark_data[a.capacity]:<#18.4g} {memcpy_benchmark_data[a.capacity]:<#16.4g}"
103
+ )
@@ -0,0 +1,300 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ ###########################################################################
17
+ # Example Sample Mesh
18
+ #
19
+ # Shows how to sample points on a mesh's surface using
20
+ # a Cumulative Distribution Function (CDF).
21
+ #
22
+ # The CDF enables uniform sampling of points across the mesh's surface,
23
+ # even when the density of triangles varies. It represents the cumulative
24
+ # probability of selecting a triangle from the mesh, with each triangle
25
+ # weighted by its area relative to the total surface area of the mesh.
26
+ #
27
+ ###########################################################################
28
+
29
+ import numpy as np
30
+
31
+ import warp as wp
32
+ import warp.render
33
+
34
+ # fmt: off
35
+ POINTS = np.array(
36
+ (
37
+ (-0.986598, -0.400638, -0.175759), (-0.81036 , -0.482105, -0.541125),
38
+ (-1.079616, 0.022652, -0.023381), (-0.894468, -0.080795, -0.618379),
39
+ (-0.607365, -0.702012, -0.556551), (-0.366107, -0.800096, -0.620734),
40
+ (-0.801777, -0.690991, -0.239593), (-0.553576, -0.871746, -0.335518),
41
+ (-0.309133, -0.370805, -0.965784), (-0.288299, -0.956987, -0.402091),
42
+ (-0.051878, -0.894342, -0.597583), (-0.386774, -1.003107, -0.145116),
43
+ (-0.19062 , -1.061165, 0.012418), (-0.176053, -1.044838, -0.217194),
44
+ ( 0.001479, -1.020045, -0.356905), (-0.105375, -0.655117, -0.861365),
45
+ (-0.542102, -0.517255, -0.795259), (-0.476599, -0.105709, -0.981171),
46
+ (-1.047915, -0.121584, 0.322098), (-0.527852, 0.137252, 0.501813),
47
+ (-0.721762, -0.803275, 0.117162), (-0.904992, -0.573281, 0.168408),
48
+ (-0.796762, -0.473428, 0.569649), (-0.606446, -0.753374, 0.492938),
49
+ (-0.466481, -0.576566, 0.802562), (-0.50476 , -0.908596, 0.300064),
50
+ (-0.337425, -1.008902, 0.170911), (-0.048676, -1.055594, 0.246732),
51
+ (-0.212871, -0.760442, 0.738447), (-0.281356, -0.9322 , 0.474965),
52
+ (-0.560476, 0.062512, -0.561019), (-0.003252, 0.083237, -1.049784),
53
+ (-0.009392, 0.593703, -0.522479), (-0.530465, 0.577231, 0.007172),
54
+ (-0.02106 , 0.064189, 1.066722), (-0.003512, 0.59714 , 0.516904),
55
+ ( 0.000194, 1.093899, 0.001113), ( 0.256861, -0.955856, -0.445325),
56
+ ( 0.251205, -1.038759, -0.174212), ( 0.170201, -0.800019, -0.712158),
57
+ ( 0.364385, -0.560298, -0.866843), ( 0.092809, -0.269437, -1.058467),
58
+ ( 0.628127, -0.12359 , -0.9012 ), ( 0.507433, -0.930658, -0.215908),
59
+ ( 0.496448, -0.800205, -0.545904), ( 0.757415, -0.527449, -0.565395),
60
+ ( 0.908704, -0.596257, 0.028995), ( 0.754069, -0.731365, -0.256687),
61
+ ( 0.921362, -0.09028 , -0.546421), ( 1.017846, -0.335787, -0.263017),
62
+ ( 0.016768, -1.080014, -0.058473), ( 0.204245, -1.056388, 0.078346),
63
+ ( 0.260892, -1.001704, 0.322104), ( 0.16608 , -0.739172, 0.788097),
64
+ ( 0.021091, -0.931327, 0.557789), (-0.046158, -0.408417, 1.011046),
65
+ ( 0.429623, -0.987237, 0.088537), ( 0.704993, -0.739396, 0.386838),
66
+ ( 0.37277 , -0.825639, 0.591102), ( 0.493947, -0.896091, 0.339163),
67
+ ( 0.321112, -0.540547, 0.890161), ( 0.654753, -0.520495, 0.690104),
68
+ ( 0.922472, -0.124429, 0.530498), ( 0.662544, -0.85601 , 0.054375),
69
+ ( 0.950976, -0.422783, 0.327726), ( 0.536849, 0.109943, -0.52279 ),
70
+ ( 0.517242, 0.120634, 0.535708), ( 0.532707, 0.598943, -0.000767),
71
+ ( 1.086691, 0.048722, 0.032517), ( 0.528734, -0.109809, 0.96863 ),
72
+ (-0.581832, -0.916941, -0.027829), (-0.625071, -0.14445 , 0.906538),
73
+ ),
74
+ dtype=np.float32,
75
+ )
76
+
77
+ FACE_VERTEX_INDICES = np.array(
78
+ (
79
+ 6, 0, 1, 6, 21, 0, 2, 0, 18, 0, 3, 1, 2, 3, 0, 5,
80
+ 7, 4, 70, 7, 11, 4, 6, 1, 16, 1, 3, 7, 6, 4, 4, 1,
81
+ 16, 9, 7, 5, 3, 17, 16, 16, 17, 8, 41, 8, 17, 30, 17, 3,
82
+ 10, 14, 9, 5, 10, 9, 10, 37, 14, 15, 10, 5, 7, 9, 11, 11,
83
+ 9, 13, 11, 13, 12, 50, 12, 13, 9, 14, 13, 15, 16, 8, 15, 8,
84
+ 41, 16, 5, 4, 16, 15, 5, 17, 31, 41, 21, 22, 18, 20, 21, 6,
85
+ 18, 0, 21, 20, 25, 23, 20, 70, 25, 70, 11, 26, 26, 25, 70, 25,
86
+ 29, 23, 21, 20, 23, 21, 23, 22, 23, 24, 22, 24, 71, 22, 26, 29,
87
+ 25, 26, 11, 12, 12, 27, 26, 26, 27, 29, 27, 54, 29, 27, 12, 50,
88
+ 28, 29, 54, 54, 53, 28, 23, 28, 24, 29, 28, 23, 28, 55, 24, 28,
89
+ 53, 55, 53, 60, 55, 24, 55, 71, 55, 34, 71, 30, 3, 2, 2, 33,
90
+ 30, 17, 30, 31, 32, 31, 30, 33, 36, 32, 19, 33, 2, 19, 35, 33,
91
+ 19, 71, 34, 35, 19, 34, 34, 66, 35, 35, 36, 33, 35, 67, 36, 15,
92
+ 39, 10, 10, 39, 37, 44, 37, 39, 14, 50, 13, 14, 38, 50, 14, 37,
93
+ 38, 37, 43, 38, 40, 15, 41, 40, 39, 15, 41, 42, 40, 44, 39, 40,
94
+ 31, 42, 41, 38, 43, 56, 44, 43, 37, 44, 47, 43, 47, 63, 43, 44,
95
+ 40, 45, 42, 45, 40, 46, 63, 47, 45, 47, 44, 65, 48, 42, 46, 47,
96
+ 49, 49, 47, 45, 48, 45, 42, 45, 48, 49, 68, 49, 48, 27, 52, 54,
97
+ 50, 51, 27, 27, 51, 52, 50, 38, 51, 38, 56, 51, 51, 56, 52, 54,
98
+ 52, 58, 52, 59, 58, 53, 54, 58, 60, 69, 55, 55, 69, 34, 43, 63,
99
+ 56, 59, 52, 56, 63, 59, 56, 63, 57, 59, 58, 60, 53, 57, 58, 59,
100
+ 58, 57, 61, 60, 58, 61, 57, 64, 61, 62, 61, 64, 60, 61, 69, 62,
101
+ 69, 61, 46, 57, 63, 64, 57, 46, 46, 49, 64, 68, 64, 49, 62, 64,
102
+ 68, 32, 65, 31, 65, 32, 67, 32, 36, 67, 65, 42, 31, 67, 68, 65,
103
+ 48, 65, 68, 34, 69, 66, 67, 35, 66, 68, 66, 62, 66, 69, 62, 67,
104
+ 66, 68, 33, 32, 30, 19, 2, 18, 20, 6, 70, 7, 70, 6, 18, 71,
105
+ 19, 22, 71, 18,
106
+ ),
107
+ dtype=np.int32,
108
+ )
109
+ # fmt: on
110
+
111
+
112
+ @wp.kernel(enable_backward=False)
113
+ def compute_tri_areas(
114
+ points: wp.array(dtype=wp.vec3),
115
+ face_vertex_indices: wp.array(dtype=wp.int32),
116
+ out_tri_areas: wp.array(dtype=wp.float32),
117
+ out_total_area: wp.array(dtype=wp.float32),
118
+ ):
119
+ tri = wp.tid()
120
+
121
+ # Retrieve the indices of the three vertices that form the current triangle.
122
+ vtx_0 = face_vertex_indices[tri * 3]
123
+ vtx_1 = face_vertex_indices[tri * 3 + 1]
124
+ vtx_2 = face_vertex_indices[tri * 3 + 2]
125
+
126
+ # Retrieve their 3D position.
127
+ pt_0 = points[vtx_0]
128
+ pt_1 = points[vtx_1]
129
+ pt_2 = points[vtx_2]
130
+
131
+ # Calculate the cross product of two edges of the triangle,
132
+ # which gives a vector whose magnitude is twice the area of the triangle.
133
+ cross = wp.cross((pt_1 - pt_0), (pt_2 - pt_0))
134
+ area = wp.length(cross) * 0.5
135
+
136
+ # Store the result.
137
+ out_tri_areas[tri] = area
138
+ wp.atomic_add(out_total_area, 0, area)
139
+
140
+
141
+ @wp.kernel(enable_backward=False)
142
+ def compute_probability_distribution(
143
+ tri_areas: wp.array(dtype=wp.float32),
144
+ total_area: wp.array(dtype=wp.float32),
145
+ out_probabilities: wp.array(dtype=wp.float32),
146
+ ):
147
+ tri = wp.tid()
148
+
149
+ # Calculate the probability of selecting this triangle,
150
+ # which is proportional to the triangle's area relative to total mesh area.
151
+ out_probabilities[tri] = tri_areas[tri] / total_area[0]
152
+
153
+
154
+ @wp.kernel(enable_backward=False)
155
+ def accumulate_cdf(
156
+ tri_count: wp.int32,
157
+ out_cdf: wp.array(dtype=wp.float32),
158
+ ):
159
+ # Transform probability values into a Cumulative Distribution Function (CDF).
160
+ for tri in range(1, tri_count):
161
+ out_cdf[tri] += out_cdf[tri - 1]
162
+
163
+
164
+ @wp.kernel(enable_backward=False)
165
+ def sample_mesh(
166
+ mesh: wp.uint64,
167
+ cdf: wp.array(dtype=wp.float32),
168
+ seed: wp.int32,
169
+ out_points: wp.array(dtype=wp.vec3),
170
+ ):
171
+ tid = wp.tid()
172
+
173
+ rng = wp.rand_init(seed, tid)
174
+
175
+ # Sample the triangle index using the CDF.
176
+ sample = wp.randf(rng)
177
+ tri = wp.lower_bound(cdf, sample)
178
+
179
+ # Sample the location in that triangle using random barycentric cordinates.
180
+ ru = wp.randf(rng)
181
+ rv = wp.randf(rng)
182
+ tri_u = 1.0 - wp.sqrt(ru)
183
+ tri_v = wp.sqrt(ru) * (1.0 - rv)
184
+ pos = wp.mesh_eval_position(mesh, tri, tri_u, tri_v)
185
+
186
+ # Store the result.
187
+ out_points[tid] = pos
188
+
189
+
190
+ class Example:
191
+ def __init__(self, stage_path="example_sample_mesh.usd"):
192
+ self.mesh = wp.Mesh(
193
+ points=wp.array(POINTS, dtype=wp.vec3),
194
+ indices=wp.array(FACE_VERTEX_INDICES, dtype=wp.int32),
195
+ )
196
+ self.tri_count = len(FACE_VERTEX_INDICES) // 3
197
+
198
+ # Compute the area of each triangle and the total area of the mesh.
199
+ tri_areas = wp.empty(shape=(self.tri_count,), dtype=wp.float32)
200
+ total_area = wp.zeros(shape=(1,), dtype=wp.float32)
201
+ wp.launch(
202
+ compute_tri_areas,
203
+ dim=tri_areas.shape,
204
+ inputs=(
205
+ self.mesh.points,
206
+ self.mesh.indices,
207
+ ),
208
+ outputs=(
209
+ tri_areas,
210
+ total_area,
211
+ ),
212
+ )
213
+
214
+ # Build a Cumulative Distribution Function (CDF) where the probability
215
+ # of sampling a given triangle is proportional to its area.
216
+ self.cdf = wp.empty(shape=(self.tri_count,), dtype=wp.float32)
217
+ wp.launch(
218
+ compute_probability_distribution,
219
+ dim=self.cdf.shape,
220
+ inputs=(
221
+ tri_areas,
222
+ total_area,
223
+ ),
224
+ outputs=(self.cdf,),
225
+ )
226
+ wp.launch(
227
+ accumulate_cdf,
228
+ dim=(1,),
229
+ inputs=(self.tri_count,),
230
+ outputs=(self.cdf,),
231
+ )
232
+
233
+ # Array to store the sampled points.
234
+ self.points = wp.empty(shape=(100,), dtype=wp.vec3)
235
+
236
+ self.fps = 4
237
+ self.frame = 0
238
+
239
+ if stage_path:
240
+ self.renderer = wp.render.UsdRenderer(stage_path, fps=self.fps)
241
+ else:
242
+ self.renderer = None
243
+
244
+ def step(self):
245
+ with wp.ScopedTimer("step"):
246
+ # Sample new points on the mesh using the CDF and the current frame
247
+ # number as seed to ensure different samples each frame.
248
+ wp.launch(
249
+ sample_mesh,
250
+ dim=self.points.shape,
251
+ inputs=(
252
+ self.mesh.id,
253
+ self.cdf,
254
+ self.frame,
255
+ ),
256
+ outputs=(self.points,),
257
+ )
258
+
259
+ self.frame += 1
260
+
261
+ def render(self):
262
+ if self.renderer is None:
263
+ return
264
+
265
+ with wp.ScopedTimer("render"):
266
+ self.renderer.begin_frame(self.frame / self.fps)
267
+ self.renderer.render_mesh(
268
+ name="mesh",
269
+ points=self.mesh.points.numpy(),
270
+ indices=self.mesh.indices.numpy(),
271
+ colors=(0.35, 0.55, 0.9),
272
+ )
273
+ self.renderer.render_points(name="points", points=self.points.numpy(), radius=0.05, colors=(0.8, 0.3, 0.2))
274
+ self.renderer.end_frame()
275
+
276
+
277
+ if __name__ == "__main__":
278
+ import argparse
279
+
280
+ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
281
+ parser.add_argument("--device", type=str, default=None, help="Override the default Warp device.")
282
+ parser.add_argument(
283
+ "--stage_path",
284
+ type=lambda x: None if x == "None" else str(x),
285
+ default="example_sample_mesh.usd",
286
+ help="Path to the output USD file.",
287
+ )
288
+ parser.add_argument("--num_frames", type=int, default=16, help="Total number of frames.")
289
+
290
+ args = parser.parse_known_args()[0]
291
+
292
+ with wp.ScopedDevice(args.device):
293
+ example = Example(stage_path=args.stage_path)
294
+
295
+ for _ in range(args.num_frames):
296
+ example.step()
297
+ example.render()
298
+
299
+ if example.renderer:
300
+ example.renderer.save()
@@ -117,7 +117,7 @@ def divergence_form(s: Sample, domain: Domain, u: Field, psi: Field):
117
117
  def invert_volume_kernel(values: wp.array(dtype=float)):
118
118
  i = wp.tid()
119
119
  m = values[i]
120
- values[i] = wp.select(m == 0.0, 1.0 / m, 0.0)
120
+ values[i] = wp.where(m == 0.0, 0.0, 1.0 / m)
121
121
 
122
122
 
123
123
  @wp.kernel
@@ -75,7 +75,7 @@ def cell_transport_form(s: fem.Sample, domain: fem.Domain, u: fem.Field, v: fem.
75
75
  def initial_condition(s: fem.Sample, domain: fem.Domain):
76
76
  x = domain(s)[0] * 2.0
77
77
  wave = wp.sin(x * wp.pi)
78
- return wp.vec2(wp.select(x <= 1.0, 0.0, wave), 0.0)
78
+ return wp.vec2(wp.where(x <= 1.0, wave, 0.0), 0.0)
79
79
 
80
80
 
81
81
  @fem.integrand
@@ -87,7 +87,7 @@ def velocity_norm(s: fem.Sample, u: fem.Field):
87
87
  def minmod(a: float, b: float):
88
88
  sa = wp.sign(a)
89
89
  sb = wp.sign(b)
90
- return wp.select(sa == sb, 0.0, sa * wp.min(wp.abs(a), wp.abs(b)))
90
+ return wp.where(sa == sb, sa * wp.min(wp.abs(a), wp.abs(b)), 0.0)
91
91
 
92
92
 
93
93
  @fem.integrand
@@ -57,7 +57,7 @@ def boundary_projector_form(
57
57
  Bilinear boundary condition projector form, non-zero on radial edges
58
58
  """
59
59
  nor = fem.normal(domain, s)
60
- active = wp.select(nor[0] < -0.9999 or nor[1] < -0.9999, 0.0, 1.0)
60
+ active = wp.where(nor[0] < -0.9999 or nor[1] < -0.9999, 1.0, 0.0)
61
61
  return active * u(s) * v(s)
62
62
 
63
63
 
@@ -82,7 +82,7 @@ def boundary_projector_form(
82
82
  ):
83
83
  # Fix a single point
84
84
  # (underconstrained, solution up to a rotation in UV space)
85
- w = wp.select(s.qp_index == 0, 0.0, 1.0)
85
+ w = wp.where(s.qp_index == 0, 1.0, 0.0)
86
86
  return w * wp.dot(u(s), v(s))
87
87
 
88
88
 
@@ -60,8 +60,8 @@ def cube_to_cylinder_grad(x: wp.vec3):
60
60
  dir_grad = (wp.identity(n=3, dtype=float) - wp.outer(dir_xz, dir_xz)) / wp.length(pos_xz)
61
61
 
62
62
  abs_xz = wp.abs(pos_xz)
63
- xinf_grad = wp.select(
64
- abs_xz[0] > abs_xz[2], wp.vec3(0.0, 0.0, wp.sign(pos_xz[2])), wp.vec(wp.sign(pos_xz[0]), 0.0, 0.0)
63
+ xinf_grad = wp.where(
64
+ abs_xz[0] > abs_xz[2], wp.vec(wp.sign(pos_xz[0]), 0.0, 0.0), wp.vec3(0.0, 0.0, wp.sign(pos_xz[2]))
65
65
  )
66
66
  grad = dir_grad * wp.max(abs_xz) + wp.outer(dir_xz, xinf_grad)
67
67
 
@@ -85,10 +85,10 @@ def permeability_field(
85
85
  r = wp.sqrt(x * x + z * z)
86
86
 
87
87
  if r <= core_radius:
88
- return wp.select(y < core_height, MU_0, MU_i)
88
+ return wp.where(y < core_height, MU_i, MU_0)
89
89
 
90
90
  if r >= coil_internal_radius and r <= coil_external_radius:
91
- return wp.select(y < coil_height, MU_0, MU_c)
91
+ return wp.where(y < coil_height, MU_c, MU_0)
92
92
 
93
93
  return MU_0
94
94
 
@@ -107,10 +107,10 @@ def current_field(
107
107
 
108
108
  r = wp.sqrt(x * x + z * z)
109
109
 
110
- return wp.select(
110
+ return wp.where(
111
111
  y < coil_height and r >= coil_internal_radius and r <= coil_external_radius,
112
- wp.vec3(0.0),
113
112
  wp.vec3(z, 0.0, -x) * current / r,
113
+ wp.vec3(0.0),
114
114
  )
115
115
 
116
116
 
@@ -34,6 +34,9 @@ __all__ = [
34
34
  "Plot",
35
35
  ]
36
36
 
37
+ # matrix inversion routines contain nested loops,
38
+ # default unrolling leads to code explosion
39
+ wp.set_module_options({"max_unroll": 6})
37
40
 
38
41
  #
39
42
  # Mesh utilities
@@ -225,6 +228,7 @@ def bsr_cg(
225
228
  mv_routine=None,
226
229
  quiet=False,
227
230
  method: str = "cg",
231
+ M: BsrMatrix = None,
228
232
  ) -> Tuple[float, int]:
229
233
  """Solves the linear system A x = b using an iterative solver, optionally with diagonal preconditioning
230
234
 
@@ -245,7 +249,9 @@ def bsr_cg(
245
249
 
246
250
  """
247
251
 
248
- if mv_routine is None:
252
+ if M is not None:
253
+ M = aslinearoperator(M)
254
+ elif mv_routine is None:
249
255
  M = preconditioner(A, "diag") if use_diag_precond else None
250
256
  else:
251
257
  A = LinearOperator(A.shape, A.dtype, A.device, matvec=mv_routine)
@@ -458,7 +464,7 @@ def bsr_solve_saddle(
458
464
  return err, end_iter
459
465
 
460
466
 
461
- @wp.kernel
467
+ @wp.kernel(enable_backward=False)
462
468
  def _compute_schur_inverse_diagonal(
463
469
  B_offsets: wp.array(dtype=int),
464
470
  B_indices: wp.array(dtype=int),
@@ -500,7 +506,7 @@ def invert_diagonal_bsr_matrix(A: BsrMatrix):
500
506
  )
501
507
 
502
508
 
503
- @wp.kernel
509
+ @wp.kernel(enable_backward=False)
504
510
  def _block_diagonal_invert(values: wp.array(dtype=Any)):
505
511
  i = wp.tid()
506
512
  values[i] = fem.utils.inverse_qr(values[i])
@@ -0,0 +1,116 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ ###########################################################################
17
+ # Example jax_callable()
18
+ #
19
+ # Examples of calling annotated Python functions from JAX.
20
+ ###########################################################################
21
+
22
+ from functools import partial
23
+
24
+ import jax
25
+ import jax.numpy as jnp
26
+
27
+ import warp as wp
28
+ from warp.jax_experimental.ffi import jax_callable
29
+
30
+
31
+ @wp.kernel
32
+ def scale_kernel(a: wp.array(dtype=float), s: float, output: wp.array(dtype=float)):
33
+ tid = wp.tid()
34
+ output[tid] = a[tid] * s
35
+
36
+
37
+ @wp.kernel
38
+ def scale_vec_kernel(a: wp.array(dtype=wp.vec2), s: float, output: wp.array(dtype=wp.vec2)):
39
+ tid = wp.tid()
40
+ output[tid] = a[tid] * s
41
+
42
+
43
+ # The Python function to call.
44
+ # Note the argument annotations, just like Warp kernels.
45
+ def example_func(
46
+ # inputs
47
+ a: wp.array(dtype=float),
48
+ b: wp.array(dtype=wp.vec2),
49
+ s: float,
50
+ # outputs
51
+ c: wp.array(dtype=float),
52
+ d: wp.array(dtype=wp.vec2),
53
+ ):
54
+ wp.launch(scale_kernel, dim=a.shape, inputs=[a, s], outputs=[c])
55
+ wp.launch(scale_vec_kernel, dim=b.shape, inputs=[b, s], outputs=[d])
56
+
57
+
58
+ def example1():
59
+ jax_func = jax_callable(example_func, num_outputs=2, vmap_method="broadcast_all")
60
+
61
+ @jax.jit
62
+ def f():
63
+ # inputs
64
+ a = jnp.arange(10, dtype=jnp.float32)
65
+ b = jnp.arange(10, dtype=jnp.float32).reshape((5, 2)) # wp.vec2
66
+ s = 2.0
67
+
68
+ # output shapes
69
+ output_dims = {"c": a.shape, "d": b.shape}
70
+
71
+ c, d = jax_func(a, b, s, output_dims=output_dims)
72
+
73
+ return c, d
74
+
75
+ r1, r2 = f()
76
+ print(r1)
77
+ print(r2)
78
+
79
+
80
+ def example2():
81
+ jax_func = jax_callable(example_func, num_outputs=2, vmap_method="broadcast_all")
82
+
83
+ # NOTE: scalar arguments must be static compile-time constants
84
+ @partial(jax.jit, static_argnames=["s"])
85
+ def f(a, b, s):
86
+ # output shapes
87
+ output_dims = {"c": a.shape, "d": b.shape}
88
+
89
+ c, d = jax_func(a, b, s, output_dims=output_dims)
90
+
91
+ return c, d
92
+
93
+ # inputs
94
+ a = jnp.arange(10, dtype=jnp.float32)
95
+ b = jnp.arange(10, dtype=jnp.float32).reshape((5, 2)) # wp.vec2
96
+ s = 3.0
97
+
98
+ r1, r2 = f(a, b, s)
99
+ print(r1)
100
+ print(r2)
101
+
102
+
103
+ def main():
104
+ wp.init()
105
+ wp.load_module(device=wp.get_device())
106
+
107
+ examples = [example1, example2]
108
+
109
+ for example in examples:
110
+ print("\n===========================================================================")
111
+ print(f"{example.__name__}:")
112
+ example()
113
+
114
+
115
+ if __name__ == "__main__":
116
+ main()