warp-lang 1.7.2rc1__py3-none-win_amd64.whl → 1.8.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (181) hide show
  1. warp/__init__.py +3 -1
  2. warp/__init__.pyi +3489 -1
  3. warp/autograd.py +45 -122
  4. warp/bin/warp-clang.dll +0 -0
  5. warp/bin/warp.dll +0 -0
  6. warp/build.py +241 -252
  7. warp/build_dll.py +125 -26
  8. warp/builtins.py +1907 -384
  9. warp/codegen.py +257 -101
  10. warp/config.py +12 -1
  11. warp/constants.py +1 -1
  12. warp/context.py +657 -223
  13. warp/dlpack.py +1 -1
  14. warp/examples/benchmarks/benchmark_cloth.py +2 -2
  15. warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
  16. warp/examples/core/example_sample_mesh.py +1 -1
  17. warp/examples/core/example_spin_lock.py +93 -0
  18. warp/examples/core/example_work_queue.py +118 -0
  19. warp/examples/fem/example_adaptive_grid.py +5 -5
  20. warp/examples/fem/example_apic_fluid.py +1 -1
  21. warp/examples/fem/example_burgers.py +1 -1
  22. warp/examples/fem/example_convection_diffusion.py +9 -6
  23. warp/examples/fem/example_darcy_ls_optimization.py +489 -0
  24. warp/examples/fem/example_deformed_geometry.py +1 -1
  25. warp/examples/fem/example_diffusion.py +2 -2
  26. warp/examples/fem/example_diffusion_3d.py +1 -1
  27. warp/examples/fem/example_distortion_energy.py +1 -1
  28. warp/examples/fem/example_elastic_shape_optimization.py +387 -0
  29. warp/examples/fem/example_magnetostatics.py +5 -3
  30. warp/examples/fem/example_mixed_elasticity.py +5 -3
  31. warp/examples/fem/example_navier_stokes.py +11 -9
  32. warp/examples/fem/example_nonconforming_contact.py +5 -3
  33. warp/examples/fem/example_streamlines.py +8 -3
  34. warp/examples/fem/utils.py +9 -8
  35. warp/examples/interop/example_jax_ffi_callback.py +2 -2
  36. warp/examples/optim/example_drone.py +1 -1
  37. warp/examples/sim/example_cloth.py +1 -1
  38. warp/examples/sim/example_cloth_self_contact.py +48 -54
  39. warp/examples/tile/example_tile_block_cholesky.py +502 -0
  40. warp/examples/tile/example_tile_cholesky.py +2 -1
  41. warp/examples/tile/example_tile_convolution.py +1 -1
  42. warp/examples/tile/example_tile_filtering.py +1 -1
  43. warp/examples/tile/example_tile_matmul.py +1 -1
  44. warp/examples/tile/example_tile_mlp.py +2 -0
  45. warp/fabric.py +7 -7
  46. warp/fem/__init__.py +5 -0
  47. warp/fem/adaptivity.py +1 -1
  48. warp/fem/cache.py +152 -63
  49. warp/fem/dirichlet.py +2 -2
  50. warp/fem/domain.py +136 -6
  51. warp/fem/field/field.py +141 -99
  52. warp/fem/field/nodal_field.py +85 -39
  53. warp/fem/field/virtual.py +97 -52
  54. warp/fem/geometry/adaptive_nanogrid.py +91 -86
  55. warp/fem/geometry/closest_point.py +13 -0
  56. warp/fem/geometry/deformed_geometry.py +102 -40
  57. warp/fem/geometry/element.py +56 -2
  58. warp/fem/geometry/geometry.py +323 -22
  59. warp/fem/geometry/grid_2d.py +157 -62
  60. warp/fem/geometry/grid_3d.py +116 -20
  61. warp/fem/geometry/hexmesh.py +86 -20
  62. warp/fem/geometry/nanogrid.py +166 -86
  63. warp/fem/geometry/partition.py +59 -25
  64. warp/fem/geometry/quadmesh.py +86 -135
  65. warp/fem/geometry/tetmesh.py +47 -119
  66. warp/fem/geometry/trimesh.py +77 -270
  67. warp/fem/integrate.py +107 -52
  68. warp/fem/linalg.py +25 -58
  69. warp/fem/operator.py +124 -27
  70. warp/fem/quadrature/pic_quadrature.py +36 -14
  71. warp/fem/quadrature/quadrature.py +40 -16
  72. warp/fem/space/__init__.py +1 -1
  73. warp/fem/space/basis_function_space.py +66 -46
  74. warp/fem/space/basis_space.py +17 -4
  75. warp/fem/space/dof_mapper.py +1 -1
  76. warp/fem/space/function_space.py +2 -2
  77. warp/fem/space/grid_2d_function_space.py +4 -1
  78. warp/fem/space/hexmesh_function_space.py +4 -2
  79. warp/fem/space/nanogrid_function_space.py +3 -1
  80. warp/fem/space/partition.py +11 -2
  81. warp/fem/space/quadmesh_function_space.py +4 -1
  82. warp/fem/space/restriction.py +5 -2
  83. warp/fem/space/shape/__init__.py +10 -8
  84. warp/fem/space/tetmesh_function_space.py +4 -1
  85. warp/fem/space/topology.py +52 -21
  86. warp/fem/space/trimesh_function_space.py +4 -1
  87. warp/fem/utils.py +53 -8
  88. warp/jax.py +1 -2
  89. warp/jax_experimental/ffi.py +12 -17
  90. warp/jax_experimental/xla_ffi.py +37 -24
  91. warp/math.py +171 -1
  92. warp/native/array.h +99 -0
  93. warp/native/builtin.h +174 -31
  94. warp/native/coloring.cpp +1 -1
  95. warp/native/exports.h +118 -63
  96. warp/native/intersect.h +3 -3
  97. warp/native/mat.h +5 -10
  98. warp/native/mathdx.cpp +11 -5
  99. warp/native/matnn.h +1 -123
  100. warp/native/quat.h +28 -4
  101. warp/native/sparse.cpp +121 -258
  102. warp/native/sparse.cu +181 -274
  103. warp/native/spatial.h +305 -17
  104. warp/native/tile.h +583 -72
  105. warp/native/tile_radix_sort.h +1108 -0
  106. warp/native/tile_reduce.h +237 -2
  107. warp/native/tile_scan.h +240 -0
  108. warp/native/tuple.h +189 -0
  109. warp/native/vec.h +6 -16
  110. warp/native/warp.cpp +36 -4
  111. warp/native/warp.cu +574 -51
  112. warp/native/warp.h +47 -74
  113. warp/optim/linear.py +5 -1
  114. warp/paddle.py +7 -8
  115. warp/py.typed +0 -0
  116. warp/render/render_opengl.py +58 -29
  117. warp/render/render_usd.py +124 -61
  118. warp/sim/__init__.py +9 -0
  119. warp/sim/collide.py +252 -78
  120. warp/sim/graph_coloring.py +8 -1
  121. warp/sim/import_mjcf.py +4 -3
  122. warp/sim/import_usd.py +11 -7
  123. warp/sim/integrator.py +5 -2
  124. warp/sim/integrator_euler.py +1 -1
  125. warp/sim/integrator_featherstone.py +1 -1
  126. warp/sim/integrator_vbd.py +751 -320
  127. warp/sim/integrator_xpbd.py +1 -1
  128. warp/sim/model.py +265 -260
  129. warp/sim/utils.py +10 -7
  130. warp/sparse.py +303 -166
  131. warp/tape.py +52 -51
  132. warp/tests/cuda/test_conditional_captures.py +1046 -0
  133. warp/tests/cuda/test_streams.py +1 -1
  134. warp/tests/geometry/test_volume.py +2 -2
  135. warp/tests/interop/test_dlpack.py +9 -9
  136. warp/tests/interop/test_jax.py +0 -1
  137. warp/tests/run_coverage_serial.py +1 -1
  138. warp/tests/sim/disabled_kinematics.py +2 -2
  139. warp/tests/sim/{test_vbd.py → test_cloth.py} +296 -113
  140. warp/tests/sim/test_collision.py +159 -51
  141. warp/tests/sim/test_coloring.py +15 -1
  142. warp/tests/test_array.py +254 -2
  143. warp/tests/test_array_reduce.py +2 -2
  144. warp/tests/test_atomic_cas.py +299 -0
  145. warp/tests/test_codegen.py +142 -19
  146. warp/tests/test_conditional.py +47 -1
  147. warp/tests/test_ctypes.py +0 -20
  148. warp/tests/test_devices.py +8 -0
  149. warp/tests/test_fabricarray.py +4 -2
  150. warp/tests/test_fem.py +58 -25
  151. warp/tests/test_func.py +42 -1
  152. warp/tests/test_grad.py +1 -1
  153. warp/tests/test_lerp.py +1 -3
  154. warp/tests/test_map.py +481 -0
  155. warp/tests/test_mat.py +1 -24
  156. warp/tests/test_quat.py +6 -15
  157. warp/tests/test_rounding.py +10 -38
  158. warp/tests/test_runlength_encode.py +7 -7
  159. warp/tests/test_smoothstep.py +1 -1
  160. warp/tests/test_sparse.py +51 -2
  161. warp/tests/test_spatial.py +507 -1
  162. warp/tests/test_struct.py +2 -2
  163. warp/tests/test_tuple.py +265 -0
  164. warp/tests/test_types.py +2 -2
  165. warp/tests/test_utils.py +24 -18
  166. warp/tests/tile/test_tile.py +420 -1
  167. warp/tests/tile/test_tile_mathdx.py +518 -14
  168. warp/tests/tile/test_tile_reduce.py +213 -0
  169. warp/tests/tile/test_tile_shared_memory.py +130 -1
  170. warp/tests/tile/test_tile_sort.py +117 -0
  171. warp/tests/unittest_suites.py +4 -6
  172. warp/types.py +462 -308
  173. warp/utils.py +647 -86
  174. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.0.dist-info}/METADATA +20 -6
  175. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.0.dist-info}/RECORD +178 -166
  176. warp/stubs.py +0 -3381
  177. warp/tests/sim/test_xpbd.py +0 -399
  178. warp/tests/test_mlp.py +0 -282
  179. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.0.dist-info}/WHEEL +0 -0
  180. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.0.dist-info}/licenses/LICENSE.md +0 -0
  181. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.0.dist-info}/top_level.txt +0 -0
warp/dlpack.py CHANGED
@@ -65,7 +65,7 @@ class _DLPackTensorHolder:
65
65
  """Class responsible for deleting DLManagedTensor memory after ownership is transferred from a capsule."""
66
66
 
67
67
  def __new__(cls, *args, **kwargs):
68
- instance = super(_DLPackTensorHolder, cls).__new__(cls)
68
+ instance = super().__new__(cls)
69
69
  instance.mem_ptr = None
70
70
  return instance
71
71
 
@@ -243,7 +243,7 @@ def run_benchmark(mode, dim, timers, render=False):
243
243
  # run one warm-up iteration to accurately measure initialization time (some engines do lazy init)
244
244
  positions = integrator.simulate(sim_dt, sim_substeps)
245
245
 
246
- label = "Dim ({}^2)".format(dim)
246
+ label = f"Dim ({dim}^2)"
247
247
 
248
248
  # run simulation
249
249
  for _i in range(sim_frames):
@@ -275,7 +275,7 @@ run_benchmark(mode, 128, timers, render=False)
275
275
  # write results
276
276
 
277
277
  for k, v in timers.items():
278
- print("{:16} min: {:8.2f} max: {:8.2f} avg: {:8.2f}".format(k, np.min(v), np.max(v), np.mean(v)))
278
+ print(f"{k:16} min: {np.min(v):8.2f} max: {np.max(v):8.2f} avg: {np.mean(v):8.2f}")
279
279
 
280
280
  report = open(os.path.join("benchmark.csv"), "a")
281
281
  writer = csv.writer(report, delimiter=",")
@@ -0,0 +1,155 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import time
17
+
18
+ import numpy as np
19
+
20
+ import warp as wp
21
+
22
+ BLOCK_DIM = 128
23
+
24
+
25
+ def create_test_kernel(KEY_TYPE, MAX_SORT_LENGTH):
26
+ @wp.kernel
27
+ def tile_sort_kernel(
28
+ input_keys: wp.array(dtype=KEY_TYPE, ndim=2),
29
+ input_values: wp.array(dtype=wp.int32, ndim=2),
30
+ output_keys: wp.array(dtype=KEY_TYPE, ndim=2),
31
+ output_values: wp.array(dtype=wp.int32, ndim=2),
32
+ ):
33
+ batch_id, i = wp.tid()
34
+
35
+ # Load input into shared memory
36
+ keys = wp.tile_load(input_keys[batch_id], shape=MAX_SORT_LENGTH, storage="shared")
37
+ values = wp.tile_load(input_values[batch_id], shape=MAX_SORT_LENGTH, storage="shared")
38
+
39
+ # Perform in-place sorting
40
+ wp.tile_sort(keys, values)
41
+
42
+ # Store sorted shared memory into output arrays
43
+ wp.tile_store(output_keys[batch_id], keys)
44
+ wp.tile_store(output_values[batch_id], values)
45
+
46
+ return tile_sort_kernel
47
+
48
+
49
+ if __name__ == "__main__":
50
+ wp.config.quiet = True
51
+ wp.init()
52
+ wp.clear_kernel_cache()
53
+ wp.set_module_options({"fast_math": True, "enable_backward": False})
54
+
55
+ iterations = 100
56
+ rng = np.random.default_rng(42)
57
+
58
+ shared_benchmark_data = {}
59
+ cub_segmented_sort_benchmark_data = {}
60
+
61
+ array_length = list(range(16, 257, 16))
62
+
63
+ print(
64
+ f"{'Type':<12s} {'Batch Size':<12s} {'Length':<12s} {'Tile Sort (ms)':<16s} {'Cub Segmented Sort (ms)':<24s} {'CubTime/TileTime':<16s}"
65
+ )
66
+ print("-" * 100)
67
+
68
+ for dtype in [int, float]:
69
+ for batch_size_exponent in range(5, 11):
70
+ batch_size = 2**batch_size_exponent
71
+ for length in array_length:
72
+ if dtype == int:
73
+ np_keys = rng.choice(1000000000, size=(batch_size, length), replace=False)
74
+ else: # dtype == float
75
+ np_keys = rng.choice(1000000, size=(batch_size, length), replace=False).astype(np.float32)
76
+
77
+ np_values = np.tile(np.arange(length), (batch_size, 1))
78
+
79
+ # Sort using NumPy for validation
80
+ np_sorted_keys = np.zeros_like(np_keys)
81
+ np_sorted_values = np.zeros_like(np_values)
82
+ for b in range(batch_size):
83
+ sorted_indices = np.argsort(np_keys[b])
84
+ np_sorted_keys[b] = np_keys[b][sorted_indices]
85
+ np_sorted_values[b] = np_values[b][sorted_indices]
86
+
87
+ # Generate random keys and iota indexer
88
+ input_keys = wp.array(np_keys, dtype=dtype, ndim=2, device="cuda")
89
+ input_values = wp.array(np_values, dtype=int, ndim=2, device="cuda")
90
+ output_keys = wp.zeros_like(input_keys, device="cuda")
91
+ output_values = wp.zeros_like(input_values, device="cuda")
92
+
93
+ kernel = create_test_kernel(dtype, length)
94
+
95
+ cmd = wp.launch_tiled(
96
+ kernel,
97
+ dim=batch_size,
98
+ inputs=[input_keys, input_values, output_keys, output_values],
99
+ block_dim=BLOCK_DIM,
100
+ record_cmd=True,
101
+ )
102
+ # Warmup
103
+ for _ in range(5):
104
+ cmd.launch()
105
+
106
+ with wp.ScopedTimer("benchmark", cuda_filter=wp.TIMING_KERNEL, print=False, synchronize=True) as timer:
107
+ for _ in range(iterations):
108
+ cmd.launch()
109
+ wp.synchronize()
110
+
111
+ if dtype == int:
112
+ keys_match = np.array_equal(output_keys.numpy(), np_sorted_keys)
113
+ else: # dtype == float
114
+ keys_match = np.allclose(output_keys.numpy(), np_sorted_keys, atol=1e-6) # Use tolerance for floats
115
+
116
+ values_match = np.array_equal(output_values.numpy(), np_sorted_values)
117
+
118
+ # Validate results
119
+ assert keys_match, f"Key sorting mismatch for dtype={dtype}!"
120
+ assert values_match, f"Value sorting mismatch for dtype={dtype}!"
121
+
122
+ timing_results = [result.elapsed for result in timer.timing_results]
123
+ mean_timing = np.mean(timing_results)
124
+
125
+ shared_benchmark_data[length] = mean_timing
126
+
127
+ # Allocate memory
128
+ input_keys = wp.zeros(shape=(batch_size * 2, length), dtype=dtype, device="cuda")
129
+ input_values = wp.zeros(shape=(batch_size * 2, length), dtype=int, device="cuda")
130
+
131
+ # Copy data
132
+ input_keys.assign(np_keys)
133
+ input_values.assign(np_values)
134
+
135
+ input_keys = input_keys.reshape(-1)
136
+ input_values = input_values.reshape(-1)
137
+
138
+ segments = wp.array(np.arange(0, batch_size + 1) * length, dtype=int, device="cuda")
139
+
140
+ # Compare with cub segmented radix sort
141
+ # Warmup
142
+ for _ in range(5):
143
+ wp.utils.segmented_sort_pairs(input_keys, input_values, batch_size * length, segments)
144
+
145
+ t1 = time.time_ns()
146
+ for _ in range(iterations):
147
+ wp.utils.segmented_sort_pairs(input_keys, input_values, batch_size * length, segments)
148
+ wp.synchronize()
149
+ t2 = time.time_ns()
150
+ cub_segmented_sort_benchmark_data[length] = (t2 - t1) / (1_000_000 * iterations)
151
+
152
+ # Print results
153
+ print(
154
+ f"{dtype!s:<12s} {batch_size:<12d} {length:<12d} {shared_benchmark_data[length]:<16.4g} {cub_segmented_sort_benchmark_data[length]:<24.4g} {cub_segmented_sort_benchmark_data[length] / shared_benchmark_data[length]:<16.4g}"
155
+ )
@@ -176,7 +176,7 @@ def sample_mesh(
176
176
  sample = wp.randf(rng)
177
177
  tri = wp.lower_bound(cdf, sample)
178
178
 
179
- # Sample the location in that triangle using random barycentric cordinates.
179
+ # Sample the location in that triangle using random barycentric coordinates.
180
180
  ru = wp.randf(rng)
181
181
  rv = wp.randf(rng)
182
182
  tri_u = 1.0 - wp.sqrt(ru)
@@ -0,0 +1,93 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ ###########################################################################
17
+ # Example Spin Lock
18
+ #
19
+ # Shows how to use a spin lock to synchronize access to a shared resource.
20
+ #
21
+ ###########################################################################
22
+
23
+ import warp as wp
24
+ from warp.tests.unittest_utils import *
25
+
26
+
27
+ @wp.func
28
+ def spinlock_acquire(lock: wp.array(dtype=wp.int32)):
29
+ # Try to acquire the lock by setting it to 1 if it's 0
30
+ while wp.atomic_cas(lock, 0, 0, 1) == 1:
31
+ pass
32
+
33
+
34
+ @wp.func
35
+ def spinlock_release(lock: wp.array(dtype=wp.int32)):
36
+ # Release the lock by setting it back to 0
37
+ wp.atomic_exch(lock, 0, 0)
38
+
39
+
40
+ @wp.func
41
+ def volatile_read(ptr: wp.array(dtype=wp.int32), index: int):
42
+ value = wp.atomic_exch(ptr, index, 0)
43
+ wp.atomic_exch(ptr, index, value)
44
+ return value
45
+
46
+
47
+ @wp.kernel
48
+ def test_spinlock_counter(
49
+ counter: wp.array(dtype=wp.int32), atomic_counter: wp.array(dtype=wp.int32), lock: wp.array(dtype=wp.int32)
50
+ ):
51
+ # Try to acquire the lock
52
+ spinlock_acquire(lock)
53
+
54
+ # Critical section - increment counter
55
+ # counter[0] = counter[0] + 1 # This gives wrong results - counter should be marked as volatile
56
+
57
+ # Work around since warp arrays cannot be marked as volatile
58
+ value = volatile_read(counter, 0)
59
+ counter[0] = value + 1
60
+
61
+ # Release the lock
62
+ spinlock_release(lock)
63
+
64
+ # Increment atomic counter for comparison
65
+ wp.atomic_add(atomic_counter, 0, 1)
66
+
67
+
68
+ def test_spinlock(device):
69
+ # Create a lock array initialized to 0 (unlocked)
70
+ lock = wp.array([0], dtype=wp.int32, device=device)
71
+
72
+ # Create counter arrays initialized to 0
73
+ counter = wp.array([0], dtype=wp.int32, device=device)
74
+ atomic_counter = wp.array([0], dtype=wp.int32, device=device)
75
+
76
+ # Number of threads to test with
77
+ n = 1024
78
+
79
+ # Launch the test kernel
80
+ wp.launch(test_spinlock_counter, dim=n, inputs=[counter, atomic_counter, lock], device=device)
81
+
82
+ # Verify results
83
+ assert atomic_counter.numpy()[0] == n, f"Atomic counter should be {n}, got {atomic_counter.numpy()[0]}"
84
+ assert counter.numpy()[0] == n, f"Counter should be {n}, got {counter.numpy()[0]}"
85
+ assert lock.numpy()[0] == 0, "Lock was not properly released"
86
+
87
+ print(f"Final counter value: {counter.numpy()[0]}")
88
+ print(f"Final atomic counter value: {atomic_counter.numpy()[0]}")
89
+
90
+
91
+ if __name__ == "__main__":
92
+ wp.clear_kernel_cache()
93
+ test_spinlock(device="cuda")
@@ -0,0 +1,118 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ ###########################################################################
17
+ # Example Work Queue
18
+ #
19
+ # Shows how to use a work queue to synchronize access to a shared resource.
20
+ #
21
+ ###########################################################################
22
+
23
+ import warp as wp
24
+ from warp.tests.unittest_utils import *
25
+
26
+
27
+ @wp.func
28
+ def volatile_read(ptr: wp.array(dtype=wp.int32), index: int):
29
+ value = wp.atomic_add(ptr, index, 0)
30
+ return value
31
+
32
+
33
+ @wp.struct
34
+ class WorkQueue:
35
+ buffer: wp.array(dtype=wp.int32)
36
+ capacity: int
37
+ head: wp.array(dtype=wp.int32)
38
+ tail: wp.array(dtype=wp.int32)
39
+
40
+
41
+ @wp.func
42
+ def enqueue(queue: WorkQueue, item: int) -> bool:
43
+ while True:
44
+ # Read current head and tail atomically
45
+ current_tail = volatile_read(queue.tail, 0)
46
+ current_head = volatile_read(queue.head, 0)
47
+
48
+ # Check if queue is full
49
+ if (current_tail - current_head) >= queue.capacity:
50
+ return False
51
+
52
+ # Try to increment tail atomically
53
+ index = current_tail % queue.capacity
54
+ if wp.atomic_cas(queue.tail, 0, current_tail, current_tail + 1) == current_tail:
55
+ queue.buffer[index] = item
56
+ return True
57
+
58
+ # Retry if another thread changed tail
59
+
60
+
61
+ @wp.func
62
+ def dequeue(queue: WorkQueue) -> tuple[bool, int]:
63
+ while True:
64
+ # Read current head and tail atomically
65
+ current_head = volatile_read(queue.head, 0)
66
+ current_tail = volatile_read(queue.tail, 0)
67
+
68
+ # Check if queue is empty
69
+ if current_head >= current_tail:
70
+ return False, 0
71
+
72
+ # Get item at current head
73
+ index = current_head % queue.capacity
74
+ item = queue.buffer[index]
75
+
76
+ # Try to increment head atomically
77
+ if wp.atomic_cas(queue.head, 0, current_head, current_head + 1) == current_head:
78
+ return True, item
79
+
80
+ # Retry if another thread changed head
81
+
82
+
83
+ @wp.kernel
84
+ def process_queue(queue: WorkQueue):
85
+ counter = int(0)
86
+ while True:
87
+ success, item = dequeue(queue)
88
+ if not success:
89
+ break
90
+ wp.printf("Processed item: %d\n", item)
91
+ if item < 1000000:
92
+ if not enqueue(queue, item + 1000000):
93
+ wp.printf("Failed to enqueue item: %d\n", item + 1000000)
94
+ counter = counter + 1
95
+
96
+
97
+ def test_work_queue(device):
98
+ # Create a work queue with capacity 1024
99
+ capacity = 8192
100
+ head = wp.array([0], dtype=wp.int32, device=device)
101
+ tail = wp.array([4096], dtype=wp.int32, device=device)
102
+ buffer = wp.array(np.arange(4096, dtype=np.int32), dtype=wp.int32, device=device)
103
+
104
+ queue = WorkQueue()
105
+ queue.capacity = capacity
106
+ queue.head = head
107
+ queue.tail = tail
108
+ queue.buffer = buffer
109
+
110
+ # Launch processing kernel
111
+ wp.launch(process_queue, dim=1024, inputs=[queue], device=device)
112
+
113
+ wp.synchronize()
114
+
115
+
116
+ if __name__ == "__main__":
117
+ wp.clear_kernel_cache()
118
+ test_work_queue(device="cuda")
@@ -70,7 +70,7 @@ def mass_form(
70
70
  @fem.integrand
71
71
  def side_divergence_form(s: fem.Sample, domain: fem.Domain, u: fem.Field, psi: fem.Field):
72
72
  # normal velocity jump (non-zero at resolution boundaries)
73
- return -wp.dot(fem.jump(u, s), fem.normal(domain, s)) * psi(s)
73
+ return -wp.dot(fem.jump(u, s), fem.normal(domain, s)) * fem.average(psi, s)
74
74
 
75
75
 
76
76
  @wp.func
@@ -173,7 +173,7 @@ class Example:
173
173
  bd_test = fem.make_test(u_space, domain=boundary)
174
174
  bd_trial = fem.make_trial(u_space, domain=boundary)
175
175
  dirichlet_projector = fem.integrate(
176
- noslip_projector_form, fields={"u": bd_test, "v": bd_trial}, nodal=True, output_dtype=float
176
+ noslip_projector_form, fields={"u": bd_test, "v": bd_trial}, assembly="nodal", output_dtype=float
177
177
  )
178
178
  fem.normalize_dirichlet_projector(dirichlet_projector)
179
179
 
@@ -187,7 +187,7 @@ class Example:
187
187
  rho_trial = fem.make_trial(rho_space)
188
188
 
189
189
  inv_mass_matrix = fem.integrate(
190
- mass_form, fields={"u": rho_trial, "v": rho_test}, nodal=True, output_dtype=float
190
+ mass_form, fields={"u": rho_trial, "v": rho_test}, assembly="nodal", output_dtype=float
191
191
  )
192
192
  fem_example_utils.invert_diagonal_bsr_matrix(inv_mass_matrix)
193
193
 
@@ -269,8 +269,8 @@ if __name__ == "__main__":
269
269
 
270
270
  stage = Usd.Stage.Open(os.path.join(warp.examples.get_asset_directory(), "rocks.usd"))
271
271
  mesh = UsdGeom.Mesh(stage.GetPrimAtPath("/root/rocks"))
272
- points = np.array((mesh.GetPointsAttr().Get()))
273
- counts = np.array((mesh.GetFaceVertexCountsAttr().Get()))
272
+ points = np.array(mesh.GetPointsAttr().Get())
273
+ counts = np.array(mesh.GetFaceVertexCountsAttr().Get())
274
274
  indices = np.array(mesh.GetFaceVertexIndicesAttr().Get())
275
275
  ref_geom = (points, counts, indices)
276
276
  except Exception:
@@ -290,7 +290,7 @@ class Example:
290
290
  vel_projector = fem.integrate(
291
291
  velocity_boundary_projector_form,
292
292
  fields={"u": velocity_trial, "v": velocity_test},
293
- nodal=True,
293
+ assembly="nodal",
294
294
  output_dtype=float,
295
295
  )
296
296
  fem.normalize_dirichlet_projector(vel_projector)
@@ -146,7 +146,7 @@ class Example:
146
146
  # For simplicity, use nodal integration so that inertia matrix is diagonal
147
147
  trial = fem.make_trial(space=vector_space, domain=domain)
148
148
  matrix_inertia = fem.integrate(
149
- vel_mass_form, fields={"u": trial, "v": self._test}, output_dtype=wp.float32, nodal=True
149
+ vel_mass_form, fields={"u": trial, "v": self._test}, output_dtype=wp.float32, assembly="nodal"
150
150
  )
151
151
  self._inv_mass_matrix = wp.sparse.bsr_copy(matrix_inertia)
152
152
  fem_example_utils.invert_diagonal_bsr_matrix(self._inv_mass_matrix)
@@ -82,7 +82,7 @@ def diffusion_and_inertia_form(s: fem.Sample, phi: fem.Field, psi: fem.Field, dt
82
82
 
83
83
 
84
84
  class Example:
85
- def __init__(self, quiet=False, degree=2, resolution=50, tri_mesh=False, viscosity=0.001, ang_vel=1.0):
85
+ def __init__(self, quiet=False, degree=2, resolution=50, mesh: str = "grid", viscosity=0.001, ang_vel=1.0):
86
86
  self._quiet = quiet
87
87
 
88
88
  self._ang_vel = ang_vel
@@ -91,11 +91,14 @@ class Example:
91
91
  self.sim_dt = 1.0 / (ang_vel * res)
92
92
  self.current_frame = 0
93
93
 
94
- if tri_mesh:
95
- positions, tri_vidx = fem_example_utils.gen_trimesh(res=wp.vec2i(res))
94
+ if mesh == "tri":
95
+ positions, tri_vidx = fem_example_utils.gen_trimesh(res=wp.vec2i(resolution))
96
96
  geo = fem.Trimesh2D(tri_vertex_indices=tri_vidx, positions=positions, build_bvh=True)
97
+ elif mesh == "quad":
98
+ positions, quad_vidx = fem_example_utils.gen_quadmesh(res=wp.vec2i(resolution))
99
+ geo = fem.Quadmesh2D(quad_vertex_indices=quad_vidx, positions=positions, build_bvh=True)
97
100
  else:
98
- geo = fem.Grid2D(res=wp.vec2i(res))
101
+ geo = fem.Grid2D(res=wp.vec2i(resolution))
99
102
 
100
103
  domain = fem.Cells(geometry=geo)
101
104
  scalar_space = fem.make_polynomial_space(geo, degree=degree)
@@ -149,7 +152,7 @@ if __name__ == "__main__":
149
152
  parser.add_argument("--num_frames", type=int, default=250, help="Total number of frames.")
150
153
  parser.add_argument("--viscosity", type=float, default=0.001, help="Fluid viscosity parameter.")
151
154
  parser.add_argument("--ang_vel", type=float, default=1.0, help="Angular velocity.")
152
- parser.add_argument("--tri_mesh", action="store_true", help="Use a triangular mesh.")
155
+ parser.add_argument("--mesh", choices=("grid", "tri", "quad"), default="grid", help="Mesh type.")
153
156
  parser.add_argument(
154
157
  "--headless",
155
158
  action="store_true",
@@ -164,7 +167,7 @@ if __name__ == "__main__":
164
167
  quiet=args.quiet,
165
168
  degree=args.degree,
166
169
  resolution=args.resolution,
167
- tri_mesh=args.tri_mesh,
170
+ mesh=args.mesh,
168
171
  viscosity=args.viscosity,
169
172
  ang_vel=args.ang_vel,
170
173
  )