warp-lang 1.8.1__py3-none-manylinux_2_34_aarch64.whl → 1.9.0__py3-none-manylinux_2_34_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (134) hide show
  1. warp/__init__.py +282 -103
  2. warp/__init__.pyi +482 -110
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +93 -30
  6. warp/build_dll.py +47 -67
  7. warp/builtins.py +955 -137
  8. warp/codegen.py +312 -206
  9. warp/config.py +1 -1
  10. warp/context.py +1249 -784
  11. warp/examples/core/example_marching_cubes.py +1 -0
  12. warp/examples/core/example_render_opengl.py +100 -3
  13. warp/examples/fem/example_apic_fluid.py +98 -52
  14. warp/examples/fem/example_convection_diffusion_dg.py +25 -4
  15. warp/examples/fem/example_diffusion_mgpu.py +8 -3
  16. warp/examples/fem/utils.py +68 -22
  17. warp/fabric.py +1 -1
  18. warp/fem/cache.py +27 -19
  19. warp/fem/domain.py +2 -2
  20. warp/fem/field/nodal_field.py +2 -2
  21. warp/fem/field/virtual.py +264 -166
  22. warp/fem/geometry/geometry.py +5 -5
  23. warp/fem/integrate.py +129 -51
  24. warp/fem/space/restriction.py +4 -0
  25. warp/fem/space/shape/tet_shape_function.py +3 -10
  26. warp/jax_experimental/custom_call.py +1 -1
  27. warp/jax_experimental/ffi.py +2 -1
  28. warp/marching_cubes.py +708 -0
  29. warp/native/array.h +99 -4
  30. warp/native/builtin.h +82 -5
  31. warp/native/bvh.cpp +64 -28
  32. warp/native/bvh.cu +58 -58
  33. warp/native/bvh.h +2 -2
  34. warp/native/clang/clang.cpp +7 -7
  35. warp/native/coloring.cpp +8 -2
  36. warp/native/crt.cpp +2 -2
  37. warp/native/crt.h +3 -5
  38. warp/native/cuda_util.cpp +41 -10
  39. warp/native/cuda_util.h +10 -4
  40. warp/native/exports.h +1842 -1908
  41. warp/native/fabric.h +2 -1
  42. warp/native/hashgrid.cpp +37 -37
  43. warp/native/hashgrid.cu +2 -2
  44. warp/native/initializer_array.h +1 -1
  45. warp/native/intersect.h +2 -2
  46. warp/native/mat.h +1910 -116
  47. warp/native/mathdx.cpp +43 -43
  48. warp/native/mesh.cpp +24 -24
  49. warp/native/mesh.cu +26 -26
  50. warp/native/mesh.h +4 -2
  51. warp/native/nanovdb/GridHandle.h +179 -12
  52. warp/native/nanovdb/HostBuffer.h +8 -7
  53. warp/native/nanovdb/NanoVDB.h +517 -895
  54. warp/native/nanovdb/NodeManager.h +323 -0
  55. warp/native/nanovdb/PNanoVDB.h +2 -2
  56. warp/native/quat.h +331 -14
  57. warp/native/range.h +7 -1
  58. warp/native/reduce.cpp +10 -10
  59. warp/native/reduce.cu +13 -14
  60. warp/native/runlength_encode.cpp +2 -2
  61. warp/native/runlength_encode.cu +5 -5
  62. warp/native/scan.cpp +3 -3
  63. warp/native/scan.cu +4 -4
  64. warp/native/sort.cpp +10 -10
  65. warp/native/sort.cu +22 -22
  66. warp/native/sparse.cpp +8 -8
  67. warp/native/sparse.cu +13 -13
  68. warp/native/spatial.h +366 -17
  69. warp/native/temp_buffer.h +2 -2
  70. warp/native/tile.h +283 -69
  71. warp/native/vec.h +381 -14
  72. warp/native/volume.cpp +54 -54
  73. warp/native/volume.cu +1 -1
  74. warp/native/volume.h +2 -1
  75. warp/native/volume_builder.cu +30 -37
  76. warp/native/warp.cpp +150 -149
  77. warp/native/warp.cu +323 -192
  78. warp/native/warp.h +227 -226
  79. warp/optim/linear.py +736 -271
  80. warp/render/imgui_manager.py +289 -0
  81. warp/render/render_opengl.py +85 -6
  82. warp/sim/graph_coloring.py +2 -2
  83. warp/sparse.py +558 -175
  84. warp/tests/aux_test_module_aot.py +7 -0
  85. warp/tests/cuda/test_async.py +3 -3
  86. warp/tests/cuda/test_conditional_captures.py +101 -0
  87. warp/tests/geometry/test_marching_cubes.py +233 -12
  88. warp/tests/sim/test_coloring.py +6 -6
  89. warp/tests/test_array.py +56 -5
  90. warp/tests/test_codegen.py +3 -2
  91. warp/tests/test_context.py +8 -15
  92. warp/tests/test_enum.py +136 -0
  93. warp/tests/test_examples.py +2 -2
  94. warp/tests/test_fem.py +45 -2
  95. warp/tests/test_fixedarray.py +229 -0
  96. warp/tests/test_func.py +18 -15
  97. warp/tests/test_future_annotations.py +7 -5
  98. warp/tests/test_linear_solvers.py +30 -0
  99. warp/tests/test_map.py +1 -1
  100. warp/tests/test_mat.py +1518 -378
  101. warp/tests/test_mat_assign_copy.py +178 -0
  102. warp/tests/test_mat_constructors.py +574 -0
  103. warp/tests/test_module_aot.py +287 -0
  104. warp/tests/test_print.py +69 -0
  105. warp/tests/test_quat.py +140 -34
  106. warp/tests/test_quat_assign_copy.py +145 -0
  107. warp/tests/test_reload.py +2 -1
  108. warp/tests/test_sparse.py +71 -0
  109. warp/tests/test_spatial.py +140 -34
  110. warp/tests/test_spatial_assign_copy.py +160 -0
  111. warp/tests/test_struct.py +43 -3
  112. warp/tests/test_types.py +0 -20
  113. warp/tests/test_vec.py +179 -34
  114. warp/tests/test_vec_assign_copy.py +143 -0
  115. warp/tests/tile/test_tile.py +184 -18
  116. warp/tests/tile/test_tile_cholesky.py +605 -0
  117. warp/tests/tile/test_tile_load.py +169 -0
  118. warp/tests/tile/test_tile_mathdx.py +2 -558
  119. warp/tests/tile/test_tile_matmul.py +1 -1
  120. warp/tests/tile/test_tile_mlp.py +1 -1
  121. warp/tests/tile/test_tile_shared_memory.py +5 -5
  122. warp/tests/unittest_suites.py +6 -0
  123. warp/tests/walkthrough_debug.py +1 -1
  124. warp/thirdparty/unittest_parallel.py +108 -9
  125. warp/types.py +554 -264
  126. warp/utils.py +68 -86
  127. {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
  128. {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/RECORD +131 -121
  129. warp/native/marching.cpp +0 -19
  130. warp/native/marching.cu +0 -514
  131. warp/native/marching.h +0 -19
  132. {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
  133. {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
  134. {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0
@@ -27,6 +27,11 @@ TILE_N = wp.constant(8)
27
27
  TILE_O = wp.constant(8)
28
28
  TILE_P = wp.constant(6)
29
29
 
30
+ HALF_M = wp.constant(TILE_M // 2)
31
+ HALF_N = wp.constant(TILE_N // 2)
32
+ TWO_M = wp.constant(TILE_M * 2)
33
+ TWO_N = wp.constant(TILE_N * 2)
34
+
30
35
  TILE_OFFSET = 5
31
36
 
32
37
 
@@ -141,6 +146,140 @@ def test_tile_load(kernel, ndim):
141
146
  return test
142
147
 
143
148
 
149
+ @wp.kernel
150
+ def tile_load_indexed(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float), z: wp.array2d(dtype=float)):
151
+ i, j = wp.tid()
152
+
153
+ evens_M = wp.tile_arange(HALF_M, dtype=int, storage="shared") * 2
154
+ t0 = wp.tile_load_indexed(
155
+ x, indices=evens_M, shape=(HALF_M, TILE_N), offset=(i * TILE_M, j * TILE_N), axis=0, storage="register"
156
+ )
157
+ wp.tile_store(y, t0, offset=(i * HALF_M, j * TILE_N))
158
+
159
+ evens_N = wp.tile_arange(HALF_N, dtype=int, storage="shared") * 2
160
+ t1 = wp.tile_load_indexed(
161
+ x, indices=evens_N, shape=(TILE_M, HALF_N), offset=(i * TILE_M, j * TILE_N), axis=1, storage="shared"
162
+ )
163
+ wp.tile_store(z, t1, offset=(i * TILE_M, j * HALF_N))
164
+
165
+
166
+ def test_tile_load_indexed(test, device):
167
+ M = TILE_M * 2
168
+ N = TILE_N * 2
169
+
170
+ arr = np.arange(M * N, dtype=float).reshape(M, N)
171
+
172
+ x = wp.array(arr, dtype=float, requires_grad=True, device=device)
173
+ y = wp.zeros((M // 2, N), dtype=float, requires_grad=True, device=device)
174
+ z = wp.zeros((M, N // 2), dtype=float, requires_grad=True, device=device)
175
+
176
+ with wp.Tape() as tape:
177
+ wp.launch_tiled(tile_load_indexed, dim=[2, 2], inputs=[x], outputs=[y, z], block_dim=32, device=device)
178
+
179
+ y.grad = wp.ones_like(y)
180
+ z.grad = wp.ones_like(z)
181
+
182
+ tape.backward()
183
+
184
+ x_grad_np = np.ones(arr.shape, dtype=float)
185
+ x_grad_np[0::2, 0::2] += 1
186
+ x_grad_np[1::2, 1::2] -= 1
187
+
188
+ assert_np_equal(y.numpy(), arr[np.arange(0, arr.shape[0], 2, dtype=int)])
189
+ assert_np_equal(z.numpy(), arr[:, np.arange(0, arr.shape[1], 2, dtype=int)])
190
+ assert_np_equal(x.grad.numpy(), x_grad_np)
191
+
192
+
193
+ @wp.func
194
+ def add_one(x: int):
195
+ return x + 1
196
+
197
+
198
+ @wp.kernel
199
+ def tile_store_indexed(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float), z: wp.array2d(dtype=float)):
200
+ i, j = wp.tid()
201
+
202
+ t = wp.tile_load(x, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N), storage="register")
203
+
204
+ evens_M = wp.tile_arange(TILE_M, dtype=int, storage="shared") * 2
205
+ odds_M = wp.tile_map(add_one, evens_M)
206
+
207
+ wp.tile_store_indexed(y, indices=odds_M, t=t, offset=(i * TWO_M, j * TILE_N), axis=0)
208
+
209
+ evens_N = wp.tile_arange(TILE_N, dtype=int, storage="shared") * 2
210
+ odds_N = wp.tile_map(add_one, evens_N)
211
+
212
+ wp.tile_store_indexed(z, indices=odds_N, t=t, offset=(i * TILE_M, j * TWO_N), axis=1)
213
+
214
+
215
+ def test_tile_store_indexed(test, device):
216
+ M = TILE_M * 2
217
+ N = TILE_N * 2
218
+
219
+ arr = np.arange(M * N, dtype=float).reshape(M, N)
220
+
221
+ x = wp.array(arr, dtype=float, requires_grad=True, device=device)
222
+ y = wp.zeros((M * 2, N), dtype=float, requires_grad=True, device=device)
223
+ z = wp.zeros((M, N * 2), dtype=float, requires_grad=True, device=device)
224
+
225
+ with wp.Tape() as tape:
226
+ wp.launch_tiled(tile_store_indexed, dim=[2, 2], inputs=[x], outputs=[y, z], block_dim=32, device=device)
227
+
228
+ y.grad = wp.ones_like(y)
229
+ z.grad = wp.ones_like(z)
230
+
231
+ tape.backward()
232
+
233
+ y_np = np.zeros((M * 2, N))
234
+ y_np[1::2, :] = arr
235
+
236
+ z_np = np.zeros((M, N * 2))
237
+ z_np[:, 1::2] = arr
238
+
239
+ x_grad_np = np.ones((M, N)) * 2
240
+
241
+ assert_np_equal(y.numpy(), y_np)
242
+ assert_np_equal(z.numpy(), z_np)
243
+ assert_np_equal(x.grad.numpy(), x_grad_np)
244
+
245
+
246
+ @wp.kernel
247
+ def tile_atomic_add_indexed(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
248
+ i, j = wp.tid()
249
+
250
+ t = wp.tile_load(x, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N), storage="register")
251
+
252
+ ones = wp.tile_ones(TILE_M, dtype=int, storage="shared")
253
+
254
+ wp.tile_atomic_add_indexed(y, indices=ones, t=t, offset=(i * TILE_M, j * TILE_N), axis=0)
255
+
256
+
257
+ def test_tile_atomic_add_indexed(test, device):
258
+ M = TILE_M * 2
259
+ N = TILE_N * 2
260
+
261
+ arr = np.arange(M * N, dtype=float).reshape(M, N)
262
+
263
+ x = wp.array(arr, dtype=float, requires_grad=True, device=device)
264
+ y = wp.zeros((M, N), dtype=float, requires_grad=True, device=device)
265
+
266
+ with wp.Tape() as tape:
267
+ wp.launch_tiled(tile_atomic_add_indexed, dim=[2, 2], inputs=[x], outputs=[y], block_dim=32, device=device)
268
+
269
+ y.grad = wp.ones_like(y)
270
+
271
+ tape.backward()
272
+
273
+ y_np = np.zeros((M, N), dtype=float)
274
+ y_np[1] = np.sum(arr[0:TILE_M], axis=0)
275
+ y_np[TILE_M + 1] = np.sum(arr[TILE_M:], axis=0)
276
+
277
+ x_grad_np = np.ones((M, N))
278
+
279
+ assert_np_equal(y.numpy(), y_np)
280
+ assert_np_equal(x.grad.numpy(), x_grad_np)
281
+
282
+
144
283
  @wp.kernel
145
284
  def tile_load_unaligned_kernel(
146
285
  input: wp.array2d(dtype=float),
@@ -492,6 +631,31 @@ def test_tile_load_fortran(test, device):
492
631
  assert_array_equal(B_wp.grad, A_wp.grad)
493
632
 
494
633
 
634
+ # ----------------------------------------------------------------------------------------
635
+
636
+
637
+ @wp.func
638
+ def test_tile_load_scoped_func(A: wp.array2d(dtype=float)):
639
+ A_tile = wp.tile_load(A, shape=(TILE_DIM, TILE_DIM), offset=(0, 0), storage="shared")
640
+
641
+
642
+ @wp.kernel
643
+ def test_tile_load_scoped_kernel(A: wp.array2d(dtype=float), B: wp.array2d(dtype=float)):
644
+ test_tile_load_scoped_func(A)
645
+ B_tile = wp.tile_load(B, shape=(TILE_DIM, TILE_DIM), offset=(0, 0), storage="shared")
646
+
647
+
648
+ def test_tile_load_scoped(test, device):
649
+ """Regression test for 2D shared tiles that are allocated in a function then deallocated when they go out of scope
650
+
651
+ Passes if it runs without errors
652
+ """
653
+ A = wp.ones((TILE_DIM, TILE_DIM), dtype=float, device=device)
654
+ B = wp.ones((TILE_DIM, TILE_DIM), dtype=float, device=device)
655
+
656
+ wp.launch_tiled(test_tile_load_scoped_kernel, dim=1, inputs=[A, B], block_dim=TILE_DIM, device=device)
657
+
658
+
495
659
  devices = get_test_devices()
496
660
 
497
661
 
@@ -503,6 +667,9 @@ add_function_test(TestTileLoad, "test_tile_load_1d", test_tile_load(tile_load_1d
503
667
  add_function_test(TestTileLoad, "test_tile_load_2d", test_tile_load(tile_load_2d_kernel, 2), devices=devices)
504
668
  add_function_test(TestTileLoad, "test_tile_load_3d", test_tile_load(tile_load_3d_kernel, 3), devices=devices)
505
669
  add_function_test(TestTileLoad, "test_tile_load_4d", test_tile_load(tile_load_4d_kernel, 4), devices=devices)
670
+ add_function_test(TestTileLoad, "test_tile_load_indexed", test_tile_load_indexed, devices=devices)
671
+ add_function_test(TestTileLoad, "test_tile_store_indexed", test_tile_store_indexed, devices=devices)
672
+ add_function_test(TestTileLoad, "test_tile_atomic_add_indexed", test_tile_atomic_add_indexed, devices=devices)
506
673
  add_function_test(TestTileLoad, "test_tile_load_unaligned", test_tile_load_unaligned, devices=devices)
507
674
  add_function_test(TestTileLoad, "test_tile_load_aligned_small", test_tile_load_aligned_small, devices=devices)
508
675
  add_function_test(
@@ -525,6 +692,8 @@ add_function_test(TestTileLoad, "test_tile_assign_4d", test_tile_assign(tile_ass
525
692
 
526
693
  add_function_test(TestTileLoad, "test_tile_load_fortran", test_tile_load_fortran, devices=devices)
527
694
 
695
+ add_function_test(TestTileLoad, "test_tile_load_scoped", test_tile_load_scoped, devices=devices)
696
+
528
697
 
529
698
  if __name__ == "__main__":
530
699
  wp.clear_kernel_cache()