warp-lang 1.8.1__py3-none-manylinux_2_34_aarch64.whl → 1.9.1__py3-none-manylinux_2_34_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +282 -103
- warp/__init__.pyi +1904 -114
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +93 -30
- warp/build_dll.py +331 -101
- warp/builtins.py +1244 -160
- warp/codegen.py +317 -206
- warp/config.py +1 -1
- warp/context.py +1465 -789
- warp/examples/core/example_marching_cubes.py +1 -0
- warp/examples/core/example_render_opengl.py +100 -3
- warp/examples/fem/example_apic_fluid.py +98 -52
- warp/examples/fem/example_convection_diffusion_dg.py +25 -4
- warp/examples/fem/example_diffusion_mgpu.py +8 -3
- warp/examples/fem/utils.py +68 -22
- warp/examples/interop/example_jax_kernel.py +2 -1
- warp/fabric.py +1 -1
- warp/fem/cache.py +27 -19
- warp/fem/domain.py +2 -2
- warp/fem/field/nodal_field.py +2 -2
- warp/fem/field/virtual.py +264 -166
- warp/fem/geometry/geometry.py +5 -5
- warp/fem/integrate.py +129 -51
- warp/fem/space/restriction.py +4 -0
- warp/fem/space/shape/tet_shape_function.py +3 -10
- warp/jax_experimental/custom_call.py +25 -2
- warp/jax_experimental/ffi.py +22 -1
- warp/jax_experimental/xla_ffi.py +16 -7
- warp/marching_cubes.py +708 -0
- warp/native/array.h +99 -4
- warp/native/builtin.h +86 -9
- warp/native/bvh.cpp +64 -28
- warp/native/bvh.cu +58 -58
- warp/native/bvh.h +2 -2
- warp/native/clang/clang.cpp +7 -7
- warp/native/coloring.cpp +8 -2
- warp/native/crt.cpp +2 -2
- warp/native/crt.h +3 -5
- warp/native/cuda_util.cpp +41 -10
- warp/native/cuda_util.h +10 -4
- warp/native/exports.h +1842 -1908
- warp/native/fabric.h +2 -1
- warp/native/hashgrid.cpp +37 -37
- warp/native/hashgrid.cu +2 -2
- warp/native/initializer_array.h +1 -1
- warp/native/intersect.h +2 -2
- warp/native/mat.h +1910 -116
- warp/native/mathdx.cpp +43 -43
- warp/native/mesh.cpp +24 -24
- warp/native/mesh.cu +26 -26
- warp/native/mesh.h +4 -2
- warp/native/nanovdb/GridHandle.h +179 -12
- warp/native/nanovdb/HostBuffer.h +8 -7
- warp/native/nanovdb/NanoVDB.h +517 -895
- warp/native/nanovdb/NodeManager.h +323 -0
- warp/native/nanovdb/PNanoVDB.h +2 -2
- warp/native/quat.h +331 -14
- warp/native/range.h +7 -1
- warp/native/reduce.cpp +10 -10
- warp/native/reduce.cu +13 -14
- warp/native/runlength_encode.cpp +2 -2
- warp/native/runlength_encode.cu +5 -5
- warp/native/scan.cpp +3 -3
- warp/native/scan.cu +4 -4
- warp/native/sort.cpp +10 -10
- warp/native/sort.cu +40 -31
- warp/native/sort.h +2 -0
- warp/native/sparse.cpp +8 -8
- warp/native/sparse.cu +13 -13
- warp/native/spatial.h +366 -17
- warp/native/temp_buffer.h +2 -2
- warp/native/tile.h +471 -82
- warp/native/vec.h +328 -14
- warp/native/volume.cpp +54 -54
- warp/native/volume.cu +1 -1
- warp/native/volume.h +2 -1
- warp/native/volume_builder.cu +30 -37
- warp/native/warp.cpp +150 -149
- warp/native/warp.cu +377 -216
- warp/native/warp.h +227 -226
- warp/optim/linear.py +736 -271
- warp/render/imgui_manager.py +289 -0
- warp/render/render_opengl.py +99 -18
- warp/render/render_usd.py +1 -0
- warp/sim/graph_coloring.py +2 -2
- warp/sparse.py +558 -175
- warp/tests/aux_test_module_aot.py +7 -0
- warp/tests/cuda/test_async.py +3 -3
- warp/tests/cuda/test_conditional_captures.py +101 -0
- warp/tests/geometry/test_hash_grid.py +38 -0
- warp/tests/geometry/test_marching_cubes.py +233 -12
- warp/tests/interop/test_jax.py +608 -28
- warp/tests/sim/test_coloring.py +6 -6
- warp/tests/test_array.py +58 -5
- warp/tests/test_codegen.py +4 -3
- warp/tests/test_context.py +8 -15
- warp/tests/test_enum.py +136 -0
- warp/tests/test_examples.py +2 -2
- warp/tests/test_fem.py +49 -6
- warp/tests/test_fixedarray.py +229 -0
- warp/tests/test_func.py +18 -15
- warp/tests/test_future_annotations.py +7 -5
- warp/tests/test_linear_solvers.py +30 -0
- warp/tests/test_map.py +15 -1
- warp/tests/test_mat.py +1518 -378
- warp/tests/test_mat_assign_copy.py +178 -0
- warp/tests/test_mat_constructors.py +574 -0
- warp/tests/test_module_aot.py +287 -0
- warp/tests/test_print.py +69 -0
- warp/tests/test_quat.py +140 -34
- warp/tests/test_quat_assign_copy.py +145 -0
- warp/tests/test_reload.py +2 -1
- warp/tests/test_sparse.py +71 -0
- warp/tests/test_spatial.py +140 -34
- warp/tests/test_spatial_assign_copy.py +160 -0
- warp/tests/test_struct.py +43 -3
- warp/tests/test_tuple.py +96 -0
- warp/tests/test_types.py +61 -20
- warp/tests/test_vec.py +179 -34
- warp/tests/test_vec_assign_copy.py +143 -0
- warp/tests/tile/test_tile.py +245 -18
- warp/tests/tile/test_tile_cholesky.py +605 -0
- warp/tests/tile/test_tile_load.py +169 -0
- warp/tests/tile/test_tile_mathdx.py +2 -558
- warp/tests/tile/test_tile_matmul.py +1 -1
- warp/tests/tile/test_tile_mlp.py +1 -1
- warp/tests/tile/test_tile_shared_memory.py +5 -5
- warp/tests/unittest_suites.py +6 -0
- warp/tests/walkthrough_debug.py +1 -1
- warp/thirdparty/unittest_parallel.py +108 -9
- warp/types.py +571 -267
- warp/utils.py +68 -86
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/METADATA +29 -69
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/RECORD +138 -128
- warp/native/marching.cpp +0 -19
- warp/native/marching.cu +0 -514
- warp/native/marching.h +0 -19
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/WHEEL +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/top_level.txt +0 -0
|
@@ -27,6 +27,11 @@ TILE_N = wp.constant(8)
|
|
|
27
27
|
TILE_O = wp.constant(8)
|
|
28
28
|
TILE_P = wp.constant(6)
|
|
29
29
|
|
|
30
|
+
HALF_M = wp.constant(TILE_M // 2)
|
|
31
|
+
HALF_N = wp.constant(TILE_N // 2)
|
|
32
|
+
TWO_M = wp.constant(TILE_M * 2)
|
|
33
|
+
TWO_N = wp.constant(TILE_N * 2)
|
|
34
|
+
|
|
30
35
|
TILE_OFFSET = 5
|
|
31
36
|
|
|
32
37
|
|
|
@@ -141,6 +146,140 @@ def test_tile_load(kernel, ndim):
|
|
|
141
146
|
return test
|
|
142
147
|
|
|
143
148
|
|
|
149
|
+
@wp.kernel
|
|
150
|
+
def tile_load_indexed(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float), z: wp.array2d(dtype=float)):
|
|
151
|
+
i, j = wp.tid()
|
|
152
|
+
|
|
153
|
+
evens_M = wp.tile_arange(HALF_M, dtype=int, storage="shared") * 2
|
|
154
|
+
t0 = wp.tile_load_indexed(
|
|
155
|
+
x, indices=evens_M, shape=(HALF_M, TILE_N), offset=(i * TILE_M, j * TILE_N), axis=0, storage="register"
|
|
156
|
+
)
|
|
157
|
+
wp.tile_store(y, t0, offset=(i * HALF_M, j * TILE_N))
|
|
158
|
+
|
|
159
|
+
evens_N = wp.tile_arange(HALF_N, dtype=int, storage="shared") * 2
|
|
160
|
+
t1 = wp.tile_load_indexed(
|
|
161
|
+
x, indices=evens_N, shape=(TILE_M, HALF_N), offset=(i * TILE_M, j * TILE_N), axis=1, storage="shared"
|
|
162
|
+
)
|
|
163
|
+
wp.tile_store(z, t1, offset=(i * TILE_M, j * HALF_N))
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def test_tile_load_indexed(test, device):
|
|
167
|
+
M = TILE_M * 2
|
|
168
|
+
N = TILE_N * 2
|
|
169
|
+
|
|
170
|
+
arr = np.arange(M * N, dtype=float).reshape(M, N)
|
|
171
|
+
|
|
172
|
+
x = wp.array(arr, dtype=float, requires_grad=True, device=device)
|
|
173
|
+
y = wp.zeros((M // 2, N), dtype=float, requires_grad=True, device=device)
|
|
174
|
+
z = wp.zeros((M, N // 2), dtype=float, requires_grad=True, device=device)
|
|
175
|
+
|
|
176
|
+
with wp.Tape() as tape:
|
|
177
|
+
wp.launch_tiled(tile_load_indexed, dim=[2, 2], inputs=[x], outputs=[y, z], block_dim=32, device=device)
|
|
178
|
+
|
|
179
|
+
y.grad = wp.ones_like(y)
|
|
180
|
+
z.grad = wp.ones_like(z)
|
|
181
|
+
|
|
182
|
+
tape.backward()
|
|
183
|
+
|
|
184
|
+
x_grad_np = np.ones(arr.shape, dtype=float)
|
|
185
|
+
x_grad_np[0::2, 0::2] += 1
|
|
186
|
+
x_grad_np[1::2, 1::2] -= 1
|
|
187
|
+
|
|
188
|
+
assert_np_equal(y.numpy(), arr[np.arange(0, arr.shape[0], 2, dtype=int)])
|
|
189
|
+
assert_np_equal(z.numpy(), arr[:, np.arange(0, arr.shape[1], 2, dtype=int)])
|
|
190
|
+
assert_np_equal(x.grad.numpy(), x_grad_np)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
@wp.func
|
|
194
|
+
def add_one(x: int):
|
|
195
|
+
return x + 1
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@wp.kernel
|
|
199
|
+
def tile_store_indexed(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float), z: wp.array2d(dtype=float)):
|
|
200
|
+
i, j = wp.tid()
|
|
201
|
+
|
|
202
|
+
t = wp.tile_load(x, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N), storage="register")
|
|
203
|
+
|
|
204
|
+
evens_M = wp.tile_arange(TILE_M, dtype=int, storage="shared") * 2
|
|
205
|
+
odds_M = wp.tile_map(add_one, evens_M)
|
|
206
|
+
|
|
207
|
+
wp.tile_store_indexed(y, indices=odds_M, t=t, offset=(i * TWO_M, j * TILE_N), axis=0)
|
|
208
|
+
|
|
209
|
+
evens_N = wp.tile_arange(TILE_N, dtype=int, storage="shared") * 2
|
|
210
|
+
odds_N = wp.tile_map(add_one, evens_N)
|
|
211
|
+
|
|
212
|
+
wp.tile_store_indexed(z, indices=odds_N, t=t, offset=(i * TILE_M, j * TWO_N), axis=1)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def test_tile_store_indexed(test, device):
|
|
216
|
+
M = TILE_M * 2
|
|
217
|
+
N = TILE_N * 2
|
|
218
|
+
|
|
219
|
+
arr = np.arange(M * N, dtype=float).reshape(M, N)
|
|
220
|
+
|
|
221
|
+
x = wp.array(arr, dtype=float, requires_grad=True, device=device)
|
|
222
|
+
y = wp.zeros((M * 2, N), dtype=float, requires_grad=True, device=device)
|
|
223
|
+
z = wp.zeros((M, N * 2), dtype=float, requires_grad=True, device=device)
|
|
224
|
+
|
|
225
|
+
with wp.Tape() as tape:
|
|
226
|
+
wp.launch_tiled(tile_store_indexed, dim=[2, 2], inputs=[x], outputs=[y, z], block_dim=32, device=device)
|
|
227
|
+
|
|
228
|
+
y.grad = wp.ones_like(y)
|
|
229
|
+
z.grad = wp.ones_like(z)
|
|
230
|
+
|
|
231
|
+
tape.backward()
|
|
232
|
+
|
|
233
|
+
y_np = np.zeros((M * 2, N))
|
|
234
|
+
y_np[1::2, :] = arr
|
|
235
|
+
|
|
236
|
+
z_np = np.zeros((M, N * 2))
|
|
237
|
+
z_np[:, 1::2] = arr
|
|
238
|
+
|
|
239
|
+
x_grad_np = np.ones((M, N)) * 2
|
|
240
|
+
|
|
241
|
+
assert_np_equal(y.numpy(), y_np)
|
|
242
|
+
assert_np_equal(z.numpy(), z_np)
|
|
243
|
+
assert_np_equal(x.grad.numpy(), x_grad_np)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
@wp.kernel
|
|
247
|
+
def tile_atomic_add_indexed(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
|
|
248
|
+
i, j = wp.tid()
|
|
249
|
+
|
|
250
|
+
t = wp.tile_load(x, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N), storage="register")
|
|
251
|
+
|
|
252
|
+
ones = wp.tile_ones(TILE_M, dtype=int, storage="shared")
|
|
253
|
+
|
|
254
|
+
wp.tile_atomic_add_indexed(y, indices=ones, t=t, offset=(i * TILE_M, j * TILE_N), axis=0)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def test_tile_atomic_add_indexed(test, device):
|
|
258
|
+
M = TILE_M * 2
|
|
259
|
+
N = TILE_N * 2
|
|
260
|
+
|
|
261
|
+
arr = np.arange(M * N, dtype=float).reshape(M, N)
|
|
262
|
+
|
|
263
|
+
x = wp.array(arr, dtype=float, requires_grad=True, device=device)
|
|
264
|
+
y = wp.zeros((M, N), dtype=float, requires_grad=True, device=device)
|
|
265
|
+
|
|
266
|
+
with wp.Tape() as tape:
|
|
267
|
+
wp.launch_tiled(tile_atomic_add_indexed, dim=[2, 2], inputs=[x], outputs=[y], block_dim=32, device=device)
|
|
268
|
+
|
|
269
|
+
y.grad = wp.ones_like(y)
|
|
270
|
+
|
|
271
|
+
tape.backward()
|
|
272
|
+
|
|
273
|
+
y_np = np.zeros((M, N), dtype=float)
|
|
274
|
+
y_np[1] = np.sum(arr[0:TILE_M], axis=0)
|
|
275
|
+
y_np[TILE_M + 1] = np.sum(arr[TILE_M:], axis=0)
|
|
276
|
+
|
|
277
|
+
x_grad_np = np.ones((M, N))
|
|
278
|
+
|
|
279
|
+
assert_np_equal(y.numpy(), y_np)
|
|
280
|
+
assert_np_equal(x.grad.numpy(), x_grad_np)
|
|
281
|
+
|
|
282
|
+
|
|
144
283
|
@wp.kernel
|
|
145
284
|
def tile_load_unaligned_kernel(
|
|
146
285
|
input: wp.array2d(dtype=float),
|
|
@@ -492,6 +631,31 @@ def test_tile_load_fortran(test, device):
|
|
|
492
631
|
assert_array_equal(B_wp.grad, A_wp.grad)
|
|
493
632
|
|
|
494
633
|
|
|
634
|
+
# ----------------------------------------------------------------------------------------
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
@wp.func
|
|
638
|
+
def test_tile_load_scoped_func(A: wp.array2d(dtype=float)):
|
|
639
|
+
A_tile = wp.tile_load(A, shape=(TILE_DIM, TILE_DIM), offset=(0, 0), storage="shared")
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
@wp.kernel
|
|
643
|
+
def test_tile_load_scoped_kernel(A: wp.array2d(dtype=float), B: wp.array2d(dtype=float)):
|
|
644
|
+
test_tile_load_scoped_func(A)
|
|
645
|
+
B_tile = wp.tile_load(B, shape=(TILE_DIM, TILE_DIM), offset=(0, 0), storage="shared")
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
def test_tile_load_scoped(test, device):
|
|
649
|
+
"""Regression test for 2D shared tiles that are allocated in a function then deallocated when they go out of scope
|
|
650
|
+
|
|
651
|
+
Passes if it runs without errors
|
|
652
|
+
"""
|
|
653
|
+
A = wp.ones((TILE_DIM, TILE_DIM), dtype=float, device=device)
|
|
654
|
+
B = wp.ones((TILE_DIM, TILE_DIM), dtype=float, device=device)
|
|
655
|
+
|
|
656
|
+
wp.launch_tiled(test_tile_load_scoped_kernel, dim=1, inputs=[A, B], block_dim=TILE_DIM, device=device)
|
|
657
|
+
|
|
658
|
+
|
|
495
659
|
devices = get_test_devices()
|
|
496
660
|
|
|
497
661
|
|
|
@@ -503,6 +667,9 @@ add_function_test(TestTileLoad, "test_tile_load_1d", test_tile_load(tile_load_1d
|
|
|
503
667
|
add_function_test(TestTileLoad, "test_tile_load_2d", test_tile_load(tile_load_2d_kernel, 2), devices=devices)
|
|
504
668
|
add_function_test(TestTileLoad, "test_tile_load_3d", test_tile_load(tile_load_3d_kernel, 3), devices=devices)
|
|
505
669
|
add_function_test(TestTileLoad, "test_tile_load_4d", test_tile_load(tile_load_4d_kernel, 4), devices=devices)
|
|
670
|
+
add_function_test(TestTileLoad, "test_tile_load_indexed", test_tile_load_indexed, devices=devices)
|
|
671
|
+
add_function_test(TestTileLoad, "test_tile_store_indexed", test_tile_store_indexed, devices=devices)
|
|
672
|
+
add_function_test(TestTileLoad, "test_tile_atomic_add_indexed", test_tile_atomic_add_indexed, devices=devices)
|
|
506
673
|
add_function_test(TestTileLoad, "test_tile_load_unaligned", test_tile_load_unaligned, devices=devices)
|
|
507
674
|
add_function_test(TestTileLoad, "test_tile_load_aligned_small", test_tile_load_aligned_small, devices=devices)
|
|
508
675
|
add_function_test(
|
|
@@ -525,6 +692,8 @@ add_function_test(TestTileLoad, "test_tile_assign_4d", test_tile_assign(tile_ass
|
|
|
525
692
|
|
|
526
693
|
add_function_test(TestTileLoad, "test_tile_load_fortran", test_tile_load_fortran, devices=devices)
|
|
527
694
|
|
|
695
|
+
add_function_test(TestTileLoad, "test_tile_load_scoped", test_tile_load_scoped, devices=devices)
|
|
696
|
+
|
|
528
697
|
|
|
529
698
|
if __name__ == "__main__":
|
|
530
699
|
wp.clear_kernel_cache()
|