PyPI - warp-lang - Versions diffs - 1.8.1__py3-none-manylinux_2_34_aarch64.whl → 1.9.1__py3-none-manylinux_2_34_aarch64.whl - Mend

warp-lang 1.8.1__py3-none-manylinux_2_34_aarch64.whl → 1.9.1__py3-none-manylinux_2_34_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (141) hide show

warp/__init__.py +282 -103
warp/__init__.pyi +1904 -114
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +93 -30
warp/build_dll.py +331 -101
warp/builtins.py +1244 -160
warp/codegen.py +317 -206
warp/config.py +1 -1
warp/context.py +1465 -789
warp/examples/core/example_marching_cubes.py +1 -0
warp/examples/core/example_render_opengl.py +100 -3
warp/examples/fem/example_apic_fluid.py +98 -52
warp/examples/fem/example_convection_diffusion_dg.py +25 -4
warp/examples/fem/example_diffusion_mgpu.py +8 -3
warp/examples/fem/utils.py +68 -22
warp/examples/interop/example_jax_kernel.py +2 -1
warp/fabric.py +1 -1
warp/fem/cache.py +27 -19
warp/fem/domain.py +2 -2
warp/fem/field/nodal_field.py +2 -2
warp/fem/field/virtual.py +264 -166
warp/fem/geometry/geometry.py +5 -5
warp/fem/integrate.py +129 -51
warp/fem/space/restriction.py +4 -0
warp/fem/space/shape/tet_shape_function.py +3 -10
warp/jax_experimental/custom_call.py +25 -2
warp/jax_experimental/ffi.py +22 -1
warp/jax_experimental/xla_ffi.py +16 -7
warp/marching_cubes.py +708 -0
warp/native/array.h +99 -4
warp/native/builtin.h +86 -9
warp/native/bvh.cpp +64 -28
warp/native/bvh.cu +58 -58
warp/native/bvh.h +2 -2
warp/native/clang/clang.cpp +7 -7
warp/native/coloring.cpp +8 -2
warp/native/crt.cpp +2 -2
warp/native/crt.h +3 -5
warp/native/cuda_util.cpp +41 -10
warp/native/cuda_util.h +10 -4
warp/native/exports.h +1842 -1908
warp/native/fabric.h +2 -1
warp/native/hashgrid.cpp +37 -37
warp/native/hashgrid.cu +2 -2
warp/native/initializer_array.h +1 -1
warp/native/intersect.h +2 -2
warp/native/mat.h +1910 -116
warp/native/mathdx.cpp +43 -43
warp/native/mesh.cpp +24 -24
warp/native/mesh.cu +26 -26
warp/native/mesh.h +4 -2
warp/native/nanovdb/GridHandle.h +179 -12
warp/native/nanovdb/HostBuffer.h +8 -7
warp/native/nanovdb/NanoVDB.h +517 -895
warp/native/nanovdb/NodeManager.h +323 -0
warp/native/nanovdb/PNanoVDB.h +2 -2
warp/native/quat.h +331 -14
warp/native/range.h +7 -1
warp/native/reduce.cpp +10 -10
warp/native/reduce.cu +13 -14
warp/native/runlength_encode.cpp +2 -2
warp/native/runlength_encode.cu +5 -5
warp/native/scan.cpp +3 -3
warp/native/scan.cu +4 -4
warp/native/sort.cpp +10 -10
warp/native/sort.cu +40 -31
warp/native/sort.h +2 -0
warp/native/sparse.cpp +8 -8
warp/native/sparse.cu +13 -13
warp/native/spatial.h +366 -17
warp/native/temp_buffer.h +2 -2
warp/native/tile.h +471 -82
warp/native/vec.h +328 -14
warp/native/volume.cpp +54 -54
warp/native/volume.cu +1 -1
warp/native/volume.h +2 -1
warp/native/volume_builder.cu +30 -37
warp/native/warp.cpp +150 -149
warp/native/warp.cu +377 -216
warp/native/warp.h +227 -226
warp/optim/linear.py +736 -271
warp/render/imgui_manager.py +289 -0
warp/render/render_opengl.py +99 -18
warp/render/render_usd.py +1 -0
warp/sim/graph_coloring.py +2 -2
warp/sparse.py +558 -175
warp/tests/aux_test_module_aot.py +7 -0
warp/tests/cuda/test_async.py +3 -3
warp/tests/cuda/test_conditional_captures.py +101 -0
warp/tests/geometry/test_hash_grid.py +38 -0
warp/tests/geometry/test_marching_cubes.py +233 -12
warp/tests/interop/test_jax.py +608 -28
warp/tests/sim/test_coloring.py +6 -6
warp/tests/test_array.py +58 -5
warp/tests/test_codegen.py +4 -3
warp/tests/test_context.py +8 -15
warp/tests/test_enum.py +136 -0
warp/tests/test_examples.py +2 -2
warp/tests/test_fem.py +49 -6
warp/tests/test_fixedarray.py +229 -0
warp/tests/test_func.py +18 -15
warp/tests/test_future_annotations.py +7 -5
warp/tests/test_linear_solvers.py +30 -0
warp/tests/test_map.py +15 -1
warp/tests/test_mat.py +1518 -378
warp/tests/test_mat_assign_copy.py +178 -0
warp/tests/test_mat_constructors.py +574 -0
warp/tests/test_module_aot.py +287 -0
warp/tests/test_print.py +69 -0
warp/tests/test_quat.py +140 -34
warp/tests/test_quat_assign_copy.py +145 -0
warp/tests/test_reload.py +2 -1
warp/tests/test_sparse.py +71 -0
warp/tests/test_spatial.py +140 -34
warp/tests/test_spatial_assign_copy.py +160 -0
warp/tests/test_struct.py +43 -3
warp/tests/test_tuple.py +96 -0
warp/tests/test_types.py +61 -20
warp/tests/test_vec.py +179 -34
warp/tests/test_vec_assign_copy.py +143 -0
warp/tests/tile/test_tile.py +245 -18
warp/tests/tile/test_tile_cholesky.py +605 -0
warp/tests/tile/test_tile_load.py +169 -0
warp/tests/tile/test_tile_mathdx.py +2 -558
warp/tests/tile/test_tile_matmul.py +1 -1
warp/tests/tile/test_tile_mlp.py +1 -1
warp/tests/tile/test_tile_shared_memory.py +5 -5
warp/tests/unittest_suites.py +6 -0
warp/tests/walkthrough_debug.py +1 -1
warp/thirdparty/unittest_parallel.py +108 -9
warp/types.py +571 -267
warp/utils.py +68 -86
{warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/METADATA +29 -69
{warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/RECORD +138 -128
warp/native/marching.cpp +0 -19
warp/native/marching.cu +0 -514
warp/native/marching.h +0 -19
{warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/WHEEL +0 -0
{warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/licenses/LICENSE.md +0 -0
{warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/top_level.txt +0 -0

warp/tests/tile/test_tile.py CHANGED Viewed

@@ -109,12 +109,29 @@ def test_tile_copy_2d(test, device):
 @wp.func
-def unary_func(x: float):
+def unary_func(x: wp.float32):
     return wp.sin(x)
+@wp.func
+def unary_func(x: wp.float64):
+    return wp.sin(x)
+@wp.kernel
+def tile_unary_map_user_func(input: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)):
+    # tile index
+    i, j = wp.tid()
+    a = wp.tile_load(input, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
+    sa = wp.tile_map(unary_func, a)
+    wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
 @wp.kernel
-def tile_unary_map(input: wp.array2d(dtype=float), output: wp.array2d(dtype=float)):
+def tile_unary_map_builtin_func(input: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)):
     # tile index
     i, j = wp.tid()
@@ -131,17 +148,76 @@ def test_tile_unary_map(test, device):
     M = TILE_M * 7
     N = TILE_N * 5
-    A = rng.random((M, N), dtype=np.float32)
-    B = np.sin(A)
+    def run(kernel, dtype):
+        A = rng.random((M, N), dtype=dtype)
+        B = np.sin(A)
+        A_grad = np.cos(A)
+        A_wp = wp.array(A, requires_grad=True, device=device)
+        B_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
+        with wp.Tape() as tape:
+            wp.launch_tiled(
+                kernel,
+                dim=[int(M / TILE_M), int(N / TILE_N)],
+                inputs=[A_wp, B_wp],
+                block_dim=TILE_DIM,
+                device=device,
+            )
+        tol = 1.0e-6 if dtype == np.float64 else 1.0e-4
+        # verify forward pass
+        assert_np_equal(B_wp.numpy(), B, tol=tol)
+        # verify backward pass
+        B_wp.grad = wp.ones_like(B_wp, device=device)
+        tape.backward()
+        assert_np_equal(A_wp.grad.numpy(), A_grad, tol=tol)
+    dtypes = [np.float32, np.float64]
+    for dtype in dtypes:
+        run(tile_unary_map_user_func, dtype)
+        run(tile_unary_map_builtin_func, dtype)
-    A_grad = np.cos(A)
+@wp.func
+def unary_func_mixed_types(x: int) -> float:
+    return wp.sin(float(x))
+@wp.kernel
+def tile_unary_map_mixed_types(input: wp.array2d(dtype=int), output: wp.array2d(dtype=float)):
+    # tile index
+    i, j = wp.tid()
+    a = wp.tile_load(input, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
+    sa = wp.tile_map(unary_func_mixed_types, a)
+    wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
+def test_tile_unary_map_mixed_types(test, device):
+    rng = np.random.default_rng(42)
+    M = TILE_M * 7
+    N = TILE_N * 5
+    A = rng.integers(0, 100, size=(M, N), dtype=np.int32)
+    B = np.sin(A.astype(np.float32))
+    A_grad = np.cos(A.astype(np.float32))
     A_wp = wp.array(A, requires_grad=True, device=device)
-    B_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
+    B_wp = wp.zeros((M, N), dtype=float, requires_grad=True, device=device)
     with wp.Tape() as tape:
         wp.launch_tiled(
-            tile_unary_map,
+            tile_unary_map_mixed_types,
             dim=[int(M / TILE_M), int(N / TILE_N)],
             inputs=[A_wp, B_wp],
             block_dim=TILE_DIM,
@@ -155,17 +231,23 @@ def test_tile_unary_map(test, device):
     B_wp.grad = wp.ones_like(B_wp, device=device)
     tape.backward()
-    assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
+    # The a gradients are now stored as ints and can't capture the correct values
+    # assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
 @wp.func
-def binary_func(x: float, y: float):
-    return wp.sin(x) + y
+def binary_func(x: wp.float32, y: wp.float32):
+    return x + y
+@wp.func
+def binary_func(x: wp.float64, y: wp.float64):
+    return x + y
 @wp.kernel
-def tile_binary_map(
-    input_a: wp.array2d(dtype=float), input_b: wp.array2d(dtype=float), output: wp.array2d(dtype=float)
+def tile_binary_map_user_func(
+    input_a: wp.array2d(dtype=Any), input_b: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)
 ):
     # tile index
     i, j = wp.tid()
@@ -178,26 +260,107 @@ def tile_binary_map(
     wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
+@wp.kernel
+def tile_binary_map_builtin_func(
+    input_a: wp.array2d(dtype=Any), input_b: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)
+):
+    # tile index
+    i, j = wp.tid()
+    a = wp.tile_load(input_a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
+    b = wp.tile_load(input_b, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
+    sa = wp.tile_map(wp.add, a, b)
+    wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
 def test_tile_binary_map(test, device):
     rng = np.random.default_rng(42)
     M = TILE_M * 7
     N = TILE_N * 5
-    A = rng.random((M, N), dtype=np.float32)
+    def run(kernel, dtype):
+        A = rng.random((M, N), dtype=dtype)
+        B = rng.random((M, N), dtype=dtype)
+        C = A + B
+        A_grad = np.ones_like(A)
+        B_grad = np.ones_like(B)
+        A_wp = wp.array(A, requires_grad=True, device=device)
+        B_wp = wp.array(B, requires_grad=True, device=device)
+        C_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
+        with wp.Tape() as tape:
+            wp.launch_tiled(
+                kernel,
+                dim=[int(M / TILE_M), int(N / TILE_N)],
+                inputs=[A_wp, B_wp, C_wp],
+                block_dim=TILE_DIM,
+                device=device,
+            )
+        tol = 1.0e-6 if dtype == np.float64 else 1.0e-4
+        # verify forward pass
+        assert_np_equal(C_wp.numpy(), C, tol=tol)
+        # verify backward pass
+        C_wp.grad = wp.ones_like(C_wp, device=device)
+        tape.backward()
+        assert_np_equal(A_wp.grad.numpy(), A_grad, tol=tol)
+        assert_np_equal(B_wp.grad.numpy(), B_grad, tol=tol)
+    dtypes = [np.float32, np.float64]
+    for dtype in dtypes:
+        run(tile_binary_map_builtin_func, dtype)
+        run(tile_binary_map_user_func, dtype)
+@wp.func
+def binary_func_mixed_types(x: int, y: float) -> float:
+    return wp.sin(float(x)) + y
+@wp.kernel
+def tile_binary_map_mixed_types(
+    input_a: wp.array2d(dtype=int), input_b: wp.array2d(dtype=float), output: wp.array2d(dtype=float)
+):
+    # tile index
+    i, j = wp.tid()
+    a = wp.tile_load(input_a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
+    b = wp.tile_load(input_b, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
+    sa = wp.tile_map(binary_func_mixed_types, a, b)
+    wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
+def test_tile_binary_map_mixed_types(test, device):
+    rng = np.random.default_rng(42)
+    M = TILE_M * 7
+    N = TILE_N * 5
+    A = rng.integers(0, 100, size=(M, N), dtype=np.int32)
     B = rng.random((M, N), dtype=np.float32)
-    C = np.sin(A) + B
+    C = np.sin(A.astype(np.float32)) + B
-    A_grad = np.cos(A)
+    A_grad = np.cos(A.astype(np.float32))
     B_grad = np.ones_like(B)
     A_wp = wp.array(A, requires_grad=True, device=device)
     B_wp = wp.array(B, requires_grad=True, device=device)
-    C_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
+    C_wp = wp.zeros_like(B_wp, requires_grad=True, device=device)
     with wp.Tape() as tape:
         wp.launch_tiled(
-            tile_binary_map,
+            tile_binary_map_mixed_types,
             dim=[int(M / TILE_M), int(N / TILE_N)],
             inputs=[A_wp, B_wp, C_wp],
             block_dim=TILE_DIM,
@@ -211,7 +374,8 @@ def test_tile_binary_map(test, device):
     C_wp.grad = wp.ones_like(C_wp, device=device)
     tape.backward()
-    assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
+    # The a gradiens are now stored as ints and can't capture the correct values
+    # assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
     assert_np_equal(B_wp.grad.numpy(), B_grad)
@@ -673,6 +837,66 @@ def test_tile_assign(test, device):
     assert_np_equal(x.grad.numpy(), np.full(TILE_M, 1.0, dtype=np.float32))
+@wp.kernel
+def test_tile_where_kernel(select: int, x: wp.array(dtype=float), y: wp.array(dtype=float), z: wp.array(dtype=float)):
+    x_reg = wp.tile_load(x, shape=(TILE_M,), storage="register")
+    y_reg = wp.tile_load(y, shape=(TILE_M,), storage="register")
+    x_shared = wp.tile_load(x, shape=(TILE_M,), storage="shared")
+    y_shared = wp.tile_load(y, shape=(TILE_M,), storage="shared")
+    if select == 0:
+        s = x_reg
+    elif select == 1:
+        s = y_reg
+    elif select == 2:
+        s = x_shared
+    else:
+        s = y_shared
+    wp.tile_store(z, s)
+def test_tile_where(test, device):
+    x = wp.full((TILE_M,), 1.0, dtype=float, device=device, requires_grad=True)
+    y = wp.full((TILE_M,), 2.0, dtype=float, device=device, requires_grad=True)
+    z = wp.zeros((TILE_M), dtype=float, device=device, requires_grad=True)
+    z_expected = [
+        np.full(TILE_M, 1.0, dtype=np.float32),
+        np.full(TILE_M, 2.0, dtype=np.float32),
+        np.full(TILE_M, 1.0, dtype=np.float32),
+        np.full(TILE_M, 2.0, dtype=np.float32),
+    ]
+    x_grad_expected = [
+        np.full(TILE_M, 1.0, dtype=np.float32),
+        np.full(TILE_M, 0.0, dtype=np.float32),
+        np.full(TILE_M, 1.0, dtype=np.float32),
+        np.full(TILE_M, 0.0, dtype=np.float32),
+    ]
+    y_grad_expected = [
+        np.full(TILE_M, 0.0, dtype=np.float32),
+        np.full(TILE_M, 1.0, dtype=np.float32),
+        np.full(TILE_M, 0.0, dtype=np.float32),
+        np.full(TILE_M, 1.0, dtype=np.float32),
+    ]
+    for i in range(4):
+        tape = wp.Tape()
+        with tape:
+            wp.launch_tiled(test_tile_where_kernel, dim=[1], inputs=[i, x, y], outputs=[z], block_dim=32, device=device)
+        z.grad = wp.ones_like(z)
+        tape.backward()
+        assert_np_equal(z.numpy(), z_expected[i])
+        assert_np_equal(x.grad.numpy(), x_grad_expected[i])
+        assert_np_equal(y.grad.numpy(), y_grad_expected[i])
+        tape.zero()
 @wp.kernel
 def test_tile_transpose_kernel(input: wp.array2d(dtype=float), output: wp.array2d(dtype=float)):
     x = wp.tile_load(input, shape=(TILE_M, TILE_N))
@@ -1085,7 +1309,9 @@ class TestTile(unittest.TestCase):
 add_function_test(TestTile, "test_tile_copy_1d", test_tile_copy_1d, devices=devices)
 add_function_test(TestTile, "test_tile_copy_2d", test_tile_copy_2d, devices=devices)
 add_function_test(TestTile, "test_tile_unary_map", test_tile_unary_map, devices=devices)
+add_function_test(TestTile, "test_tile_unary_map_mixed_types", test_tile_unary_map_mixed_types, devices=devices)
 add_function_test(TestTile, "test_tile_binary_map", test_tile_binary_map, devices=devices)
+add_function_test(TestTile, "test_tile_binary_map_mixed_types", test_tile_binary_map_mixed_types, devices=devices)
 add_function_test(TestTile, "test_tile_transpose", test_tile_transpose, devices=devices)
 add_function_test(TestTile, "test_tile_operators", test_tile_operators, devices=devices)
 add_function_test(TestTile, "test_tile_tile", test_tile_tile, devices=get_cuda_test_devices())
@@ -1095,6 +1321,7 @@ add_function_test(TestTile, "test_tile_sum_launch", test_tile_sum_launch, device
 add_function_test(TestTile, "test_tile_extract", test_tile_extract, devices=devices)
 add_function_test(TestTile, "test_tile_extract_repeated", test_tile_extract_repeated, devices=devices)
 add_function_test(TestTile, "test_tile_assign", test_tile_assign, devices=devices)
+add_function_test(TestTile, "test_tile_where", test_tile_where, devices=devices)
 add_function_test(TestTile, "test_tile_broadcast_add_1d", test_tile_broadcast_add_1d, devices=devices)
 add_function_test(TestTile, "test_tile_broadcast_add_2d", test_tile_broadcast_add_2d, devices=devices)
 add_function_test(TestTile, "test_tile_broadcast_add_3d", test_tile_broadcast_add_3d, devices=devices)