PyPI - warp-lang - Versions diffs - 1.5.1__py3-none-manylinux2014_aarch64.whl → 1.6.0__py3-none-manylinux2014_aarch64.whl - Mend

warp-lang 1.5.1__py3-none-manylinux2014_aarch64.whl → 1.6.0__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (123) hide show

warp/__init__.py +5 -0
warp/autograd.py +414 -191
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +40 -12
warp/build_dll.py +13 -6
warp/builtins.py +1076 -480
warp/codegen.py +240 -119
warp/config.py +1 -1
warp/context.py +298 -84
warp/examples/assets/square_cloth.usd +0 -0
warp/examples/benchmarks/benchmark_gemm.py +27 -18
warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
warp/examples/core/example_torch.py +18 -34
warp/examples/fem/example_apic_fluid.py +1 -0
warp/examples/fem/example_mixed_elasticity.py +1 -1
warp/examples/optim/example_bounce.py +1 -1
warp/examples/optim/example_cloth_throw.py +1 -1
warp/examples/optim/example_diffray.py +4 -15
warp/examples/optim/example_drone.py +1 -1
warp/examples/optim/example_softbody_properties.py +392 -0
warp/examples/optim/example_trajectory.py +1 -3
warp/examples/optim/example_walker.py +5 -0
warp/examples/sim/example_cartpole.py +0 -2
warp/examples/sim/example_cloth_self_contact.py +260 -0
warp/examples/sim/example_granular_collision_sdf.py +4 -5
warp/examples/sim/example_jacobian_ik.py +0 -2
warp/examples/sim/example_quadruped.py +5 -2
warp/examples/tile/example_tile_cholesky.py +79 -0
warp/examples/tile/example_tile_convolution.py +2 -2
warp/examples/tile/example_tile_fft.py +2 -2
warp/examples/tile/example_tile_filtering.py +3 -3
warp/examples/tile/example_tile_matmul.py +4 -4
warp/examples/tile/example_tile_mlp.py +12 -12
warp/examples/tile/example_tile_nbody.py +180 -0
warp/examples/tile/example_tile_walker.py +319 -0
warp/math.py +147 -0
warp/native/array.h +12 -0
warp/native/builtin.h +0 -1
warp/native/bvh.cpp +149 -70
warp/native/bvh.cu +287 -68
warp/native/bvh.h +195 -85
warp/native/clang/clang.cpp +5 -1
warp/native/cuda_util.cpp +35 -0
warp/native/cuda_util.h +5 -0
warp/native/exports.h +40 -40
warp/native/intersect.h +17 -0
warp/native/mat.h +41 -0
warp/native/mathdx.cpp +19 -0
warp/native/mesh.cpp +25 -8
warp/native/mesh.cu +153 -101
warp/native/mesh.h +482 -403
warp/native/quat.h +40 -0
warp/native/solid_angle.h +7 -0
warp/native/sort.cpp +85 -0
warp/native/sort.cu +34 -0
warp/native/sort.h +3 -1
warp/native/spatial.h +11 -0
warp/native/tile.h +1185 -664
warp/native/tile_reduce.h +8 -6
warp/native/vec.h +41 -0
warp/native/warp.cpp +8 -1
warp/native/warp.cu +263 -40
warp/native/warp.h +19 -5
warp/optim/linear.py +22 -4
warp/render/render_opengl.py +124 -59
warp/sim/__init__.py +6 -1
warp/sim/collide.py +270 -26
warp/sim/integrator_euler.py +25 -7
warp/sim/integrator_featherstone.py +154 -35
warp/sim/integrator_vbd.py +842 -40
warp/sim/model.py +111 -53
warp/stubs.py +248 -115
warp/tape.py +28 -30
warp/tests/aux_test_module_unload.py +15 -0
warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
warp/tests/test_array.py +74 -0
warp/tests/test_assert.py +242 -0
warp/tests/test_codegen.py +14 -61
warp/tests/test_collision.py +2 -2
warp/tests/test_examples.py +9 -0
warp/tests/test_grad_debug.py +87 -2
warp/tests/test_hash_grid.py +1 -1
warp/tests/test_ipc.py +116 -0
warp/tests/test_mat.py +138 -167
warp/tests/test_math.py +47 -1
warp/tests/test_matmul.py +11 -7
warp/tests/test_matmul_lite.py +4 -4
warp/tests/test_mesh.py +84 -60
warp/tests/test_mesh_query_aabb.py +165 -0
warp/tests/test_mesh_query_point.py +328 -286
warp/tests/test_mesh_query_ray.py +134 -121
warp/tests/test_mlp.py +2 -2
warp/tests/test_operators.py +43 -0
warp/tests/test_overwrite.py +2 -2
warp/tests/test_quat.py +77 -0
warp/tests/test_reload.py +29 -0
warp/tests/test_sim_grad_bounce_linear.py +204 -0
warp/tests/test_static.py +16 -0
warp/tests/test_tape.py +25 -0
warp/tests/test_tile.py +134 -191
warp/tests/test_tile_load.py +356 -0
warp/tests/test_tile_mathdx.py +61 -8
warp/tests/test_tile_mlp.py +17 -17
warp/tests/test_tile_reduce.py +24 -18
warp/tests/test_tile_shared_memory.py +66 -17
warp/tests/test_tile_view.py +165 -0
warp/tests/test_torch.py +35 -0
warp/tests/test_utils.py +36 -24
warp/tests/test_vec.py +110 -0
warp/tests/unittest_suites.py +29 -4
warp/tests/unittest_utils.py +30 -11
warp/thirdparty/unittest_parallel.py +2 -2
warp/types.py +409 -99
warp/utils.py +9 -5
{warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/METADATA +68 -44
{warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/RECORD +121 -110
{warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/WHEEL +1 -1
warp/examples/benchmarks/benchmark_tile.py +0 -179
warp/native/tile_gemm.h +0 -341
{warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/LICENSE.md +0 -0
{warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/top_level.txt +0 -0

warp/tests/test_tile_shared_memory.py CHANGED Viewed

@@ -22,11 +22,11 @@ def test_tile_shared_mem_size(test, device):
     @wp.kernel
     def compute(out: wp.array2d(dtype=float)):
-        a = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared")
-        b = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared") * 2.0
+        a = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared")
+        b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 2.0
         c = a + b
-        wp.tile_store(out, 0, 0, c)
+        wp.tile_store(out, c)
     out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
@@ -58,11 +58,11 @@ def test_tile_shared_mem_large(test, device):
     # we disable backward kernel gen since 128k is not supported on most architectures
     @wp.kernel(enable_backward=False)
     def compute(out: wp.array2d(dtype=float)):
-        a = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared")
-        b = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared") * 2.0
+        a = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared")
+        b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 2.0
         c = a + b
-        wp.tile_store(out, 0, 0, c)
+        wp.tile_store(out, c)
     out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
@@ -73,7 +73,7 @@ def test_tile_shared_mem_large(test, device):
     # check required shared memory
     expected_forward_bytes = DIM_M * DIM_N * 4 * 2
-    expected_backward_bytes = expected_forward_bytes * 2
+    expected_backward_bytes = 0
     assert expected_forward_bytes == 2**16
@@ -94,11 +94,11 @@ def test_tile_shared_mem_graph(test, device):
     @wp.kernel
     def compute(out: wp.array2d(dtype=float)):
-        a = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared")
-        b = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared") * 2.0
+        a = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared")
+        b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 2.0
         c = a + b
-        wp.tile_store(out, 0, 0, c)
+        wp.tile_store(out, c)
     out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
@@ -127,22 +127,25 @@ def test_tile_shared_mem_graph(test, device):
 # checks that stack allocations work for user functions
 def test_tile_shared_mem_func(test, device):
-    DIM_M = 32
-    DIM_N = 32
+    DIM_M = 64
+    DIM_N = 64
+    SMALL_DIM_M = 64 // 4
+    SMALL_DIM_N = 64 // 4
     BLOCK_DIM = 256
     @wp.func
     def add_tile_small():
-        a = wp.tile_ones(16, 16, dtype=float, storage="shared")
-        b = wp.tile_ones(16, 16, dtype=float, storage="shared") * 2.0
+        a = wp.tile_ones(shape=(SMALL_DIM_M, SMALL_DIM_N), dtype=float, storage="shared")
+        b = wp.tile_ones(shape=(SMALL_DIM_M, SMALL_DIM_N), dtype=float, storage="shared") * 2.0
         return a + b
     @wp.func
     def add_tile_big():
-        a = wp.tile_ones(64, 64, dtype=float, storage="shared")
-        b = wp.tile_ones(64, 64, dtype=float, storage="shared") * 2.0
+        a = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared")
+        b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 2.0
         return a + b
@@ -151,7 +154,7 @@ def test_tile_shared_mem_func(test, device):
         s = add_tile_small()
         b = add_tile_big()
-        wp.tile_store(out, 0, 0, b)
+        wp.tile_store(out, b)
     out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
@@ -168,6 +171,51 @@ def test_tile_shared_mem_func(test, device):
     assert hooks.backward_smem_bytes == expected_required_shared * 2
+def round_up(a, b):
+    return b * ((a + b - 1) // b)
+# checks that using non-16B aligned sizes work
+def test_tile_shared_non_aligned(test, device):
+    # Tile size = 4 (float) * 1 * 3 = 12B % 16 != 0
+    DIM_M = 1
+    DIM_N = 3
+    BLOCK_DIM = 256
+    @wp.func
+    def foo():
+        a = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 2.0
+        b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 3.0
+        return a + b
+    @wp.kernel
+    def compute(out: wp.array2d(dtype=float)):
+        # This test the logic in the stack allocator, which should increment and
+        # decrement the stack pointer each time foo() is called
+        # Failing to do so correct will make b out of bounds and corrupt the results
+        for _ in range(4096):
+            foo()
+        b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared")
+        wp.tile_store(out, b)
+    out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
+    wp.launch_tiled(compute, dim=[1], inputs=[out], block_dim=BLOCK_DIM, device=device)
+    assert_np_equal(out.numpy(), np.ones((DIM_M, DIM_N), dtype=float))
+    # check shared memory for kernel on the device
+    module_exec = compute.module.load(device, BLOCK_DIM)
+    hooks = module_exec.get_kernel_hooks(compute)
+    # ensure that total required dynamic shared is the larger of the two tiles
+    expected_required_shared = 3 * round_up(DIM_M * DIM_N * 4, 16)
+    assert hooks.forward_smem_bytes == expected_required_shared
+    assert hooks.backward_smem_bytes == expected_required_shared * 2
 devices = get_cuda_test_devices()
@@ -183,6 +231,7 @@ add_function_test(
 )
 add_function_test(TestTileSharedMemory, "test_tile_shared_mem_graph", test_tile_shared_mem_graph, devices=devices)
 add_function_test(TestTileSharedMemory, "test_tile_shared_mem_func", test_tile_shared_mem_func, devices=devices)
+add_function_test(TestTileSharedMemory, "test_tile_shared_non_aligned", test_tile_shared_non_aligned, devices=devices)
 if __name__ == "__main__":

warp/tests/test_tile_view.py ADDED Viewed

@@ -0,0 +1,165 @@
+# Copyright (c) 2024 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import unittest
+import numpy as np
+import warp as wp
+from warp.tests.unittest_utils import *
+TILE_DIM = 64
+TILE_M = 16
+TILE_N = 32
+TILE_O = 8
+@wp.kernel
+def test_tile_view_kernel(src: wp.array2d(dtype=float), dst: wp.array2d(dtype=float)):
+    # load whole source into local memory
+    a = wp.tile_load(src, shape=(TILE_M, TILE_N))
+    # copy the source array row by row
+    for i in range(TILE_M):
+        # create a view on original array and store
+        row = a[i]
+        wp.tile_store(dst[i], row)
+def test_tile_view(test, device):
+    rng = np.random.default_rng(42)
+    a = wp.array(rng.random((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
+    b = wp.array(np.zeros((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
+    with wp.Tape() as tape:
+        wp.launch_tiled(test_tile_view_kernel, dim=[1], inputs=[a, b], block_dim=32, device=device)
+    assert_np_equal(b.numpy(), a.numpy())
+    b.grad = wp.ones_like(b, device=device)
+    tape.backward()
+    assert_np_equal(a.grad.numpy(), np.ones_like(a.numpy()))
+@wp.kernel
+def test_tile_assign_1d_kernel(src: wp.array2d(dtype=float), dst: wp.array2d(dtype=float)):
+    # load whole source into local memory
+    a = wp.tile_load(src, shape=(TILE_M, TILE_N))
+    b = wp.tile_zeros(dtype=float, shape=(TILE_M, TILE_N))
+    # copy the source array row by row
+    for i in range(int(TILE_M)):
+        # create views onto source and dest rows
+        row_src = a[i]
+        row_dst = b[i]
+        # copy onto dest row
+        wp.tile_assign(row_dst, row_src)
+    wp.tile_store(dst, b)
+def test_tile_assign_1d(test, device):
+    rng = np.random.default_rng(42)
+    a = wp.array(rng.random((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
+    b = wp.array(np.zeros((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
+    with wp.Tape() as tape:
+        wp.launch_tiled(test_tile_assign_1d_kernel, dim=[1], inputs=[a, b], block_dim=32, device=device)
+    assert_np_equal(b.numpy(), a.numpy())
+    b.grad = wp.ones_like(b, device=device)
+    tape.backward()
+    assert_np_equal(a.grad.numpy(), np.ones_like(a.numpy()))
+@wp.kernel
+def test_tile_assign_2d_kernel(src: wp.array3d(dtype=float), dst: wp.array3d(dtype=float)):
+    # load whole source into local memory
+    a = wp.tile_load(src, shape=(TILE_M, TILE_N, TILE_O))
+    b = wp.tile_zeros(dtype=float, shape=(TILE_M, TILE_N, TILE_O))
+    # copy the source array slice by slice
+    for i in range(TILE_M):
+        # create views onto source and dest slice
+        row_src = a[i]
+        row_dst = b[i]
+        # copy onto dest slice
+        wp.tile_assign(row_dst, row_src)
+    wp.tile_store(dst, b)
+def test_tile_assign_2d(test, device):
+    rng = np.random.default_rng(42)
+    a = wp.array(rng.random((TILE_M, TILE_N, TILE_O), dtype=np.float32), requires_grad=True, device=device)
+    b = wp.array(np.zeros((TILE_M, TILE_N, TILE_O), dtype=np.float32), requires_grad=True, device=device)
+    with wp.Tape() as tape:
+        wp.launch_tiled(test_tile_assign_2d_kernel, dim=[1], inputs=[a, b], block_dim=32, device=device)
+    assert_np_equal(b.numpy(), a.numpy())
+    b.grad = wp.ones_like(b, device=device)
+    tape.backward()
+    assert_np_equal(a.grad.numpy(), np.ones_like(a.numpy()))
+@wp.kernel
+def test_tile_view_offset_kernel(src: wp.array2d(dtype=float), dst: wp.array2d(dtype=float)):
+    # load whole source into local memory
+    a = wp.tile_load(src, shape=(TILE_M, TILE_N))
+    b = wp.tile_zeros(shape=(TILE_M, TILE_N), dtype=float)
+    # copy the source array slice by slice
+    for i in range(TILE_M // 4):
+        # create views onto source and dest slice 4 rows at a time
+        v = wp.tile_view(a, offset=(i * 4, 0), shape=(4, TILE_N))
+        # copy onto dest slice
+        wp.tile_assign(b, v, offset=(i * 4, 0))
+    wp.tile_store(dst, b)
+def test_tile_view_offset(test, device):
+    rng = np.random.default_rng(42)
+    a = wp.array(rng.random((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
+    b = wp.array(np.zeros((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
+    with wp.Tape() as tape:
+        wp.launch_tiled(test_tile_view_offset_kernel, dim=[1], inputs=[a, b], block_dim=32, device=device)
+    assert_np_equal(b.numpy(), a.numpy())
+    b.grad = wp.ones_like(b, device=device)
+    tape.backward()
+    assert_np_equal(a.grad.numpy(), np.ones_like(a.numpy()))
+devices = get_cuda_test_devices()
+class TestTileView(unittest.TestCase):
+    pass
+add_function_test(TestTileView, "test_tile_view", test_tile_view, devices=devices)
+add_function_test(TestTileView, "test_tile_view_offset", test_tile_view_offset, devices=devices)
+add_function_test(TestTileView, "test_tile_assign_1d", test_tile_assign_1d, devices=devices)
+add_function_test(TestTileView, "test_tile_assign_2d", test_tile_assign_2d, devices=devices)
+if __name__ == "__main__":
+    wp.clear_kernel_cache()
+    unittest.main(verbosity=2, failfast=True)

warp/tests/test_torch.py CHANGED Viewed

@@ -403,6 +403,38 @@ def test_cuda_array_interface(test, device):
         assert a1.strides == a2.strides
+@wp.kernel
+def vec_sum_kernel(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3), z: wp.array(dtype=wp.vec3)):
+    tid = wp.tid()
+    z[tid] = x[tid] + y[tid]
+# ensure torch arrays passed to Warp kernels are unchanged by Tape.backward()
+def test_tensor_in_warp_kernel(test, device):
+    torch_device = wp.device_to_torch(device)
+    x = torch.ones((10, 3), dtype=torch.float32, device=torch_device)
+    y = torch.ones((10, 3), dtype=torch.float32, device=torch_device)
+    wp_y = wp.from_torch(y, dtype=wp.vec3, requires_grad=True)
+    z = torch.zeros((10, 3), dtype=torch.float32, device=torch_device)
+    wp_z = wp.from_torch(z, dtype=wp.vec3, requires_grad=True)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(vec_sum_kernel, dim=10, inputs=[x, wp_y], outputs=[wp_z], device=device)
+    assert_np_equal(x.cpu().numpy(), np.ones((10, 3), dtype=float))
+    tape.backward(grads={wp_z: wp.ones_like(wp_z)})
+    # x is unchanged by Tape.backward()
+    assert_np_equal(x.cpu().numpy(), np.ones((10, 3), dtype=float))
+    # we can still compute the gradient of y because Warp created an array for it
+    assert_np_equal(y.grad.cpu().numpy(), np.ones((10, 3), dtype=float))
 def test_to_torch(test, device):
     import torch
@@ -913,6 +945,9 @@ try:
         add_function_test(TestTorch, "test_torch_zerocopy", test_torch_zerocopy, devices=torch_compatible_devices)
         add_function_test(TestTorch, "test_torch_autograd", test_torch_autograd, devices=torch_compatible_devices)
         add_function_test(TestTorch, "test_direct", test_direct, devices=torch_compatible_devices)
+        add_function_test(
+            TestTorch, "test_tensor_in_warp_kernel", test_tensor_in_warp_kernel, devices=torch_compatible_devices
+        )
     if torch_compatible_cuda_devices:
         add_function_test(

warp/tests/test_utils.py CHANGED Viewed

@@ -79,37 +79,49 @@ def test_array_scan_error_unsupported_dtype(test, device):
 def test_radix_sort_pairs(test, device):
-    keys = wp.array((7, 2, 8, 4, 1, 6, 5, 3, 0, 0, 0, 0, 0, 0, 0, 0), dtype=int, device=device)
-    values = wp.array((1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0), dtype=int, device=device)
-    wp.utils.radix_sort_pairs(keys, values, 8)
-    assert_np_equal(keys.numpy()[:8], np.array((1, 2, 3, 4, 5, 6, 7, 8)))
-    assert_np_equal(values.numpy()[:8], np.array((5, 2, 8, 4, 7, 6, 1, 3)))
+    keyTypes = [int, wp.float32]
+    for keyType in keyTypes:
+        keys = wp.array((7, 2, 8, 4, 1, 6, 5, 3, 0, 0, 0, 0, 0, 0, 0, 0), dtype=keyType, device=device)
+        values = wp.array((1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0), dtype=int, device=device)
+        wp.utils.radix_sort_pairs(keys, values, 8)
+        assert_np_equal(keys.numpy()[:8], np.array((1, 2, 3, 4, 5, 6, 7, 8)))
+        assert_np_equal(values.numpy()[:8], np.array((5, 2, 8, 4, 7, 6, 1, 3)))
 def test_radix_sort_pairs_empty(test, device):
-    keys = wp.array((), dtype=int, device=device)
-    values = wp.array((), dtype=int, device=device)
-    wp.utils.radix_sort_pairs(keys, values, 0)
+    keyTypes = [int, wp.float32]
+    for keyType in keyTypes:
+        keys = wp.array((), dtype=keyType, device=device)
+        values = wp.array((), dtype=int, device=device)
+        wp.utils.radix_sort_pairs(keys, values, 0)
 def test_radix_sort_pairs_error_insufficient_storage(test, device):
-    keys = wp.array((1, 2, 3), dtype=int, device=device)
-    values = wp.array((1, 2, 3), dtype=int, device=device)
-    with test.assertRaisesRegex(
-        RuntimeError,
-        r"Array storage must be large enough to contain 2\*count elements$",
-    ):
-        wp.utils.radix_sort_pairs(keys, values, 3)
+    keyTypes = [int, wp.float32]
+    for keyType in keyTypes:
+        keys = wp.array((1, 2, 3), dtype=keyType, device=device)
+        values = wp.array((1, 2, 3), dtype=int, device=device)
+        with test.assertRaisesRegex(
+            RuntimeError,
+            r"Array storage must be large enough to contain 2\*count elements$",
+        ):
+            wp.utils.radix_sort_pairs(keys, values, 3)
 def test_radix_sort_pairs_error_unsupported_dtype(test, device):
-    keys = wp.array((1.0, 2.0, 3.0), dtype=float, device=device)
-    values = wp.array((1.0, 2.0, 3.0), dtype=float, device=device)
-    with test.assertRaisesRegex(
-        RuntimeError,
-        r"Unsupported data type$",
-    ):
-        wp.utils.radix_sort_pairs(keys, values, 1)
+    keyTypes = [int, wp.float32]
+    for keyType in keyTypes:
+        keys = wp.array((1.0, 2.0, 3.0), dtype=keyType, device=device)
+        values = wp.array((1.0, 2.0, 3.0), dtype=float, device=device)
+        with test.assertRaisesRegex(
+            RuntimeError,
+            r"Unsupported data type$",
+        ):
+            wp.utils.radix_sort_pairs(keys, values, 1)
 def test_array_sum(test, device):
@@ -268,7 +280,7 @@ class TestUtils(unittest.TestCase):
             wp.utils.warn("hello, world!")
             wp.utils.warn("hello, world!")
-        expected = "Warp UserWarning: hello, world!\n" "Warp UserWarning: hello, world!\n"
+        expected = "Warp UserWarning: hello, world!\nWarp UserWarning: hello, world!\n"
         self.assertEqual(f.getvalue(), expected)
@@ -308,7 +320,7 @@ class TestUtils(unittest.TestCase):
             wp.utils.warn("foo", category=DeprecationWarning)
             wp.utils.warn("bar", category=DeprecationWarning)
-        expected = "Warp DeprecationWarning: foo\n" "Warp DeprecationWarning: bar\n"
+        expected = "Warp DeprecationWarning: foo\nWarp DeprecationWarning: bar\n"
         self.assertEqual(f.getvalue(), expected)

warp/tests/test_vec.py CHANGED Viewed

@@ -6,6 +6,7 @@
 # license agreement from NVIDIA CORPORATION is strictly prohibited.
 import unittest
+from typing import Any
 import numpy as np
@@ -1240,6 +1241,103 @@ def test_constructors_constant_length():
         v[i] = float(i)
+Vec123 = wp.vec(123, dtype=wp.float16)
+@wp.kernel
+def vector_len_kernel(
+    v1: wp.vec2,
+    v2: wp.vec(3, float),
+    v3: wp.vec(Any, float),
+    v4: Vec123,
+    out: wp.array(dtype=int),
+):
+    length = wp.static(len(v1))
+    wp.expect_eq(len(v1), 2)
+    out[0] = len(v1)
+    length = len(v2)
+    wp.expect_eq(wp.static(len(v2)), 3)
+    out[1] = len(v2)
+    length = len(v3)
+    wp.expect_eq(len(v3), 4)
+    out[2] = wp.static(len(v3))
+    length = wp.static(len(v4))
+    wp.expect_eq(wp.static(len(v4)), 123)
+    out[3] = wp.static(len(v4))
+    foo = wp.vec2()
+    length = len(foo)
+    wp.expect_eq(len(foo), 2)
+    out[4] = len(foo)
+def test_vector_len(test, device):
+    v1 = wp.vec2()
+    v2 = wp.vec3()
+    v3 = wp.vec4()
+    v4 = Vec123()
+    out = wp.empty(5, dtype=int, device=device)
+    wp.launch(vector_len_kernel, dim=(1,), inputs=(v1, v2, v3, v4), outputs=(out,), device=device)
+    test.assertEqual(out.numpy()[0], 2)
+    test.assertEqual(out.numpy()[1], 3)
+    test.assertEqual(out.numpy()[2], 4)
+    test.assertEqual(out.numpy()[3], 123)
+    test.assertEqual(out.numpy()[4], 2)
+@wp.kernel
+def vector_augassign_kernel(
+    a: wp.array(dtype=wp.vec3), b: wp.array(dtype=wp.vec3), c: wp.array(dtype=wp.vec3), d: wp.array(dtype=wp.vec3)
+):
+    i = wp.tid()
+    v1 = wp.vec3()
+    v2 = b[i]
+    v1[0] += v2[0]
+    v1[1] += v2[1]
+    v1[2] += v2[2]
+    a[i] = v1
+    v3 = wp.vec3()
+    v4 = d[i]
+    v3[0] -= v4[0]
+    v3[1] -= v4[1]
+    v3[2] -= v4[2]
+    c[i] = v3
+def test_vector_augassign(test, device):
+    N = 3
+    a = wp.zeros(N, dtype=wp.vec3, requires_grad=True)
+    b = wp.ones(N, dtype=wp.vec3, requires_grad=True)
+    c = wp.zeros(N, dtype=wp.vec3, requires_grad=True)
+    d = wp.ones(N, dtype=wp.vec3, requires_grad=True)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(vector_augassign_kernel, N, inputs=[a, b, c, d])
+    tape.backward(grads={a: wp.ones_like(a), c: wp.ones_like(c)})
+    assert_np_equal(a.numpy(), wp.ones_like(a).numpy())
+    assert_np_equal(a.grad.numpy(), wp.ones_like(a).numpy())
+    assert_np_equal(b.grad.numpy(), wp.ones_like(a).numpy())
+    assert_np_equal(c.numpy(), -wp.ones_like(c).numpy())
+    assert_np_equal(c.grad.numpy(), wp.ones_like(c).numpy())
+    assert_np_equal(d.grad.numpy(), -wp.ones_like(d).numpy())
 devices = get_test_devices()
@@ -1350,6 +1448,18 @@ add_function_test(
     test_tpl_constructor_error_numeric_args_mismatch,
     devices=devices,
 )
+add_function_test(
+    TestVec,
+    "test_vector_len",
+    test_vector_len,
+    devices=devices,
+)
+add_function_test(
+    TestVec,
+    "test_vector_augassign",
+    test_vector_augassign,
+    devices=devices,
+)
 if __name__ == "__main__":