PyPI - warp-lang - Versions diffs - 1.6.2__py3-none-macosx_10_13_universal2.whl → 1.7.1__py3-none-macosx_10_13_universal2.whl - Mend

warp-lang 1.6.2__py3-none-macosx_10_13_universal2.whl → 1.7.1__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (191) hide show

warp/__init__.py +7 -1
warp/autograd.py +12 -2
warp/bin/libwarp-clang.dylib +0 -0
warp/bin/libwarp.dylib +0 -0
warp/build.py +410 -0
warp/build_dll.py +6 -14
warp/builtins.py +463 -372
warp/codegen.py +196 -124
warp/config.py +42 -6
warp/context.py +496 -271
warp/dlpack.py +8 -6
warp/examples/assets/nonuniform.usd +0 -0
warp/examples/assets/nvidia_logo.png +0 -0
warp/examples/benchmarks/benchmark_cloth.py +1 -1
warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
warp/examples/core/example_sample_mesh.py +300 -0
warp/examples/distributed/example_jacobi_mpi.py +507 -0
warp/examples/fem/example_apic_fluid.py +1 -1
warp/examples/fem/example_burgers.py +2 -2
warp/examples/fem/example_deformed_geometry.py +1 -1
warp/examples/fem/example_distortion_energy.py +1 -1
warp/examples/fem/example_magnetostatics.py +6 -6
warp/examples/fem/utils.py +9 -3
warp/examples/interop/example_jax_callable.py +116 -0
warp/examples/interop/example_jax_ffi_callback.py +132 -0
warp/examples/interop/example_jax_kernel.py +205 -0
warp/examples/optim/example_fluid_checkpoint.py +497 -0
warp/examples/tile/example_tile_matmul.py +2 -4
warp/fem/__init__.py +11 -1
warp/fem/adaptivity.py +4 -4
warp/fem/field/field.py +11 -1
warp/fem/field/nodal_field.py +56 -88
warp/fem/field/virtual.py +62 -23
warp/fem/geometry/adaptive_nanogrid.py +16 -13
warp/fem/geometry/closest_point.py +1 -1
warp/fem/geometry/deformed_geometry.py +5 -2
warp/fem/geometry/geometry.py +5 -0
warp/fem/geometry/grid_2d.py +12 -12
warp/fem/geometry/grid_3d.py +12 -15
warp/fem/geometry/hexmesh.py +5 -7
warp/fem/geometry/nanogrid.py +9 -11
warp/fem/geometry/quadmesh.py +13 -13
warp/fem/geometry/tetmesh.py +3 -4
warp/fem/geometry/trimesh.py +7 -20
warp/fem/integrate.py +262 -93
warp/fem/linalg.py +5 -5
warp/fem/quadrature/pic_quadrature.py +37 -22
warp/fem/quadrature/quadrature.py +194 -25
warp/fem/space/__init__.py +1 -1
warp/fem/space/basis_function_space.py +4 -2
warp/fem/space/basis_space.py +25 -18
warp/fem/space/hexmesh_function_space.py +2 -2
warp/fem/space/partition.py +6 -2
warp/fem/space/quadmesh_function_space.py +8 -8
warp/fem/space/shape/cube_shape_function.py +23 -23
warp/fem/space/shape/square_shape_function.py +12 -12
warp/fem/space/shape/triangle_shape_function.py +1 -1
warp/fem/space/tetmesh_function_space.py +3 -3
warp/fem/space/trimesh_function_space.py +2 -2
warp/fem/utils.py +12 -6
warp/jax.py +14 -1
warp/jax_experimental/__init__.py +16 -0
warp/{jax_experimental.py → jax_experimental/custom_call.py} +28 -29
warp/jax_experimental/ffi.py +702 -0
warp/jax_experimental/xla_ffi.py +602 -0
warp/math.py +89 -0
warp/native/array.h +13 -0
warp/native/builtin.h +29 -3
warp/native/bvh.cpp +3 -1
warp/native/bvh.cu +42 -14
warp/native/bvh.h +2 -1
warp/native/clang/clang.cpp +30 -3
warp/native/cuda_util.cpp +14 -0
warp/native/cuda_util.h +2 -0
warp/native/exports.h +68 -63
warp/native/intersect.h +26 -26
warp/native/intersect_adj.h +33 -33
warp/native/marching.cu +1 -1
warp/native/mat.h +513 -9
warp/native/mesh.h +10 -10
warp/native/quat.h +99 -11
warp/native/rand.h +6 -0
warp/native/sort.cpp +122 -59
warp/native/sort.cu +152 -15
warp/native/sort.h +8 -1
warp/native/sparse.cpp +43 -22
warp/native/sparse.cu +52 -17
warp/native/svd.h +116 -0
warp/native/tile.h +312 -116
warp/native/tile_reduce.h +46 -3
warp/native/vec.h +68 -7
warp/native/volume.cpp +85 -113
warp/native/volume_builder.cu +25 -10
warp/native/volume_builder.h +6 -0
warp/native/warp.cpp +5 -6
warp/native/warp.cu +100 -11
warp/native/warp.h +19 -10
warp/optim/linear.py +10 -10
warp/render/render_opengl.py +19 -17
warp/render/render_usd.py +93 -3
warp/sim/articulation.py +4 -4
warp/sim/collide.py +32 -19
warp/sim/import_mjcf.py +449 -155
warp/sim/import_urdf.py +32 -12
warp/sim/inertia.py +189 -156
warp/sim/integrator_euler.py +8 -5
warp/sim/integrator_featherstone.py +3 -10
warp/sim/integrator_vbd.py +207 -2
warp/sim/integrator_xpbd.py +8 -5
warp/sim/model.py +71 -25
warp/sim/render.py +4 -0
warp/sim/utils.py +2 -2
warp/sparse.py +642 -555
warp/stubs.py +217 -20
warp/tests/__main__.py +0 -15
warp/tests/assets/torus.usda +1 -1
warp/tests/cuda/__init__.py +0 -0
warp/tests/{test_mempool.py → cuda/test_mempool.py} +39 -0
warp/tests/{test_streams.py → cuda/test_streams.py} +71 -0
warp/tests/geometry/__init__.py +0 -0
warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +66 -63
warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +1 -1
warp/tests/{test_volume.py → geometry/test_volume.py} +41 -6
warp/tests/interop/__init__.py +0 -0
warp/tests/{test_dlpack.py → interop/test_dlpack.py} +28 -5
warp/tests/sim/__init__.py +0 -0
warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +9 -10
warp/tests/{test_collision.py → sim/test_collision.py} +236 -205
warp/tests/sim/test_inertia.py +161 -0
warp/tests/{test_model.py → sim/test_model.py} +40 -0
warp/tests/{flaky_test_sim_grad.py → sim/test_sim_grad.py} +4 -0
warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +2 -1
warp/tests/sim/test_vbd.py +597 -0
warp/tests/sim/test_xpbd.py +399 -0
warp/tests/test_bool.py +1 -1
warp/tests/test_codegen.py +24 -3
warp/tests/test_examples.py +40 -38
warp/tests/test_fem.py +98 -14
warp/tests/test_linear_solvers.py +0 -11
warp/tests/test_mat.py +577 -156
warp/tests/test_mat_scalar_ops.py +4 -4
warp/tests/test_overwrite.py +0 -60
warp/tests/test_quat.py +356 -151
warp/tests/test_rand.py +44 -37
warp/tests/test_sparse.py +47 -6
warp/tests/test_spatial.py +75 -0
warp/tests/test_static.py +1 -1
warp/tests/test_utils.py +84 -4
warp/tests/test_vec.py +336 -178
warp/tests/tile/__init__.py +0 -0
warp/tests/{test_tile.py → tile/test_tile.py} +136 -51
warp/tests/{test_tile_load.py → tile/test_tile_load.py} +98 -1
warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +9 -6
warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +25 -14
warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +60 -1
warp/tests/{test_tile_view.py → tile/test_tile_view.py} +1 -1
warp/tests/unittest_serial.py +1 -0
warp/tests/unittest_suites.py +45 -62
warp/tests/unittest_utils.py +2 -1
warp/thirdparty/unittest_parallel.py +3 -1
warp/types.py +175 -666
warp/utils.py +137 -72
{warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/METADATA +46 -12
{warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/RECORD +184 -171
{warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/WHEEL +1 -1
{warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info/licenses}/LICENSE.md +0 -26
warp/examples/optim/example_walker.py +0 -317
warp/native/cutlass_gemm.cpp +0 -43
warp/native/cutlass_gemm.cu +0 -382
warp/tests/test_matmul.py +0 -511
warp/tests/test_matmul_lite.py +0 -411
warp/tests/test_vbd.py +0 -386
warp/tests/unused_test_misc.py +0 -77
/warp/tests/{test_async.py → cuda/test_async.py} +0 -0
/warp/tests/{test_ipc.py → cuda/test_ipc.py} +0 -0
/warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +0 -0
/warp/tests/{test_peer.py → cuda/test_peer.py} +0 -0
/warp/tests/{test_pinned.py → cuda/test_pinned.py} +0 -0
/warp/tests/{test_bvh.py → geometry/test_bvh.py} +0 -0
/warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +0 -0
/warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +0 -0
/warp/tests/{test_mesh.py → geometry/test_mesh.py} +0 -0
/warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +0 -0
/warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +0 -0
/warp/tests/{test_jax.py → interop/test_jax.py} +0 -0
/warp/tests/{test_paddle.py → interop/test_paddle.py} +0 -0
/warp/tests/{test_torch.py → interop/test_torch.py} +0 -0
/warp/tests/{test_coloring.py → sim/test_coloring.py} +0 -0
/warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +0 -0
/warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +0 -0
{warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/top_level.txt +0 -0

warp/tests/test_rand.py CHANGED Viewed

@@ -26,6 +26,8 @@ def test_kernel(
     kernel_seed: int,
     int_a: wp.array(dtype=int),
     int_ab: wp.array(dtype=int),
+    uint_a: wp.array(dtype=wp.uint32),
+    uint_ab: wp.array(dtype=wp.uint32),
     float_01: wp.array(dtype=float),
     float_ab: wp.array(dtype=float),
 ):
@@ -35,6 +37,8 @@ def test_kernel(
     int_a[tid] = wp.randi(state)
     int_ab[tid] = wp.randi(state, 0, 100)
+    uint_a[tid] = wp.randu(state)
+    uint_ab[tid] = wp.randu(state, wp.uint32(0), wp.uint32(100))
     float_01[tid] = wp.randf(state)
     float_ab[tid] = wp.randf(state, 0.0, 100.0)
@@ -42,37 +46,25 @@ def test_kernel(
 def test_rand(test, device):
     N = 10
-    int_a_device = wp.zeros(N, dtype=int, device=device)
-    int_a_host = wp.zeros(N, dtype=int, device="cpu")
-    int_ab_device = wp.zeros(N, dtype=int, device=device)
-    int_ab_host = wp.zeros(N, dtype=int, device="cpu")
+    int_a = wp.zeros(N, dtype=int, device=device)
+    int_ab = wp.zeros(N, dtype=int, device=device)
-    float_01_device = wp.zeros(N, dtype=float, device=device)
-    float_01_host = wp.zeros(N, dtype=float, device="cpu")
-    float_ab_device = wp.zeros(N, dtype=float, device=device)
-    float_ab_host = wp.zeros(N, dtype=float, device="cpu")
+    uint_a = wp.zeros(N, dtype=wp.uint32, device=device)
+    uint_ab = wp.zeros(N, dtype=wp.uint32, device=device)
+    float_01 = wp.zeros(N, dtype=float, device=device)
+    float_ab = wp.zeros(N, dtype=float, device=device)
     seed = 42
     wp.launch(
         kernel=test_kernel,
         dim=N,
-        inputs=[seed, int_a_device, int_ab_device, float_01_device, float_ab_device],
+        inputs=[seed, int_a, int_ab, uint_a, uint_ab, float_01, float_ab],
         outputs=[],
         device=device,
     )
-    wp.copy(int_a_host, int_a_device)
-    wp.copy(int_ab_host, int_ab_device)
-    wp.copy(float_01_host, float_01_device)
-    wp.copy(float_ab_host, float_ab_device)
-    wp.synchronize_device(device)
-    int_a = int_a_host.numpy()
-    int_ab = int_ab_host.numpy()
-    float_01 = float_01_host.numpy()
-    float_ab = float_ab_host.numpy()
     int_a_true = np.array(
         [
             -575632308,
@@ -88,32 +80,47 @@ def test_rand(test, device):
         ]
     )
     int_ab_true = np.array([46, 58, 46, 83, 85, 39, 72, 99, 18, 41])
+    uint_a_true = np.array(
+        [
+            3133687854,
+            3702303309,
+            1235698096,
+            3516599792,
+            800302729,
+            2620462179,
+            2423739693,
+            3024873594,
+            2783682377,
+            1188846332,
+        ]
+    )
+    uint_ab_true = np.array([6, 55, 2, 92, 55, 93, 65, 23, 48, 0])
     float_01_true = np.array(
         [
-            0.72961855,
-            0.86200964,
-            0.28770837,
-            0.8187722,
-            0.186335,
-            0.6101239,
-            0.56432086,
-            0.70428324,
-            0.64812654,
-            0.27679986,
+            0.8265858,
+            0.5874614,
+            0.1508659,
+            0.9498008,
+            0.02531803,
+            0.8520948,
+            0.0001185536,
+            0.4855958,
+            0.06277305,
+            0.2214079,
         ]
     )
     float_ab_true = np.array(
-        [96.04259, 73.33809, 63.601555, 38.647305, 71.813896, 64.65809, 77.79791, 46.579605, 94.614456, 91.921814]
+        [79.84678, 76.362206, 32.135242, 99.70866, 70.45863, 20.6523, 45.164482, 55.583008, 76.60291, 35.36277]
     )
-    test.assertTrue((int_a == int_a_true).all())
-    test.assertTrue((int_ab == int_ab_true).all())
+    assert_np_equal(int_a.numpy(), int_a_true)
+    assert_np_equal(int_ab.numpy(), int_ab_true)
-    err = np.max(np.abs(float_01 - float_01_true))
-    test.assertTrue(err < 1e-04)
+    assert_np_equal(uint_a.numpy(), uint_a_true)
+    assert_np_equal(uint_ab.numpy(), uint_ab_true)
-    err = np.max(np.abs(float_ab - float_ab_true))
-    test.assertTrue(err < 1e-04)
+    assert_np_equal(float_01.numpy(), float_01_true, 1e-04)
+    assert_np_equal(float_ab.numpy(), float_ab_true, 1e-04)
 @wp.kernel

warp/tests/test_sparse.py CHANGED Viewed

@@ -19,10 +19,12 @@ import numpy as np
 import warp as wp
 from warp.sparse import (
+    bsr_assign,
     bsr_axpy,
     bsr_axpy_work_arrays,
     bsr_copy,
     bsr_diag,
+    bsr_from_triplets,
     bsr_get_diag,
     bsr_identity,
     bsr_mm,
@@ -232,18 +234,43 @@ def test_bsr_split_merge(test, device):
     with test.assertRaisesRegex(ValueError, "Incompatible dest and src block shapes"):
         bsr_copy(bsr, block_shape=(3, 3))
-    with test.assertRaisesRegex(
-        ValueError, r"Dest block shape \(5, 5\) is not an exact multiple of src block shape \(4, 2\)"
-    ):
+    with test.assertRaisesRegex(ValueError, "Incompatible dest and src block shapes"):
         bsr_copy(bsr, block_shape=(5, 5))
     with test.assertRaisesRegex(
         ValueError,
-        "The total rows and columns of the src matrix cannot be evenly divided using the requested block shape",
+        "The requested block shape does not evenly divide the source matrix",
     ):
         bsr_copy(bsr, block_shape=(32, 32))
+def test_bsr_assign_masked(test, device):
+    rng = np.random.default_rng(123)
+    block_shape = (1, 2)
+    nrow = 16
+    ncol = 8
+    shape = (block_shape[0] * nrow, block_shape[1] * ncol)
+    n = 20
+    rows = wp.array(rng.integers(0, high=nrow, size=n, dtype=int), dtype=int, device=device)
+    cols = wp.array(rng.integers(0, high=ncol, size=n, dtype=int), dtype=int, device=device)
+    vals = wp.array(rng.random(size=(n, block_shape[0], block_shape[1])), dtype=float, device=device)
+    A = bsr_from_triplets(nrow, ncol, rows, cols, vals)
+    # Extract coarse diagonal with copy + diag funcs, for reference
+    A_coarse = bsr_copy(A, block_shape=(4, 4))
+    ref = _bsr_to_dense(bsr_diag(bsr_get_diag(A_coarse)))
+    # Extract coarse diagonal with masked assign (more memory efficient)
+    diag_masked = bsr_diag(rows_of_blocks=shape[0] // 4, block_type=A_coarse.dtype, device=device)
+    bsr_assign(src=A, dest=diag_masked, masked=True)
+    res = _bsr_to_dense(diag_masked)
+    assert_np_equal(res, ref, 0.0001)
 def make_test_bsr_transpose(block_shape, scalar_type):
     def test_bsr_transpose(test, device):
         rng = np.random.default_rng(123)
@@ -316,6 +343,12 @@ def make_test_bsr_axpy(block_shape, scalar_type):
         res = _bsr_to_dense(y)
         assert_np_equal(res, ref, 0.0001)
+        # test masked
+        y_mask = bsr_from_triplets(nrow, ncol, y.uncompress_rows()[:1], y.columns[:1], y.values[:1])
+        bsr_axpy(y, y_mask, masked=True)
+        assert y_mask.nnz_sync() == 1
+        assert_np_equal(y_mask.values.numpy(), 2.0 * y.values[:1].numpy(), 0.0001)
         # test incompatible shapes
         y.ncol = y.ncol + 1
         with test.assertRaisesRegex(ValueError, "Matrices must have the same number of rows and columns"):
@@ -383,6 +416,13 @@ def make_test_bsr_mm(block_shape, scalar_type):
         bsr_mm(x, y, z, alpha, beta, work_arrays=work_arrays, reuse_topology=True)
         assert_np_equal(res, ref, 0.0001)
+        # test masked mm
+        z = bsr_diag(rows_of_blocks=z.nrow, block_type=z.dtype, device=z.device)
+        bsr_mm(x, y, z, masked=True)
+        res = _bsr_to_dense(z)
+        ref = _bsr_to_dense(bsr_diag(bsr_get_diag(x @ y)))
+        assert_np_equal(res, ref, 0.0001)
         # using overloaded operators
         x = (alpha * x) @ y
         assert_np_equal(res, ref, 0.0001)
@@ -479,12 +519,12 @@ def make_test_bsr_mv(block_shape, scalar_type):
         assert_np_equal(res, ref, 0.0001)
         A.ncol = A.ncol + 1
-        with test.assertRaisesRegex(ValueError, "Number of columns"):
+        with test.assertRaisesRegex(ValueError, "Incompatible 'x'"):
             bsr_mv(A, x, y)
         A.ncol = A.ncol - 1
         A.nrow = A.nrow - 1
-        with test.assertRaisesRegex(ValueError, "Number of rows"):
+        with test.assertRaisesRegex(ValueError, "Incompatible 'y'"):
             bsr_mv(A, x, y)
     return test_bsr_mv
@@ -518,6 +558,7 @@ add_function_test(TestSparse, "test_csr_from_triplets", test_csr_from_triplets,
 add_function_test(TestSparse, "test_bsr_from_triplets", test_bsr_from_triplets, devices=devices)
 add_function_test(TestSparse, "test_bsr_get_diag", test_bsr_get_set_diag, devices=devices)
 add_function_test(TestSparse, "test_bsr_split_merge", test_bsr_split_merge, devices=devices)
+add_function_test(TestSparse, "test_bsr_assign_masked", test_bsr_assign_masked, devices=devices)
 add_function_test(TestSparse, "test_csr_transpose", make_test_bsr_transpose((1, 1), wp.float32), devices=devices)
 add_function_test(TestSparse, "test_bsr_transpose_1_3", make_test_bsr_transpose((1, 3), wp.float32), devices=devices)

warp/tests/test_spatial.py CHANGED Viewed

@@ -1969,6 +1969,67 @@ def test_transform_anon_type_instance(test, device, dtype, register_kernels=Fals
         tape.zero()
+def test_transform_from_matrix(test, device, dtype, register_kernels=False):
+    wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
+    mat44 = wp.types.matrix((4, 4), wptype)
+    vec3 = wp.types.vector(3, wptype)
+    quat = wp.types.quaternion(wptype)
+    def transform_from_matrix_kernel():
+        # fmt: off
+        m = mat44(
+            wptype(0.6), wptype(0.48), wptype(0.64), wptype(1.0),
+            wptype(-0.8), wptype(0.36), wptype(0.48), wptype(2.0),
+            wptype(0.0), wptype(-0.8), wptype(0.6), wptype(3.0),
+            wptype(0.0), wptype(0.0), wptype(0.0), wptype(1.0),
+        )
+        # fmt: on
+        t = wp.transform_from_matrix(m)
+        p = wp.transform_get_translation(t)
+        q = wp.transform_get_rotation(t)
+        wp.expect_near(p, vec3(wptype(1.0), wptype(2.0), wptype(3.0)), tolerance=wptype(1e-3))
+        wp.expect_near(q, quat(wptype(-0.4), wptype(0.2), wptype(-0.4), wptype(0.8)), tolerance=wptype(1e-3))
+    kernel = getkernel(transform_from_matrix_kernel, suffix=dtype.__name__)
+    if register_kernels:
+        return
+    wp.launch(kernel, dim=1, device=device)
+def test_transform_to_matrix(test, device, dtype, register_kernels=False):
+    wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
+    mat44 = wp.types.matrix((4, 4), wptype)
+    vec3 = wp.types.vector(3, wptype)
+    quat = wp.types.quaternion(wptype)
+    def transform_to_matrix_kernel():
+        p = vec3(wptype(1.0), wptype(2.0), wptype(3.0))
+        q = quat(wptype(-0.4), wptype(0.2), wptype(-0.4), wptype(0.8))
+        t = wp.transformation(p, q)
+        m = wp.transform_to_matrix(t)
+        # fmt: off
+        wp.expect_near(
+            m,
+            mat44(
+                wptype(0.6), wptype(0.48), wptype(0.64), wptype(1.0),
+                wptype(-0.8), wptype(0.36), wptype(0.48), wptype(2.0),
+                wptype(0.0), wptype(-0.8), wptype(0.6), wptype(3.0),
+                wptype(0.0), wptype(0.0), wptype(0.0), wptype(1.0),
+            ),
+            tolerance=wptype(1e-3),
+        )
+        # fmt: on
+    kernel = getkernel(transform_to_matrix_kernel, suffix=dtype.__name__)
+    if register_kernels:
+        return
+    wp.launch(kernel, dim=1, device=device)
 devices = get_test_devices()
@@ -2145,6 +2206,20 @@ for dtype in np_float_types:
     add_function_test_register_kernel(
         TestSpatial, f"test_spatial_adjoint_{dtype.__name__}", test_spatial_adjoint, devices=devices, dtype=dtype
     )
+    add_function_test_register_kernel(
+        TestSpatial,
+        f"test_transform_from_matrix_{dtype.__name__}",
+        test_transform_from_matrix,
+        devices=devices,
+        dtype=dtype,
+    )
+    add_function_test_register_kernel(
+        TestSpatial,
+        f"test_transform_to_matrix_{dtype.__name__}",
+        test_transform_to_matrix,
+        devices=devices,
+        dtype=dtype,
+    )
     # \TODO: test spatial_mass and spatial_jacobian

warp/tests/test_static.py CHANGED Viewed

@@ -307,7 +307,7 @@ def test_function_lookup(test, device):
 def count_ssa_occurrences(kernel: wp.Kernel, ssas: List[str]) -> Dict[str, int]:
     # analyze the generated code
-    counts = {ssa: 0 for ssa in ssas}
+    counts = dict.fromkeys(ssas, 0)
     for line in kernel.adj.blocks[0].body_forward:
         for ssa in ssas:
             if ssa in line:

warp/tests/test_utils.py CHANGED Viewed

@@ -87,7 +87,7 @@ def test_array_scan_error_unsupported_dtype(test, device):
 def test_radix_sort_pairs(test, device):
-    keyTypes = [int, wp.float32]
+    keyTypes = [int, wp.float32, wp.int64]
     for keyType in keyTypes:
         keys = wp.array((7, 2, 8, 4, 1, 6, 5, 3, 0, 0, 0, 0, 0, 0, 0, 0), dtype=keyType, device=device)
@@ -97,18 +97,46 @@ def test_radix_sort_pairs(test, device):
         assert_np_equal(values.numpy()[:8], np.array((5, 2, 8, 4, 7, 6, 1, 3)))
-def test_radix_sort_pairs_empty(test, device):
+def test_segmented_sort_pairs(test, device):
     keyTypes = [int, wp.float32]
+    for keyType in keyTypes:
+        keys = wp.array((7, 2, 8, 4, 1, 6, 5, 3, 0, 0, 0, 0, 0, 0, 0, 0), dtype=keyType, device=device)
+        values = wp.array((1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0), dtype=int, device=device)
+        wp.utils.segmented_sort_pairs(
+            keys,
+            values,
+            8,
+            wp.array((0, 4), dtype=int, device=device),
+            wp.array((4, 8), dtype=int, device=device),
+        )
+        assert_np_equal(keys.numpy()[:8], np.array((2, 4, 7, 8, 1, 3, 5, 6)))
+        assert_np_equal(values.numpy()[:8], np.array((2, 4, 1, 3, 5, 8, 7, 6)))
+def test_radix_sort_pairs_empty(test, device):
+    keyTypes = [int, wp.float32, wp.int64]
     for keyType in keyTypes:
         keys = wp.array((), dtype=keyType, device=device)
         values = wp.array((), dtype=int, device=device)
         wp.utils.radix_sort_pairs(keys, values, 0)
-def test_radix_sort_pairs_error_insufficient_storage(test, device):
+def test_segmented_sort_pairs_empty(test, device):
     keyTypes = [int, wp.float32]
+    for keyType in keyTypes:
+        keys = wp.array((), dtype=keyType, device=device)
+        values = wp.array((), dtype=int, device=device)
+        wp.utils.segmented_sort_pairs(
+            keys, values, 0, wp.array((), dtype=int, device=device), wp.array((), dtype=int, device=device)
+        )
+def test_radix_sort_pairs_error_insufficient_storage(test, device):
+    keyTypes = [int, wp.float32, wp.int64]
     for keyType in keyTypes:
         keys = wp.array((1, 2, 3), dtype=keyType, device=device)
         values = wp.array((1, 2, 3), dtype=int, device=device)
@@ -119,9 +147,28 @@ def test_radix_sort_pairs_error_insufficient_storage(test, device):
             wp.utils.radix_sort_pairs(keys, values, 3)
-def test_radix_sort_pairs_error_unsupported_dtype(test, device):
+def test_segmented_sort_pairs_error_insufficient_storage(test, device):
     keyTypes = [int, wp.float32]
+    for keyType in keyTypes:
+        keys = wp.array((1, 2, 3), dtype=keyType, device=device)
+        values = wp.array((1, 2, 3), dtype=int, device=device)
+        with test.assertRaisesRegex(
+            RuntimeError,
+            r"Array storage must be large enough to contain 2\*count elements$",
+        ):
+            wp.utils.segmented_sort_pairs(
+                keys,
+                values,
+                3,
+                wp.array((0,), dtype=int, device=device),
+                wp.array((3,), dtype=int, device=device),
+            )
+def test_radix_sort_pairs_error_unsupported_dtype(test, device):
+    keyTypes = [int, wp.float32, wp.int64]
     for keyType in keyTypes:
         keys = wp.array((1.0, 2.0, 3.0), dtype=keyType, device=device)
         values = wp.array((1.0, 2.0, 3.0), dtype=float, device=device)
@@ -132,6 +179,25 @@ def test_radix_sort_pairs_error_unsupported_dtype(test, device):
             wp.utils.radix_sort_pairs(keys, values, 1)
+def test_segmented_sort_pairs_error_unsupported_dtype(test, device):
+    keyTypes = [int, wp.float32]
+    for keyType in keyTypes:
+        keys = wp.array((1.0, 2.0, 3.0), dtype=keyType, device=device)
+        values = wp.array((1.0, 2.0, 3.0), dtype=float, device=device)
+        with test.assertRaisesRegex(
+            RuntimeError,
+            r"Unsupported data type$",
+        ):
+            wp.utils.segmented_sort_pairs(
+                keys,
+                values,
+                1,
+                wp.array((0,), dtype=int, device=device),
+                wp.array((3,), dtype=int, device=device),
+            )
 def test_array_sum(test, device):
     for dtype in (wp.float32, wp.float64):
         with test.subTest(dtype=dtype):
@@ -468,6 +534,20 @@ add_function_test(
     test_radix_sort_pairs_error_unsupported_dtype,
     devices=devices,
 )
+add_function_test(TestUtils, "test_segmented_sort_pairs", test_segmented_sort_pairs, devices=devices)
+add_function_test(TestUtils, "test_segmented_sort_pairs_empty", test_segmented_sort_pairs, devices=devices)
+add_function_test(
+    TestUtils,
+    "test_segmented_sort_pairs_error_insufficient_storage",
+    test_segmented_sort_pairs_error_insufficient_storage,
+    devices=devices,
+)
+add_function_test(
+    TestUtils,
+    "test_segmented_sort_pairs_error_unsupported_dtype",
+    test_segmented_sort_pairs_error_unsupported_dtype,
+    devices=devices,
+)
 add_function_test(TestUtils, "test_array_sum", test_array_sum, devices=devices)
 add_function_test(
     TestUtils, "test_array_sum_error_out_dtype_mismatch", test_array_sum_error_out_dtype_mismatch, devices=devices