PyPI - warp-lang - Versions diffs - 1.6.2__py3-none-win_amd64.whl → 1.7.0__py3-none-win_amd64.whl - Mend

warp-lang 1.6.2__py3-none-win_amd64.whl → 1.7.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (179) hide show

warp/__init__.py +7 -1
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +410 -0
warp/build_dll.py +6 -14
warp/builtins.py +452 -362
warp/codegen.py +179 -119
warp/config.py +42 -6
warp/context.py +490 -271
warp/dlpack.py +8 -6
warp/examples/assets/nonuniform.usd +0 -0
warp/examples/assets/nvidia_logo.png +0 -0
warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
warp/examples/core/example_sample_mesh.py +300 -0
warp/examples/fem/example_apic_fluid.py +1 -1
warp/examples/fem/example_burgers.py +2 -2
warp/examples/fem/example_deformed_geometry.py +1 -1
warp/examples/fem/example_distortion_energy.py +1 -1
warp/examples/fem/example_magnetostatics.py +6 -6
warp/examples/fem/utils.py +9 -3
warp/examples/interop/example_jax_callable.py +116 -0
warp/examples/interop/example_jax_ffi_callback.py +132 -0
warp/examples/interop/example_jax_kernel.py +205 -0
warp/examples/optim/example_fluid_checkpoint.py +497 -0
warp/examples/tile/example_tile_matmul.py +2 -4
warp/fem/__init__.py +11 -1
warp/fem/adaptivity.py +4 -4
warp/fem/field/nodal_field.py +22 -68
warp/fem/field/virtual.py +62 -23
warp/fem/geometry/adaptive_nanogrid.py +9 -10
warp/fem/geometry/closest_point.py +1 -1
warp/fem/geometry/deformed_geometry.py +5 -2
warp/fem/geometry/geometry.py +5 -0
warp/fem/geometry/grid_2d.py +12 -12
warp/fem/geometry/grid_3d.py +12 -15
warp/fem/geometry/hexmesh.py +5 -7
warp/fem/geometry/nanogrid.py +9 -11
warp/fem/geometry/quadmesh.py +13 -13
warp/fem/geometry/tetmesh.py +3 -4
warp/fem/geometry/trimesh.py +3 -8
warp/fem/integrate.py +262 -93
warp/fem/linalg.py +5 -5
warp/fem/quadrature/pic_quadrature.py +37 -22
warp/fem/quadrature/quadrature.py +194 -25
warp/fem/space/__init__.py +1 -1
warp/fem/space/basis_function_space.py +4 -2
warp/fem/space/basis_space.py +25 -18
warp/fem/space/hexmesh_function_space.py +2 -2
warp/fem/space/partition.py +6 -2
warp/fem/space/quadmesh_function_space.py +8 -8
warp/fem/space/shape/cube_shape_function.py +23 -23
warp/fem/space/shape/square_shape_function.py +12 -12
warp/fem/space/shape/triangle_shape_function.py +1 -1
warp/fem/space/tetmesh_function_space.py +3 -3
warp/fem/space/trimesh_function_space.py +2 -2
warp/fem/utils.py +12 -6
warp/jax.py +14 -1
warp/jax_experimental/__init__.py +16 -0
warp/{jax_experimental.py → jax_experimental/custom_call.py} +14 -27
warp/jax_experimental/ffi.py +698 -0
warp/jax_experimental/xla_ffi.py +602 -0
warp/math.py +89 -0
warp/native/array.h +13 -0
warp/native/builtin.h +29 -3
warp/native/bvh.cpp +3 -1
warp/native/bvh.cu +42 -14
warp/native/bvh.h +2 -1
warp/native/clang/clang.cpp +30 -3
warp/native/cuda_util.cpp +14 -0
warp/native/cuda_util.h +2 -0
warp/native/exports.h +68 -63
warp/native/intersect.h +26 -26
warp/native/intersect_adj.h +33 -33
warp/native/marching.cu +1 -1
warp/native/mat.h +513 -9
warp/native/mesh.h +10 -10
warp/native/quat.h +99 -11
warp/native/rand.h +6 -0
warp/native/sort.cpp +122 -59
warp/native/sort.cu +152 -15
warp/native/sort.h +8 -1
warp/native/sparse.cpp +43 -22
warp/native/sparse.cu +52 -17
warp/native/svd.h +116 -0
warp/native/tile.h +301 -105
warp/native/tile_reduce.h +46 -3
warp/native/vec.h +68 -7
warp/native/volume.cpp +85 -113
warp/native/volume_builder.cu +25 -10
warp/native/volume_builder.h +6 -0
warp/native/warp.cpp +5 -6
warp/native/warp.cu +99 -10
warp/native/warp.h +19 -10
warp/optim/linear.py +10 -10
warp/sim/articulation.py +4 -4
warp/sim/collide.py +21 -10
warp/sim/import_mjcf.py +449 -155
warp/sim/import_urdf.py +32 -12
warp/sim/integrator_euler.py +5 -5
warp/sim/integrator_featherstone.py +3 -10
warp/sim/integrator_vbd.py +207 -2
warp/sim/integrator_xpbd.py +5 -5
warp/sim/model.py +42 -13
warp/sim/utils.py +2 -2
warp/sparse.py +642 -555
warp/stubs.py +216 -19
warp/tests/__main__.py +0 -15
warp/tests/cuda/__init__.py +0 -0
warp/tests/{test_mempool.py → cuda/test_mempool.py} +39 -0
warp/tests/{test_streams.py → cuda/test_streams.py} +71 -0
warp/tests/geometry/__init__.py +0 -0
warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +66 -63
warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +1 -1
warp/tests/{test_volume.py → geometry/test_volume.py} +41 -6
warp/tests/interop/__init__.py +0 -0
warp/tests/{test_dlpack.py → interop/test_dlpack.py} +28 -5
warp/tests/sim/__init__.py +0 -0
warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +9 -10
warp/tests/{test_collision.py → sim/test_collision.py} +2 -2
warp/tests/{test_model.py → sim/test_model.py} +40 -0
warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +2 -1
warp/tests/sim/test_vbd.py +597 -0
warp/tests/test_bool.py +1 -1
warp/tests/test_examples.py +28 -36
warp/tests/test_fem.py +23 -4
warp/tests/test_linear_solvers.py +0 -11
warp/tests/test_mat.py +233 -79
warp/tests/test_mat_scalar_ops.py +4 -4
warp/tests/test_overwrite.py +0 -60
warp/tests/test_quat.py +67 -46
warp/tests/test_rand.py +44 -37
warp/tests/test_sparse.py +47 -6
warp/tests/test_spatial.py +75 -0
warp/tests/test_static.py +1 -1
warp/tests/test_utils.py +84 -4
warp/tests/test_vec.py +46 -34
warp/tests/tile/__init__.py +0 -0
warp/tests/{test_tile.py → tile/test_tile.py} +136 -51
warp/tests/{test_tile_load.py → tile/test_tile_load.py} +1 -1
warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +9 -6
warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +25 -14
warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +60 -1
warp/tests/{test_tile_view.py → tile/test_tile_view.py} +1 -1
warp/tests/unittest_serial.py +1 -0
warp/tests/unittest_suites.py +45 -59
warp/tests/unittest_utils.py +2 -1
warp/thirdparty/unittest_parallel.py +3 -1
warp/types.py +110 -658
warp/utils.py +137 -72
{warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/METADATA +29 -7
{warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/RECORD +172 -162
{warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/WHEEL +1 -1
warp/examples/optim/example_walker.py +0 -317
warp/native/cutlass_gemm.cpp +0 -43
warp/native/cutlass_gemm.cu +0 -382
warp/tests/test_matmul.py +0 -511
warp/tests/test_matmul_lite.py +0 -411
warp/tests/test_vbd.py +0 -386
warp/tests/unused_test_misc.py +0 -77
/warp/tests/{test_async.py → cuda/test_async.py} +0 -0
/warp/tests/{test_ipc.py → cuda/test_ipc.py} +0 -0
/warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +0 -0
/warp/tests/{test_peer.py → cuda/test_peer.py} +0 -0
/warp/tests/{test_pinned.py → cuda/test_pinned.py} +0 -0
/warp/tests/{test_bvh.py → geometry/test_bvh.py} +0 -0
/warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +0 -0
/warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +0 -0
/warp/tests/{test_mesh.py → geometry/test_mesh.py} +0 -0
/warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +0 -0
/warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +0 -0
/warp/tests/{test_jax.py → interop/test_jax.py} +0 -0
/warp/tests/{test_paddle.py → interop/test_paddle.py} +0 -0
/warp/tests/{test_torch.py → interop/test_torch.py} +0 -0
/warp/tests/{flaky_test_sim_grad.py → sim/flaky_test_sim_grad.py} +0 -0
/warp/tests/{test_coloring.py → sim/test_coloring.py} +0 -0
/warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +0 -0
/warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +0 -0
{warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info/licenses}/LICENSE.md +0 -0
{warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/top_level.txt +0 -0

warp/tests/test_mat_scalar_ops.py CHANGED Viewed

@@ -334,19 +334,19 @@ def test_constructors(test, device, dtype, register_kernels=False):
         outcomponents: wp.array(dtype=wptype),
     ):
         # multiply outputs by 2 so we've got something to backpropagate:
-        m2result = wptype(2) * mat22(vec2(input[0], input[2]), vec2(input[1], input[3]))
-        m3result = wptype(2) * mat33(
+        m2result = wptype(2) * wp.matrix_from_cols(vec2(input[0], input[2]), vec2(input[1], input[3]))
+        m3result = wptype(2) * wp.matrix_from_cols(
             vec3(input[4], input[7], input[10]),
             vec3(input[5], input[8], input[11]),
             vec3(input[6], input[9], input[12]),
         )
-        m4result = wptype(2) * mat44(
+        m4result = wptype(2) * wp.matrix_from_cols(
             vec4(input[13], input[17], input[21], input[25]),
             vec4(input[14], input[18], input[22], input[26]),
             vec4(input[15], input[19], input[23], input[27]),
             vec4(input[16], input[20], input[24], input[28]),
         )
-        m5result = wptype(2) * mat55(
+        m5result = wptype(2) * wp.matrix_from_cols(
             vec5(input[29], input[34], input[39], input[44], input[49]),
             vec5(input[30], input[35], input[40], input[45], input[50]),
             vec5(input[31], input[36], input[41], input[46], input[51]),

warp/tests/test_overwrite.py CHANGED Viewed

@@ -23,8 +23,6 @@ import numpy as np
 import warp as wp
 from warp.tests.unittest_utils import *
-wp.init()  # For wp.context.runtime.core.is_cutlass_enabled()
 # kernels are defined in the global scope, to ensure wp.Kernel objects are not GC'ed in the MGPU case
 # kernel args are assigned array modes during codegen, so wp.Kernel objects generated during codegen
 # must be preserved for overwrite tracking to function
@@ -378,62 +376,6 @@ def test_copy(test, device):
         wp.config.verify_autograd_array_access = saved_verify_autograd_array_access_setting
-# wp.matmul uses wp.record_func. Ensure array modes are propagated correctly.
-def test_matmul(test, device):
-    if device.is_cuda and not wp.context.runtime.core.is_cutlass_enabled():
-        test.skipTest("Warp was not built with CUTLASS support")
-    saved_verify_autograd_array_access_setting = wp.config.verify_autograd_array_access
-    try:
-        wp.config.verify_autograd_array_access = True
-        a = wp.ones((3, 3), dtype=float, requires_grad=True, device=device)
-        b = wp.ones_like(a)
-        c = wp.ones_like(a)
-        d = wp.zeros_like(a)
-        tape = wp.Tape()
-        with tape:
-            wp.matmul(a, b, c, d)
-        test.assertEqual(a._is_read, True)
-        test.assertEqual(b._is_read, True)
-        test.assertEqual(c._is_read, True)
-        test.assertEqual(d._is_read, False)
-    finally:
-        wp.config.verify_autograd_array_access = saved_verify_autograd_array_access_setting
-# wp.batched_matmul uses wp.record_func. Ensure array modes are propagated correctly.
-def test_batched_matmul(test, device):
-    if device.is_cuda and not wp.context.runtime.core.is_cutlass_enabled():
-        test.skipTest("Warp was not built with CUTLASS support")
-    saved_verify_autograd_array_access_setting = wp.config.verify_autograd_array_access
-    try:
-        wp.config.verify_autograd_array_access = True
-        a = wp.ones((1, 3, 3), dtype=float, requires_grad=True, device=device)
-        b = wp.ones_like(a)
-        c = wp.ones_like(a)
-        d = wp.zeros_like(a)
-        tape = wp.Tape()
-        with tape:
-            wp.batched_matmul(a, b, c, d)
-        test.assertEqual(a._is_read, True)
-        test.assertEqual(b._is_read, True)
-        test.assertEqual(c._is_read, True)
-        test.assertEqual(d._is_read, False)
-    finally:
-        wp.config.verify_autograd_array_access = saved_verify_autograd_array_access_setting
 # write after read warning with in-place operators within a kernel
 def test_in_place_operators_warning(test, device):
     saved_verify_autograd_array_access_setting = wp.config.verify_autograd_array_access
@@ -593,8 +535,6 @@ add_function_test(TestOverwrite, "test_views", test_views, devices=devices)
 add_function_test(TestOverwrite, "test_reset", test_reset, devices=devices)
 add_function_test(TestOverwrite, "test_copy", test_copy, devices=devices)
-add_function_test(TestOverwrite, "test_matmul", test_matmul, devices=devices, check_output=False)
-add_function_test(TestOverwrite, "test_batched_matmul", test_batched_matmul, devices=devices, check_output=False)
 add_function_test(TestOverwrite, "test_atomic_operations", test_atomic_operations, devices=devices)
 # Some warning are only issued during codegen, and codegen only runs on cuda_0 in the MGPU case.

warp/tests/test_quat.py CHANGED Viewed

@@ -1205,7 +1205,6 @@ def test_quat_to_matrix(test, device, dtype, register_kernels=False):
     wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
     quat = wp.types.quaternion(dtype=wptype)
-    mat3 = wp.types.matrix(shape=(3, 3), dtype=wptype)
     vec3 = wp.types.vector(length=3, dtype=wptype)
     def check_quat_to_matrix(
@@ -1239,7 +1238,7 @@ def test_quat_to_matrix(test, device, dtype, register_kernels=False):
                 wptype(1),
             ),
         )
-        result_manual = mat3(xaxis, yaxis, zaxis)
+        result_manual = wp.matrix_from_cols(xaxis, yaxis, zaxis)
         idx = 0
         for i in range(3):
@@ -1711,18 +1710,31 @@ def test_quat_rpy_grad(test, device, dtype, register_kernels=False):
 def test_quat_from_matrix(test, device, dtype, register_kernels=False):
     wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
     mat33 = wp.types.matrix((3, 3), wptype)
+    mat44 = wp.types.matrix((4, 4), wptype)
     quat = wp.types.quaternion(wptype)
     def quat_from_matrix(m: wp.array2d(dtype=wptype), loss: wp.array(dtype=wptype), idx: int):
         tid = wp.tid()
-        matrix = mat33(
-            m[tid, 0], m[tid, 1], m[tid, 2], m[tid, 3], m[tid, 4], m[tid, 5], m[tid, 6], m[tid, 7], m[tid, 8]
+        # fmt: off
+        m3 = mat33(
+            m[tid, 0], m[tid, 1], m[tid, 2],
+            m[tid, 3], m[tid, 4], m[tid, 5],
+            m[tid, 6], m[tid, 7], m[tid, 8],
         )
+        q1 = wp.quat_from_matrix(m3)
-        q = wp.quat_from_matrix(matrix)
+        m4 = mat44(
+            m[tid, 0], m[tid, 1], m[tid, 2], wptype(0.0),
+            m[tid, 3], m[tid, 4], m[tid, 5], wptype(0.0),
+            m[tid, 6], m[tid, 7], m[tid, 8], wptype(0.0),
+            wptype(0.0), wptype(0.0), wptype(0.0), wptype(1.0),
+        )
+        q2 = wp.quat_from_matrix(m4)
+        # fmt: on
-        wp.atomic_add(loss, 0, q[idx])
+        wp.expect_eq(q1, q2)
+        wp.atomic_add(loss, 0, q1[idx])
     def quat_from_matrix_forward(mats: wp.array2d(dtype=wptype), loss: wp.array(dtype=wptype), idx: int):
         tid = wp.tid()
@@ -1894,7 +1906,7 @@ def test_quat_identity(test, device, dtype, register_kernels=False):
 ############################################################
-def test_quat_assign(test, device, dtype, register_kernels=False):
+def test_quat_assign_inplace(test, device, dtype, register_kernels=False):
     np_type = np.dtype(dtype)
     wp_type = wp.types.np_dtype_to_warp_type[np_type]
@@ -1915,16 +1927,6 @@ def test_quat_assign(test, device, dtype, register_kernels=False):
         g = q[0] + wp_type(2.0) * q[1] + wp_type(3.0) * q[2] + wp_type(4.0) * q[3]
         x[tid] = g
-    def quattest_in_register_overwrite(x: wp.array(dtype=quat), a: wp.array(dtype=quat)):
-        tid = wp.tid()
-        f = quat()
-        a_quat = a[tid]
-        f = a_quat
-        f[1] = wp_type(3.0)
-        x[tid] = f
     def quattest_component(x: wp.array(dtype=quat), y: wp.array(dtype=wp_type)):
         i = wp.tid()
@@ -1937,7 +1939,6 @@ def test_quat_assign(test, device, dtype, register_kernels=False):
     kernel_read_write_store = getkernel(quattest_read_write_store, suffix=dtype.__name__)
     kernel_in_register = getkernel(quattest_in_register, suffix=dtype.__name__)
-    kernel_in_register_overwrite = getkernel(quattest_in_register_overwrite, suffix=dtype.__name__)
     kernel_component = getkernel(quattest_component, suffix=dtype.__name__)
     if register_kernels:
@@ -1973,7 +1974,6 @@ def test_quat_assign(test, device, dtype, register_kernels=False):
     x = wp.zeros(1, dtype=quat, requires_grad=True)
     y = wp.ones(1, dtype=wp_type, requires_grad=True)
-    tape = wp.Tape()
     with tape:
         wp.launch(kernel_component, dim=1, inputs=[x, y])
@@ -1982,18 +1982,6 @@ def test_quat_assign(test, device, dtype, register_kernels=False):
     assert_np_equal(x.numpy(), np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np_type))
     assert_np_equal(y.grad.numpy(), np.array([10.0], dtype=np_type))
-    x = wp.zeros(1, dtype=quat, device=device, requires_grad=True)
-    a = wp.ones(1, dtype=quat, device=device, requires_grad=True)
-    tape = wp.Tape()
-    with tape:
-        wp.launch(kernel_in_register_overwrite, dim=1, inputs=[x, a], device=device)
-    tape.backward(grads={x: wp.ones_like(x, requires_grad=False)})
-    assert_np_equal(x.numpy(), np.array([[1.0, 3.0, 1.0, 1.0]], dtype=np_type))
-    assert_np_equal(a.grad.numpy(), np.array([[1.0, 0.0, 1.0, 1.0]], dtype=np_type))
 ############################################################
@@ -2128,7 +2116,7 @@ def test_quat_len(test, device):
 @wp.kernel
-def vector_augassign_kernel(
+def quat_augassign_kernel(
     a: wp.array(dtype=wp.quat), b: wp.array(dtype=wp.quat), c: wp.array(dtype=wp.quat), d: wp.array(dtype=wp.quat)
 ):
     i = wp.tid()
@@ -2146,26 +2134,26 @@ def vector_augassign_kernel(
     q3 = wp.quat()
     q4 = d[i]
-    q3[0] += q4[0]
-    q3[1] += q4[1]
-    q3[2] += q4[2]
-    q3[3] += q4[3]
+    q3[0] -= q4[0]
+    q3[1] -= q4[1]
+    q3[2] -= q4[2]
+    q3[3] -= q4[3]
-    c[i] = q1
+    c[i] = q3
-def test_vector_augassign(test, device):
+def test_quat_augassign(test, device):
     N = 3
-    a = wp.zeros(N, dtype=wp.quat, requires_grad=True)
-    b = wp.ones(N, dtype=wp.quat, requires_grad=True)
+    a = wp.zeros(N, dtype=wp.quat, requires_grad=True, device=device)
+    b = wp.ones(N, dtype=wp.quat, requires_grad=True, device=device)
-    c = wp.zeros(N, dtype=wp.quat, requires_grad=True)
-    d = wp.ones(N, dtype=wp.quat, requires_grad=True)
+    c = wp.zeros(N, dtype=wp.quat, requires_grad=True, device=device)
+    d = wp.ones(N, dtype=wp.quat, requires_grad=True, device=device)
     tape = wp.Tape()
     with tape:
-        wp.launch(vector_augassign_kernel, N, inputs=[a, b, c, d])
+        wp.launch(quat_augassign_kernel, N, inputs=[a, b, c, d], device=device)
     tape.backward(grads={a: wp.ones_like(a), c: wp.ones_like(c)})
@@ -2178,6 +2166,38 @@ def test_vector_augassign(test, device):
     assert_np_equal(d.grad.numpy(), -wp.ones_like(d).numpy())
+def test_quat_assign_copy(test, device):
+    saved_enable_vector_component_overwrites_setting = wp.config.enable_vector_component_overwrites
+    try:
+        wp.config.enable_vector_component_overwrites = True
+        @wp.kernel
+        def quat_in_register_overwrite(x: wp.array(dtype=wp.quat), a: wp.array(dtype=wp.quat)):
+            tid = wp.tid()
+            f = wp.quat()
+            a_quat = a[tid]
+            f = a_quat
+            f[1] = 3.0
+            x[tid] = f
+        x = wp.zeros(1, dtype=wp.quat, device=device, requires_grad=True)
+        a = wp.ones(1, dtype=wp.quat, device=device, requires_grad=True)
+        tape = wp.Tape()
+        with tape:
+            wp.launch(quat_in_register_overwrite, dim=1, inputs=[x, a], device=device)
+        tape.backward(grads={x: wp.ones_like(x, requires_grad=False)})
+        assert_np_equal(x.numpy(), np.array([[1.0, 3.0, 1.0, 1.0]], dtype=float))
+        assert_np_equal(a.grad.numpy(), np.array([[1.0, 0.0, 1.0, 1.0]], dtype=float))
+    finally:
+        wp.config.enable_vector_component_overwrites = saved_enable_vector_component_overwrites_setting
 devices = get_test_devices()
@@ -2277,8 +2297,8 @@ for dtype in np_float_types:
     )
     add_function_test_register_kernel(
         TestQuat,
-        f"test_quat_assign_{dtype.__name__}",
-        test_quat_assign,
+        f"test_quat_assign_inplace_{dtype.__name__}",
+        test_quat_assign_inplace,
         devices=devices,
         dtype=dtype,
     )
@@ -2287,7 +2307,8 @@ for dtype in np_float_types:
     )
 add_function_test(TestQuat, "test_quat_len", test_quat_len, devices=devices)
+add_function_test(TestQuat, "test_quat_augassign", test_quat_augassign, devices=devices)
+add_function_test(TestQuat, "test_quat_assign_copy", test_quat_assign_copy, devices=devices)
 if __name__ == "__main__":
     wp.clear_kernel_cache()

warp/tests/test_rand.py CHANGED Viewed

@@ -26,6 +26,8 @@ def test_kernel(
     kernel_seed: int,
     int_a: wp.array(dtype=int),
     int_ab: wp.array(dtype=int),
+    uint_a: wp.array(dtype=wp.uint32),
+    uint_ab: wp.array(dtype=wp.uint32),
     float_01: wp.array(dtype=float),
     float_ab: wp.array(dtype=float),
 ):
@@ -35,6 +37,8 @@ def test_kernel(
     int_a[tid] = wp.randi(state)
     int_ab[tid] = wp.randi(state, 0, 100)
+    uint_a[tid] = wp.randu(state)
+    uint_ab[tid] = wp.randu(state, wp.uint32(0), wp.uint32(100))
     float_01[tid] = wp.randf(state)
     float_ab[tid] = wp.randf(state, 0.0, 100.0)
@@ -42,37 +46,25 @@ def test_kernel(
 def test_rand(test, device):
     N = 10
-    int_a_device = wp.zeros(N, dtype=int, device=device)
-    int_a_host = wp.zeros(N, dtype=int, device="cpu")
-    int_ab_device = wp.zeros(N, dtype=int, device=device)
-    int_ab_host = wp.zeros(N, dtype=int, device="cpu")
+    int_a = wp.zeros(N, dtype=int, device=device)
+    int_ab = wp.zeros(N, dtype=int, device=device)
-    float_01_device = wp.zeros(N, dtype=float, device=device)
-    float_01_host = wp.zeros(N, dtype=float, device="cpu")
-    float_ab_device = wp.zeros(N, dtype=float, device=device)
-    float_ab_host = wp.zeros(N, dtype=float, device="cpu")
+    uint_a = wp.zeros(N, dtype=wp.uint32, device=device)
+    uint_ab = wp.zeros(N, dtype=wp.uint32, device=device)
+    float_01 = wp.zeros(N, dtype=float, device=device)
+    float_ab = wp.zeros(N, dtype=float, device=device)
     seed = 42
     wp.launch(
         kernel=test_kernel,
         dim=N,
-        inputs=[seed, int_a_device, int_ab_device, float_01_device, float_ab_device],
+        inputs=[seed, int_a, int_ab, uint_a, uint_ab, float_01, float_ab],
         outputs=[],
         device=device,
     )
-    wp.copy(int_a_host, int_a_device)
-    wp.copy(int_ab_host, int_ab_device)
-    wp.copy(float_01_host, float_01_device)
-    wp.copy(float_ab_host, float_ab_device)
-    wp.synchronize_device(device)
-    int_a = int_a_host.numpy()
-    int_ab = int_ab_host.numpy()
-    float_01 = float_01_host.numpy()
-    float_ab = float_ab_host.numpy()
     int_a_true = np.array(
         [
             -575632308,
@@ -88,32 +80,47 @@ def test_rand(test, device):
         ]
     )
     int_ab_true = np.array([46, 58, 46, 83, 85, 39, 72, 99, 18, 41])
+    uint_a_true = np.array(
+        [
+            3133687854,
+            3702303309,
+            1235698096,
+            3516599792,
+            800302729,
+            2620462179,
+            2423739693,
+            3024873594,
+            2783682377,
+            1188846332,
+        ]
+    )
+    uint_ab_true = np.array([6, 55, 2, 92, 55, 93, 65, 23, 48, 0])
     float_01_true = np.array(
         [
-            0.72961855,
-            0.86200964,
-            0.28770837,
-            0.8187722,
-            0.186335,
-            0.6101239,
-            0.56432086,
-            0.70428324,
-            0.64812654,
-            0.27679986,
+            0.8265858,
+            0.5874614,
+            0.1508659,
+            0.9498008,
+            0.02531803,
+            0.8520948,
+            0.0001185536,
+            0.4855958,
+            0.06277305,
+            0.2214079,
         ]
     )
     float_ab_true = np.array(
-        [96.04259, 73.33809, 63.601555, 38.647305, 71.813896, 64.65809, 77.79791, 46.579605, 94.614456, 91.921814]
+        [79.84678, 76.362206, 32.135242, 99.70866, 70.45863, 20.6523, 45.164482, 55.583008, 76.60291, 35.36277]
     )
-    test.assertTrue((int_a == int_a_true).all())
-    test.assertTrue((int_ab == int_ab_true).all())
+    assert_np_equal(int_a.numpy(), int_a_true)
+    assert_np_equal(int_ab.numpy(), int_ab_true)
-    err = np.max(np.abs(float_01 - float_01_true))
-    test.assertTrue(err < 1e-04)
+    assert_np_equal(uint_a.numpy(), uint_a_true)
+    assert_np_equal(uint_ab.numpy(), uint_ab_true)
-    err = np.max(np.abs(float_ab - float_ab_true))
-    test.assertTrue(err < 1e-04)
+    assert_np_equal(float_01.numpy(), float_01_true, 1e-04)
+    assert_np_equal(float_ab.numpy(), float_ab_true, 1e-04)
 @wp.kernel

warp/tests/test_sparse.py CHANGED Viewed

@@ -19,10 +19,12 @@ import numpy as np
 import warp as wp
 from warp.sparse import (
+    bsr_assign,
     bsr_axpy,
     bsr_axpy_work_arrays,
     bsr_copy,
     bsr_diag,
+    bsr_from_triplets,
     bsr_get_diag,
     bsr_identity,
     bsr_mm,
@@ -232,18 +234,43 @@ def test_bsr_split_merge(test, device):
     with test.assertRaisesRegex(ValueError, "Incompatible dest and src block shapes"):
         bsr_copy(bsr, block_shape=(3, 3))
-    with test.assertRaisesRegex(
-        ValueError, r"Dest block shape \(5, 5\) is not an exact multiple of src block shape \(4, 2\)"
-    ):
+    with test.assertRaisesRegex(ValueError, "Incompatible dest and src block shapes"):
         bsr_copy(bsr, block_shape=(5, 5))
     with test.assertRaisesRegex(
         ValueError,
-        "The total rows and columns of the src matrix cannot be evenly divided using the requested block shape",
+        "The requested block shape does not evenly divide the source matrix",
     ):
         bsr_copy(bsr, block_shape=(32, 32))
+def test_bsr_assign_masked(test, device):
+    rng = np.random.default_rng(123)
+    block_shape = (1, 2)
+    nrow = 16
+    ncol = 8
+    shape = (block_shape[0] * nrow, block_shape[1] * ncol)
+    n = 20
+    rows = wp.array(rng.integers(0, high=nrow, size=n, dtype=int), dtype=int, device=device)
+    cols = wp.array(rng.integers(0, high=ncol, size=n, dtype=int), dtype=int, device=device)
+    vals = wp.array(rng.random(size=(n, block_shape[0], block_shape[1])), dtype=float, device=device)
+    A = bsr_from_triplets(nrow, ncol, rows, cols, vals)
+    # Extract coarse diagonal with copy + diag funcs, for reference
+    A_coarse = bsr_copy(A, block_shape=(4, 4))
+    ref = _bsr_to_dense(bsr_diag(bsr_get_diag(A_coarse)))
+    # Extract coarse diagonal with masked assign (more memory efficient)
+    diag_masked = bsr_diag(rows_of_blocks=shape[0] // 4, block_type=A_coarse.dtype, device=device)
+    bsr_assign(src=A, dest=diag_masked, masked=True)
+    res = _bsr_to_dense(diag_masked)
+    assert_np_equal(res, ref, 0.0001)
 def make_test_bsr_transpose(block_shape, scalar_type):
     def test_bsr_transpose(test, device):
         rng = np.random.default_rng(123)
@@ -316,6 +343,12 @@ def make_test_bsr_axpy(block_shape, scalar_type):
         res = _bsr_to_dense(y)
         assert_np_equal(res, ref, 0.0001)
+        # test masked
+        y_mask = bsr_from_triplets(nrow, ncol, y.uncompress_rows()[:1], y.columns[:1], y.values[:1])
+        bsr_axpy(y, y_mask, masked=True)
+        assert y_mask.nnz_sync() == 1
+        assert_np_equal(y_mask.values.numpy(), 2.0 * y.values[:1].numpy(), 0.0001)
         # test incompatible shapes
         y.ncol = y.ncol + 1
         with test.assertRaisesRegex(ValueError, "Matrices must have the same number of rows and columns"):
@@ -383,6 +416,13 @@ def make_test_bsr_mm(block_shape, scalar_type):
         bsr_mm(x, y, z, alpha, beta, work_arrays=work_arrays, reuse_topology=True)
         assert_np_equal(res, ref, 0.0001)
+        # test masked mm
+        z = bsr_diag(rows_of_blocks=z.nrow, block_type=z.dtype, device=z.device)
+        bsr_mm(x, y, z, masked=True)
+        res = _bsr_to_dense(z)
+        ref = _bsr_to_dense(bsr_diag(bsr_get_diag(x @ y)))
+        assert_np_equal(res, ref, 0.0001)
         # using overloaded operators
         x = (alpha * x) @ y
         assert_np_equal(res, ref, 0.0001)
@@ -479,12 +519,12 @@ def make_test_bsr_mv(block_shape, scalar_type):
         assert_np_equal(res, ref, 0.0001)
         A.ncol = A.ncol + 1
-        with test.assertRaisesRegex(ValueError, "Number of columns"):
+        with test.assertRaisesRegex(ValueError, "Incompatible 'x'"):
             bsr_mv(A, x, y)
         A.ncol = A.ncol - 1
         A.nrow = A.nrow - 1
-        with test.assertRaisesRegex(ValueError, "Number of rows"):
+        with test.assertRaisesRegex(ValueError, "Incompatible 'y'"):
             bsr_mv(A, x, y)
     return test_bsr_mv
@@ -518,6 +558,7 @@ add_function_test(TestSparse, "test_csr_from_triplets", test_csr_from_triplets,
 add_function_test(TestSparse, "test_bsr_from_triplets", test_bsr_from_triplets, devices=devices)
 add_function_test(TestSparse, "test_bsr_get_diag", test_bsr_get_set_diag, devices=devices)
 add_function_test(TestSparse, "test_bsr_split_merge", test_bsr_split_merge, devices=devices)
+add_function_test(TestSparse, "test_bsr_assign_masked", test_bsr_assign_masked, devices=devices)
 add_function_test(TestSparse, "test_csr_transpose", make_test_bsr_transpose((1, 1), wp.float32), devices=devices)
 add_function_test(TestSparse, "test_bsr_transpose_1_3", make_test_bsr_transpose((1, 3), wp.float32), devices=devices)

warp/tests/test_spatial.py CHANGED Viewed

@@ -1969,6 +1969,67 @@ def test_transform_anon_type_instance(test, device, dtype, register_kernels=Fals
         tape.zero()
+def test_transform_from_matrix(test, device, dtype, register_kernels=False):
+    wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
+    mat44 = wp.types.matrix((4, 4), wptype)
+    vec3 = wp.types.vector(3, wptype)
+    quat = wp.types.quaternion(wptype)
+    def transform_from_matrix_kernel():
+        # fmt: off
+        m = mat44(
+            wptype(0.6), wptype(0.48), wptype(0.64), wptype(1.0),
+            wptype(-0.8), wptype(0.36), wptype(0.48), wptype(2.0),
+            wptype(0.0), wptype(-0.8), wptype(0.6), wptype(3.0),
+            wptype(0.0), wptype(0.0), wptype(0.0), wptype(1.0),
+        )
+        # fmt: on
+        t = wp.transform_from_matrix(m)
+        p = wp.transform_get_translation(t)
+        q = wp.transform_get_rotation(t)
+        wp.expect_near(p, vec3(wptype(1.0), wptype(2.0), wptype(3.0)), tolerance=wptype(1e-3))
+        wp.expect_near(q, quat(wptype(-0.4), wptype(0.2), wptype(-0.4), wptype(0.8)), tolerance=wptype(1e-3))
+    kernel = getkernel(transform_from_matrix_kernel, suffix=dtype.__name__)
+    if register_kernels:
+        return
+    wp.launch(kernel, dim=1, device=device)
+def test_transform_to_matrix(test, device, dtype, register_kernels=False):
+    wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
+    mat44 = wp.types.matrix((4, 4), wptype)
+    vec3 = wp.types.vector(3, wptype)
+    quat = wp.types.quaternion(wptype)
+    def transform_to_matrix_kernel():
+        p = vec3(wptype(1.0), wptype(2.0), wptype(3.0))
+        q = quat(wptype(-0.4), wptype(0.2), wptype(-0.4), wptype(0.8))
+        t = wp.transformation(p, q)
+        m = wp.transform_to_matrix(t)
+        # fmt: off
+        wp.expect_near(
+            m,
+            mat44(
+                wptype(0.6), wptype(0.48), wptype(0.64), wptype(1.0),
+                wptype(-0.8), wptype(0.36), wptype(0.48), wptype(2.0),
+                wptype(0.0), wptype(-0.8), wptype(0.6), wptype(3.0),
+                wptype(0.0), wptype(0.0), wptype(0.0), wptype(1.0),
+            ),
+            tolerance=wptype(1e-3),
+        )
+        # fmt: on
+    kernel = getkernel(transform_to_matrix_kernel, suffix=dtype.__name__)
+    if register_kernels:
+        return
+    wp.launch(kernel, dim=1, device=device)
 devices = get_test_devices()
@@ -2145,6 +2206,20 @@ for dtype in np_float_types:
     add_function_test_register_kernel(
         TestSpatial, f"test_spatial_adjoint_{dtype.__name__}", test_spatial_adjoint, devices=devices, dtype=dtype
     )
+    add_function_test_register_kernel(
+        TestSpatial,
+        f"test_transform_from_matrix_{dtype.__name__}",
+        test_transform_from_matrix,
+        devices=devices,
+        dtype=dtype,
+    )
+    add_function_test_register_kernel(
+        TestSpatial,
+        f"test_transform_to_matrix_{dtype.__name__}",
+        test_transform_to_matrix,
+        devices=devices,
+        dtype=dtype,
+    )
     # \TODO: test spatial_mass and spatial_jacobian

warp/tests/test_static.py CHANGED Viewed

@@ -307,7 +307,7 @@ def test_function_lookup(test, device):
 def count_ssa_occurrences(kernel: wp.Kernel, ssas: List[str]) -> Dict[str, int]:
     # analyze the generated code
-    counts = {ssa: 0 for ssa in ssas}
+    counts = dict.fromkeys(ssas, 0)
     for line in kernel.adj.blocks[0].body_forward:
         for ssa in ssas:
             if ssa in line: