PyPI - warp-lang - Versions diffs - 1.3.2__py3-none-manylinux2014_x86_64.whl → 1.4.0__py3-none-manylinux2014_x86_64.whl - Mend

warp-lang 1.3.2__py3-none-manylinux2014_x86_64.whl → 1.4.0__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (107) hide show

warp/__init__.py +6 -0
warp/autograd.py +59 -6
warp/bin/warp.so +0 -0
warp/build_dll.py +8 -10
warp/builtins.py +126 -4
warp/codegen.py +435 -53
warp/config.py +1 -1
warp/context.py +678 -403
warp/dlpack.py +2 -0
warp/examples/benchmarks/benchmark_cloth.py +10 -0
warp/examples/core/example_render_opengl.py +12 -10
warp/examples/fem/example_adaptive_grid.py +251 -0
warp/examples/fem/example_apic_fluid.py +1 -1
warp/examples/fem/example_diffusion_3d.py +2 -2
warp/examples/fem/example_magnetostatics.py +1 -1
warp/examples/fem/example_streamlines.py +1 -0
warp/examples/fem/utils.py +23 -4
warp/examples/sim/example_cloth.py +50 -6
warp/fem/__init__.py +2 -0
warp/fem/adaptivity.py +493 -0
warp/fem/field/field.py +2 -1
warp/fem/field/nodal_field.py +18 -26
warp/fem/field/test.py +4 -4
warp/fem/field/trial.py +4 -4
warp/fem/geometry/__init__.py +1 -0
warp/fem/geometry/adaptive_nanogrid.py +843 -0
warp/fem/geometry/nanogrid.py +55 -28
warp/fem/space/__init__.py +1 -1
warp/fem/space/nanogrid_function_space.py +69 -35
warp/fem/utils.py +113 -107
warp/jax_experimental.py +28 -15
warp/native/array.h +0 -1
warp/native/builtin.h +103 -6
warp/native/bvh.cu +2 -0
warp/native/cuda_util.cpp +14 -0
warp/native/cuda_util.h +2 -0
warp/native/error.cpp +4 -2
warp/native/exports.h +99 -17
warp/native/mat.h +97 -0
warp/native/mesh.cpp +36 -0
warp/native/mesh.cu +51 -0
warp/native/mesh.h +1 -0
warp/native/quat.h +43 -0
warp/native/spatial.h +6 -0
warp/native/vec.h +74 -0
warp/native/warp.cpp +2 -1
warp/native/warp.cu +10 -3
warp/native/warp.h +8 -1
warp/paddle.py +382 -0
warp/sim/__init__.py +1 -0
warp/sim/collide.py +519 -0
warp/sim/integrator_euler.py +18 -5
warp/sim/integrator_featherstone.py +5 -5
warp/sim/integrator_vbd.py +1026 -0
warp/sim/model.py +49 -23
warp/stubs.py +459 -0
warp/tape.py +2 -0
warp/tests/aux_test_dependent.py +1 -0
warp/tests/aux_test_name_clash1.py +32 -0
warp/tests/aux_test_name_clash2.py +32 -0
warp/tests/aux_test_square.py +1 -0
warp/tests/test_array.py +222 -0
warp/tests/test_async.py +3 -3
warp/tests/test_atomic.py +6 -0
warp/tests/test_closest_point_edge_edge.py +93 -1
warp/tests/test_codegen.py +62 -15
warp/tests/test_codegen_instancing.py +1457 -0
warp/tests/test_collision.py +486 -0
warp/tests/test_compile_consts.py +3 -28
warp/tests/test_dlpack.py +170 -0
warp/tests/test_examples.py +22 -8
warp/tests/test_fast_math.py +10 -4
warp/tests/test_fem.py +64 -0
warp/tests/test_func.py +46 -0
warp/tests/test_implicit_init.py +49 -0
warp/tests/test_jax.py +58 -0
warp/tests/test_mat.py +84 -0
warp/tests/test_mesh_query_point.py +188 -0
warp/tests/test_module_hashing.py +40 -0
warp/tests/test_multigpu.py +3 -3
warp/tests/test_overwrite.py +8 -0
warp/tests/test_paddle.py +852 -0
warp/tests/test_print.py +89 -0
warp/tests/test_quat.py +111 -0
warp/tests/test_reload.py +31 -1
warp/tests/test_scalar_ops.py +2 -0
warp/tests/test_static.py +412 -0
warp/tests/test_streams.py +64 -3
warp/tests/test_struct.py +4 -4
warp/tests/test_torch.py +24 -0
warp/tests/test_triangle_closest_point.py +137 -0
warp/tests/test_types.py +1 -1
warp/tests/test_vbd.py +386 -0
warp/tests/test_vec.py +143 -0
warp/tests/test_vec_scalar_ops.py +139 -0
warp/tests/test_volume.py +30 -0
warp/tests/unittest_suites.py +12 -0
warp/tests/unittest_utils.py +9 -5
warp/thirdparty/dlpack.py +3 -1
warp/types.py +157 -34
warp/utils.py +37 -14
{warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/METADATA +10 -8
{warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/RECORD +106 -94
warp/tests/test_point_triangle_closest_point.py +0 -143
{warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/LICENSE.md +0 -0
{warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/WHEEL +0 -0
{warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/top_level.txt +0 -0

warp/tests/test_dlpack.py CHANGED Viewed

@@ -188,6 +188,57 @@ def test_dlpack_dtypes_and_shapes(test, device):
         wrap_scalar_to_matrix_tensor(mat_type)
+def test_dlpack_stream_arg(test, device):
+    # test valid range for the stream argument to array.__dlpack__()
+    data = np.arange(10)
+    def check_result(capsule):
+        result = wp.dlpack._from_dlpack(capsule)
+        assert_np_equal(result.numpy(), data)
+    with wp.ScopedDevice(device):
+        a = wp.array(data=data)
+        # stream arguments supported for all devices
+        check_result(a.__dlpack__())
+        check_result(a.__dlpack__(stream=None))
+        check_result(a.__dlpack__(stream=-1))
+        # device-specific stream arguments
+        if device.is_cuda:
+            check_result(a.__dlpack__(stream=0))  # default stream
+            check_result(a.__dlpack__(stream=1))  # legacy default stream
+            check_result(a.__dlpack__(stream=2))  # per thread default stream
+            # custom stream
+            stream = wp.Stream(device)
+            check_result(a.__dlpack__(stream=stream.cuda_stream))
+            # unsupported stream arguments
+            expected_error = r"DLPack stream must None or an integer >= -1"
+            with test.assertRaisesRegex(TypeError, expected_error):
+                check_result(a.__dlpack__(stream=-2))
+            with test.assertRaisesRegex(TypeError, expected_error):
+                check_result(a.__dlpack__(stream="nope"))
+        else:
+            expected_error = r"DLPack stream must be None or -1 for CPU device"
+            with test.assertRaisesRegex(TypeError, expected_error):
+                check_result(a.__dlpack__(stream=0))
+            with test.assertRaisesRegex(TypeError, expected_error):
+                check_result(a.__dlpack__(stream=1))
+            with test.assertRaisesRegex(TypeError, expected_error):
+                check_result(a.__dlpack__(stream=2))
+            with test.assertRaisesRegex(TypeError, expected_error):
+                check_result(a.__dlpack__(stream=1742))
+            with test.assertRaisesRegex(TypeError, expected_error):
+                check_result(a.__dlpack__(stream=-2))
+            with test.assertRaisesRegex(TypeError, expected_error):
+                check_result(a.__dlpack__(stream="nope"))
 def test_dlpack_warp_to_torch(test, device):
     import torch.utils.dlpack
@@ -299,6 +350,34 @@ def test_dlpack_torch_to_warp_v2(test, device):
     assert_np_equal(a.numpy(), t.cpu().numpy())
+def test_dlpack_paddle_to_warp(test, device):
+    import paddle
+    import paddle.utils.dlpack
+    t = paddle.arange(N, dtype=paddle.float32).to(device=wp.device_to_paddle(device))
+    # paddle do not implement __dlpack__ yet, so only test to_dlpack here
+    a = wp.from_dlpack(paddle.utils.dlpack.to_dlpack(t))
+    item_size = wp.types.type_size_in_bytes(a.dtype)
+    test.assertEqual(a.ptr, t.data_ptr())
+    test.assertEqual(a.device, wp.device_from_paddle(t.place))
+    test.assertEqual(a.dtype, wp.dtype_from_paddle(t.dtype))
+    test.assertEqual(a.shape, tuple(t.shape))
+    test.assertEqual(a.strides, tuple(s * item_size for s in t.strides))
+    assert_np_equal(a.numpy(), t.numpy())
+    wp.launch(inc, dim=a.size, inputs=[a], device=device)
+    assert_np_equal(a.numpy(), t.numpy())
+    paddle.assign(t + 1, t)
+    assert_np_equal(a.numpy(), t.numpy())
 def test_dlpack_warp_to_jax(test, device):
     import jax
     import jax.dlpack
@@ -370,6 +449,61 @@ def test_dlpack_warp_to_jax_v2(test, device):
     assert_np_equal(a.numpy(), np.asarray(j2))
+def test_dlpack_warp_to_paddle(test, device):
+    import paddle.utils.dlpack
+    a = wp.array(data=np.arange(N, dtype=np.float32), device=device)
+    t = paddle.utils.dlpack.from_dlpack(wp.to_dlpack(a))
+    item_size = wp.types.type_size_in_bytes(a.dtype)
+    test.assertEqual(a.ptr, t.data_ptr())
+    test.assertEqual(a.device, wp.device_from_paddle(t.place))
+    test.assertEqual(a.dtype, wp.dtype_from_paddle(t.dtype))
+    test.assertEqual(a.shape, tuple(t.shape))
+    test.assertEqual(a.strides, tuple(s * item_size for s in t.strides))
+    assert_np_equal(a.numpy(), t.cpu().numpy())
+    wp.launch(inc, dim=a.size, inputs=[a], device=device)
+    assert_np_equal(a.numpy(), t.cpu().numpy())
+    paddle.assign(t + 1, t)
+    assert_np_equal(a.numpy(), t.cpu().numpy())
+def test_dlpack_warp_to_paddle_v2(test, device):
+    # same as original test, but uses newer __dlpack__() method
+    import paddle.utils.dlpack
+    a = wp.array(data=np.arange(N, dtype=np.float32), device=device)
+    # pass the array directly
+    t = paddle.utils.dlpack.from_dlpack(a)
+    item_size = wp.types.type_size_in_bytes(a.dtype)
+    test.assertEqual(a.ptr, t.data_ptr())
+    test.assertEqual(a.device, wp.device_from_paddle(t.place))
+    test.assertEqual(a.dtype, wp.dtype_from_paddle(t.dtype))
+    test.assertEqual(a.shape, tuple(t.shape))
+    test.assertEqual(a.strides, tuple(s * item_size for s in t.strides))
+    assert_np_equal(a.numpy(), t.numpy())
+    wp.launch(inc, dim=a.size, inputs=[a], device=device)
+    assert_np_equal(a.numpy(), t.numpy())
+    paddle.assign(t + 1, t)
+    assert_np_equal(a.numpy(), t.numpy())
 def test_dlpack_jax_to_warp(test, device):
     import jax
     import jax.dlpack
@@ -448,6 +582,7 @@ devices = get_test_devices()
 add_function_test(TestDLPack, "test_dlpack_warp_to_warp", test_dlpack_warp_to_warp, devices=devices)
 add_function_test(TestDLPack, "test_dlpack_dtypes_and_shapes", test_dlpack_dtypes_and_shapes, devices=devices)
+add_function_test(TestDLPack, "test_dlpack_stream_arg", test_dlpack_stream_arg, devices=devices)
 # torch interop via dlpack
 try:
@@ -523,6 +658,41 @@ except Exception as e:
     print(f"Skipping Jax DLPack tests due to exception: {e}")
+# paddle interop via dlpack
+try:
+    import paddle
+    import paddle.utils.dlpack
+    # check which Warp devices work with paddle
+    # CUDA devices may fail if paddle was not compiled with CUDA support
+    test_devices = get_test_devices()
+    paddle_compatible_devices = []
+    for d in test_devices:
+        try:
+            t = paddle.arange(10).to(device=wp.device_to_paddle(d))
+            paddle.assign(t + 1, t)
+            paddle_compatible_devices.append(d)
+        except Exception as e:
+            print(f"Skipping paddle DLPack tests on device '{d}' due to exception: {e}")
+    if paddle_compatible_devices:
+        add_function_test(
+            TestDLPack, "test_dlpack_warp_to_paddle", test_dlpack_warp_to_paddle, devices=paddle_compatible_devices
+        )
+        add_function_test(
+            TestDLPack,
+            "test_dlpack_warp_to_paddle_v2",
+            test_dlpack_warp_to_paddle_v2,
+            devices=paddle_compatible_devices,
+        )
+        add_function_test(
+            TestDLPack, "test_dlpack_paddle_to_warp", test_dlpack_paddle_to_warp, devices=paddle_compatible_devices
+        )
+except Exception as e:
+    print(f"Skipping Paddle DLPack tests due to exception: {e}")
 if __name__ == "__main__":
     wp.clear_kernel_cache()
     unittest.main(verbosity=2)

warp/tests/test_examples.py CHANGED Viewed

@@ -20,7 +20,12 @@ override example defaults so the example can run in less than ten seconds.
 Use {"usd_required": True} and {"torch_required": True} to skip running the test
 if usd-core or torch are not found in the Python environment.
+Use "cutlass_required": True} to skip the test if Warp needs to be built with
+CUTLASS.
 Use the "num_frames" and "train_iters" keys to control the number of steps.
+Use "test_timeout" to override the default test timeout threshold of 300 seconds.
 """
 import os
@@ -37,6 +42,9 @@ from warp.tests.unittest_utils import (
     get_test_devices,
     sanitize_identifier,
 )
+from warp.utils import check_p2p
+wp.init()  # For wp.context.runtime.core.is_cutlass_enabled()
 def _build_command_line_options(test_options: Dict[str, Any]) -> list:
@@ -103,6 +111,10 @@ def add_example_test(
         if usd_required and not USD_AVAILABLE:
             test.skipTest("Requires usd-core")
+        cutlass_required = options.pop("cutlass_required", False)
+        if cutlass_required and not wp.context.runtime.core.is_cutlass_enabled():
+            test.skipTest("Warp was not built with CUTLASS support")
         # Find the current Warp cache
         warp_cache_path = wp.config.kernel_cache_dir
@@ -286,6 +298,7 @@ add_example_test(
     test_options_cuda={
         "train_iters": 1 if warp.context.runtime.core.is_debug_enabled() else 3,
         "num_frames": 1 if warp.context.runtime.core.is_debug_enabled() else 60,
+        "cutlass_required": True,
     },
     test_options_cpu={"train_iters": 1, "num_frames": 30},
 )
@@ -340,12 +353,14 @@ class TestFemDiffusionExamples(unittest.TestCase):
     pass
-add_example_test(
-    TestFemDiffusionExamples,
-    name="fem.example_diffusion_mgpu",
-    devices=get_selected_cuda_test_devices(mode="basic"),
-    test_options={"headless": True},
-)
+# MGPU tests may fail on systems where P2P transfers are misconfigured
+if check_p2p():
+    add_example_test(
+        TestFemDiffusionExamples,
+        name="fem.example_diffusion_mgpu",
+        devices=get_selected_cuda_test_devices(mode="basic"),
+        test_options={"headless": True},
+    )
 add_example_test(
     TestFemExamples,
@@ -433,5 +448,4 @@ add_example_test(
 if __name__ == "__main__":
     # force rebuild of all kernels
     wp.clear_kernel_cache()
-    unittest.main(verbosity=2, failfast=True)
+    unittest.main(verbosity=2)

warp/tests/test_fast_math.py CHANGED Viewed

@@ -12,12 +12,19 @@ from warp.tests.unittest_utils import *
 @wp.kernel
-def test_pow(e: float, result: float):
+def test_pow(e: float, expected: float):
     tid = wp.tid()
     y = wp.pow(-2.0, e)
-    wp.expect_eq(y, result)
+    # Since equality comparisons with NaN's are false, we have to do something manually
+    if wp.isnan(expected):
+        if not wp.isnan(y):
+            print("Error, comparison failed")
+            wp.printf("    Expected: %f\n", expected)
+            wp.printf("    Actual: %f\n", y)
+    else:
+        wp.expect_eq(y, expected)
 def test_fast_math_disabled(test, device):
@@ -26,14 +33,13 @@ def test_fast_math_disabled(test, device):
     wp.launch(test_pow, dim=1, inputs=[2.0, 4.0], device=device)
-@unittest.expectedFailure
 def test_fast_math_cuda(test, device):
     # on CUDA with --fast-math enabled taking the pow()
     # of a negative number will result in a NaN
     wp.set_module_options({"fast_math": True})
     try:
-        wp.launch(test_pow, dim=1, inputs=[2.0, 4.0], device=device)
+        wp.launch(test_pow, dim=1, inputs=[2.0, wp.NAN], device=device)
     finally:
         # Turn fast math back off
         wp.set_module_options({"fast_math": False})

warp/tests/test_fem.py CHANGED Viewed

@@ -430,6 +430,9 @@ def _launch_test_geometry_kernel(geo: fem.Geometry, device):
         pos_inner = geo.cell_position(cell_arg, inner_s)
         pos_outer = geo.cell_position(cell_arg, outer_s)
+        # if wp.length(pos_outer - pos_side) > 0.1:
+        #    wp.print(side_index)
         for k in range(type(pos_side).length):
             wp.expect_near(pos_side[k], pos_inner[k], 0.0001)
             wp.expect_near(pos_side[k], pos_outer[k], 0.0001)
@@ -616,6 +619,66 @@ def test_nanogrid(test, device):
     assert_np_equal(cell_measures.numpy(), np.full(cell_measures.shape, 1.0 / (N**3)), tol=1.0e-4)
+@wp.func
+def _refinement_field(x: wp.vec3):
+    return 4.0 * (wp.length(x) - 0.5)
+def test_adaptive_nanogrid(test, device):
+    # 3 res-1 voxels, 8 res-0 voxels
+    res0 = wp.array(
+        [
+            [2, 2, 0],
+            [2, 3, 0],
+            [3, 2, 0],
+            [3, 3, 0],
+            [2, 2, 1],
+            [2, 3, 1],
+            [3, 2, 1],
+            [3, 3, 1],
+        ],
+        dtype=int,
+        device=device,
+    )
+    res1 = wp.array(
+        [
+            [0, 0, 0],
+            [0, 1, 0],
+            [1, 0, 0],
+            [1, 1, 0],
+        ],
+        dtype=int,
+        device=device,
+    )
+    grid0 = wp.Volume.allocate_by_voxels(res0, 0.5, device=device)
+    grid1 = wp.Volume.allocate_by_voxels(res1, 1.0, device=device)
+    geo = fem.adaptive_nanogrid_from_hierarchy([grid0, grid1])
+    test.assertEqual(geo.cell_count(), 3 + 8)
+    test.assertEqual(geo.vertex_count(), 2 * 9 + 27 - 8)
+    test.assertEqual(geo.side_count(), 2 * 4 + 6 * 2 + (3 * (2 + 1) * 2**2 - 6))
+    test.assertEqual(geo.boundary_side_count(), 2 * 4 + 4 * 2 + (4 * 4 - 4))
+    # test.assertEqual(geo.edge_count(), 6 * 4 + 9 + (3 * 2 * (2 + 1) ** 2 - 12))
+    test.assertEqual(geo.stacked_face_count(), geo.side_count() + 2)
+    test.assertEqual(geo.stacked_edge_count(), 6 * 4 + 9 + (3 * 2 * (2 + 1) ** 2 - 12) + 7)
+    side_measures, cell_measures = _launch_test_geometry_kernel(geo, device)
+    test.assertAlmostEqual(np.sum(cell_measures.numpy()), 4.0, places=4)
+    test.assertAlmostEqual(np.sum(side_measures.numpy()), 20 + 3.0, places=4)
+    # Test with non-graded geometry
+    ref_field = fem.ImplicitField(fem.Cells(geo), func=_refinement_field)
+    non_graded_geo = fem.adaptive_nanogrid_from_field(grid1, level_count=3, refinement_field=ref_field)
+    _launch_test_geometry_kernel(geo, device)
+    # Test automatic grading
+    graded_geo = fem.adaptive_nanogrid_from_field(grid1, level_count=3, refinement_field=ref_field, grading="face")
+    test.assertEqual(non_graded_geo.cell_count() + 7, graded_geo.cell_count())
 @integrand
 def _rigid_deformation_field(s: Sample, domain: Domain, translation: wp.vec3, rotation: wp.vec3, scale: float):
     q = wp.quat_from_axis_angle(wp.normalize(rotation), wp.length(rotation))
@@ -1531,6 +1594,7 @@ add_function_test(TestFem, "test_grid_3d", test_grid_3d, devices=devices)
 add_function_test(TestFem, "test_tet_mesh", test_tet_mesh, devices=devices)
 add_function_test(TestFem, "test_hex_mesh", test_hex_mesh, devices=devices)
 add_function_test(TestFem, "test_nanogrid", test_nanogrid, devices=cuda_devices)
+add_function_test(TestFem, "test_adaptive_nanogrid", test_adaptive_nanogrid, devices=cuda_devices)
 add_function_test(TestFem, "test_deformed_geometry", test_deformed_geometry, devices=devices)
 add_function_test(TestFem, "test_dof_mapper", test_dof_mapper)
 add_function_test(TestFem, "test_point_basis", test_point_basis)

warp/tests/test_func.py CHANGED Viewed

@@ -7,6 +7,7 @@
 import math
 import unittest
+from typing import Tuple
 import numpy as np
@@ -155,6 +156,41 @@ def test_builtin_shadowing():
     wp.expect_eq(sign(1.23), 123.0)
+@wp.func
+def user_func_with_defaults(a: int = 123, b: int = 234) -> int:
+    return a + b
+@wp.kernel
+def test_user_func_with_defaults():
+    a = user_func_with_defaults()
+    wp.expect_eq(a, 357)
+    b = user_func_with_defaults(111)
+    wp.expect_eq(b, 345)
+    c = user_func_with_defaults(111, 222)
+    wp.expect_eq(c, 333)
+    d = user_func_with_defaults(a=111)
+    wp.expect_eq(d, 345)
+    e = user_func_with_defaults(b=111)
+    wp.expect_eq(e, 234)
+@wp.func
+def user_func_return_multiple_values(a: int, b: float) -> Tuple[int, float]:
+    return a + a, b * b
+@wp.kernel
+def test_user_func_return_multiple_values():
+    a, b = user_func_return_multiple_values(123, 234.0)
+    wp.expect_eq(a, 246)
+    wp.expect_eq(b, 54756.0)
 devices = get_test_devices()
@@ -329,6 +365,16 @@ add_function_test(TestFunc, func=test_func_closure_capture, name="test_func_clos
 add_function_test(TestFunc, func=test_multi_valued_func, name="test_multi_valued_func", devices=devices)
 add_kernel_test(TestFunc, kernel=test_func_defaults, name="test_func_defaults", dim=1, devices=devices)
 add_kernel_test(TestFunc, kernel=test_builtin_shadowing, name="test_builtin_shadowing", dim=1, devices=devices)
+add_kernel_test(
+    TestFunc, kernel=test_user_func_with_defaults, name="test_user_func_with_defaults", dim=1, devices=devices
+)
+add_kernel_test(
+    TestFunc,
+    kernel=test_user_func_return_multiple_values,
+    name="test_user_func_return_multiple_values",
+    dim=1,
+    devices=devices,
+)
 if __name__ == "__main__":

warp/tests/test_implicit_init.py CHANGED Viewed

@@ -347,6 +347,55 @@ add_function_test(
 )
+#   Structs
+# ------------------------------------------------------------------------------
+def test_struct_member_init(test, device):
+    @wp.struct
+    class S:
+        # fp16 requires conversion functions from warp.so
+        x: wp.float16
+        v: wp.vec3h
+    s = S()
+    s.x = 42.0
+    s.v = wp.vec3h(1.0, 2.0, 3.0)
+class TestImplicitInitStructMemberInit(unittest.TestCase):
+    pass
+add_function_test(
+    TestImplicitInitStructMemberInit,
+    "test_struct_member_init",
+    test_struct_member_init,
+    check_output=False,
+)
+#   Tape
+# ------------------------------------------------------------------------------
+def test_tape(test, device):
+    with wp.Tape():
+        pass
+class TestImplicitInitTape(unittest.TestCase):
+    pass
+add_function_test(
+    TestImplicitInitTape,
+    "test_tape",
+    test_tape,
+    check_output=False,
+)
 if __name__ == "__main__":
     # Do not clear the kernel cache or call anything that would initialize Warp
     # since these tests are specifically aiming to catch issues where Warp isn't

warp/tests/test_jax.py CHANGED Viewed

@@ -246,6 +246,60 @@ def test_jax_kernel_multiarg(test, device):
     assert_np_equal(result_y, expected_y)
+@unittest.skipUnless(_jax_version() >= (0, 4, 25), "Jax version too old")
+def test_jax_kernel_launch_dims(test, device):
+    import jax.numpy as jp
+    from warp.jax_experimental import jax_kernel
+    n = 64
+    m = 32
+    # Test with 1D launch dims
+    @wp.kernel
+    def add_one_kernel(x: wp.array(dtype=float), y: wp.array(dtype=float)):
+        tid = wp.tid()
+        y[tid] = x[tid] + 1.0
+    jax_add_one = jax_kernel(
+        add_one_kernel, launch_dims=(n - 2,)
+    )  # Intentionally not the same as the first dimension of the input
+    @jax.jit
+    def f_1d():
+        x = jp.arange(n, dtype=jp.float32)
+        return jax_add_one(x)
+    # Test with 2D launch dims
+    @wp.kernel
+    def add_one_2d_kernel(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
+        i, j = wp.tid()
+        y[i, j] = x[i, j] + 1.0
+    jax_add_one_2d = jax_kernel(
+        add_one_2d_kernel, launch_dims=(n - 2, m - 2)
+    )  # Intentionally not the same as the first dimension of the input
+    @jax.jit
+    def f_2d():
+        x = jp.zeros((n, m), dtype=jp.float32) + 3.0
+        return jax_add_one_2d(x)
+    # run on the given device
+    with jax.default_device(wp.device_to_jax(device)):
+        y_1d = f_1d()
+        y_2d = f_2d()
+    result_1d = np.asarray(y_1d).reshape((n - 2,))
+    expected_1d = np.arange(n - 2, dtype=np.float32) + 1.0
+    result_2d = np.asarray(y_2d).reshape((n - 2, m - 2))
+    expected_2d = np.full((n - 2, m - 2), 4.0, dtype=np.float32)
+    assert_np_equal(result_1d, expected_1d)
+    assert_np_equal(result_2d, expected_2d)
 class TestJax(unittest.TestCase):
     pass
@@ -296,6 +350,10 @@ try:
             TestJax, "test_jax_kernel_multiarg", test_jax_kernel_multiarg, devices=jax_compatible_cuda_devices
         )
+        add_function_test(
+            TestJax, "test_jax_kernel_launch_dims", test_jax_kernel_launch_dims, devices=jax_compatible_cuda_devices
+        )
 except Exception as e:
     print(f"Skipping Jax tests due to exception: {e}")

warp/tests/test_mat.py CHANGED Viewed

@@ -1559,6 +1559,83 @@ def test_transform_vector(test, device, dtype, register_kernels=False):
             tape.zero()
+def test_mat_array_type_indexing(test, device, dtype, register_kernels=False):
+    np_type = np.dtype(dtype)
+    wp_type = wp.types.np_dtype_to_warp_type[np_type]
+    vec2 = wp.types.vector(length=2, dtype=wp_type)
+    mat22 = wp.types.matrix(shape=(2, 2), dtype=wp_type)
+    mat33 = wp.types.matrix(shape=(3, 3), dtype=wp_type)
+    def mattest_read_write_store(x: wp.array(dtype=wp_type), a: wp.array(dtype=mat22)):
+        tid = wp.tid()
+        t = a[tid]
+        t[0, 0] = x[tid]
+        a[tid] = t
+    def mattest_in_register(x: wp.array2d(dtype=mat22), y: wp.array(dtype=vec2)):
+        i, j = wp.tid()
+        a = mat22(wp_type(0.0))
+        a[0] = y[i]
+        a[1, 1] = wp_type(3.0)
+        x[i, j] = a
+    def mattest_in_register_overwrite(x: wp.array2d(dtype=mat22), y: wp.array(dtype=vec2)):
+        i, j = wp.tid()
+        a = mat22(wp_type(0.0))
+        a[0] = y[i]
+        a[0, 1] = wp_type(3.0)
+        x[i, j] = a
+    kernel_read_write_store = getkernel(mattest_read_write_store, suffix=dtype.__name__)
+    kernel_in_register = getkernel(mattest_in_register, suffix=dtype.__name__)
+    kernel_in_register_overwrite = getkernel(mattest_in_register_overwrite, suffix=dtype.__name__)
+    if register_kernels:
+        return
+    a = wp.ones(1, dtype=mat22, device=device, requires_grad=True)
+    x = wp.full(1, value=2.0, dtype=wp_type, device=device, requires_grad=True)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(kernel_read_write_store, dim=1, inputs=[x, a], device=device)
+    tape.backward(grads={a: wp.ones_like(a, requires_grad=False)})
+    assert_np_equal(a.numpy(), np.array([[[2.0, 1.0], [1.0, 1.0]]], dtype=np_type))
+    assert_np_equal(x.grad.numpy(), np.array([1.0], dtype=np_type))
+    tape.reset()
+    x = wp.zeros((1, 1), dtype=mat22, device=device, requires_grad=True)
+    y = wp.ones(1, dtype=vec2, device=device, requires_grad=True)
+    with tape:
+        wp.launch(kernel_in_register, dim=(1, 1), inputs=[x, y], device=device)
+    tape.backward(grads={x: wp.ones_like(x, requires_grad=False)})
+    assert_np_equal(x.numpy(), np.array([[[[1.0, 1.0], [0.0, 3.0]]]], dtype=np_type))
+    assert_np_equal(y.grad.numpy(), np.array([[1.0, 1.0]], dtype=np_type))
+    tape.reset()
+    x = wp.zeros((1, 1), dtype=mat22, device=device, requires_grad=True)
+    y = wp.ones(1, dtype=vec2, device=device, requires_grad=True)
+    with tape:
+        wp.launch(kernel_in_register_overwrite, dim=(1, 1), inputs=[x, y], device=device)
+    tape.backward(grads={x: wp.ones_like(x, requires_grad=False)})
+    assert_np_equal(x.numpy(), np.array([[[[1.0, 3.0], [0.0, 0.0]]]], dtype=np_type))
+    assert_np_equal(y.grad.numpy(), np.array([[1.0, 0.0]], dtype=np_type))
 # Test matrix constructors using explicit type (float16)
 # note that these tests are specifically not using generics / closure
 # args to create kernels dynamically (like the rest of this file)
@@ -1791,6 +1868,13 @@ for dtype in np_float_types:
         TestMat, f"test_determinant_{dtype.__name__}", test_determinant, devices=devices, dtype=dtype
     )
     add_function_test_register_kernel(TestMat, f"test_skew_{dtype.__name__}", test_skew, devices=devices, dtype=dtype)
+    add_function_test_register_kernel(
+        TestMat,
+        f"test_mat_array_type_indexing_{dtype.__name__}",
+        test_mat_array_type_indexing,
+        devices=devices,
+        dtype=dtype,
+    )
 if __name__ == "__main__":