PyPI - warp-lang - Versions diffs - 1.5.1__py3-none-macosx_10_13_universal2.whl → 1.6.0__py3-none-macosx_10_13_universal2.whl - Mend

warp-lang 1.5.1__py3-none-macosx_10_13_universal2.whl → 1.6.0__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (123) hide show

warp/__init__.py +5 -0
warp/autograd.py +414 -191
warp/bin/libwarp-clang.dylib +0 -0
warp/bin/libwarp.dylib +0 -0
warp/build.py +40 -12
warp/build_dll.py +13 -6
warp/builtins.py +1076 -480
warp/codegen.py +240 -119
warp/config.py +1 -1
warp/context.py +298 -84
warp/examples/assets/square_cloth.usd +0 -0
warp/examples/benchmarks/benchmark_gemm.py +27 -18
warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
warp/examples/core/example_torch.py +18 -34
warp/examples/fem/example_apic_fluid.py +1 -0
warp/examples/fem/example_mixed_elasticity.py +1 -1
warp/examples/optim/example_bounce.py +1 -1
warp/examples/optim/example_cloth_throw.py +1 -1
warp/examples/optim/example_diffray.py +4 -15
warp/examples/optim/example_drone.py +1 -1
warp/examples/optim/example_softbody_properties.py +392 -0
warp/examples/optim/example_trajectory.py +1 -3
warp/examples/optim/example_walker.py +5 -0
warp/examples/sim/example_cartpole.py +0 -2
warp/examples/sim/example_cloth_self_contact.py +260 -0
warp/examples/sim/example_granular_collision_sdf.py +4 -5
warp/examples/sim/example_jacobian_ik.py +0 -2
warp/examples/sim/example_quadruped.py +5 -2
warp/examples/tile/example_tile_cholesky.py +79 -0
warp/examples/tile/example_tile_convolution.py +2 -2
warp/examples/tile/example_tile_fft.py +2 -2
warp/examples/tile/example_tile_filtering.py +3 -3
warp/examples/tile/example_tile_matmul.py +4 -4
warp/examples/tile/example_tile_mlp.py +12 -12
warp/examples/tile/example_tile_nbody.py +180 -0
warp/examples/tile/example_tile_walker.py +319 -0
warp/math.py +147 -0
warp/native/array.h +12 -0
warp/native/builtin.h +0 -1
warp/native/bvh.cpp +149 -70
warp/native/bvh.cu +287 -68
warp/native/bvh.h +195 -85
warp/native/clang/clang.cpp +5 -1
warp/native/cuda_util.cpp +35 -0
warp/native/cuda_util.h +5 -0
warp/native/exports.h +40 -40
warp/native/intersect.h +17 -0
warp/native/mat.h +41 -0
warp/native/mathdx.cpp +19 -0
warp/native/mesh.cpp +25 -8
warp/native/mesh.cu +153 -101
warp/native/mesh.h +482 -403
warp/native/quat.h +40 -0
warp/native/solid_angle.h +7 -0
warp/native/sort.cpp +85 -0
warp/native/sort.cu +34 -0
warp/native/sort.h +3 -1
warp/native/spatial.h +11 -0
warp/native/tile.h +1185 -664
warp/native/tile_reduce.h +8 -6
warp/native/vec.h +41 -0
warp/native/warp.cpp +8 -1
warp/native/warp.cu +263 -40
warp/native/warp.h +19 -5
warp/optim/linear.py +22 -4
warp/render/render_opengl.py +124 -59
warp/sim/__init__.py +6 -1
warp/sim/collide.py +270 -26
warp/sim/integrator_euler.py +25 -7
warp/sim/integrator_featherstone.py +154 -35
warp/sim/integrator_vbd.py +842 -40
warp/sim/model.py +111 -53
warp/stubs.py +248 -115
warp/tape.py +28 -30
warp/tests/aux_test_module_unload.py +15 -0
warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
warp/tests/test_array.py +74 -0
warp/tests/test_assert.py +242 -0
warp/tests/test_codegen.py +14 -61
warp/tests/test_collision.py +2 -2
warp/tests/test_examples.py +9 -0
warp/tests/test_grad_debug.py +87 -2
warp/tests/test_hash_grid.py +1 -1
warp/tests/test_ipc.py +116 -0
warp/tests/test_mat.py +138 -167
warp/tests/test_math.py +47 -1
warp/tests/test_matmul.py +11 -7
warp/tests/test_matmul_lite.py +4 -4
warp/tests/test_mesh.py +84 -60
warp/tests/test_mesh_query_aabb.py +165 -0
warp/tests/test_mesh_query_point.py +328 -286
warp/tests/test_mesh_query_ray.py +134 -121
warp/tests/test_mlp.py +2 -2
warp/tests/test_operators.py +43 -0
warp/tests/test_overwrite.py +2 -2
warp/tests/test_quat.py +77 -0
warp/tests/test_reload.py +29 -0
warp/tests/test_sim_grad_bounce_linear.py +204 -0
warp/tests/test_static.py +16 -0
warp/tests/test_tape.py +25 -0
warp/tests/test_tile.py +134 -191
warp/tests/test_tile_load.py +356 -0
warp/tests/test_tile_mathdx.py +61 -8
warp/tests/test_tile_mlp.py +17 -17
warp/tests/test_tile_reduce.py +24 -18
warp/tests/test_tile_shared_memory.py +66 -17
warp/tests/test_tile_view.py +165 -0
warp/tests/test_torch.py +35 -0
warp/tests/test_utils.py +36 -24
warp/tests/test_vec.py +110 -0
warp/tests/unittest_suites.py +29 -4
warp/tests/unittest_utils.py +30 -11
warp/thirdparty/unittest_parallel.py +2 -2
warp/types.py +409 -99
warp/utils.py +9 -5
{warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/METADATA +68 -44
{warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/RECORD +121 -110
{warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/WHEEL +1 -1
warp/examples/benchmarks/benchmark_tile.py +0 -179
warp/native/tile_gemm.h +0 -341
{warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/LICENSE.md +0 -0
{warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/top_level.txt +0 -0

warp/tests/test_mesh_query_ray.py CHANGED Viewed

@@ -91,114 +91,120 @@ def test_mesh_query_ray_grad(test, device):
     mesh_points = wp.array(np.array(mesh_geom.GetPointsAttr().Get()), dtype=wp.vec3, device=device)
     mesh_indices = wp.array(np.array(tri_indices), dtype=int, device=device)
-    p = wp.vec3(50.0, 50.0, 0.0)
-    D = wp.vec3(0.0, -1.0, 0.0)
-    # create mesh
-    mesh = wp.Mesh(points=mesh_points, velocities=None, indices=mesh_indices)
-    tape = wp.Tape()
-    # analytic gradients
-    with tape:
-        query_points = wp.array(p, dtype=wp.vec3, device=device, requires_grad=True)
-        query_dirs = wp.array(D, dtype=wp.vec3, device=device, requires_grad=True)
-        intersection_points = wp.zeros(n=1, dtype=wp.vec3, device=device)
-        loss = wp.zeros(n=1, dtype=float, device=device, requires_grad=True)
-        wp.launch(
-            kernel=mesh_query_ray_loss,
-            dim=1,
-            inputs=[mesh.id, query_points, query_dirs, intersection_points, loss],
-            device=device,
-        )
-    tape.backward(loss=loss)
-    q = intersection_points.numpy().flatten()
-    analytic_p = tape.gradients[query_points].numpy().flatten()
-    analytic_D = tape.gradients[query_dirs].numpy().flatten()
-    # numeric gradients
-    # ray origin
-    eps = 1.0e-3
-    loss_values_p = []
-    numeric_p = np.zeros(3)
-    offset_query_points = [
-        wp.vec3(p[0] - eps, p[1], p[2]),
-        wp.vec3(p[0] + eps, p[1], p[2]),
-        wp.vec3(p[0], p[1] - eps, p[2]),
-        wp.vec3(p[0], p[1] + eps, p[2]),
-        wp.vec3(p[0], p[1], p[2] - eps),
-        wp.vec3(p[0], p[1], p[2] + eps),
-    ]
-    for i in range(6):
-        q = offset_query_points[i]
-        query_points = wp.array(q, dtype=wp.vec3, device=device)
-        query_dirs = wp.array(D, dtype=wp.vec3, device=device)
-        intersection_points = wp.zeros(n=1, dtype=wp.vec3, device=device)
-        loss = wp.zeros(n=1, dtype=float, device=device)
-        wp.launch(
-            kernel=mesh_query_ray_loss,
-            dim=1,
-            inputs=[mesh.id, query_points, query_dirs, intersection_points, loss],
-            device=device,
-        )
-        loss_values_p.append(loss.numpy()[0])
-    for i in range(3):
-        l_0 = loss_values_p[i * 2]
-        l_1 = loss_values_p[i * 2 + 1]
-        gradient = (l_1 - l_0) / (2.0 * eps)
-        numeric_p[i] = gradient
-    # ray dir
-    loss_values_D = []
-    numeric_D = np.zeros(3)
-    offset_query_dirs = [
-        wp.vec3(D[0] - eps, D[1], D[2]),
-        wp.vec3(D[0] + eps, D[1], D[2]),
-        wp.vec3(D[0], D[1] - eps, D[2]),
-        wp.vec3(D[0], D[1] + eps, D[2]),
-        wp.vec3(D[0], D[1], D[2] - eps),
-        wp.vec3(D[0], D[1], D[2] + eps),
-    ]
-    for i in range(6):
-        q = offset_query_dirs[i]
-        query_points = wp.array(p, dtype=wp.vec3, device=device)
-        query_dirs = wp.array(q, dtype=wp.vec3, device=device)
-        intersection_points = wp.zeros(n=1, dtype=wp.vec3, device=device)
-        loss = wp.zeros(n=1, dtype=float, device=device)
-        wp.launch(
-            kernel=mesh_query_ray_loss,
-            dim=1,
-            inputs=[mesh.id, query_points, query_dirs, intersection_points, loss],
-            device=device,
-        )
-        loss_values_D.append(loss.numpy()[0])
-    for i in range(3):
-        l_0 = loss_values_D[i * 2]
-        l_1 = loss_values_D[i * 2 + 1]
-        gradient = (l_1 - l_0) / (2.0 * eps)
-        numeric_D[i] = gradient
-    error_p = ((analytic_p - numeric_p) * (analytic_p - numeric_p)).sum(axis=0)
-    error_D = ((analytic_D - numeric_D) * (analytic_D - numeric_D)).sum(axis=0)
-    tolerance = 1.0e-3
-    test.assertTrue(error_p < tolerance, f"error is {error_p} which is >= {tolerance}")
-    test.assertTrue(error_D < tolerance, f"error is {error_D} which is >= {tolerance}")
+    if device.is_cpu:
+        constructors = ["sah", "median"]
+    else:
+        constructors = ["sah", "median", "lbvh"]
+    for constructor in constructors:
+        p = wp.vec3(50.0, 50.0, 0.0)
+        D = wp.vec3(0.0, -1.0, 0.0)
+        # create mesh
+        mesh = wp.Mesh(points=mesh_points, velocities=None, indices=mesh_indices, bvh_constructor=constructor)
+        tape = wp.Tape()
+        # analytic gradients
+        with tape:
+            query_points = wp.array(p, dtype=wp.vec3, device=device, requires_grad=True)
+            query_dirs = wp.array(D, dtype=wp.vec3, device=device, requires_grad=True)
+            intersection_points = wp.zeros(n=1, dtype=wp.vec3, device=device)
+            loss = wp.zeros(n=1, dtype=float, device=device, requires_grad=True)
+            wp.launch(
+                kernel=mesh_query_ray_loss,
+                dim=1,
+                inputs=[mesh.id, query_points, query_dirs, intersection_points, loss],
+                device=device,
+            )
+        tape.backward(loss=loss)
+        q = intersection_points.numpy().flatten()
+        analytic_p = tape.gradients[query_points].numpy().flatten()
+        analytic_D = tape.gradients[query_dirs].numpy().flatten()
+        # numeric gradients
+        # ray origin
+        eps = 1.0e-3
+        loss_values_p = []
+        numeric_p = np.zeros(3)
+        offset_query_points = [
+            wp.vec3(p[0] - eps, p[1], p[2]),
+            wp.vec3(p[0] + eps, p[1], p[2]),
+            wp.vec3(p[0], p[1] - eps, p[2]),
+            wp.vec3(p[0], p[1] + eps, p[2]),
+            wp.vec3(p[0], p[1], p[2] - eps),
+            wp.vec3(p[0], p[1], p[2] + eps),
+        ]
+        for i in range(6):
+            q = offset_query_points[i]
+            query_points = wp.array(q, dtype=wp.vec3, device=device)
+            query_dirs = wp.array(D, dtype=wp.vec3, device=device)
+            intersection_points = wp.zeros(n=1, dtype=wp.vec3, device=device)
+            loss = wp.zeros(n=1, dtype=float, device=device)
+            wp.launch(
+                kernel=mesh_query_ray_loss,
+                dim=1,
+                inputs=[mesh.id, query_points, query_dirs, intersection_points, loss],
+                device=device,
+            )
+            loss_values_p.append(loss.numpy()[0])
+        for i in range(3):
+            l_0 = loss_values_p[i * 2]
+            l_1 = loss_values_p[i * 2 + 1]
+            gradient = (l_1 - l_0) / (2.0 * eps)
+            numeric_p[i] = gradient
+        # ray dir
+        loss_values_D = []
+        numeric_D = np.zeros(3)
+        offset_query_dirs = [
+            wp.vec3(D[0] - eps, D[1], D[2]),
+            wp.vec3(D[0] + eps, D[1], D[2]),
+            wp.vec3(D[0], D[1] - eps, D[2]),
+            wp.vec3(D[0], D[1] + eps, D[2]),
+            wp.vec3(D[0], D[1], D[2] - eps),
+            wp.vec3(D[0], D[1], D[2] + eps),
+        ]
+        for i in range(6):
+            q = offset_query_dirs[i]
+            query_points = wp.array(p, dtype=wp.vec3, device=device)
+            query_dirs = wp.array(q, dtype=wp.vec3, device=device)
+            intersection_points = wp.zeros(n=1, dtype=wp.vec3, device=device)
+            loss = wp.zeros(n=1, dtype=float, device=device)
+            wp.launch(
+                kernel=mesh_query_ray_loss,
+                dim=1,
+                inputs=[mesh.id, query_points, query_dirs, intersection_points, loss],
+                device=device,
+            )
+            loss_values_D.append(loss.numpy()[0])
+        for i in range(3):
+            l_0 = loss_values_D[i * 2]
+            l_1 = loss_values_D[i * 2 + 1]
+            gradient = (l_1 - l_0) / (2.0 * eps)
+            numeric_D[i] = gradient
+        error_p = ((analytic_p - numeric_p) * (analytic_p - numeric_p)).sum(axis=0)
+        error_D = ((analytic_D - numeric_D) * (analytic_D - numeric_D)).sum(axis=0)
+        tolerance = 1.0e-3
+        test.assertTrue(error_p < tolerance, f"error is {error_p} which is >= {tolerance}")
+        test.assertTrue(error_D < tolerance, f"error is {error_D} which is >= {tolerance}")
 @wp.kernel
@@ -229,6 +235,11 @@ def raycast_kernel(
 def test_mesh_query_ray_edge(test, device):
+    if device.is_cpu:
+        constructors = ["sah", "median"]
+    else:
+        constructors = ["sah", "median", "lbvh"]
     # Create raycast starts and directions
     xx, yy = np.meshgrid(np.arange(0.1, 0.4, 0.01), np.arange(0.1, 0.4, 0.01))
     xx = xx.flatten().reshape(-1, 1)
@@ -239,27 +250,29 @@ def test_mesh_query_ray_edge(test, device):
     ray_dirs = np.zeros_like(ray_starts)
     ray_dirs[:, 2] = -1.0
+    n = len(ray_starts)
+    ray_starts = wp.array(ray_starts, shape=(n,), dtype=wp.vec3, device=device)
+    ray_dirs = wp.array(ray_dirs, shape=(n,), dtype=wp.vec3, device=device)
     # Create simple square mesh
     vertices = np.array([[0.0, 0.0, 0.0], [0.0, 0.5, 0.0], [0.5, 0.0, 0.0], [0.5, 0.5, 0.0]], dtype=np.float32)
     triangles = np.array([[1, 0, 2], [1, 2, 3]], dtype=np.int32)
-    mesh = wp.Mesh(
-        points=wp.array(vertices, dtype=wp.vec3, device=device),
-        indices=wp.array(triangles.flatten(), dtype=int, device=device),
-    )
-    counts = wp.zeros(1, dtype=int, device=device)
-    n = len(ray_starts)
+    for constructor in constructors:
+        mesh = wp.Mesh(
+            points=wp.array(vertices, dtype=wp.vec3, device=device),
+            indices=wp.array(triangles.flatten(), dtype=int, device=device),
+            bvh_constructor=constructor,
+        )
-    ray_starts = wp.array(ray_starts, shape=(n,), dtype=wp.vec3, device=device)
-    ray_dirs = wp.array(ray_dirs, shape=(n,), dtype=wp.vec3, device=device)
+        counts = wp.zeros(1, dtype=int, device=device)
-    wp.launch(kernel=raycast_kernel, dim=n, inputs=[mesh.id, ray_starts, ray_dirs, counts], device=device)
-    wp.synchronize()
+        wp.launch(kernel=raycast_kernel, dim=n, inputs=[mesh.id, ray_starts, ray_dirs, counts], device=device)
+        wp.synchronize()
-    test.assertEqual(counts.numpy()[0], n)
+        test.assertEqual(counts.numpy()[0], n)
 devices = get_test_devices()

warp/tests/test_mlp.py CHANGED Viewed

@@ -265,8 +265,8 @@ class TestMLP(unittest.TestCase):
     pass
-add_function_test(TestMLP, "test_mlp", test_mlp, devices=devices)
-add_function_test(TestMLP, "test_mlp_grad", test_mlp_grad, devices=devices)
+add_function_test(TestMLP, "test_mlp", test_mlp, devices=devices, check_output=False)
+add_function_test(TestMLP, "test_mlp_grad", test_mlp_grad, devices=devices, check_output=False)
 if __name__ == "__main__":

warp/tests/test_operators.py CHANGED Viewed

@@ -224,6 +224,48 @@ def test_operators_mat44():
     expect_eq(r0[3], wp.vec4(39.0, 42.0, 45.0, 48.0))
+@wp.struct
+class Complex:
+    real: float
+    imag: float
+@wp.func
+def add(
+    a: Complex,
+    b: Complex,
+) -> Complex:
+    return Complex(
+        a.real + b.real,
+        a.imag + b.imag,
+    )
+@wp.func
+def mul(
+    a: Complex,
+    b: Complex,
+) -> Complex:
+    return Complex(
+        a.real * b.real - a.imag * b.imag,
+        a.real * b.imag + a.imag * b.real,
+    )
+@wp.kernel
+def test_operators_overload():
+    a = Complex(1.0, 2.0)
+    b = Complex(3.0, 4.0)
+    c = a + b
+    expect_eq(c.real, 4.0)
+    expect_eq(c.imag, 6.0)
+    d = a * b
+    expect_eq(d.real, -5.0)
+    expect_eq(d.imag, 10.0)
 devices = get_test_devices()
@@ -241,6 +283,7 @@ add_kernel_test(TestOperators, test_operators_vec4, dim=1, devices=devices)
 add_kernel_test(TestOperators, test_operators_mat22, dim=1, devices=devices)
 add_kernel_test(TestOperators, test_operators_mat33, dim=1, devices=devices)
 add_kernel_test(TestOperators, test_operators_mat44, dim=1, devices=devices)
+add_kernel_test(TestOperators, test_operators_overload, dim=1, devices=devices)
 if __name__ == "__main__":

warp/tests/test_overwrite.py CHANGED Viewed

@@ -577,8 +577,8 @@ add_function_test(TestOverwrite, "test_views", test_views, devices=devices)
 add_function_test(TestOverwrite, "test_reset", test_reset, devices=devices)
 add_function_test(TestOverwrite, "test_copy", test_copy, devices=devices)
-add_function_test(TestOverwrite, "test_matmul", test_matmul, devices=devices)
-add_function_test(TestOverwrite, "test_batched_matmul", test_batched_matmul, devices=devices)
+add_function_test(TestOverwrite, "test_matmul", test_matmul, devices=devices, check_output=False)
+add_function_test(TestOverwrite, "test_batched_matmul", test_batched_matmul, devices=devices, check_output=False)
 add_function_test(TestOverwrite, "test_atomic_operations", test_atomic_operations, devices=devices)
 # Some warning are only issued during codegen, and codegen only runs on cuda_0 in the MGPU case.

warp/tests/test_quat.py CHANGED Viewed

@@ -2095,6 +2095,81 @@ def test_py_arithmetic_ops(test, device, dtype):
     test.assertSequenceEqual(wptype(24) / v, make_quat(12, 6, 4, 3))
+@wp.kernel
+def quat_len_kernel(
+    q: wp.quat,
+    out: wp.array(dtype=int),
+):
+    length = wp.static(len(q))
+    wp.expect_eq(wp.static(len(q)), 4)
+    out[0] = wp.static(len(q))
+    foo = wp.quat()
+    length = len(foo)
+    wp.expect_eq(len(foo), 4)
+    out[1] = len(foo)
+def test_quat_len(test, device):
+    q = wp.quat()
+    out = wp.empty(2, dtype=int, device=device)
+    wp.launch(quat_len_kernel, dim=(1,), inputs=(q,), outputs=(out,), device=device)
+    test.assertEqual(out.numpy()[0], 4)
+    test.assertEqual(out.numpy()[1], 4)
+@wp.kernel
+def vector_augassign_kernel(
+    a: wp.array(dtype=wp.quat), b: wp.array(dtype=wp.quat), c: wp.array(dtype=wp.quat), d: wp.array(dtype=wp.quat)
+):
+    i = wp.tid()
+    q1 = wp.quat()
+    q2 = b[i]
+    q1[0] += q2[0]
+    q1[1] += q2[1]
+    q1[2] += q2[2]
+    q1[3] += q2[3]
+    a[i] = q1
+    q3 = wp.quat()
+    q4 = d[i]
+    q3[0] += q4[0]
+    q3[1] += q4[1]
+    q3[2] += q4[2]
+    q3[3] += q4[3]
+    c[i] = q1
+def test_vector_augassign(test, device):
+    N = 3
+    a = wp.zeros(N, dtype=wp.quat, requires_grad=True)
+    b = wp.ones(N, dtype=wp.quat, requires_grad=True)
+    c = wp.zeros(N, dtype=wp.quat, requires_grad=True)
+    d = wp.ones(N, dtype=wp.quat, requires_grad=True)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(vector_augassign_kernel, N, inputs=[a, b, c, d])
+    tape.backward(grads={a: wp.ones_like(a), c: wp.ones_like(c)})
+    assert_np_equal(a.numpy(), wp.ones_like(a).numpy())
+    assert_np_equal(a.grad.numpy(), wp.ones_like(a).numpy())
+    assert_np_equal(b.grad.numpy(), wp.ones_like(a).numpy())
+    assert_np_equal(c.numpy(), -wp.ones_like(c).numpy())
+    assert_np_equal(c.grad.numpy(), wp.ones_like(c).numpy())
+    assert_np_equal(d.grad.numpy(), -wp.ones_like(d).numpy())
 devices = get_test_devices()
@@ -2203,6 +2278,8 @@ for dtype in np_float_types:
         TestQuat, f"test_py_arithmetic_ops_{dtype.__name__}", test_py_arithmetic_ops, devices=None, dtype=dtype
     )
+add_function_test(TestQuat, "test_quat_len", test_quat_len, devices=devices)
 if __name__ == "__main__":
     wp.clear_kernel_cache()

warp/tests/test_reload.py CHANGED Viewed

@@ -241,6 +241,32 @@ def test_graph_launch_after_module_reload(test, device):
         test.assertEqual(a.numpy()[0], 42)
+def test_module_unload_during_graph_capture(test, device):
+    @wp.kernel
+    def foo(a: wp.array(dtype=int)):
+        a[0] = 42
+    # preload module before graph capture
+    wp.load_module(device=device)
+    # load another module to test unloading during graph capture
+    other_module = wp.get_module("warp.tests.aux_test_module_unload")
+    other_module.load(device)
+    with wp.ScopedDevice(device):
+        a = wp.zeros(1, dtype=int)
+        with wp.ScopedCapture(force_module_load=False) as capture:
+            wp.launch(foo, dim=1, inputs=[a])
+            # unloading a module during graph capture should be fine (deferred until capture completes)
+            other_module.unload()
+        wp.capture_launch(capture.graph)
+        test.assertEqual(a.numpy()[0], 42)
 devices = get_test_devices()
 cuda_devices = get_cuda_test_devices()
@@ -258,6 +284,9 @@ add_function_test(TestReload, "test_reload_references", test_reload_references,
 add_function_test(
     TestReload, "test_graph_launch_after_module_reload", test_graph_launch_after_module_reload, devices=cuda_devices
 )
+add_function_test(
+    TestReload, "test_module_unload_during_graph_capture", test_module_unload_during_graph_capture, devices=cuda_devices
+)
 if __name__ == "__main__":