PyPI - warp-lang - Versions diffs - 1.3.3__py3-none-manylinux2014_aarch64.whl → 1.4.1__py3-none-manylinux2014_aarch64.whl - Mend

warp-lang 1.3.3__py3-none-manylinux2014_aarch64.whl → 1.4.1__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (110) hide show

warp/__init__.py +6 -0
warp/autograd.py +59 -6
warp/bin/warp.so +0 -0
warp/build_dll.py +8 -10
warp/builtins.py +103 -3
warp/codegen.py +447 -53
warp/config.py +1 -1
warp/context.py +682 -405
warp/dlpack.py +2 -0
warp/examples/benchmarks/benchmark_cloth.py +10 -0
warp/examples/core/example_render_opengl.py +12 -10
warp/examples/fem/example_adaptive_grid.py +251 -0
warp/examples/fem/example_apic_fluid.py +1 -1
warp/examples/fem/example_diffusion_3d.py +2 -2
warp/examples/fem/example_magnetostatics.py +1 -1
warp/examples/fem/example_streamlines.py +1 -0
warp/examples/fem/utils.py +25 -5
warp/examples/sim/example_cloth.py +50 -6
warp/fem/__init__.py +2 -0
warp/fem/adaptivity.py +493 -0
warp/fem/field/field.py +2 -1
warp/fem/field/nodal_field.py +18 -26
warp/fem/field/test.py +4 -4
warp/fem/field/trial.py +4 -4
warp/fem/geometry/__init__.py +1 -0
warp/fem/geometry/adaptive_nanogrid.py +843 -0
warp/fem/geometry/nanogrid.py +55 -28
warp/fem/space/__init__.py +1 -1
warp/fem/space/nanogrid_function_space.py +69 -35
warp/fem/utils.py +118 -107
warp/jax_experimental.py +28 -15
warp/native/array.h +0 -1
warp/native/builtin.h +103 -6
warp/native/bvh.cu +4 -2
warp/native/cuda_util.cpp +14 -0
warp/native/cuda_util.h +2 -0
warp/native/error.cpp +4 -2
warp/native/exports.h +99 -0
warp/native/mat.h +97 -0
warp/native/mesh.cpp +36 -0
warp/native/mesh.cu +52 -1
warp/native/mesh.h +1 -0
warp/native/quat.h +43 -0
warp/native/range.h +11 -2
warp/native/spatial.h +6 -0
warp/native/vec.h +74 -0
warp/native/warp.cpp +2 -1
warp/native/warp.cu +10 -3
warp/native/warp.h +8 -1
warp/paddle.py +382 -0
warp/sim/__init__.py +1 -0
warp/sim/collide.py +519 -0
warp/sim/integrator_euler.py +18 -5
warp/sim/integrator_featherstone.py +5 -5
warp/sim/integrator_vbd.py +1026 -0
warp/sim/integrator_xpbd.py +2 -6
warp/sim/model.py +50 -25
warp/sparse.py +9 -7
warp/stubs.py +459 -0
warp/tape.py +2 -0
warp/tests/aux_test_dependent.py +1 -0
warp/tests/aux_test_name_clash1.py +32 -0
warp/tests/aux_test_name_clash2.py +32 -0
warp/tests/aux_test_square.py +1 -0
warp/tests/test_array.py +188 -0
warp/tests/test_async.py +3 -3
warp/tests/test_atomic.py +6 -0
warp/tests/test_closest_point_edge_edge.py +93 -1
warp/tests/test_codegen.py +93 -15
warp/tests/test_codegen_instancing.py +1457 -0
warp/tests/test_collision.py +486 -0
warp/tests/test_compile_consts.py +3 -28
warp/tests/test_dlpack.py +170 -0
warp/tests/test_examples.py +22 -8
warp/tests/test_fast_math.py +10 -4
warp/tests/test_fem.py +81 -1
warp/tests/test_func.py +46 -0
warp/tests/test_implicit_init.py +49 -0
warp/tests/test_jax.py +58 -0
warp/tests/test_mat.py +84 -0
warp/tests/test_mesh_query_point.py +188 -0
warp/tests/test_model.py +13 -0
warp/tests/test_module_hashing.py +40 -0
warp/tests/test_multigpu.py +3 -3
warp/tests/test_overwrite.py +8 -0
warp/tests/test_paddle.py +852 -0
warp/tests/test_print.py +89 -0
warp/tests/test_quat.py +111 -0
warp/tests/test_reload.py +31 -1
warp/tests/test_scalar_ops.py +2 -0
warp/tests/test_static.py +568 -0
warp/tests/test_streams.py +64 -3
warp/tests/test_struct.py +4 -4
warp/tests/test_torch.py +24 -0
warp/tests/test_triangle_closest_point.py +137 -0
warp/tests/test_types.py +1 -1
warp/tests/test_vbd.py +386 -0
warp/tests/test_vec.py +143 -0
warp/tests/test_vec_scalar_ops.py +139 -0
warp/tests/unittest_suites.py +12 -0
warp/tests/unittest_utils.py +9 -5
warp/thirdparty/dlpack.py +3 -1
warp/types.py +167 -36
warp/utils.py +37 -14
{warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/METADATA +10 -8
{warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/RECORD +109 -97
warp/tests/test_point_triangle_closest_point.py +0 -143
{warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/LICENSE.md +0 -0
{warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/WHEEL +0 -0
{warp_lang-1.3.3.dist-info → warp_lang-1.4.1.dist-info}/top_level.txt +0 -0

warp/tests/test_array.py CHANGED Viewed

@@ -2361,6 +2361,173 @@ def test_array_from_cai(test, device):
     assert_np_equal(arr_warp.numpy(), np.array([[2, 1, 1], [1, 0, 0], [1, 0, 0]]))
+def test_array_inplace_ops(test, device):
+    @wp.kernel
+    def inplace_add_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
+        i = wp.tid()
+        x[i] += y[i]
+    @wp.kernel
+    def inplace_add_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
+        i, j = wp.tid()
+        x[i, j] += y[i, j]
+    @wp.kernel
+    def inplace_add_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
+        i, j, k = wp.tid()
+        x[i, j, k] += y[i, j, k]
+    @wp.kernel
+    def inplace_add_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
+        i, j, k, l = wp.tid()
+        x[i, j, k, l] += y[i, j, k, l]
+    @wp.kernel
+    def inplace_sub_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
+        i = wp.tid()
+        x[i] -= y[i]
+    @wp.kernel
+    def inplace_sub_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
+        i, j = wp.tid()
+        x[i, j] -= y[i, j]
+    @wp.kernel
+    def inplace_sub_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
+        i, j, k = wp.tid()
+        x[i, j, k] -= y[i, j, k]
+    @wp.kernel
+    def inplace_sub_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
+        i, j, k, l = wp.tid()
+        x[i, j, k, l] -= y[i, j, k, l]
+    @wp.kernel
+    def inplace_add_vecs(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
+        i = wp.tid()
+        x[i] += y[i]
+    @wp.kernel
+    def inplace_add_mats(x: wp.array(dtype=wp.mat33), y: wp.array(dtype=wp.mat33)):
+        i = wp.tid()
+        x[i] += y[i]
+    @wp.kernel
+    def inplace_add_rhs(x: wp.array(dtype=float), y: wp.array(dtype=float), z: wp.array(dtype=float)):
+        i = wp.tid()
+        a = y[i]
+        a += x[i]
+        wp.atomic_add(z, 0, a)
+    N = 3
+    x1 = wp.ones(N, dtype=float, requires_grad=True, device=device)
+    x2 = wp.ones((N, N), dtype=float, requires_grad=True, device=device)
+    x3 = wp.ones((N, N, N), dtype=float, requires_grad=True, device=device)
+    x4 = wp.ones((N, N, N, N), dtype=float, requires_grad=True, device=device)
+    y1 = wp.clone(x1, requires_grad=True, device=device)
+    y2 = wp.clone(x2, requires_grad=True, device=device)
+    y3 = wp.clone(x3, requires_grad=True, device=device)
+    y4 = wp.clone(x4, requires_grad=True, device=device)
+    v1 = wp.ones(1, dtype=wp.vec3, requires_grad=True, device=device)
+    v2 = wp.clone(v1, requires_grad=True, device=device)
+    m1 = wp.ones(1, dtype=wp.mat33, requires_grad=True, device=device)
+    m2 = wp.clone(m1, requires_grad=True, device=device)
+    x = wp.ones(1, dtype=float, requires_grad=True, device=device)
+    y = wp.clone(x, requires_grad=True, device=device)
+    z = wp.zeros(1, dtype=float, requires_grad=True, device=device)
+    np_ones_1d = np.ones(N, dtype=float)
+    np_ones_2d = np.ones((N, N), dtype=float)
+    np_ones_3d = np.ones((N, N, N), dtype=float)
+    np_ones_4d = np.ones((N, N, N, N), dtype=float)
+    np_twos_1d = np.full(N, 2.0, dtype=float)
+    np_twos_2d = np.full((N, N), 2.0, dtype=float)
+    np_twos_3d = np.full((N, N, N), 2.0, dtype=float)
+    np_twos_4d = np.full((N, N, N, N), 2.0, dtype=float)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(inplace_add_1d, N, inputs=[x1, y1], device=device)
+        wp.launch(inplace_add_2d, (N, N), inputs=[x2, y2], device=device)
+        wp.launch(inplace_add_3d, (N, N, N), inputs=[x3, y3], device=device)
+        wp.launch(inplace_add_4d, (N, N, N, N), inputs=[x4, y4], device=device)
+    tape.backward(grads={x1: wp.ones_like(x1), x2: wp.ones_like(x2), x3: wp.ones_like(x3), x4: wp.ones_like(x4)})
+    assert_np_equal(x1.grad.numpy(), np_ones_1d)
+    assert_np_equal(x2.grad.numpy(), np_ones_2d)
+    assert_np_equal(x3.grad.numpy(), np_ones_3d)
+    assert_np_equal(x4.grad.numpy(), np_ones_4d)
+    assert_np_equal(y1.grad.numpy(), np_ones_1d)
+    assert_np_equal(y2.grad.numpy(), np_ones_2d)
+    assert_np_equal(y3.grad.numpy(), np_ones_3d)
+    assert_np_equal(y4.grad.numpy(), np_ones_4d)
+    assert_np_equal(x1.numpy(), np_twos_1d)
+    assert_np_equal(x2.numpy(), np_twos_2d)
+    assert_np_equal(x3.numpy(), np_twos_3d)
+    assert_np_equal(x4.numpy(), np_twos_4d)
+    x1.grad.zero_()
+    x2.grad.zero_()
+    x3.grad.zero_()
+    x4.grad.zero_()
+    tape.reset()
+    with tape:
+        wp.launch(inplace_sub_1d, N, inputs=[x1, y1], device=device)
+        wp.launch(inplace_sub_2d, (N, N), inputs=[x2, y2], device=device)
+        wp.launch(inplace_sub_3d, (N, N, N), inputs=[x3, y3], device=device)
+        wp.launch(inplace_sub_4d, (N, N, N, N), inputs=[x4, y4], device=device)
+    tape.backward(grads={x1: wp.ones_like(x1), x2: wp.ones_like(x2), x3: wp.ones_like(x3), x4: wp.ones_like(x4)})
+    assert_np_equal(x1.grad.numpy(), np_ones_1d)
+    assert_np_equal(x2.grad.numpy(), np_ones_2d)
+    assert_np_equal(x3.grad.numpy(), np_ones_3d)
+    assert_np_equal(x4.grad.numpy(), np_ones_4d)
+    assert_np_equal(y1.grad.numpy(), -np_ones_1d)
+    assert_np_equal(y2.grad.numpy(), -np_ones_2d)
+    assert_np_equal(y3.grad.numpy(), -np_ones_3d)
+    assert_np_equal(y4.grad.numpy(), -np_ones_4d)
+    assert_np_equal(x1.numpy(), np_ones_1d)
+    assert_np_equal(x2.numpy(), np_ones_2d)
+    assert_np_equal(x3.numpy(), np_ones_3d)
+    assert_np_equal(x4.numpy(), np_ones_4d)
+    x1.grad.zero_()
+    x2.grad.zero_()
+    x3.grad.zero_()
+    x4.grad.zero_()
+    tape.reset()
+    with tape:
+        wp.launch(inplace_add_vecs, 1, inputs=[v1, v2], device=device)
+        wp.launch(inplace_add_mats, 1, inputs=[m1, m2], device=device)
+        wp.launch(inplace_add_rhs, 1, inputs=[x, y, z], device=device)
+    tape.backward(loss=z, grads={v1: wp.ones_like(v1, requires_grad=False), m1: wp.ones_like(m1, requires_grad=False)})
+    assert_np_equal(v1.numpy(), np.full(shape=(1, 3), fill_value=2.0, dtype=float))
+    assert_np_equal(v1.grad.numpy(), np.ones(shape=(1, 3), dtype=float))
+    assert_np_equal(v2.grad.numpy(), np.ones(shape=(1, 3), dtype=float))
+    assert_np_equal(m1.numpy(), np.full(shape=(1, 3, 3), fill_value=2.0, dtype=float))
+    assert_np_equal(m1.grad.numpy(), np.ones(shape=(1, 3, 3), dtype=float))
+    assert_np_equal(m2.grad.numpy(), np.ones(shape=(1, 3, 3), dtype=float))
+    assert_np_equal(x.grad.numpy(), np.ones(1, dtype=float))
+    assert_np_equal(y.grad.numpy(), np.ones(1, dtype=float))
 @wp.kernel
 def inc_scalar(a: wp.array(dtype=float)):
     tid = wp.tid()
@@ -2423,6 +2590,25 @@ def test_array_from_int64_domain(test, device):
     wp.zeros(np.array([1504, 1080, 520], dtype=np.int64), dtype=wp.float32, device=device)
+def test_numpy_array_interface(test, device):
+    # We should be able to convert between NumPy and Warp arrays using __array_interface__ on CPU.
+    # This tests all scalar types supported by both.
+    n = 10
+    scalar_types = wp.types.scalar_types
+    for dtype in scalar_types:
+        # test round trip
+        a1 = wp.zeros(n, dtype=dtype, device="cpu")
+        na = np.array(a1)
+        a2 = wp.array(na, device="cpu")
+        assert a1.dtype == a2.dtype
+        assert a1.shape == a2.shape
+        assert a1.strides == a2.strides
 devices = get_test_devices()
@@ -2481,7 +2667,9 @@ add_function_test(TestArray, "test_array_of_structs_from_numpy", test_array_of_s
 add_function_test(TestArray, "test_array_of_structs_roundtrip", test_array_of_structs_roundtrip, devices=devices)
 add_function_test(TestArray, "test_array_from_numpy", test_array_from_numpy, devices=devices)
 add_function_test(TestArray, "test_array_aliasing_from_numpy", test_array_aliasing_from_numpy, devices=["cpu"])
+add_function_test(TestArray, "test_numpy_array_interface", test_numpy_array_interface, devices=["cpu"])
+add_function_test(TestArray, "test_array_inplace_ops", test_array_inplace_ops, devices=devices)
 add_function_test(TestArray, "test_direct_from_numpy", test_direct_from_numpy, devices=["cpu"])
 add_function_test(TestArray, "test_kernel_array_from_ptr", test_kernel_array_from_ptr, devices=devices)

warp/tests/test_async.py CHANGED Viewed

@@ -11,7 +11,7 @@ import numpy as np
 import warp as wp
 from warp.tests.unittest_utils import *
-from warp.utils import check_iommu
+from warp.utils import check_p2p
 class Capturable:
@@ -507,8 +507,8 @@ for src_type, src_ctor in array_constructors.items():
         copy_type = f"{array_type_codes[src_type]}2{array_type_codes[dst_type]}"
         for transfer_type, device_pair in device_pairs.items():
-            # skip p2p tests if IOMMU is enabled on Linux
-            if transfer_type == "p2p" and not check_iommu():
+            # skip p2p tests if not supported (e.g., IOMMU is enabled on Linux)
+            if transfer_type == "p2p" and not check_p2p():
                 continue
             src_device = device_pair[0]

warp/tests/test_atomic.py CHANGED Viewed

@@ -45,6 +45,10 @@ def make_atomic_test(type):
             base = rng.random(size=1, dtype=np.float32)
             val = rng.random(size=n, dtype=np.float32)
+        elif type == wp.float64:
+            base = rng.random(size=1, dtype=np.float64)
+            val = rng.random(size=n, dtype=np.float64)
         else:
             base = rng.random(size=(1, *type._shape_), dtype=float)
             val = rng.random(size=(n, *type._shape_), dtype=float)
@@ -109,6 +113,7 @@ def make_atomic_test(type):
 # generate test functions for atomic types
 test_atomic_int = make_atomic_test(wp.int32)
 test_atomic_float = make_atomic_test(wp.float32)
+test_atomic_double = make_atomic_test(wp.float64)
 test_atomic_vec2 = make_atomic_test(wp.vec2)
 test_atomic_vec3 = make_atomic_test(wp.vec3)
 test_atomic_vec4 = make_atomic_test(wp.vec4)
@@ -126,6 +131,7 @@ class TestAtomic(unittest.TestCase):
 add_function_test(TestAtomic, "test_atomic_int", test_atomic_int, devices=devices)
 add_function_test(TestAtomic, "test_atomic_float", test_atomic_float, devices=devices)
+add_function_test(TestAtomic, "test_atomic_double", test_atomic_double, devices=devices)
 add_function_test(TestAtomic, "test_atomic_vec2", test_atomic_vec2, devices=devices)
 add_function_test(TestAtomic, "test_atomic_vec3", test_atomic_vec3, devices=devices)
 add_function_test(TestAtomic, "test_atomic_vec4", test_atomic_vec4, devices=devices)

warp/tests/test_closest_point_edge_edge.py CHANGED Viewed

@@ -170,6 +170,93 @@ def test_edge_edge_perpendicular_s0_t1(test, device):
     test.assertAlmostEqual(st0[1], 1.0)  # t value
+@wp.func
+def check_edge_closest_point_sufficient_necessary(c1: wp.vec3, c2: wp.vec3, t: float, p: wp.vec3, q: wp.vec3):
+    """
+    This is a sufficient and necessary condition of closest point
+    c1: closest point on the other edge
+    c2: closest point on edge p-q
+    t: c2 = (1.0-t) * p + t * q
+    e1, e2: end points of the edge
+    """
+    eps = 1e-5
+    e = p - q
+    if t == 0.0:
+        wp.expect_eq(wp.dot(c1 - p, p - q) > -eps, True)
+        wp.expect_eq(wp.abs(wp.length(c2 - p)) < eps, True)
+    elif t == 1.0:
+        wp.expect_eq(wp.dot(c1 - q, q - p) > -eps, True)
+        wp.expect_eq(wp.abs(wp.length(c2 - q)) < eps, True)
+    else:
+        # interior closest point, c1c2 must be perpendicular to e
+        c1c2 = c1 - c2
+        wp.expect_eq(wp.abs(wp.dot(c1c2, e)) < eps, True)
+@wp.kernel
+def check_edge_closest_point_sufficient_necessary_kernel(
+    p1s: wp.array(dtype=wp.vec3),
+    q1s: wp.array(dtype=wp.vec3),
+    p2s: wp.array(dtype=wp.vec3),
+    q2s: wp.array(dtype=wp.vec3),
+    epsilon: float,
+):
+    tid = wp.tid()
+    p1 = p1s[tid]
+    q1 = q1s[tid]
+    p2 = p2s[tid]
+    q2 = q2s[tid]
+    st = wp.closest_point_edge_edge(p1, q1, p2, q2, epsilon)
+    s = st[0]
+    t = st[1]
+    c1 = p1 + (q1 - p1) * s
+    c2 = p2 + (q2 - p2) * t
+    check_edge_closest_point_sufficient_necessary(c1, c2, t, p2, q2)
+    check_edge_closest_point_sufficient_necessary(c2, c1, s, p1, q1)
+def check_edge_closest_point_random(test, device):
+    num_tests = 100000
+    np.random.seed(12345)
+    p1 = wp.array(np.random.randn(num_tests, 3), dtype=wp.vec3, device=device)
+    q1 = wp.array(np.random.randn(num_tests, 3), dtype=wp.vec3, device=device)
+    p2 = wp.array(np.random.randn(num_tests, 3), dtype=wp.vec3, device=device)
+    q2 = wp.array(np.random.randn(num_tests, 3), dtype=wp.vec3, device=device)
+    wp.launch(
+        kernel=check_edge_closest_point_sufficient_necessary_kernel,
+        dim=num_tests,
+        inputs=[p1, q1, p2, q2, epsilon],
+        device=device,
+    )
+    # parallel edges
+    p1 = np.random.randn(num_tests, 3)
+    q1 = np.random.randn(num_tests, 3)
+    shifts = np.random.randn(num_tests, 3)
+    p2 = p1 + shifts
+    q2 = q1 + shifts
+    p1 = wp.array(p1, dtype=wp.vec3, device=device)
+    q1 = wp.array(q1, dtype=wp.vec3, device=device)
+    p2 = wp.array(p2, dtype=wp.vec3, device=device)
+    q2 = wp.array(q2, dtype=wp.vec3, device=device)
+    wp.launch(
+        kernel=check_edge_closest_point_sufficient_necessary_kernel,
+        dim=num_tests,
+        inputs=[p1, q1, p2, q2, epsilon],
+        device=device,
+    )
 devices = get_test_devices()
@@ -220,7 +307,12 @@ add_function_test(
     test_edge_edge_perpendicular_s0_t1,
     devices=devices,
 )
+add_function_test(
+    TestClosestPointEdgeEdgeMethods,
+    "test_edge_closest_point_random",
+    check_edge_closest_point_random,
+    devices=devices,
+)
 if __name__ == "__main__":
     wp.clear_kernel_cache()

warp/tests/test_codegen.py CHANGED Viewed

@@ -405,24 +405,24 @@ def test_error_global_var(test, device):
     kernel = wp.Kernel(func=kernel_1_fn)
     with test.assertRaisesRegex(
-        RuntimeError,
-        r"Cannot reference a global variable from a kernel unless `wp.constant\(\)` is being used",
+        TypeError,
+        r"Invalid external reference type: <class 'warp.types.array'>",
     ):
-        wp.launch(kernel, dim=out.shape, inputs=(), outputs=(out,))
+        wp.launch(kernel, dim=out.shape, inputs=(), outputs=(out,), device=device)
     kernel = wp.Kernel(func=kernel_2_fn)
     with test.assertRaisesRegex(
-        RuntimeError,
-        r"Cannot reference a global variable from a kernel unless `wp.constant\(\)` is being used",
+        TypeError,
+        r"Invalid external reference type: <class 'warp.types.array'>",
     ):
-        wp.launch(kernel, dim=out.shape, inputs=(), outputs=(out,))
+        wp.launch(kernel, dim=out.shape, inputs=(), outputs=(out,), device=device)
     kernel = wp.Kernel(func=kernel_3_fn)
     with test.assertRaisesRegex(
-        RuntimeError,
-        r"Cannot reference a global variable from a kernel unless `wp.constant\(\)` is being used",
+        TypeError,
+        r"Invalid external reference type: <class 'warp.types.array'>",
     ):
-        wp.launch(kernel, dim=out.shape, inputs=(), outputs=(out,))
+        wp.launch(kernel, dim=out.shape, inputs=(), outputs=(out,), device=device)
 def test_error_collection_construct(test, device):
@@ -443,28 +443,28 @@ def test_error_collection_construct(test, device):
         RuntimeError,
         r"List constructs are not supported in kernels. Use vectors like `wp.vec3\(\)` for small collections instead.",
     ):
-        wp.launch(kernel, dim=1)
+        wp.launch(kernel, dim=1, device=device)
     kernel = wp.Kernel(func=kernel_2_fn)
     with test.assertRaisesRegex(
         RuntimeError,
         r"Tuple constructs are not supported in kernels. Use vectors like `wp.vec3\(\)` for small collections instead.",
     ):
-        wp.launch(kernel, dim=1)
+        wp.launch(kernel, dim=1, device=device)
     kernel = wp.Kernel(func=kernel_3_fn)
     with test.assertRaisesRegex(
         RuntimeError,
         r"Construct `ast.Dict` not supported in kernels.",
     ):
-        wp.launch(kernel, dim=1)
+        wp.launch(kernel, dim=1, device=device)
     kernel = wp.Kernel(func=kernel_4_fn)
     with test.assertRaisesRegex(
         RuntimeError,
         r"Tuple constructs are not supported in kernels. Use vectors like `wp.vec3\(\)` instead.",
     ):
-        wp.launch(kernel, dim=1)
+        wp.launch(kernel, dim=1, device=device)
 def test_error_unmatched_arguments(test, device):
@@ -479,14 +479,60 @@ def test_error_unmatched_arguments(test, device):
         RuntimeError,
         r"Input types must be the same, got \['int32', 'float32'\]",
     ):
-        wp.launch(kernel, dim=1)
+        wp.launch(kernel, dim=1, device=device)
     kernel = wp.Kernel(func=kernel_2_fn)
     with test.assertRaisesRegex(
         RuntimeError,
         r"Input types must be exactly the same, got \[\"vector\(length=2, dtype=<class 'warp.types.float32'>\)\", \"vector\(length=2, dtype=<class 'warp.types.float16'>\)\"\]",
     ):
-        wp.launch(kernel, dim=1)
+        wp.launch(kernel, dim=1, device=device)
+def test_error_mutating_constant_in_dynamic_loop(test, device):
+    @wp.kernel
+    def dynamic_loop_kernel(n: int, input: wp.array(dtype=float)):
+        my_constant = 0.0
+        for i in range(n):
+            my_constant += input[i]
+    inputs = wp.array([1.0, 2.0, 3.0], dtype=float, device=device)
+    with test.assertRaisesRegex(
+        wp.codegen.WarpCodegenError,
+        r"Error mutating a constant my_constant inside a dynamic loop, use the following syntax\: pi = float\(3\.141\) to declare a dynamic variable",
+    ):
+        wp.launch(dynamic_loop_kernel, dim=1, inputs=[3, inputs], device=device)
+    # the following nested loop must not raise an error
+    const_a = 7
+    const_b = 5
+    @wp.kernel
+    def mixed_dyn_static_loop_kernel(dyn_a: int, dyn_b: int, dyn_c: int, output: wp.array(dtype=float, ndim=2)):
+        tid = wp.tid()
+        for i in range(const_a + 1):
+            for j in range(dyn_a + 1):
+                for k in range(dyn_b + 1):
+                    for l in range(const_b + 1):
+                        for m in range(dyn_c + 1):
+                            coeff = i + j + k + l + m
+                            output[tid, coeff] = 1.0
+    dyn_a, dyn_b, dyn_c = 3, 4, 5
+    num_threads = 10
+    output = wp.empty([num_threads, const_a + const_b + dyn_a + dyn_b + dyn_c + 1], dtype=float, device=device)
+    wp.launch(
+        mixed_dyn_static_loop_kernel,
+        num_threads,
+        inputs=[
+            dyn_a,
+            dyn_b,
+            dyn_c,
+        ],
+        outputs=[output],
+        device=device,
+    )
+    assert_np_equal(output.numpy(), np.ones([num_threads, const_a + const_b + dyn_a + dyn_b + dyn_c + 1]))
 @wp.kernel
@@ -507,6 +553,30 @@ def test_call_syntax():
     wp.expect_eq(wp.matrix(rot=rot, pos=pos, dtype=wp.float32, scale=scale), expected_matrix)
+# test shadowing builtin functions
+@wp.func
+def sum(a: wp.vec3) -> float:
+    return a[0] + a[1] + a[2]
+@wp.kernel
+def test_shadow_builtin():
+    wp.expect_eq(sum(wp.vec3(1.0)), 3.0)
+@wp.struct
+class Iterator:
+    valid: wp.bool
+@wp.kernel(enable_backward=False)
+def test_while_condition_eval():
+    it = Iterator()
+    it.valid = True
+    while it.valid:
+        it.valid = False
 class TestCodeGen(unittest.TestCase):
     pass
@@ -643,8 +713,16 @@ add_function_test(
 add_function_test(
     TestCodeGen, func=test_error_unmatched_arguments, name="test_error_unmatched_arguments", devices=devices
 )
+add_function_test(
+    TestCodeGen,
+    func=test_error_mutating_constant_in_dynamic_loop,
+    name="test_error_mutating_constant_in_dynamic_loop",
+    devices=devices,
+)
 add_kernel_test(TestCodeGen, name="test_call_syntax", kernel=test_call_syntax, dim=1, devices=devices)
+add_kernel_test(TestCodeGen, name="test_shadow_builtin", kernel=test_shadow_builtin, dim=1, devices=devices)
+add_kernel_test(TestCodeGen, name="test_while_condition_eval", kernel=test_while_condition_eval, dim=1, devices=devices)
 if __name__ == "__main__":