PyPI - warp-lang - Versions diffs - 0.11.0__py3-none-manylinux2014_x86_64.whl → 1.0.0__py3-none-manylinux2014_x86_64.whl - Mend

warp-lang 0.11.0__py3-none-manylinux2014_x86_64.whl → 1.0.0__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (170) hide show

warp/__init__.py +8 -0
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +7 -6
warp/build_dll.py +70 -79
warp/builtins.py +10 -6
warp/codegen.py +51 -19
warp/config.py +7 -8
warp/constants.py +3 -0
warp/context.py +948 -245
warp/dlpack.py +198 -113
warp/examples/assets/bunny.usd +0 -0
warp/examples/assets/cartpole.urdf +110 -0
warp/examples/assets/crazyflie.usd +0 -0
warp/examples/assets/cube.usda +42 -0
warp/examples/assets/nv_ant.xml +92 -0
warp/examples/assets/nv_humanoid.xml +183 -0
warp/examples/assets/quadruped.urdf +268 -0
warp/examples/assets/rocks.nvdb +0 -0
warp/examples/assets/rocks.usd +0 -0
warp/examples/assets/sphere.usda +56 -0
warp/examples/assets/torus.usda +105 -0
warp/examples/benchmarks/benchmark_api.py +383 -0
warp/examples/benchmarks/benchmark_cloth.py +279 -0
warp/examples/benchmarks/benchmark_cloth_cupy.py +88 -0
warp/examples/benchmarks/benchmark_cloth_jax.py +100 -0
warp/examples/benchmarks/benchmark_cloth_numba.py +142 -0
warp/examples/benchmarks/benchmark_cloth_numpy.py +77 -0
warp/examples/benchmarks/benchmark_cloth_pytorch.py +86 -0
warp/examples/benchmarks/benchmark_cloth_taichi.py +112 -0
warp/examples/benchmarks/benchmark_cloth_warp.py +146 -0
warp/examples/benchmarks/benchmark_launches.py +295 -0
warp/examples/core/example_dem.py +221 -0
warp/examples/core/example_fluid.py +267 -0
warp/examples/core/example_graph_capture.py +129 -0
warp/examples/core/example_marching_cubes.py +177 -0
warp/examples/core/example_mesh.py +154 -0
warp/examples/core/example_mesh_intersect.py +193 -0
warp/examples/core/example_nvdb.py +169 -0
warp/examples/core/example_raycast.py +89 -0
warp/examples/core/example_raymarch.py +178 -0
warp/examples/core/example_render_opengl.py +141 -0
warp/examples/core/example_sph.py +389 -0
warp/examples/core/example_torch.py +181 -0
warp/examples/core/example_wave.py +249 -0
warp/examples/fem/bsr_utils.py +380 -0
warp/examples/fem/example_apic_fluid.py +391 -0
warp/examples/fem/example_convection_diffusion.py +168 -0
warp/examples/fem/example_convection_diffusion_dg.py +209 -0
warp/examples/fem/example_convection_diffusion_dg0.py +194 -0
warp/examples/fem/example_deformed_geometry.py +159 -0
warp/examples/fem/example_diffusion.py +173 -0
warp/examples/fem/example_diffusion_3d.py +152 -0
warp/examples/fem/example_diffusion_mgpu.py +214 -0
warp/examples/fem/example_mixed_elasticity.py +222 -0
warp/examples/fem/example_navier_stokes.py +243 -0
warp/examples/fem/example_stokes.py +192 -0
warp/examples/fem/example_stokes_transfer.py +249 -0
warp/examples/fem/mesh_utils.py +109 -0
warp/examples/fem/plot_utils.py +287 -0
warp/examples/optim/example_bounce.py +248 -0
warp/examples/optim/example_cloth_throw.py +210 -0
warp/examples/optim/example_diffray.py +535 -0
warp/examples/optim/example_drone.py +850 -0
warp/examples/optim/example_inverse_kinematics.py +169 -0
warp/examples/optim/example_inverse_kinematics_torch.py +170 -0
warp/examples/optim/example_spring_cage.py +234 -0
warp/examples/optim/example_trajectory.py +201 -0
warp/examples/sim/example_cartpole.py +128 -0
warp/examples/sim/example_cloth.py +184 -0
warp/examples/sim/example_granular.py +113 -0
warp/examples/sim/example_granular_collision_sdf.py +185 -0
warp/examples/sim/example_jacobian_ik.py +213 -0
warp/examples/sim/example_particle_chain.py +106 -0
warp/examples/sim/example_quadruped.py +179 -0
warp/examples/sim/example_rigid_chain.py +191 -0
warp/examples/sim/example_rigid_contact.py +176 -0
warp/examples/sim/example_rigid_force.py +126 -0
warp/examples/sim/example_rigid_gyroscopic.py +97 -0
warp/examples/sim/example_rigid_soft_contact.py +124 -0
warp/examples/sim/example_soft_body.py +178 -0
warp/fabric.py +29 -20
warp/fem/cache.py +0 -1
warp/fem/dirichlet.py +0 -2
warp/fem/integrate.py +0 -1
warp/jax.py +45 -0
warp/jax_experimental.py +339 -0
warp/native/builtin.h +12 -0
warp/native/bvh.cu +18 -18
warp/native/clang/clang.cpp +8 -3
warp/native/cuda_util.cpp +94 -5
warp/native/cuda_util.h +35 -6
warp/native/cutlass_gemm.cpp +1 -1
warp/native/cutlass_gemm.cu +4 -1
warp/native/error.cpp +66 -0
warp/native/error.h +27 -0
warp/native/mesh.cu +2 -2
warp/native/reduce.cu +4 -4
warp/native/runlength_encode.cu +2 -2
warp/native/scan.cu +2 -2
warp/native/sparse.cu +0 -1
warp/native/temp_buffer.h +2 -2
warp/native/warp.cpp +95 -60
warp/native/warp.cu +1053 -218
warp/native/warp.h +49 -32
warp/optim/linear.py +33 -16
warp/render/render_opengl.py +202 -101
warp/render/render_usd.py +82 -40
warp/sim/__init__.py +13 -4
warp/sim/articulation.py +4 -5
warp/sim/collide.py +320 -175
warp/sim/import_mjcf.py +25 -30
warp/sim/import_urdf.py +94 -63
warp/sim/import_usd.py +51 -36
warp/sim/inertia.py +3 -2
warp/sim/integrator.py +233 -0
warp/sim/integrator_euler.py +447 -469
warp/sim/integrator_featherstone.py +1991 -0
warp/sim/integrator_xpbd.py +1420 -640
warp/sim/model.py +765 -487
warp/sim/particles.py +2 -1
warp/sim/render.py +35 -13
warp/sim/utils.py +222 -11
warp/stubs.py +8 -0
warp/tape.py +16 -1
warp/tests/aux_test_grad_customs.py +23 -0
warp/tests/test_array.py +190 -1
warp/tests/test_async.py +656 -0
warp/tests/test_bool.py +50 -0
warp/tests/test_dlpack.py +164 -11
warp/tests/test_examples.py +166 -74
warp/tests/test_fem.py +8 -1
warp/tests/test_generics.py +15 -5
warp/tests/test_grad.py +1 -1
warp/tests/test_grad_customs.py +172 -12
warp/tests/test_jax.py +254 -0
warp/tests/test_large.py +29 -6
warp/tests/test_launch.py +25 -0
warp/tests/test_linear_solvers.py +20 -3
warp/tests/test_matmul.py +61 -16
warp/tests/test_matmul_lite.py +13 -13
warp/tests/test_mempool.py +186 -0
warp/tests/test_multigpu.py +3 -0
warp/tests/test_options.py +16 -2
warp/tests/test_peer.py +137 -0
warp/tests/test_print.py +3 -1
warp/tests/test_quat.py +23 -0
warp/tests/test_sim_kinematics.py +97 -0
warp/tests/test_snippet.py +126 -3
warp/tests/test_streams.py +108 -79
warp/tests/test_torch.py +16 -8
warp/tests/test_utils.py +32 -27
warp/tests/test_verify_fp.py +65 -0
warp/tests/test_volume.py +1 -1
warp/tests/unittest_serial.py +2 -0
warp/tests/unittest_suites.py +12 -0
warp/tests/unittest_utils.py +14 -7
warp/thirdparty/unittest_parallel.py +15 -3
warp/torch.py +10 -8
warp/types.py +363 -246
warp/utils.py +143 -19
warp_lang-1.0.0.dist-info/LICENSE.md +126 -0
warp_lang-1.0.0.dist-info/METADATA +394 -0
{warp_lang-0.11.0.dist-info → warp_lang-1.0.0.dist-info}/RECORD +167 -86
warp/sim/optimizer.py +0 -138
warp_lang-0.11.0.dist-info/LICENSE.md +0 -36
warp_lang-0.11.0.dist-info/METADATA +0 -238
/warp/tests/{walkthough_debug.py → walkthrough_debug.py} +0 -0
{warp_lang-0.11.0.dist-info → warp_lang-1.0.0.dist-info}/WHEEL +0 -0
{warp_lang-0.11.0.dist-info → warp_lang-1.0.0.dist-info}/top_level.txt +0 -0

warp/tests/test_grad_customs.py CHANGED Viewed

@@ -22,6 +22,7 @@ wp.init()
 def reversible_increment(
     counter: wp.array(dtype=int), counter_index: int, value: int, thread_values: wp.array(dtype=int), tid: int
 ):
+    """This is a docstring"""
     next_index = wp.atomic_add(counter, counter_index, value)
     thread_values[tid] = next_index
     return next_index
@@ -31,6 +32,7 @@ def reversible_increment(
 def replay_reversible_increment(
     counter: wp.array(dtype=int), counter_index: int, value: int, thread_values: wp.array(dtype=int), tid: int
 ):
+    """This is a docstring"""
     return thread_values[tid]
@@ -58,34 +60,39 @@ def test_custom_replay_grad(test, device):
             run_atomic_add, dim=num_threads, inputs=[inputs, counter, thread_ids], outputs=[outputs], device=device
         )
-    tape.backward(grads={outputs: wp.array(np.ones(num_threads, dtype=np.float32), device=device)})
+    tape.backward(grads={outputs: wp.ones(num_threads, dtype=wp.float32, device=device)})
     assert_np_equal(inputs.grad.numpy(), 2.0 * inputs.numpy(), tol=1e-4)
 @wp.func
 def overload_fn(x: float, y: float):
+    """This is a docstring"""
     return x * 3.0 + y / 3.0, y**2.5
 @wp.func_grad(overload_fn)
 def overload_fn_grad(x: float, y: float, adj_ret0: float, adj_ret1: float):
+    """This is a docstring"""
     wp.adjoint[x] += x * adj_ret0 * 42.0 + y * adj_ret1 * 10.0
     wp.adjoint[y] += y * adj_ret1 * 3.0
 @wp.struct
 class MyStruct:
+    """This is a docstring"""
     scalar: float
     vec: wp.vec3
 @wp.func
 def overload_fn(x: MyStruct):
+    """This is a docstring"""
     return x.vec[0] * x.vec[1] * x.vec[2] * 4.0, wp.length(x.vec), x.scalar**0.5
 @wp.func_grad(overload_fn)
 def overload_fn_grad(x: MyStruct, adj_ret0: float, adj_ret1: float, adj_ret2: float):
+    """This is a docstring"""
     wp.adjoint[x.scalar] += x.scalar * adj_ret0 * 10.0
     wp.adjoint[x.vec][0] += adj_ret0 * x.vec[1] * x.vec[2] * 20.0
     wp.adjoint[x.vec][1] += adj_ret1 * x.vec[0] * x.vec[2] * 30.0
@@ -96,6 +103,7 @@ def overload_fn_grad(x: MyStruct, adj_ret0: float, adj_ret1: float, adj_ret2: fl
 def run_overload_float_fn(
     xs: wp.array(dtype=float), ys: wp.array(dtype=float), output0: wp.array(dtype=float), output1: wp.array(dtype=float)
 ):
+    """This is a docstring"""
     i = wp.tid()
     out0, out1 = overload_fn(xs[i], ys[i])
     output0[i] = out0
@@ -111,17 +119,19 @@ def run_overload_struct_fn(xs: wp.array(dtype=MyStruct), output: wp.array(dtype=
 def test_custom_overload_grad(test, device):
     dim = 3
-    xs_float = wp.array(np.arange(1.0, dim + 1.0), dtype=wp.float32, requires_grad=True)
-    ys_float = wp.array(np.arange(10.0, dim + 10.0), dtype=wp.float32, requires_grad=True)
-    out0_float = wp.zeros(dim)
-    out1_float = wp.zeros(dim)
+    xs_float = wp.array(np.arange(1.0, dim + 1.0), dtype=wp.float32, requires_grad=True, device=device)
+    ys_float = wp.array(np.arange(10.0, dim + 10.0), dtype=wp.float32, requires_grad=True, device=device)
+    out0_float = wp.zeros(dim, device=device)
+    out1_float = wp.zeros(dim, device=device)
     tape = wp.Tape()
     with tape:
-        wp.launch(run_overload_float_fn, dim=dim, inputs=[xs_float, ys_float], outputs=[out0_float, out1_float])
+        wp.launch(
+            run_overload_float_fn, dim=dim, inputs=[xs_float, ys_float], outputs=[out0_float, out1_float], device=device
+        )
     tape.backward(
         grads={
-            out0_float: wp.array(np.ones(dim), dtype=wp.float32),
-            out1_float: wp.array(np.ones(dim), dtype=wp.float32),
+            out0_float: wp.ones(dim, dtype=wp.float32, device=device),
+            out1_float: wp.ones(dim, dtype=wp.float32, device=device),
         }
     )
     assert_np_equal(xs_float.grad.numpy(), xs_float.numpy() * 42.0 + ys_float.numpy() * 10.0)
@@ -136,12 +146,12 @@ def test_custom_overload_grad(test, device):
     x2 = MyStruct()
     x2.vec = wp.vec3(8.0, 9.0, 10.0)
     x2.scalar = 19.0
-    xs_struct = wp.array([x0, x1, x2], dtype=MyStruct, requires_grad=True)
-    out_struct = wp.zeros(dim)
+    xs_struct = wp.array([x0, x1, x2], dtype=MyStruct, requires_grad=True, device=device)
+    out_struct = wp.zeros(dim, device=device)
     tape = wp.Tape()
     with tape:
-        wp.launch(run_overload_struct_fn, dim=dim, inputs=[xs_struct], outputs=[out_struct])
-    tape.backward(grads={out_struct: wp.array(np.ones(dim), dtype=wp.float32)})
+        wp.launch(run_overload_struct_fn, dim=dim, inputs=[xs_struct], outputs=[out_struct], device=device)
+    tape.backward(grads={out_struct: wp.ones(dim, dtype=wp.float32, device=device)})
     xs_struct_np = xs_struct.numpy()
     struct_grads = xs_struct.grad.numpy()
     # fmt: off
@@ -160,6 +170,153 @@ def test_custom_overload_grad(test, device):
     # fmt: on
+def test_custom_import_grad(test, device):
+    from warp.tests.aux_test_grad_customs import aux_custom_fn
+    @wp.kernel
+    def run_defined_float_fn(
+        xs: wp.array(dtype=float),
+        ys: wp.array(dtype=float),
+        output0: wp.array(dtype=float),
+        output1: wp.array(dtype=float),
+    ):
+        i = wp.tid()
+        out0, out1 = aux_custom_fn(xs[i], ys[i])
+        output0[i] = out0
+        output1[i] = out1
+    dim = 3
+    xs_float = wp.array(np.arange(1.0, dim + 1.0), dtype=wp.float32, requires_grad=True, device=device)
+    ys_float = wp.array(np.arange(10.0, dim + 10.0), dtype=wp.float32, requires_grad=True, device=device)
+    out0_float = wp.zeros(dim, device=device)
+    out1_float = wp.zeros(dim, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(
+            run_defined_float_fn, dim=dim, inputs=[xs_float, ys_float], outputs=[out0_float, out1_float], device=device
+        )
+    tape.backward(
+        grads={
+            out0_float: wp.ones(dim, dtype=wp.float32, device=device),
+            out1_float: wp.ones(dim, dtype=wp.float32, device=device),
+        }
+    )
+    assert_np_equal(xs_float.grad.numpy(), xs_float.numpy() * 42.0 + ys_float.numpy() * 10.0)
+    assert_np_equal(ys_float.grad.numpy(), ys_float.numpy() * 3.0)
+@wp.func
+def sigmoid(x: float):
+    return 1.0 / (1.0 + wp.exp(-x))
+@wp.func_grad(sigmoid)
+def adj_sigmoid(x: float, adj: float):
+    # unused function to test that we don't run into infinite recursion when calling
+    # the forward function from within the gradient function
+    wp.adjoint[x] += adj * sigmoid(x) * (1.0 - sigmoid(x))
+@wp.func
+def sigmoid_no_return(i: int, xs: wp.array(dtype=float), ys: wp.array(dtype=float)):
+    # test function that does not return anything
+    ys[i] = sigmoid(xs[i])
+@wp.func_grad(sigmoid_no_return)
+def adj_sigmoid_no_return(i: int, xs: wp.array(dtype=float), ys: wp.array(dtype=float)):
+    wp.adjoint[xs][i] += ys[i] * (1.0 - ys[i])
+@wp.kernel
+def eval_sigmoid(xs: wp.array(dtype=float), ys: wp.array(dtype=float)):
+    i = wp.tid()
+    sigmoid_no_return(i, xs, ys)
+def test_custom_grad_no_return(test, device):
+    xs = wp.array([1.0, 2.0, 3.0, 4.0], dtype=wp.float32, requires_grad=True)
+    ys = wp.zeros_like(xs)
+    ys.grad.fill_(1.0)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(eval_sigmoid, dim=len(xs), inputs=[xs], outputs=[ys])
+    tape.backward()
+    sigmoids = ys.numpy()
+    grad = xs.grad.numpy()
+    assert_np_equal(grad, sigmoids * (1.0 - sigmoids))
+def test_wrapped_docstring(test, device):
+    assert "This is a docstring" in reversible_increment.__doc__
+    assert "This is a docstring" in replay_reversible_increment.__doc__
+    assert "This is a docstring" in overload_fn.__doc__
+    assert "This is a docstring" in overload_fn_grad.__doc__
+    assert "This is a docstring" in run_overload_float_fn.__doc__
+    assert "This is a docstring" in MyStruct.__doc__
+@wp.func
+def dense_gemm(
+    m: int,
+    n: int,
+    p: int,
+    transpose_A: bool,
+    transpose_B: bool,
+    add_to_C: bool,
+    A: wp.array(dtype=float),
+    B: wp.array(dtype=float),
+    # outputs
+    C: wp.array(dtype=float),
+):
+    # this function doesn't get called but it is an important test for code generation
+    # multiply a `m x p` matrix A by a `p x n` matrix B to produce a `m x n` matrix C
+    for i in range(m):
+        for j in range(n):
+            sum = float(0.0)
+            for k in range(p):
+                if transpose_A:
+                    a_i = k * m + i
+                else:
+                    a_i = i * p + k
+                if transpose_B:
+                    b_j = j * p + k
+                else:
+                    b_j = k * n + j
+                sum += A[a_i] * B[b_j]
+            if add_to_C:
+                C[i * n + j] += sum
+            else:
+                C[i * n + j] = sum
+@wp.func_grad(dense_gemm)
+def adj_dense_gemm(
+    m: int,
+    n: int,
+    p: int,
+    transpose_A: bool,
+    transpose_B: bool,
+    add_to_C: bool,
+    A: wp.array(dtype=float),
+    B: wp.array(dtype=float),
+    # outputs
+    C: wp.array(dtype=float),
+):
+    # code generation would break here if we didn't defer building the custom grad
+    # function until after the forward functions + kernels of the module have been built
+    add_to_C = True
+    if transpose_A:
+        dense_gemm(p, m, n, False, True, add_to_C, B, wp.adjoint[C], wp.adjoint[A])
+        dense_gemm(p, n, m, False, False, add_to_C, A, wp.adjoint[C], wp.adjoint[B])
+    else:
+        dense_gemm(m, p, n, False, not transpose_B, add_to_C, wp.adjoint[C], B, wp.adjoint[A])
+        dense_gemm(p, n, m, True, False, add_to_C, A, wp.adjoint[C], wp.adjoint[B])
 devices = get_test_devices()
@@ -169,6 +326,9 @@ class TestGradCustoms(unittest.TestCase):
 add_function_test(TestGradCustoms, "test_custom_replay_grad", test_custom_replay_grad, devices=devices)
 add_function_test(TestGradCustoms, "test_custom_overload_grad", test_custom_overload_grad, devices=devices)
+add_function_test(TestGradCustoms, "test_custom_import_grad", test_custom_import_grad, devices=devices)
+add_function_test(TestGradCustoms, "test_custom_grad_no_return", test_custom_grad_no_return, devices=devices)
+add_function_test(TestGradCustoms, "test_wrapped_docstring", test_wrapped_docstring, devices=devices)
 if __name__ == "__main__":

warp/tests/test_jax.py ADDED Viewed

@@ -0,0 +1,254 @@
+# Copyright (c) 2024 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import numpy as np
+import os
+import unittest
+from typing import Any
+import warp as wp
+from warp.tests.unittest_utils import *
+wp.init()
+# basic kernel with one input and output
+@wp.kernel
+def triple_kernel(input: wp.array(dtype=float), output: wp.array(dtype=float)):
+    tid = wp.tid()
+    output[tid] = 3.0 * input[tid]
+# generic kernel with one scalar input and output
+@wp.kernel
+def triple_kernel_scalar(input: wp.array(dtype=Any), output: wp.array(dtype=Any)):
+    tid = wp.tid()
+    output[tid] = input.dtype(3) * input[tid]
+# generic kernel with one vector/matrix input and output
+@wp.kernel
+def triple_kernel_vecmat(input: wp.array(dtype=Any), output: wp.array(dtype=Any)):
+    tid = wp.tid()
+    output[tid] = input.dtype.dtype(3) * input[tid]
+# kernel with multiple inputs and outputs
+@wp.kernel
+def multiarg_kernel(
+    # inputs
+    a: wp.array(dtype=float),
+    b: wp.array(dtype=float),
+    c: wp.array(dtype=float),
+    # outputs
+    ab: wp.array(dtype=float),
+    bc: wp.array(dtype=float),
+):
+    tid = wp.tid()
+    ab[tid] = a[tid] + b[tid]
+    bc[tid] = b[tid] + c[tid]
+# various types for testing
+scalar_types = wp.types.scalar_types
+vector_types = []
+matrix_types = []
+for dim in [2, 3, 4]:
+    for T in scalar_types:
+        vector_types.append(wp.vec(dim, T))
+        matrix_types.append(wp.mat((dim, dim), T))
+# explicitly overload generic kernels to avoid module reloading during tests
+for T in scalar_types:
+    wp.overload(triple_kernel_scalar, [wp.array(dtype=T), wp.array(dtype=T)])
+for T in [*vector_types, *matrix_types]:
+    wp.overload(triple_kernel_vecmat, [wp.array(dtype=T), wp.array(dtype=T)])
+def _jax_version():
+    try:
+        import jax
+        return jax.__version_info__
+    except ImportError:
+        return (0, 0, 0)
+@unittest.skipUnless(_jax_version() >= (0, 4, 25), "Jax version too old")
+def test_jax_kernel_basic(test, device):
+    import jax.numpy as jp
+    from warp.jax_experimental import jax_kernel
+    n = 64
+    jax_triple = jax_kernel(triple_kernel)
+    @jax.jit
+    def f():
+        x = jp.arange(n, dtype=jp.float32)
+        return jax_triple(x)
+    # run on the given device
+    with jax.default_device(wp.device_to_jax(device)):
+        y = f()
+    result = np.asarray(y)
+    expected = 3 * np.arange(n, dtype=np.float32)
+    assert_np_equal(result, expected)
+@unittest.skipUnless(_jax_version() >= (0, 4, 25), "Jax version too old")
+def test_jax_kernel_scalar(test, device):
+    import jax.numpy as jp
+    from warp.jax_experimental import jax_kernel
+    n = 64
+    for T in scalar_types:
+        jp_dtype = wp.jax.dtype_to_jax(T)
+        np_dtype = wp.types.warp_type_to_np_dtype[T]
+        with test.subTest(msg=T.__name__):
+            # get the concrete overload
+            kernel_instance = triple_kernel_scalar.get_overload([wp.array(dtype=T), wp.array(dtype=T)])
+            jax_triple = jax_kernel(kernel_instance)
+            @jax.jit
+            def f():
+                x = jp.arange(n, dtype=jp_dtype)
+                return jax_triple(x)
+            # run on the given device
+            with jax.default_device(wp.device_to_jax(device)):
+                y = f()
+            result = np.asarray(y)
+            expected = 3 * np.arange(n, dtype=np_dtype)
+            assert_np_equal(result, expected)
+@unittest.skipUnless(_jax_version() >= (0, 4, 25), "Jax version too old")
+def test_jax_kernel_vecmat(test, device):
+    import jax.numpy as jp
+    from warp.jax_experimental import jax_kernel
+    for T in [*vector_types, *matrix_types]:
+        jp_dtype = wp.jax.dtype_to_jax(T._wp_scalar_type_)
+        np_dtype = wp.types.warp_type_to_np_dtype[T._wp_scalar_type_]
+        n = 64 // T._length_
+        scalar_shape = (n, *T._shape_)
+        scalar_len = n * T._length_
+        with test.subTest(msg=T.__name__):
+            # get the concrete overload
+            kernel_instance = triple_kernel_vecmat.get_overload([wp.array(dtype=T), wp.array(dtype=T)])
+            jax_triple = jax_kernel(kernel_instance)
+            @jax.jit
+            def f():
+                x = jp.arange(scalar_len, dtype=jp_dtype).reshape(scalar_shape)
+                return jax_triple(x)
+            # run on the given device
+            with jax.default_device(wp.device_to_jax(device)):
+                y = f()
+            result = np.asarray(y)
+            expected = 3 * np.arange(scalar_len, dtype=np_dtype).reshape(scalar_shape)
+            assert_np_equal(result, expected)
+@unittest.skipUnless(_jax_version() >= (0, 4, 25), "Jax version too old")
+def test_jax_kernel_multiarg(test, device):
+    import jax.numpy as jp
+    from warp.jax_experimental import jax_kernel
+    n = 64
+    jax_multiarg = jax_kernel(multiarg_kernel)
+    @jax.jit
+    def f():
+        a = jp.full(n, 1, dtype=jp.float32)
+        b = jp.full(n, 2, dtype=jp.float32)
+        c = jp.full(n, 3, dtype=jp.float32)
+        return jax_multiarg(a, b, c)
+    # run on the given device
+    with jax.default_device(wp.device_to_jax(device)):
+        x, y = f()
+    result_x, result_y = np.asarray(x), np.asarray(y)
+    expected_x = np.full(n, 3, dtype=np.float32)
+    expected_y = np.full(n, 5, dtype=np.float32)
+    assert_np_equal(result_x, expected_x)
+    assert_np_equal(result_y, expected_y)
+class TestJax(unittest.TestCase):
+    pass
+# try adding Jax tests if Jax is installed correctly
+try:
+    # prevent Jax from gobbling up GPU memory
+    os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"] = "false"
+    os.environ["XLA_PYTHON_CLIENT_ALLOCATOR"] = "platform"
+    import jax
+    import jax.dlpack
+    # NOTE: we must enable 64-bit types in Jax to test the full gamut of types
+    jax.config.update("jax_enable_x64", True)
+    # check which Warp devices work with Jax
+    # CUDA devices may fail if Jax cannot find a CUDA Toolkit
+    test_devices = get_test_devices()
+    jax_compatible_devices = []
+    jax_compatible_cuda_devices = []
+    for d in test_devices:
+        try:
+            with jax.default_device(wp.device_to_jax(d)):
+                j = jax.numpy.arange(10, dtype=jax.numpy.float32)
+                j += 1
+            jax_compatible_devices.append(d)
+            if d.is_cuda:
+                jax_compatible_cuda_devices.append(d)
+        except Exception as e:
+            print(f"Skipping Jax DLPack tests on device '{d}' due to exception: {e}")
+    if jax_compatible_cuda_devices:
+        add_function_test(
+            TestJax, "test_jax_kernel_basic", test_jax_kernel_basic, devices=jax_compatible_cuda_devices
+        )
+        add_function_test(
+            TestJax, "test_jax_kernel_scalar", test_jax_kernel_scalar, devices=jax_compatible_cuda_devices
+        )
+        add_function_test(
+            TestJax, "test_jax_kernel_vecmat", test_jax_kernel_vecmat, devices=jax_compatible_cuda_devices
+        )
+        add_function_test(
+            TestJax, "test_jax_kernel_multiarg", test_jax_kernel_multiarg, devices=jax_compatible_cuda_devices
+        )
+except Exception as e:
+    print(f"Skipping Jax tests due to exception: {e}")
+if __name__ == "__main__":
+    wp.build.clear_kernel_cache()
+    unittest.main(verbosity=2)

warp/tests/test_large.py CHANGED Viewed

@@ -81,8 +81,8 @@ def test_large_arrays_slow(test, device):
     # without changes to support how frequently a test may be run
     total_elements = 2**31 + 8
-    # 1-D to 4-D arrays: test zero_, fill_, then zero_ for scalar data types:
-    for total_dims in range(1, 5):
+    # 2-D to 4-D arrays: test zero_, fill_, then zero_ for scalar data types:
+    for total_dims in range(2, 5):
         dim_x = math.ceil(total_elements ** (1 / total_dims))
         shape_tuple = tuple([dim_x] * total_dims)
@@ -99,21 +99,42 @@ def test_large_arrays_slow(test, device):
 def test_large_arrays_fast(test, device):
     # A truncated version of test_large_arrays_slow meant to catch basic errors
-    total_elements = 2**31 + 8
+    # Make is so that a (dim_x, dim_x) array has more than 2**31 elements
+    dim_x = math.ceil(math.sqrt(2**31))
     nptype = np.dtype(np.int8)
     wptype = wp.types.np_dtype_to_warp_type[nptype]
-    a1 = wp.zeros((total_elements,), dtype=wptype, device=device)
-    assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))
+    a1 = wp.zeros((dim_x, dim_x), dtype=wptype, device=device)
     a1.fill_(127)
     assert_np_equal(a1.numpy(), 127 * np.ones_like(a1.numpy()))
     a1.zero_()
     assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))
+def test_large_array_excessive_zeros(test, device):
+    # Tests the allocation of an array with length exceeding 2**31-1 in a dimension
+    with test.assertRaisesRegex(
+        ValueError, "Array shapes must not exceed the maximum representable value of a signed 32-bit integer"
+    ):
+        _ = wp.zeros((2**31), dtype=int, device=device)
+def test_large_array_excessive_numpy(test, device):
+    # Tests the allocation of an array from a numpy array with length exceeding 2**31-1 in a dimension
+    large_np_array = np.empty((2**31), dtype=int)
+    with test.assertRaisesRegex(
+        ValueError, "Array shapes must not exceed the maximum representable value of a signed 32-bit integer"
+    ):
+        _ = wp.array(large_np_array, device=device)
 devices = get_test_devices()
@@ -134,6 +155,8 @@ add_function_test(
 )
 add_function_test(TestLarge, "test_large_arrays_fast", test_large_arrays_fast, devices=devices)
+add_function_test(TestLarge, "test_large_array_excessive_zeros", test_large_array_excessive_zeros, devices=devices)
+add_function_test(TestLarge, "test_large_array_excessive_numpy", test_large_array_excessive_numpy, devices=devices)
 if __name__ == "__main__":

warp/tests/test_launch.py CHANGED Viewed

@@ -301,7 +301,30 @@ def test_launch_tuple_args(test, device):
         outputs=(out,),
         device=device,
     )
+    assert_np_equal(out.numpy(), np.array((0, 3, 6, 9)))
+    wp.launch(
+        kernel_mul,
+        dim=len(values),
+        inputs=(
+            values,
+            coeff,
+            out,
+        ),
+        device=device,
+    )
+    assert_np_equal(out.numpy(), np.array((0, 3, 6, 9)))
+    wp.launch(
+        kernel_mul,
+        dim=len(values),
+        outputs=(
+            values,
+            coeff,
+            out,
+        ),
+        device=device,
+    )
     assert_np_equal(out.numpy(), np.array((0, 3, 6, 9)))
@@ -323,6 +346,8 @@ add_function_test(TestLaunch, "test_launch_cmd_set_ctype", test_launch_cmd_set_c
 add_function_test(TestLaunch, "test_launch_cmd_set_dim", test_launch_cmd_set_dim, devices=devices)
 add_function_test(TestLaunch, "test_launch_cmd_empty", test_launch_cmd_empty, devices=devices)
+add_function_test(TestLaunch, "test_launch_tuple_args", test_launch_tuple_args, devices=devices)
 if __name__ == "__main__":
     wp.build.clear_kernel_cache()

warp/tests/test_linear_solvers.py CHANGED Viewed

@@ -7,9 +7,10 @@ import unittest
 from warp.optim.linear import preconditioner, cg, bicgstab, gmres
 from warp.tests.unittest_utils import *
 wp.init()
+from warp.context import runtime  # noqa: E402
 def _check_linear_solve(test, A, b, func, *args, **kwargs):
     # test from zero
@@ -75,6 +76,15 @@ def _make_indefinite_system(n: int, seed: int, dtype, device, spd=False):
     return wp.array(A, dtype=dtype, device=device), wp.array(b, dtype=dtype, device=device)
+def _make_identity_system(n: int, seed: int, dtype, device):
+    rng = np.random.default_rng(seed)
+    A = np.eye(n)
+    b = rng.uniform(low=-1.0, high=1.0, size=(n,))
+    return wp.array(A, dtype=dtype, device=device), wp.array(b, dtype=dtype, device=device)
 def test_cg(test, device):
     A, b = _make_spd_system(n=64, seed=123, device=device, dtype=wp.float64)
     M = preconditioner(A, "diag")
@@ -88,6 +98,9 @@ def test_cg(test, device):
     _check_linear_solve(test, A, b, cg, maxiter=1000)
     _check_linear_solve(test, A, b, cg, M=M, maxiter=1000)
+    A, b = _make_identity_system(n=5, seed=321, device=device, dtype=wp.float32)
+    _check_linear_solve(test, A, b, cg, maxiter=30)
 def test_bicgstab(test, device):
     A, b = _make_nonsymmetric_system(n=64, seed=123, device=device, dtype=wp.float64)
@@ -111,6 +124,9 @@ def test_bicgstab(test, device):
     _check_linear_solve(test, A, b, bicgstab, M=M, maxiter=1000)
     _check_linear_solve(test, A, b, bicgstab, M=M, maxiter=1000, is_left_preconditioner=True)
+    A, b = _make_identity_system(n=5, seed=321, device=device, dtype=wp.float32)
+    _check_linear_solve(test, A, b, bicgstab, maxiter=30)
 def test_gmres(test, device):
     A, b = _make_nonsymmetric_system(n=64, seed=456, device=device, dtype=wp.float64)
@@ -127,6 +143,9 @@ def test_gmres(test, device):
     _check_linear_solve(test, A, b, gmres, M=M, maxiter=1000, tol=1.0e-5)
     _check_linear_solve(test, A, b, gmres, M=M, maxiter=1000, tol=1.0e-5, is_left_preconditioner=True)
+    A, b = _make_identity_system(n=5, seed=123, device=device, dtype=wp.float32)
+    _check_linear_solve(test, A, b, gmres, maxiter=120)
 class TestLinearSolvers(unittest.TestCase):
     pass
@@ -134,8 +153,6 @@ class TestLinearSolvers(unittest.TestCase):
 devices = get_test_devices()
-from warp.context import runtime
 if not runtime.core.is_cutlass_enabled():
     devices = [d for d in devices if not d.is_cuda]
     print("Skipping CUDA linear solver tests because CUTLASS is not supported in this build")