PyPI - warp-lang - Versions diffs - 1.6.2__py3-none-macosx_10_13_universal2.whl → 1.7.1__py3-none-macosx_10_13_universal2.whl - Mend

warp-lang 1.6.2__py3-none-macosx_10_13_universal2.whl → 1.7.1__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (191) hide show

warp/__init__.py +7 -1
warp/autograd.py +12 -2
warp/bin/libwarp-clang.dylib +0 -0
warp/bin/libwarp.dylib +0 -0
warp/build.py +410 -0
warp/build_dll.py +6 -14
warp/builtins.py +463 -372
warp/codegen.py +196 -124
warp/config.py +42 -6
warp/context.py +496 -271
warp/dlpack.py +8 -6
warp/examples/assets/nonuniform.usd +0 -0
warp/examples/assets/nvidia_logo.png +0 -0
warp/examples/benchmarks/benchmark_cloth.py +1 -1
warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
warp/examples/core/example_sample_mesh.py +300 -0
warp/examples/distributed/example_jacobi_mpi.py +507 -0
warp/examples/fem/example_apic_fluid.py +1 -1
warp/examples/fem/example_burgers.py +2 -2
warp/examples/fem/example_deformed_geometry.py +1 -1
warp/examples/fem/example_distortion_energy.py +1 -1
warp/examples/fem/example_magnetostatics.py +6 -6
warp/examples/fem/utils.py +9 -3
warp/examples/interop/example_jax_callable.py +116 -0
warp/examples/interop/example_jax_ffi_callback.py +132 -0
warp/examples/interop/example_jax_kernel.py +205 -0
warp/examples/optim/example_fluid_checkpoint.py +497 -0
warp/examples/tile/example_tile_matmul.py +2 -4
warp/fem/__init__.py +11 -1
warp/fem/adaptivity.py +4 -4
warp/fem/field/field.py +11 -1
warp/fem/field/nodal_field.py +56 -88
warp/fem/field/virtual.py +62 -23
warp/fem/geometry/adaptive_nanogrid.py +16 -13
warp/fem/geometry/closest_point.py +1 -1
warp/fem/geometry/deformed_geometry.py +5 -2
warp/fem/geometry/geometry.py +5 -0
warp/fem/geometry/grid_2d.py +12 -12
warp/fem/geometry/grid_3d.py +12 -15
warp/fem/geometry/hexmesh.py +5 -7
warp/fem/geometry/nanogrid.py +9 -11
warp/fem/geometry/quadmesh.py +13 -13
warp/fem/geometry/tetmesh.py +3 -4
warp/fem/geometry/trimesh.py +7 -20
warp/fem/integrate.py +262 -93
warp/fem/linalg.py +5 -5
warp/fem/quadrature/pic_quadrature.py +37 -22
warp/fem/quadrature/quadrature.py +194 -25
warp/fem/space/__init__.py +1 -1
warp/fem/space/basis_function_space.py +4 -2
warp/fem/space/basis_space.py +25 -18
warp/fem/space/hexmesh_function_space.py +2 -2
warp/fem/space/partition.py +6 -2
warp/fem/space/quadmesh_function_space.py +8 -8
warp/fem/space/shape/cube_shape_function.py +23 -23
warp/fem/space/shape/square_shape_function.py +12 -12
warp/fem/space/shape/triangle_shape_function.py +1 -1
warp/fem/space/tetmesh_function_space.py +3 -3
warp/fem/space/trimesh_function_space.py +2 -2
warp/fem/utils.py +12 -6
warp/jax.py +14 -1
warp/jax_experimental/__init__.py +16 -0
warp/{jax_experimental.py → jax_experimental/custom_call.py} +28 -29
warp/jax_experimental/ffi.py +702 -0
warp/jax_experimental/xla_ffi.py +602 -0
warp/math.py +89 -0
warp/native/array.h +13 -0
warp/native/builtin.h +29 -3
warp/native/bvh.cpp +3 -1
warp/native/bvh.cu +42 -14
warp/native/bvh.h +2 -1
warp/native/clang/clang.cpp +30 -3
warp/native/cuda_util.cpp +14 -0
warp/native/cuda_util.h +2 -0
warp/native/exports.h +68 -63
warp/native/intersect.h +26 -26
warp/native/intersect_adj.h +33 -33
warp/native/marching.cu +1 -1
warp/native/mat.h +513 -9
warp/native/mesh.h +10 -10
warp/native/quat.h +99 -11
warp/native/rand.h +6 -0
warp/native/sort.cpp +122 -59
warp/native/sort.cu +152 -15
warp/native/sort.h +8 -1
warp/native/sparse.cpp +43 -22
warp/native/sparse.cu +52 -17
warp/native/svd.h +116 -0
warp/native/tile.h +312 -116
warp/native/tile_reduce.h +46 -3
warp/native/vec.h +68 -7
warp/native/volume.cpp +85 -113
warp/native/volume_builder.cu +25 -10
warp/native/volume_builder.h +6 -0
warp/native/warp.cpp +5 -6
warp/native/warp.cu +100 -11
warp/native/warp.h +19 -10
warp/optim/linear.py +10 -10
warp/render/render_opengl.py +19 -17
warp/render/render_usd.py +93 -3
warp/sim/articulation.py +4 -4
warp/sim/collide.py +32 -19
warp/sim/import_mjcf.py +449 -155
warp/sim/import_urdf.py +32 -12
warp/sim/inertia.py +189 -156
warp/sim/integrator_euler.py +8 -5
warp/sim/integrator_featherstone.py +3 -10
warp/sim/integrator_vbd.py +207 -2
warp/sim/integrator_xpbd.py +8 -5
warp/sim/model.py +71 -25
warp/sim/render.py +4 -0
warp/sim/utils.py +2 -2
warp/sparse.py +642 -555
warp/stubs.py +217 -20
warp/tests/__main__.py +0 -15
warp/tests/assets/torus.usda +1 -1
warp/tests/cuda/__init__.py +0 -0
warp/tests/{test_mempool.py → cuda/test_mempool.py} +39 -0
warp/tests/{test_streams.py → cuda/test_streams.py} +71 -0
warp/tests/geometry/__init__.py +0 -0
warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +66 -63
warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +1 -1
warp/tests/{test_volume.py → geometry/test_volume.py} +41 -6
warp/tests/interop/__init__.py +0 -0
warp/tests/{test_dlpack.py → interop/test_dlpack.py} +28 -5
warp/tests/sim/__init__.py +0 -0
warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +9 -10
warp/tests/{test_collision.py → sim/test_collision.py} +236 -205
warp/tests/sim/test_inertia.py +161 -0
warp/tests/{test_model.py → sim/test_model.py} +40 -0
warp/tests/{flaky_test_sim_grad.py → sim/test_sim_grad.py} +4 -0
warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +2 -1
warp/tests/sim/test_vbd.py +597 -0
warp/tests/sim/test_xpbd.py +399 -0
warp/tests/test_bool.py +1 -1
warp/tests/test_codegen.py +24 -3
warp/tests/test_examples.py +40 -38
warp/tests/test_fem.py +98 -14
warp/tests/test_linear_solvers.py +0 -11
warp/tests/test_mat.py +577 -156
warp/tests/test_mat_scalar_ops.py +4 -4
warp/tests/test_overwrite.py +0 -60
warp/tests/test_quat.py +356 -151
warp/tests/test_rand.py +44 -37
warp/tests/test_sparse.py +47 -6
warp/tests/test_spatial.py +75 -0
warp/tests/test_static.py +1 -1
warp/tests/test_utils.py +84 -4
warp/tests/test_vec.py +336 -178
warp/tests/tile/__init__.py +0 -0
warp/tests/{test_tile.py → tile/test_tile.py} +136 -51
warp/tests/{test_tile_load.py → tile/test_tile_load.py} +98 -1
warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +9 -6
warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +25 -14
warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +60 -1
warp/tests/{test_tile_view.py → tile/test_tile_view.py} +1 -1
warp/tests/unittest_serial.py +1 -0
warp/tests/unittest_suites.py +45 -62
warp/tests/unittest_utils.py +2 -1
warp/thirdparty/unittest_parallel.py +3 -1
warp/types.py +175 -666
warp/utils.py +137 -72
{warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/METADATA +46 -12
{warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/RECORD +184 -171
{warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/WHEEL +1 -1
{warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info/licenses}/LICENSE.md +0 -26
warp/examples/optim/example_walker.py +0 -317
warp/native/cutlass_gemm.cpp +0 -43
warp/native/cutlass_gemm.cu +0 -382
warp/tests/test_matmul.py +0 -511
warp/tests/test_matmul_lite.py +0 -411
warp/tests/test_vbd.py +0 -386
warp/tests/unused_test_misc.py +0 -77
/warp/tests/{test_async.py → cuda/test_async.py} +0 -0
/warp/tests/{test_ipc.py → cuda/test_ipc.py} +0 -0
/warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +0 -0
/warp/tests/{test_peer.py → cuda/test_peer.py} +0 -0
/warp/tests/{test_pinned.py → cuda/test_pinned.py} +0 -0
/warp/tests/{test_bvh.py → geometry/test_bvh.py} +0 -0
/warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +0 -0
/warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +0 -0
/warp/tests/{test_mesh.py → geometry/test_mesh.py} +0 -0
/warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +0 -0
/warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +0 -0
/warp/tests/{test_jax.py → interop/test_jax.py} +0 -0
/warp/tests/{test_paddle.py → interop/test_paddle.py} +0 -0
/warp/tests/{test_torch.py → interop/test_torch.py} +0 -0
/warp/tests/{test_coloring.py → sim/test_coloring.py} +0 -0
/warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +0 -0
/warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +0 -0
{warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/top_level.txt +0 -0

warp/examples/interop/example_jax_callable.py ADDED Viewed

@@ -0,0 +1,116 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###########################################################################
+# Example jax_callable()
+#
+# Examples of calling annotated Python functions from JAX.
+###########################################################################
+from functools import partial
+import jax
+import jax.numpy as jnp
+import warp as wp
+from warp.jax_experimental.ffi import jax_callable
+@wp.kernel
+def scale_kernel(a: wp.array(dtype=float), s: float, output: wp.array(dtype=float)):
+    tid = wp.tid()
+    output[tid] = a[tid] * s
+@wp.kernel
+def scale_vec_kernel(a: wp.array(dtype=wp.vec2), s: float, output: wp.array(dtype=wp.vec2)):
+    tid = wp.tid()
+    output[tid] = a[tid] * s
+# The Python function to call.
+# Note the argument annotations, just like Warp kernels.
+def example_func(
+    # inputs
+    a: wp.array(dtype=float),
+    b: wp.array(dtype=wp.vec2),
+    s: float,
+    # outputs
+    c: wp.array(dtype=float),
+    d: wp.array(dtype=wp.vec2),
+):
+    wp.launch(scale_kernel, dim=a.shape, inputs=[a, s], outputs=[c])
+    wp.launch(scale_vec_kernel, dim=b.shape, inputs=[b, s], outputs=[d])
+def example1():
+    jax_func = jax_callable(example_func, num_outputs=2, vmap_method="broadcast_all")
+    @jax.jit
+    def f():
+        # inputs
+        a = jnp.arange(10, dtype=jnp.float32)
+        b = jnp.arange(10, dtype=jnp.float32).reshape((5, 2))  # wp.vec2
+        s = 2.0
+        # output shapes
+        output_dims = {"c": a.shape, "d": b.shape}
+        c, d = jax_func(a, b, s, output_dims=output_dims)
+        return c, d
+    r1, r2 = f()
+    print(r1)
+    print(r2)
+def example2():
+    jax_func = jax_callable(example_func, num_outputs=2, vmap_method="broadcast_all")
+    # NOTE: scalar arguments must be static compile-time constants
+    @partial(jax.jit, static_argnames=["s"])
+    def f(a, b, s):
+        # output shapes
+        output_dims = {"c": a.shape, "d": b.shape}
+        c, d = jax_func(a, b, s, output_dims=output_dims)
+        return c, d
+    # inputs
+    a = jnp.arange(10, dtype=jnp.float32)
+    b = jnp.arange(10, dtype=jnp.float32).reshape((5, 2))  # wp.vec2
+    s = 3.0
+    r1, r2 = f(a, b, s)
+    print(r1)
+    print(r2)
+def main():
+    wp.init()
+    wp.load_module(device=wp.get_device())
+    examples = [example1, example2]
+    for example in examples:
+        print("\n===========================================================================")
+        print(f"{example.__name__}:")
+        example()
+if __name__ == "__main__":
+    main()

warp/examples/interop/example_jax_ffi_callback.py ADDED Viewed

@@ -0,0 +1,132 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###########################################################################
+# Example register_ffi_callback()
+#
+# Examples of calling Python functions from JAX.
+# Target functions must have the form func(inputs, outputs, attrs, ctx).
+###########################################################################
+import jax
+import jax.numpy as jnp
+import numpy as np
+import warp as wp
+from warp.jax import get_jax_device
+from warp.jax_experimental.ffi import register_ffi_callback
+@wp.kernel
+def scale_kernel(a: wp.array(dtype=float), s: float, output: wp.array(dtype=float)):
+    tid = wp.tid()
+    output[tid] = a[tid] * s
+@wp.kernel
+def scale_vec_kernel(a: wp.array(dtype=wp.vec2), s: float, output: wp.array(dtype=wp.vec2)):
+    tid = wp.tid()
+    output[tid] = a[tid] * s
+def example1():
+    # the Python function to call
+    def print_args(inputs, outputs, attrs, ctx):
+        def buffer_to_string(b):
+            return str(b.dtype) + str(list(b.shape)) + " @%x" % b.data
+        print("Inputs:     ", ", ".join([buffer_to_string(b) for b in inputs]))
+        print("Outputs:    ", ", ".join([buffer_to_string(b) for b in outputs]))
+        print("Attributes: ", "".join(["\n  %s: %s" % (k, str(v)) for k, v in attrs.items()]))
+    # register callback
+    register_ffi_callback("print_args", print_args)
+    # set up call
+    call = jax.ffi.ffi_call("print_args", jax.ShapeDtypeStruct((1, 2, 3), jnp.int8))
+    # call it
+    call(
+        jnp.arange(16),
+        jnp.arange(32.0).reshape((4, 8)),
+        str_attr="hi",
+        f32_attr=np.float32(4.2),
+        dict_attr={"a": 1, "b": 6.4},
+    )
+def example2():
+    # the Python function to call
+    def warp_func(inputs, outputs, attrs, ctx):
+        # input arrays
+        a = inputs[0]
+        b = inputs[1]
+        # scalar attributes
+        s = attrs["scale"]
+        # output arrays
+        c = outputs[0]
+        d = outputs[1]
+        device = wp.device_from_jax(get_jax_device())
+        stream = wp.Stream(device, cuda_stream=ctx.stream)
+        with wp.ScopedStream(stream):
+            # launch with arrays of scalars
+            wp.launch(scale_kernel, dim=a.shape, inputs=[a, s], outputs=[c])
+            # launch with arrays of vec2
+            # NOTE: the input shapes are from JAX arrays, we need to strip the inner dimension for vec2 arrays
+            wp.launch(scale_vec_kernel, dim=b.shape[0], inputs=[b, s], outputs=[d])
+    # register callback
+    register_ffi_callback("warp_func", warp_func)
+    n = 10
+    # inputs
+    a = jnp.arange(n, dtype=jnp.float32)
+    b = jnp.arange(n, dtype=jnp.float32).reshape((n // 2, 2))  # array of wp.vec2
+    s = 2.0
+    # set up call
+    out_types = [
+        jax.ShapeDtypeStruct(a.shape, jnp.float32),
+        jax.ShapeDtypeStruct(b.shape, jnp.float32),  # array of wp.vec2
+    ]
+    call = jax.ffi.ffi_call("warp_func", out_types)
+    # call it
+    c, d = call(a, b, scale=s)
+    print(c)
+    print(d)
+def main():
+    wp.init()
+    wp.load_module(device=wp.get_device())
+    examples = [example1, example2]
+    for example in examples:
+        print("\n===========================================================================")
+        print(f"{example.__name__}:")
+        example()
+if __name__ == "__main__":
+    main()

warp/examples/interop/example_jax_kernel.py ADDED Viewed

@@ -0,0 +1,205 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###########################################################################
+# Example jax_kernel()
+#
+# Examples of calling a Warp kernel from JAX.
+###########################################################################
+import math
+from functools import partial
+import jax
+import jax.numpy as jnp
+import warp as wp
+from warp.jax_experimental.ffi import jax_kernel
+@wp.kernel
+def add_kernel(a: wp.array(dtype=int), b: wp.array(dtype=int), output: wp.array(dtype=int)):
+    tid = wp.tid()
+    output[tid] = a[tid] + b[tid]
+@wp.kernel
+def sincos_kernel(angle: wp.array(dtype=float), sin_out: wp.array(dtype=float), cos_out: wp.array(dtype=float)):
+    tid = wp.tid()
+    sin_out[tid] = wp.sin(angle[tid])
+    cos_out[tid] = wp.cos(angle[tid])
+@wp.kernel
+def diagonal_kernel(output: wp.array(dtype=wp.mat33)):
+    tid = wp.tid()
+    output[tid] = wp.mat33(1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0)
+@wp.kernel
+def matmul_kernel(
+    a: wp.array2d(dtype=float),  # NxK
+    b: wp.array2d(dtype=float),  # KxM
+    c: wp.array2d(dtype=float),  # NxM
+):
+    # launch dims should be (N, M)
+    i, j = wp.tid()
+    N = a.shape[0]
+    K = a.shape[1]
+    M = b.shape[1]
+    if i < N and j < M:
+        s = wp.float32(0)
+        for k in range(K):
+            s += a[i, k] * b[k, j]
+        c[i, j] = s
+@wp.kernel
+def scale_vec_kernel(a: wp.array(dtype=wp.vec2), s: float, output: wp.array(dtype=wp.vec2)):
+    tid = wp.tid()
+    output[tid] = a[tid] * s
+def example1():
+    # two inputs and one output
+    jax_add = jax_kernel(add_kernel)
+    @jax.jit
+    def f():
+        n = 10
+        a = jnp.arange(n, dtype=jnp.int32)
+        b = jnp.ones(n, dtype=jnp.int32)
+        return jax_add(a, b)
+    print(f())
+def example2():
+    # one input and two outputs
+    jax_sincos = jax_kernel(sincos_kernel, num_outputs=2)
+    @jax.jit
+    def f():
+        n = 32
+        a = jnp.linspace(0, 2 * math.pi, n)
+        return jax_sincos(a)
+    s, c = f()
+    print(s)
+    print()
+    print(c)
+def example3():
+    # multiply vectors by scalar
+    jax_scale_vec = jax_kernel(scale_vec_kernel)
+    @jax.jit
+    def f():
+        a = jnp.arange(10, dtype=jnp.float32).reshape((5, 2))  # array of vec2
+        s = 2.0
+        return jax_scale_vec(a, s)
+    b = f()
+    print(b)
+def example4():
+    # multiply vectors by scalar (static arg)
+    jax_scale_vec = jax_kernel(scale_vec_kernel)
+    # NOTE: scalar arguments must be static compile-time constants
+    @partial(jax.jit, static_argnames=["s"])
+    def f(a, s):
+        return jax_scale_vec(a, s)
+    a = jnp.arange(10, dtype=jnp.float32).reshape((5, 2))  # array of vec2
+    s = 3.0
+    b = f(a, s)
+    print(b)
+def example5():
+    N, M, K = 3, 4, 2
+    # specify default launch dims
+    jax_matmul = jax_kernel(matmul_kernel, launch_dims=(N, M))
+    @jax.jit
+    def f():
+        a = jnp.full((N, K), 2, dtype=jnp.float32)
+        b = jnp.full((K, M), 3, dtype=jnp.float32)
+        # use default launch dims
+        return jax_matmul(a, b)
+    print(f())
+def example6():
+    # don't specify default launch dims
+    jax_matmul = jax_kernel(matmul_kernel)
+    @jax.jit
+    def f():
+        N1, M1, K1 = 3, 4, 2
+        a1 = jnp.full((N1, K1), 2, dtype=jnp.float32)
+        b1 = jnp.full((K1, M1), 3, dtype=jnp.float32)
+        # use custom launch dims
+        result1 = jax_matmul(a1, b1, launch_dims=(N1, M1))
+        N2, M2, K2 = 4, 3, 2
+        a2 = jnp.full((N2, K2), 2, dtype=jnp.float32)
+        b2 = jnp.full((K2, M2), 3, dtype=jnp.float32)
+        # use custom launch dims
+        result2 = jax_matmul(a2, b2, launch_dims=(N2, M2))
+        return result1, result2
+    r1, r2 = f()
+    print(r1)
+    print()
+    print(r2)
+def example7():
+    # no inputs and one output
+    jax_diagonal = jax_kernel(diagonal_kernel)
+    @jax.jit
+    def f():
+        # launch dimensions determine output size
+        return jax_diagonal(launch_dims=4)
+    print(f())
+def main():
+    wp.init()
+    wp.load_module(device=wp.get_device())
+    examples = [example1, example2, example3, example4, example5, example6, example7]
+    for example in examples:
+        print("\n===========================================================================")
+        print(f"{example.__name__}:")
+        example()
+if __name__ == "__main__":
+    main()