PyPI - warp-lang - Versions diffs - 1.4.1__py3-none-macosx_10_13_universal2.whl → 1.5.0__py3-none-macosx_10_13_universal2.whl - Mend

warp-lang 1.4.1__py3-none-macosx_10_13_universal2.whl → 1.5.0__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (164) hide show

warp/__init__.py +4 -0
warp/autograd.py +43 -8
warp/bin/libwarp-clang.dylib +0 -0
warp/bin/libwarp.dylib +0 -0
warp/build.py +21 -2
warp/build_dll.py +23 -6
warp/builtins.py +1920 -111
warp/codegen.py +186 -62
warp/config.py +2 -2
warp/context.py +322 -73
warp/examples/assets/pixel.jpg +0 -0
warp/examples/benchmarks/benchmark_cloth_paddle.py +86 -0
warp/examples/benchmarks/benchmark_gemm.py +121 -0
warp/examples/benchmarks/benchmark_interop_paddle.py +158 -0
warp/examples/benchmarks/benchmark_tile.py +179 -0
warp/examples/core/example_dem.py +2 -1
warp/examples/core/example_mesh_intersect.py +3 -3
warp/examples/fem/example_adaptive_grid.py +37 -10
warp/examples/fem/example_apic_fluid.py +3 -2
warp/examples/fem/example_convection_diffusion_dg.py +4 -5
warp/examples/fem/example_deformed_geometry.py +1 -1
warp/examples/fem/example_diffusion_3d.py +47 -4
warp/examples/fem/example_distortion_energy.py +220 -0
warp/examples/fem/example_magnetostatics.py +127 -85
warp/examples/fem/example_nonconforming_contact.py +5 -5
warp/examples/fem/example_stokes.py +3 -1
warp/examples/fem/example_streamlines.py +12 -19
warp/examples/fem/utils.py +38 -15
warp/examples/optim/example_walker.py +2 -2
warp/examples/sim/example_cloth.py +2 -25
warp/examples/sim/example_jacobian_ik.py +6 -2
warp/examples/sim/example_quadruped.py +2 -1
warp/examples/tile/example_tile_convolution.py +58 -0
warp/examples/tile/example_tile_fft.py +47 -0
warp/examples/tile/example_tile_filtering.py +105 -0
warp/examples/tile/example_tile_matmul.py +79 -0
warp/examples/tile/example_tile_mlp.py +375 -0
warp/fem/__init__.py +8 -0
warp/fem/cache.py +16 -12
warp/fem/dirichlet.py +1 -1
warp/fem/domain.py +44 -1
warp/fem/field/__init__.py +1 -2
warp/fem/field/field.py +31 -19
warp/fem/field/nodal_field.py +101 -49
warp/fem/field/virtual.py +794 -0
warp/fem/geometry/__init__.py +2 -2
warp/fem/geometry/deformed_geometry.py +3 -105
warp/fem/geometry/element.py +13 -0
warp/fem/geometry/geometry.py +165 -5
warp/fem/geometry/grid_2d.py +3 -6
warp/fem/geometry/grid_3d.py +31 -28
warp/fem/geometry/hexmesh.py +3 -46
warp/fem/geometry/nanogrid.py +3 -2
warp/fem/geometry/{quadmesh_2d.py → quadmesh.py} +280 -159
warp/fem/geometry/tetmesh.py +2 -43
warp/fem/geometry/{trimesh_2d.py → trimesh.py} +354 -186
warp/fem/integrate.py +683 -261
warp/fem/linalg.py +404 -0
warp/fem/operator.py +101 -18
warp/fem/polynomial.py +5 -5
warp/fem/quadrature/quadrature.py +45 -21
warp/fem/space/__init__.py +45 -11
warp/fem/space/basis_function_space.py +451 -0
warp/fem/space/basis_space.py +58 -11
warp/fem/space/function_space.py +146 -5
warp/fem/space/grid_2d_function_space.py +80 -66
warp/fem/space/grid_3d_function_space.py +113 -68
warp/fem/space/hexmesh_function_space.py +96 -108
warp/fem/space/nanogrid_function_space.py +62 -110
warp/fem/space/quadmesh_function_space.py +208 -0
warp/fem/space/shape/__init__.py +45 -7
warp/fem/space/shape/cube_shape_function.py +328 -54
warp/fem/space/shape/shape_function.py +10 -1
warp/fem/space/shape/square_shape_function.py +328 -60
warp/fem/space/shape/tet_shape_function.py +269 -19
warp/fem/space/shape/triangle_shape_function.py +238 -19
warp/fem/space/tetmesh_function_space.py +69 -37
warp/fem/space/topology.py +38 -0
warp/fem/space/trimesh_function_space.py +179 -0
warp/fem/utils.py +6 -331
warp/jax_experimental.py +3 -1
warp/native/array.h +55 -40
warp/native/builtin.h +124 -43
warp/native/bvh.h +4 -0
warp/native/coloring.cpp +600 -0
warp/native/cuda_util.cpp +14 -0
warp/native/cuda_util.h +2 -1
warp/native/fabric.h +8 -0
warp/native/hashgrid.h +4 -0
warp/native/marching.cu +8 -0
warp/native/mat.h +14 -3
warp/native/mathdx.cpp +59 -0
warp/native/mesh.h +4 -0
warp/native/range.h +13 -1
warp/native/reduce.cpp +9 -1
warp/native/reduce.cu +7 -0
warp/native/runlength_encode.cpp +9 -1
warp/native/runlength_encode.cu +7 -1
warp/native/scan.cpp +8 -0
warp/native/scan.cu +8 -0
warp/native/scan.h +8 -1
warp/native/sparse.cpp +8 -0
warp/native/sparse.cu +8 -0
warp/native/temp_buffer.h +7 -0
warp/native/tile.h +1857 -0
warp/native/tile_gemm.h +341 -0
warp/native/tile_reduce.h +210 -0
warp/native/volume_builder.cu +8 -0
warp/native/volume_builder.h +8 -0
warp/native/warp.cpp +10 -2
warp/native/warp.cu +369 -15
warp/native/warp.h +12 -2
warp/optim/adam.py +39 -4
warp/paddle.py +29 -12
warp/render/render_opengl.py +137 -65
warp/sim/graph_coloring.py +292 -0
warp/sim/integrator_euler.py +4 -2
warp/sim/integrator_featherstone.py +115 -44
warp/sim/integrator_vbd.py +6 -0
warp/sim/model.py +90 -17
warp/stubs.py +651 -85
warp/tape.py +12 -7
warp/tests/assets/pixel.npy +0 -0
warp/tests/aux_test_instancing_gc.py +18 -0
warp/tests/test_array.py +207 -48
warp/tests/test_closest_point_edge_edge.py +8 -8
warp/tests/test_codegen.py +120 -1
warp/tests/test_codegen_instancing.py +30 -0
warp/tests/test_collision.py +110 -0
warp/tests/test_coloring.py +241 -0
warp/tests/test_context.py +34 -0
warp/tests/test_examples.py +18 -4
warp/tests/test_fabricarray.py +33 -0
warp/tests/test_fem.py +453 -113
warp/tests/test_func.py +48 -1
warp/tests/test_generics.py +52 -0
warp/tests/test_iter.py +68 -0
warp/tests/test_mat_scalar_ops.py +1 -1
warp/tests/test_mesh_query_point.py +5 -4
warp/tests/test_module_hashing.py +23 -0
warp/tests/test_paddle.py +27 -87
warp/tests/test_print.py +191 -1
warp/tests/test_spatial.py +1 -1
warp/tests/test_tile.py +700 -0
warp/tests/test_tile_mathdx.py +144 -0
warp/tests/test_tile_mlp.py +383 -0
warp/tests/test_tile_reduce.py +374 -0
warp/tests/test_tile_shared_memory.py +190 -0
warp/tests/test_vbd.py +12 -20
warp/tests/test_volume.py +43 -0
warp/tests/unittest_suites.py +23 -2
warp/tests/unittest_utils.py +4 -0
warp/types.py +339 -73
warp/utils.py +22 -1
{warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/METADATA +33 -7
{warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/RECORD +159 -132
{warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/WHEEL +1 -1
warp/fem/field/test.py +0 -180
warp/fem/field/trial.py +0 -183
warp/fem/space/collocated_function_space.py +0 -102
warp/fem/space/quadmesh_2d_function_space.py +0 -261
warp/fem/space/trimesh_2d_function_space.py +0 -153
{warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/LICENSE.md +0 -0
{warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/top_level.txt +0 -0

warp/tape.py CHANGED Viewed

@@ -15,7 +15,7 @@ class Tape:
     """
     Record kernel launches within a Tape scope to enable automatic differentiation.
     Gradients can be computed after the operations have been recorded on the tape via
-    ``tape.backward()``.
+    :meth:`Tape.backward()`.
     Example
     -------
@@ -131,6 +131,7 @@ class Tape:
                 inputs = launch[3]
                 outputs = launch[4]
                 device = launch[5]
+                block_dim = launch[6]
                 adj_inputs = []
                 adj_outputs = []
@@ -153,13 +154,14 @@ class Tape:
                     device=device,
                     adjoint=True,
                     max_blocks=max_blocks,
+                    block_dim=block_dim,
                 )
     # record a kernel launch on the tape
-    def record_launch(self, kernel, dim, max_blocks, inputs, outputs, device, metadata=None):
+    def record_launch(self, kernel, dim, max_blocks, inputs, outputs, device, block_dim=0, metadata=None):
         if metadata is None:
             metadata = {}
-        self.launches.append([kernel, dim, max_blocks, inputs, outputs, device, metadata])
+        self.launches.append([kernel, dim, max_blocks, inputs, outputs, device, block_dim, metadata])
     def record_func(self, backward, arrays):
         """
@@ -614,7 +616,9 @@ class ArrayStatsVisitor(TapeVisitor):
         self.array_grad_stats.insert(0, grad_stats)
-Launch = namedtuple("Launch", ["id", "kernel", "dim", "max_blocks", "inputs", "outputs", "device", "metadata"])
+Launch = namedtuple(
+    "Launch", ["id", "kernel", "dim", "max_blocks", "inputs", "outputs", "device", "block_dim", "metadata"]
+)
 RepeatedSequence = namedtuple("RepeatedSequence", ["start", "end", "repetitions"])
@@ -645,8 +649,8 @@ def visit_tape(
     def get_launch_id(launch):
         kernel = launch[0]
         suffix = ""
-        if len(launch) > 6:
-            metadata = launch[6]
+        if len(launch) > 7:
+            metadata = launch[7]
             # calling function helps to identify unique launches
             if "caller" in metadata:
                 caller = metadata["caller"]
@@ -680,7 +684,8 @@ def visit_tape(
             inputs=launch[3],
             outputs=launch[4],
             device=launch[5],
-            metadata=launch[6] if len(launch) > 6 else {},
+            block_dim=launch[6],
+            metadata=launch[7] if len(launch) > 7 else {},
         )
         for launch in kernel_launches
     ]

warp/tests/assets/pixel.npy ADDED Viewed

Binary file

warp/tests/aux_test_instancing_gc.py ADDED Viewed

@@ -0,0 +1,18 @@
+# Copyright (c) 2024 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+"""Helper module used in test_codegen_instancing.py"""
+import warp as wp
+def create_kernel_closure(value: int):
+    @wp.kernel
+    def k(a: wp.array(dtype=int)):
+        a[0] = value
+    return k

warp/tests/test_array.py CHANGED Viewed

@@ -6,6 +6,7 @@
 # license agreement from NVIDIA CORPORATION is strictly prohibited.
 import unittest
+from typing import Any
 import numpy as np
@@ -2361,64 +2362,85 @@ def test_array_from_cai(test, device):
     assert_np_equal(arr_warp.numpy(), np.array([[2, 1, 1], [1, 0, 0], [1, 0, 0]]))
-def test_array_inplace_ops(test, device):
-    @wp.kernel
-    def inplace_add_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
-        i = wp.tid()
-        x[i] += y[i]
+@wp.kernel
+def inplace_add_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
+    i = wp.tid()
+    x[i] += y[i]
-    @wp.kernel
-    def inplace_add_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
-        i, j = wp.tid()
-        x[i, j] += y[i, j]
-    @wp.kernel
-    def inplace_add_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
-        i, j, k = wp.tid()
-        x[i, j, k] += y[i, j, k]
+@wp.kernel
+def inplace_add_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
+    i, j = wp.tid()
+    x[i, j] += y[i, j]
-    @wp.kernel
-    def inplace_add_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
-        i, j, k, l = wp.tid()
-        x[i, j, k, l] += y[i, j, k, l]
-    @wp.kernel
-    def inplace_sub_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
-        i = wp.tid()
-        x[i] -= y[i]
+@wp.kernel
+def inplace_add_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
+    i, j, k = wp.tid()
+    x[i, j, k] += y[i, j, k]
-    @wp.kernel
-    def inplace_sub_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
-        i, j = wp.tid()
-        x[i, j] -= y[i, j]
-    @wp.kernel
-    def inplace_sub_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
-        i, j, k = wp.tid()
-        x[i, j, k] -= y[i, j, k]
+@wp.kernel
+def inplace_add_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
+    i, j, k, l = wp.tid()
+    x[i, j, k, l] += y[i, j, k, l]
-    @wp.kernel
-    def inplace_sub_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
-        i, j, k, l = wp.tid()
-        x[i, j, k, l] -= y[i, j, k, l]
-    @wp.kernel
-    def inplace_add_vecs(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
-        i = wp.tid()
-        x[i] += y[i]
+@wp.kernel
+def inplace_sub_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
+    i = wp.tid()
+    x[i] -= y[i]
-    @wp.kernel
-    def inplace_add_mats(x: wp.array(dtype=wp.mat33), y: wp.array(dtype=wp.mat33)):
-        i = wp.tid()
-        x[i] += y[i]
-    @wp.kernel
-    def inplace_add_rhs(x: wp.array(dtype=float), y: wp.array(dtype=float), z: wp.array(dtype=float)):
-        i = wp.tid()
-        a = y[i]
-        a += x[i]
-        wp.atomic_add(z, 0, a)
+@wp.kernel
+def inplace_sub_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
+    i, j = wp.tid()
+    x[i, j] -= y[i, j]
+@wp.kernel
+def inplace_sub_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
+    i, j, k = wp.tid()
+    x[i, j, k] -= y[i, j, k]
+@wp.kernel
+def inplace_sub_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
+    i, j, k, l = wp.tid()
+    x[i, j, k, l] -= y[i, j, k, l]
+@wp.kernel
+def inplace_add_vecs(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
+    i = wp.tid()
+    x[i] += y[i]
+@wp.kernel
+def inplace_add_mats(x: wp.array(dtype=wp.mat33), y: wp.array(dtype=wp.mat33)):
+    i = wp.tid()
+    x[i] += y[i]
+@wp.kernel
+def inplace_add_rhs(x: wp.array(dtype=float), y: wp.array(dtype=float), z: wp.array(dtype=float)):
+    i = wp.tid()
+    a = y[i]
+    a += x[i]
+    wp.atomic_add(z, 0, a)
+vec9 = wp.vec(length=9, dtype=float)
+@wp.kernel
+def inplace_add_custom_vec(x: wp.array(dtype=vec9), y: wp.array(dtype=vec9)):
+    i = wp.tid()
+    x[i] += y[i]
+    x[i] += y[i]
+def test_array_inplace_diff_ops(test, device):
     N = 3
     x1 = wp.ones(N, dtype=float, requires_grad=True, device=device)
     x2 = wp.ones((N, N), dtype=float, requires_grad=True, device=device)
@@ -2526,6 +2548,60 @@ def test_array_inplace_ops(test, device):
     assert_np_equal(x.grad.numpy(), np.ones(1, dtype=float))
     assert_np_equal(y.grad.numpy(), np.ones(1, dtype=float))
+    tape.reset()
+    x = wp.zeros(1, dtype=vec9, requires_grad=True, device=device)
+    y = wp.ones(1, dtype=vec9, requires_grad=True, device=device)
+    with tape:
+        wp.launch(inplace_add_custom_vec, 1, inputs=[x, y], device=device)
+    tape.backward(grads={x: wp.ones_like(x)})
+    assert_np_equal(x.numpy(), np.full((1, 9), 2.0, dtype=float))
+    assert_np_equal(y.grad.numpy(), np.full((1, 9), 2.0, dtype=float))
+@wp.kernel
+def inplace_mul_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
+    i = wp.tid()
+    x[i] *= y[i]
+@wp.kernel
+def inplace_div_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
+    i = wp.tid()
+    x[i] /= y[i]
+@wp.kernel
+def inplace_add_non_atomic_types(x: wp.array(dtype=Any), y: wp.array(dtype=Any)):
+    i = wp.tid()
+    x[i] += y[i]
+uint16vec3 = wp.vec(length=3, dtype=wp.uint16)
+def test_array_inplace_non_diff_ops(test, device):
+    N = 3
+    x1 = wp.full(N, value=10.0, dtype=float, device=device)
+    y1 = wp.full(N, value=5.0, dtype=float, device=device)
+    wp.launch(inplace_mul_1d, N, inputs=[x1, y1], device=device)
+    assert_np_equal(x1.numpy(), np.full(N, fill_value=50.0, dtype=float))
+    x1.fill_(10.0)
+    y1.fill_(5.0)
+    wp.launch(inplace_div_1d, N, inputs=[x1, y1], device=device)
+    assert_np_equal(x1.numpy(), np.full(N, fill_value=2.0, dtype=float))
+    for dtype in wp.types.non_atomic_types + (wp.vec2b, wp.vec2ub, wp.vec2s, wp.vec2us, uint16vec3):
+        x = wp.full(N, value=0, dtype=dtype, device=device)
+        y = wp.full(N, value=1, dtype=dtype, device=device)
+        wp.launch(inplace_add_non_atomic_types, N, inputs=[x, y], device=device)
+        assert_np_equal(x.numpy(), y.numpy())
 @wp.kernel
@@ -2609,6 +2685,87 @@ def test_numpy_array_interface(test, device):
         assert a1.strides == a2.strides
+@wp.kernel
+def kernel_indexing_types(
+    arr_1d: wp.array(dtype=wp.int32, ndim=1),
+    arr_2d: wp.array(dtype=wp.int32, ndim=2),
+    arr_3d: wp.array(dtype=wp.int32, ndim=3),
+    arr_4d: wp.array(dtype=wp.int32, ndim=4),
+):
+    x = arr_1d[wp.uint8(0)]
+    y = arr_1d[wp.int16(1)]
+    z = arr_1d[wp.uint32(2)]
+    w = arr_1d[wp.int64(3)]
+    x = arr_2d[wp.uint8(0), wp.uint8(0)]
+    y = arr_2d[wp.int16(1), wp.int16(1)]
+    z = arr_2d[wp.uint32(2), wp.uint32(2)]
+    w = arr_2d[wp.int64(3), wp.int64(3)]
+    x = arr_3d[wp.uint8(0), wp.uint8(0), wp.uint8(0)]
+    y = arr_3d[wp.int16(1), wp.int16(1), wp.int16(1)]
+    z = arr_3d[wp.uint32(2), wp.uint32(2), wp.uint32(2)]
+    w = arr_3d[wp.int64(3), wp.int64(3), wp.int64(3)]
+    x = arr_4d[wp.uint8(0), wp.uint8(0), wp.uint8(0), wp.uint8(0)]
+    y = arr_4d[wp.int16(1), wp.int16(1), wp.int16(1), wp.int16(1)]
+    z = arr_4d[wp.uint32(2), wp.uint32(2), wp.uint32(2), wp.uint32(2)]
+    w = arr_4d[wp.int64(3), wp.int64(3), wp.int64(3), wp.int64(3)]
+    arr_1d[wp.uint8(0)] = 123
+    arr_1d[wp.int16(1)] = 123
+    arr_1d[wp.uint32(2)] = 123
+    arr_1d[wp.int64(3)] = 123
+    arr_2d[wp.uint8(0), wp.uint8(0)] = 123
+    arr_2d[wp.int16(1), wp.int16(1)] = 123
+    arr_2d[wp.uint32(2), wp.uint32(2)] = 123
+    arr_2d[wp.int64(3), wp.int64(3)] = 123
+    arr_3d[wp.uint8(0), wp.uint8(0), wp.uint8(0)] = 123
+    arr_3d[wp.int16(1), wp.int16(1), wp.int16(1)] = 123
+    arr_3d[wp.uint32(2), wp.uint32(2), wp.uint32(2)] = 123
+    arr_3d[wp.int64(3), wp.int64(3), wp.int64(3)] = 123
+    arr_4d[wp.uint8(0), wp.uint8(0), wp.uint8(0), wp.uint8(0)] = 123
+    arr_4d[wp.int16(1), wp.int16(1), wp.int16(1), wp.int16(1)] = 123
+    arr_4d[wp.uint32(2), wp.uint32(2), wp.uint32(2), wp.uint32(2)] = 123
+    arr_4d[wp.int64(3), wp.int64(3), wp.int64(3), wp.int64(3)] = 123
+    wp.atomic_add(arr_1d, wp.uint8(0), 123)
+    wp.atomic_sub(arr_1d, wp.int16(1), 123)
+    wp.atomic_min(arr_1d, wp.uint32(2), 123)
+    wp.atomic_max(arr_1d, wp.int64(3), 123)
+    wp.atomic_add(arr_2d, wp.uint8(0), wp.uint8(0), 123)
+    wp.atomic_sub(arr_2d, wp.int16(1), wp.int16(1), 123)
+    wp.atomic_min(arr_2d, wp.uint32(2), wp.uint32(2), 123)
+    wp.atomic_max(arr_2d, wp.int64(3), wp.int64(3), 123)
+    wp.atomic_add(arr_3d, wp.uint8(0), wp.uint8(0), wp.uint8(0), 123)
+    wp.atomic_sub(arr_3d, wp.int16(1), wp.int16(1), wp.int16(1), 123)
+    wp.atomic_min(arr_3d, wp.uint32(2), wp.uint32(2), wp.uint32(2), 123)
+    wp.atomic_max(arr_3d, wp.int64(3), wp.int64(3), wp.int64(3), 123)
+    wp.atomic_add(arr_4d, wp.uint8(0), wp.uint8(0), wp.uint8(0), wp.uint8(0), 123)
+    wp.atomic_sub(arr_4d, wp.int16(1), wp.int16(1), wp.int16(1), wp.int16(1), 123)
+    wp.atomic_min(arr_4d, wp.uint32(2), wp.uint32(2), wp.uint32(2), wp.uint32(2), 123)
+    wp.atomic_max(arr_4d, wp.int64(3), wp.int64(3), wp.int64(3), wp.int64(3), 123)
+def test_indexing_types(test, device):
+    arr_1d = wp.zeros(shape=(4,), dtype=wp.int32, device=device)
+    arr_2d = wp.zeros(shape=(4, 4), dtype=wp.int32, device=device)
+    arr_3d = wp.zeros(shape=(4, 4, 4), dtype=wp.int32, device=device)
+    arr_4d = wp.zeros(shape=(4, 4, 4, 4), dtype=wp.int32, device=device)
+    wp.launch(
+        kernel=kernel_indexing_types,
+        dim=1,
+        inputs=(arr_1d, arr_2d, arr_3d, arr_4d),
+        device=device,
+    )
 devices = get_test_devices()
@@ -2669,12 +2826,14 @@ add_function_test(TestArray, "test_array_from_numpy", test_array_from_numpy, dev
 add_function_test(TestArray, "test_array_aliasing_from_numpy", test_array_aliasing_from_numpy, devices=["cpu"])
 add_function_test(TestArray, "test_numpy_array_interface", test_numpy_array_interface, devices=["cpu"])
-add_function_test(TestArray, "test_array_inplace_ops", test_array_inplace_ops, devices=devices)
+add_function_test(TestArray, "test_array_inplace_diff_ops", test_array_inplace_diff_ops, devices=devices)
+add_function_test(TestArray, "test_array_inplace_non_diff_ops", test_array_inplace_non_diff_ops, devices=devices)
 add_function_test(TestArray, "test_direct_from_numpy", test_direct_from_numpy, devices=["cpu"])
 add_function_test(TestArray, "test_kernel_array_from_ptr", test_kernel_array_from_ptr, devices=devices)
 add_function_test(TestArray, "test_array_from_int32_domain", test_array_from_int32_domain, devices=devices)
 add_function_test(TestArray, "test_array_from_int64_domain", test_array_from_int64_domain, devices=devices)
+add_function_test(TestArray, "test_indexing_types", test_indexing_types, devices=devices)
 try:
     import torch

warp/tests/test_closest_point_edge_edge.py CHANGED Viewed

@@ -220,12 +220,12 @@ def check_edge_closest_point_sufficient_necessary_kernel(
 def check_edge_closest_point_random(test, device):
     num_tests = 100000
-    np.random.seed(12345)
-    p1 = wp.array(np.random.randn(num_tests, 3), dtype=wp.vec3, device=device)
-    q1 = wp.array(np.random.randn(num_tests, 3), dtype=wp.vec3, device=device)
+    rng = np.random.default_rng(123)
+    p1 = wp.array(rng.standard_normal(size=(num_tests, 3)), dtype=wp.vec3, device=device)
+    q1 = wp.array(rng.standard_normal(size=(num_tests, 3)), dtype=wp.vec3, device=device)
-    p2 = wp.array(np.random.randn(num_tests, 3), dtype=wp.vec3, device=device)
-    q2 = wp.array(np.random.randn(num_tests, 3), dtype=wp.vec3, device=device)
+    p2 = wp.array(rng.standard_normal(size=(num_tests, 3)), dtype=wp.vec3, device=device)
+    q2 = wp.array(rng.standard_normal(size=(num_tests, 3)), dtype=wp.vec3, device=device)
     wp.launch(
         kernel=check_edge_closest_point_sufficient_necessary_kernel,
@@ -235,10 +235,10 @@ def check_edge_closest_point_random(test, device):
     )
     # parallel edges
-    p1 = np.random.randn(num_tests, 3)
-    q1 = np.random.randn(num_tests, 3)
+    p1 = rng.standard_normal(size=(num_tests, 3))
+    q1 = rng.standard_normal(size=(num_tests, 3))
-    shifts = np.random.randn(num_tests, 3)
+    shifts = rng.standard_normal(size=(num_tests, 3))
     p2 = p1 + shifts
     q2 = q1 + shifts

warp/tests/test_codegen.py CHANGED Viewed

@@ -7,11 +7,27 @@
 import sys
 import unittest
+from typing import Tuple
 import warp as wp
 from warp.tests.unittest_utils import *
+@wp.kernel
+def test_expect():
+    a = 1.0
+    a += 2.0
+    wp.expect_eq(123, 123)
+    wp.expect_neq(123, 234)
+    wp.expect_eq(wp.vec2(1.0, 2.0), wp.vec2(1.0, 2.0))
+    wp.expect_neq(wp.vec2(1.0, 2.0), wp.vec2(2.0, 3.0))
+    wp.expect_eq(wp.mat22(1.0, 2.0, 3.0, 4.0), wp.mat22(1.0, 2.0, 3.0, 4.0))
+    wp.expect_neq(wp.mat22(1.0, 2.0, 3.0, 4.0), wp.mat22(2.0, 3.0, 4.0, 5.0))
 @wp.kernel
 def test_rename():
     a = 0
@@ -534,6 +550,103 @@ def test_error_mutating_constant_in_dynamic_loop(test, device):
     )
     assert_np_equal(output.numpy(), np.ones([num_threads, const_a + const_b + dyn_a + dyn_b + dyn_c + 1]))
+    @wp.kernel
+    def static_then_dynamic_loop_kernel(mats: wp.array(dtype=wp.mat33d)):
+        tid = wp.tid()
+        mat = wp.mat33d()
+        for i in range(3):
+            for j in range(3):
+                mat[i, j] = wp.float64(0.0)
+        dim = 2
+        for i in range(dim + 1):
+            for j in range(dim + 1):
+                mat[i, j] = wp.float64(1.0)
+        mats[tid] = mat
+    mats = wp.empty(1, dtype=wp.mat33d, device=device)
+    wp.launch(static_then_dynamic_loop_kernel, dim=1, inputs=[mats], device=device)
+    assert_np_equal(mats.numpy(), np.ones((1, 3, 3)))
+    @wp.kernel
+    def dynamic_then_static_loop_kernel(mats: wp.array(dtype=wp.mat33d)):
+        tid = wp.tid()
+        mat = wp.mat33d()
+        dim = 2
+        for i in range(dim + 1):
+            for j in range(dim + 1):
+                mat[i, j] = wp.float64(1.0)
+        for i in range(3):
+            for j in range(3):
+                mat[i, j] = wp.float64(0.0)
+        mats[tid] = mat
+    mats = wp.empty(1, dtype=wp.mat33d, device=device)
+    wp.launch(dynamic_then_static_loop_kernel, dim=1, inputs=[mats], device=device)
+    assert_np_equal(mats.numpy(), np.zeros((1, 3, 3)))
+def test_error_return_annotation_mismatch(test, device):
+    @wp.func
+    def foo_1(x: wp.int32) -> wp.int16:
+        return wp.int8(x)
+    def kernel_1_fn():
+        x = foo_1(123)
+    @wp.func
+    def foo_2(x: int) -> int:
+        return (x + x, x * x)
+    def kernel_2_fn():
+        x = foo_2(123)
+    @wp.func
+    def foo_3(x: int) -> Tuple[int, int]:
+        return (x, 1.23)
+    def kernel_3_fn():
+        x, y = foo_3(123)
+    @wp.func
+    def foo_4(x: int) -> Tuple[int, int, int]:
+        return (x + x, x * x)
+    def kernel_4_fn():
+        x, y, z = foo_4(123)
+    kernel = wp.Kernel(func=kernel_1_fn)
+    with test.assertRaisesRegex(
+        wp.codegen.WarpCodegenError,
+        r"The function `foo_1` has its return type annotated as `int16` but the code returns a value of type `int8`.",
+    ):
+        wp.launch(kernel, dim=1, device=device)
+    kernel = wp.Kernel(func=kernel_2_fn)
+    with test.assertRaisesRegex(
+        wp.codegen.WarpCodegenError,
+        r"The function `foo_2` has its return type annotated as `int` but the code returns 2 values.",
+    ):
+        wp.launch(kernel, dim=1, device=device)
+    kernel = wp.Kernel(func=kernel_3_fn)
+    with test.assertRaisesRegex(
+        wp.codegen.WarpCodegenError,
+        r"The function `foo_3` has its return type annotated as `Tuple\[int, int\]` but the code returns a tuple with types `\(int32, float32\)`.",
+    ):
+        wp.launch(kernel, dim=1, device=device)
+    kernel = wp.Kernel(func=kernel_4_fn)
+    with test.assertRaisesRegex(
+        wp.codegen.WarpCodegenError,
+        r"The function `foo_4` has its return type annotated as a tuple of 3 elements but the code returns 2 values.",
+    ):
+        wp.launch(kernel, dim=1, device=device)
 @wp.kernel
 def test_call_syntax():
@@ -583,6 +696,7 @@ class TestCodeGen(unittest.TestCase):
 devices = get_test_devices()
+add_kernel_test(TestCodeGen, name="test_expect", kernel=test_expect, dim=1, devices=devices)
 add_kernel_test(TestCodeGen, name="test_inplace", kernel=test_inplace, dim=1, devices=devices)
 add_kernel_test(TestCodeGen, name="test_rename", kernel=test_rename, dim=1, devices=devices)
 add_kernel_test(TestCodeGen, name="test_constant", kernel=test_constant, inputs=[1.0], dim=1, devices=devices)
@@ -719,7 +833,12 @@ add_function_test(
     name="test_error_mutating_constant_in_dynamic_loop",
     devices=devices,
 )
+add_function_test(
+    TestCodeGen,
+    func=test_error_return_annotation_mismatch,
+    name="test_error_return_annotation_mismatch",
+    devices=devices,
+)
 add_kernel_test(TestCodeGen, name="test_call_syntax", kernel=test_call_syntax, dim=1, devices=devices)
 add_kernel_test(TestCodeGen, name="test_shadow_builtin", kernel=test_shadow_builtin, dim=1, devices=devices)
 add_kernel_test(TestCodeGen, name="test_while_condition_eval", kernel=test_while_condition_eval, dim=1, devices=devices)

warp/tests/test_codegen_instancing.py CHANGED Viewed

@@ -1287,6 +1287,35 @@ def test_module_mark_modified(test, device):
 # =======================================================================
+def test_garbage_collection(test, device):
+    """Test that dynamically generated kernels without user references are not retained in the module."""
+    # use a helper module with a known kernel count
+    import warp.tests.aux_test_instancing_gc as gc_test_module
+    with wp.ScopedDevice(device):
+        a = wp.zeros(1, dtype=int)
+        for i in range(10):
+            # create a unique kernel on each iteration
+            k = gc_test_module.create_kernel_closure(i)
+            # import gc
+            # gc.collect()
+            # since we don't keep references to the previous kernels,
+            # they should be garbage-collected and not appear in the module
+            k.module.load(device=device)
+            test.assertEqual(len(k.module.live_kernels), 1)
+            # test the kernel
+            wp.launch(k, dim=1, inputs=[a])
+            test.assertEqual(a.numpy()[0], i)
+# =======================================================================
 class TestCodeGenInstancing(unittest.TestCase):
     pass
@@ -1450,6 +1479,7 @@ add_function_test(TestCodeGenInstancing, func=test_create_kernel_loop, name="tes
 add_function_test(
     TestCodeGenInstancing, func=test_module_mark_modified, name="test_module_mark_modified", devices=devices
 )
+add_function_test(TestCodeGenInstancing, func=test_garbage_collection, name="test_garbage_collection", devices=devices)
 if __name__ == "__main__":