PyPI - warp-lang - Versions diffs - 1.4.2__py3-none-manylinux2014_x86_64.whl → 1.5.0__py3-none-manylinux2014_x86_64.whl - Mend

warp-lang 1.4.2__py3-none-manylinux2014_x86_64.whl → 1.5.0__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (158) hide show

warp/__init__.py +4 -0
warp/autograd.py +43 -8
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +21 -2
warp/build_dll.py +23 -6
warp/builtins.py +1783 -2
warp/codegen.py +177 -45
warp/config.py +2 -2
warp/context.py +321 -73
warp/examples/assets/pixel.jpg +0 -0
warp/examples/benchmarks/benchmark_cloth_paddle.py +86 -0
warp/examples/benchmarks/benchmark_gemm.py +121 -0
warp/examples/benchmarks/benchmark_interop_paddle.py +158 -0
warp/examples/benchmarks/benchmark_tile.py +179 -0
warp/examples/fem/example_adaptive_grid.py +37 -10
warp/examples/fem/example_apic_fluid.py +3 -2
warp/examples/fem/example_convection_diffusion_dg.py +4 -5
warp/examples/fem/example_deformed_geometry.py +1 -1
warp/examples/fem/example_diffusion_3d.py +47 -4
warp/examples/fem/example_distortion_energy.py +220 -0
warp/examples/fem/example_magnetostatics.py +127 -85
warp/examples/fem/example_nonconforming_contact.py +5 -5
warp/examples/fem/example_stokes.py +3 -1
warp/examples/fem/example_streamlines.py +12 -19
warp/examples/fem/utils.py +38 -15
warp/examples/sim/example_cloth.py +2 -25
warp/examples/sim/example_quadruped.py +2 -1
warp/examples/tile/example_tile_convolution.py +58 -0
warp/examples/tile/example_tile_fft.py +47 -0
warp/examples/tile/example_tile_filtering.py +105 -0
warp/examples/tile/example_tile_matmul.py +79 -0
warp/examples/tile/example_tile_mlp.py +375 -0
warp/fem/__init__.py +8 -0
warp/fem/cache.py +16 -12
warp/fem/dirichlet.py +1 -1
warp/fem/domain.py +44 -1
warp/fem/field/__init__.py +1 -2
warp/fem/field/field.py +31 -19
warp/fem/field/nodal_field.py +101 -49
warp/fem/field/virtual.py +794 -0
warp/fem/geometry/__init__.py +2 -2
warp/fem/geometry/deformed_geometry.py +3 -105
warp/fem/geometry/element.py +13 -0
warp/fem/geometry/geometry.py +165 -5
warp/fem/geometry/grid_2d.py +3 -6
warp/fem/geometry/grid_3d.py +31 -28
warp/fem/geometry/hexmesh.py +3 -46
warp/fem/geometry/nanogrid.py +3 -2
warp/fem/geometry/{quadmesh_2d.py → quadmesh.py} +280 -159
warp/fem/geometry/tetmesh.py +2 -43
warp/fem/geometry/{trimesh_2d.py → trimesh.py} +354 -186
warp/fem/integrate.py +683 -261
warp/fem/linalg.py +404 -0
warp/fem/operator.py +101 -18
warp/fem/polynomial.py +5 -5
warp/fem/quadrature/quadrature.py +45 -21
warp/fem/space/__init__.py +45 -11
warp/fem/space/basis_function_space.py +451 -0
warp/fem/space/basis_space.py +58 -11
warp/fem/space/function_space.py +146 -5
warp/fem/space/grid_2d_function_space.py +80 -66
warp/fem/space/grid_3d_function_space.py +113 -68
warp/fem/space/hexmesh_function_space.py +96 -108
warp/fem/space/nanogrid_function_space.py +62 -110
warp/fem/space/quadmesh_function_space.py +208 -0
warp/fem/space/shape/__init__.py +45 -7
warp/fem/space/shape/cube_shape_function.py +328 -54
warp/fem/space/shape/shape_function.py +10 -1
warp/fem/space/shape/square_shape_function.py +328 -60
warp/fem/space/shape/tet_shape_function.py +269 -19
warp/fem/space/shape/triangle_shape_function.py +238 -19
warp/fem/space/tetmesh_function_space.py +69 -37
warp/fem/space/topology.py +38 -0
warp/fem/space/trimesh_function_space.py +179 -0
warp/fem/utils.py +6 -331
warp/jax_experimental.py +3 -1
warp/native/array.h +15 -0
warp/native/builtin.h +66 -26
warp/native/bvh.h +4 -0
warp/native/coloring.cpp +600 -0
warp/native/cuda_util.cpp +14 -0
warp/native/cuda_util.h +2 -1
warp/native/fabric.h +8 -0
warp/native/hashgrid.h +4 -0
warp/native/marching.cu +8 -0
warp/native/mat.h +14 -3
warp/native/mathdx.cpp +59 -0
warp/native/mesh.h +4 -0
warp/native/range.h +13 -1
warp/native/reduce.cpp +9 -1
warp/native/reduce.cu +7 -0
warp/native/runlength_encode.cpp +9 -1
warp/native/runlength_encode.cu +7 -1
warp/native/scan.cpp +8 -0
warp/native/scan.cu +8 -0
warp/native/scan.h +8 -1
warp/native/sparse.cpp +8 -0
warp/native/sparse.cu +8 -0
warp/native/temp_buffer.h +7 -0
warp/native/tile.h +1857 -0
warp/native/tile_gemm.h +341 -0
warp/native/tile_reduce.h +210 -0
warp/native/volume_builder.cu +8 -0
warp/native/volume_builder.h +8 -0
warp/native/warp.cpp +10 -2
warp/native/warp.cu +369 -15
warp/native/warp.h +12 -2
warp/optim/adam.py +39 -4
warp/paddle.py +29 -12
warp/render/render_opengl.py +137 -65
warp/sim/graph_coloring.py +292 -0
warp/sim/integrator_euler.py +4 -2
warp/sim/integrator_featherstone.py +115 -44
warp/sim/integrator_vbd.py +6 -0
warp/sim/model.py +88 -15
warp/stubs.py +569 -4
warp/tape.py +12 -7
warp/tests/assets/pixel.npy +0 -0
warp/tests/aux_test_instancing_gc.py +18 -0
warp/tests/test_array.py +39 -0
warp/tests/test_codegen.py +81 -1
warp/tests/test_codegen_instancing.py +30 -0
warp/tests/test_collision.py +110 -0
warp/tests/test_coloring.py +241 -0
warp/tests/test_context.py +34 -0
warp/tests/test_examples.py +18 -4
warp/tests/test_fem.py +453 -113
warp/tests/test_func.py +13 -0
warp/tests/test_generics.py +52 -0
warp/tests/test_iter.py +68 -0
warp/tests/test_mat_scalar_ops.py +1 -1
warp/tests/test_mesh_query_point.py +1 -1
warp/tests/test_module_hashing.py +23 -0
warp/tests/test_paddle.py +27 -87
warp/tests/test_print.py +56 -1
warp/tests/test_spatial.py +1 -1
warp/tests/test_tile.py +700 -0
warp/tests/test_tile_mathdx.py +144 -0
warp/tests/test_tile_mlp.py +383 -0
warp/tests/test_tile_reduce.py +374 -0
warp/tests/test_tile_shared_memory.py +190 -0
warp/tests/test_vbd.py +12 -20
warp/tests/test_volume.py +43 -0
warp/tests/unittest_suites.py +19 -2
warp/tests/unittest_utils.py +4 -0
warp/types.py +338 -72
warp/utils.py +22 -1
{warp_lang-1.4.2.dist-info → warp_lang-1.5.0.dist-info}/METADATA +33 -7
{warp_lang-1.4.2.dist-info → warp_lang-1.5.0.dist-info}/RECORD +153 -126
{warp_lang-1.4.2.dist-info → warp_lang-1.5.0.dist-info}/WHEEL +1 -1
warp/fem/field/test.py +0 -180
warp/fem/field/trial.py +0 -183
warp/fem/space/collocated_function_space.py +0 -102
warp/fem/space/quadmesh_2d_function_space.py +0 -261
warp/fem/space/trimesh_2d_function_space.py +0 -153
{warp_lang-1.4.2.dist-info → warp_lang-1.5.0.dist-info}/LICENSE.md +0 -0
{warp_lang-1.4.2.dist-info → warp_lang-1.5.0.dist-info}/top_level.txt +0 -0

warp/tests/test_func.py CHANGED Viewed

@@ -222,6 +222,16 @@ def test_user_func_overload_resolution(test, device):
     assert a1.numpy()[0] == 12
+@wp.func
+def user_func_return_none() -> None:
+    pass
+@wp.kernel
+def test_return_annotation_none() -> None:
+    user_func_return_none()
 devices = get_test_devices()
@@ -409,6 +419,9 @@ add_kernel_test(
 add_function_test(
     TestFunc, func=test_user_func_overload_resolution, name="test_user_func_overload_resolution", devices=devices
 )
+add_kernel_test(
+    TestFunc, kernel=test_return_annotation_none, name="test_return_annotation_none", dim=1, devices=devices
+)
 if __name__ == "__main__":

warp/tests/test_generics.py CHANGED Viewed

@@ -522,6 +522,57 @@ def test_type_attribute_error(test, device):
         )
+@wp.func
+def vec_int_annotation_func(v: wp.vec(3, wp.Int)) -> wp.Int:
+    return v[0] + v[1] + v[2]
+@wp.func
+def vec_float_annotation_func(v: wp.vec(3, wp.Float)) -> wp.Float:
+    return v[0] + v[1] + v[2]
+@wp.func
+def vec_scalar_annotation_func(v: wp.vec(3, wp.Scalar)) -> wp.Scalar:
+    return v[0] + v[1] + v[2]
+@wp.func
+def mat_int_annotation_func(m: wp.mat((2, 2), wp.Int)) -> wp.Int:
+    return m[0, 0] + m[0, 1] + m[1, 0] + m[1, 1]
+@wp.func
+def mat_float_annotation_func(m: wp.mat((2, 2), wp.Float)) -> wp.Float:
+    return m[0, 0] + m[0, 1] + m[1, 0] + m[1, 1]
+@wp.func
+def mat_scalar_annotation_func(m: wp.mat((2, 2), wp.Scalar)) -> wp.Scalar:
+    return m[0, 0] + m[0, 1] + m[1, 0] + m[1, 1]
+mat22s = wp.mat((2, 2), wp.int16)
+mat22d = wp.mat((2, 2), wp.float64)
+@wp.kernel
+def test_annotations_kernel():
+    vi16 = wp.vec3s(wp.int16(1), wp.int16(2), wp.int16(3))
+    vf64 = wp.vec3d(wp.float64(1), wp.float64(2), wp.float64(3))
+    wp.expect_eq(vec_int_annotation_func(vi16), wp.int16(6))
+    wp.expect_eq(vec_float_annotation_func(vf64), wp.float64(6))
+    wp.expect_eq(vec_scalar_annotation_func(vi16), wp.int16(6))
+    wp.expect_eq(vec_scalar_annotation_func(vf64), wp.float64(6))
+    mi16 = mat22s(wp.int16(1), wp.int16(2), wp.int16(3), wp.int16(4))
+    mf64 = mat22d(wp.float64(1), wp.float64(2), wp.float64(3), wp.float64(4))
+    wp.expect_eq(mat_int_annotation_func(mi16), wp.int16(10))
+    wp.expect_eq(mat_float_annotation_func(mf64), wp.float64(10))
+    wp.expect_eq(mat_scalar_annotation_func(mi16), wp.int16(10))
+    wp.expect_eq(mat_scalar_annotation_func(mf64), wp.float64(10))
 class TestGenerics(unittest.TestCase):
     pass
@@ -590,6 +641,7 @@ add_kernel_test(
 )
 add_function_test(TestGenerics, "test_type_operator_misspell", test_type_operator_misspell, devices=devices)
 add_function_test(TestGenerics, "test_type_attribute_error", test_type_attribute_error, devices=devices)
+add_kernel_test(TestGenerics, name="test_annotations_kernel", kernel=test_annotations_kernel, dim=1, devices=devices)
 if __name__ == "__main__":
     wp.clear_kernel_cache()

warp/tests/test_iter.py ADDED Viewed

@@ -0,0 +1,68 @@
+# Copyright (c) 2024 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import unittest
+import warp as wp
+from warp.tests.unittest_utils import *
+@wp.kernel
+def reversed_kernel(
+    start: wp.int32,
+    end: wp.int32,
+    step: wp.int32,
+    out_count: wp.array(dtype=wp.int32),
+    out_values: wp.array(dtype=wp.int32),
+):
+    count = wp.int32(0)
+    for i in reversed(range(start, end, step)):
+        out_values[count] = i
+        count += 1
+    out_count[0] = count
+def test_reversed(test, device):
+    count = wp.empty(1, dtype=wp.int32)
+    values = wp.empty(32, dtype=wp.int32)
+    start, end, step = (-2, 8, 3)
+    wp.launch(
+        reversed_kernel,
+        dim=1,
+        inputs=(start, end, step),
+        outputs=(count, values),
+    )
+    expected = tuple(reversed(range(start, end, step)))
+    assert count.numpy()[0] == len(expected)
+    assert_np_equal(values.numpy()[: len(expected)], expected)
+    start, end, step = (9, -3, -2)
+    wp.launch(
+        reversed_kernel,
+        dim=1,
+        inputs=(start, end, step),
+        outputs=(count, values),
+    )
+    expected = tuple(reversed(range(start, end, step)))
+    assert count.numpy()[0] == len(expected)
+    assert_np_equal(values.numpy()[: len(expected)], expected)
+devices = get_test_devices()
+class TestIter(unittest.TestCase):
+    pass
+add_function_test(TestIter, "test_reversed", test_reversed, devices=devices)
+if __name__ == "__main__":
+    wp.clear_kernel_cache()
+    unittest.main(verbosity=2)

warp/tests/test_mat_scalar_ops.py CHANGED Viewed

@@ -1501,7 +1501,7 @@ def test_matmat_multiplication(test, device, dtype, register_kernels=False):
     tol = {
         np.float16: 2.0e-2,
         np.float32: 5.0e-6,
-        np.float64: 1.0e-8,
+        np.float64: 5.0e-7,
     }.get(dtype, 0)
     wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]

warp/tests/test_mesh_query_point.py CHANGED Viewed

@@ -805,7 +805,7 @@ def test_set_mesh_points(test, device):
         device=device,
     )
-    shift = np.random.randn(3)
+    shift = rng.standard_normal(size=3)
     vs_higher = vs + shift
     vertices2 = wp.array(vs_higher, dtype=wp.vec3, device=device)

warp/tests/test_module_hashing.py CHANGED Viewed

@@ -214,12 +214,35 @@ def test_function_generic_overload_hashing(test, device):
     test.assertNotEqual(hash4, hash1)
+SIMPLE_MODULE = """# -*- coding: utf-8 -*-
+import warp as wp
+@wp.kernel
+def k():
+    pass
+"""
+def test_module_load(test, device):
+    """Ensure that loading a module does not change its hash"""
+    m = load_code_as_module(SIMPLE_MODULE, "simple_module")
+    hash1 = m.hash_module()
+    m.load(device)
+    hash2 = m.hash_module()
+    test.assertEqual(hash1, hash2)
 class TestModuleHashing(unittest.TestCase):
     pass
+devices = get_test_devices()
 add_function_test(TestModuleHashing, "test_function_overload_hashing", test_function_overload_hashing)
 add_function_test(TestModuleHashing, "test_function_generic_overload_hashing", test_function_generic_overload_hashing)
+add_function_test(TestModuleHashing, "test_module_load", test_module_load, devices=devices)
 if __name__ == "__main__":

warp/tests/test_paddle.py CHANGED Viewed

@@ -7,8 +7,6 @@
 import unittest
-import numpy as np
 import warp as wp
 from warp.tests.unittest_utils import *
@@ -444,7 +442,7 @@ def test_from_paddle_slices(test, device):
     assert a.ptr == t.data_ptr()
     assert a.is_contiguous
     assert a.shape == tuple(t.shape)
-    assert_np_equal(a.numpy(), t.cpu().numpy())
+    assert_np_equal(a.numpy(), t.numpy())
     # 1D slice with non-contiguous stride
     t_base = paddle.arange(10, dtype=paddle.float32).to(device=paddle_device)
@@ -456,7 +454,7 @@ def test_from_paddle_slices(test, device):
     # copy contents to contiguous array
     a_contiguous = wp.empty_like(a)
     wp.launch(copy1d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
-    assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
+    assert_np_equal(a_contiguous.numpy(), t.numpy())
     # 2D slices (non-contiguous)
     t_base = paddle.arange(24, dtype=paddle.float32).to(device=paddle_device).reshape((4, 6))
@@ -468,7 +466,7 @@ def test_from_paddle_slices(test, device):
     # copy contents to contiguous array
     a_contiguous = wp.empty_like(a)
     wp.launch(copy2d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
-    assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
+    assert_np_equal(a_contiguous.numpy(), t.numpy())
     # 3D slices (non-contiguous)
     t_base = paddle.arange(36, dtype=paddle.float32).to(device=paddle_device).reshape((4, 3, 3))
@@ -480,7 +478,7 @@ def test_from_paddle_slices(test, device):
     # copy contents to contiguous array
     a_contiguous = wp.empty_like(a)
     wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
-    assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
+    assert_np_equal(a_contiguous.numpy(), t.numpy())
     # 2D slices of vec3 (inner contiguous, outer non-contiguous)
     t_base = paddle.arange(150, dtype=paddle.float32).to(device=paddle_device).reshape((10, 5, 3))
@@ -492,7 +490,7 @@ def test_from_paddle_slices(test, device):
     # copy contents to contiguous array
     a_contiguous = wp.empty_like(a)
     wp.launch(copy2d_vec3_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
-    assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
+    assert_np_equal(a_contiguous.numpy(), t.numpy())
     # 2D slices of mat22 (inner contiguous, outer non-contiguous)
     t_base = paddle.arange(200, dtype=paddle.float32).to(device=paddle_device).reshape((10, 5, 2, 2))
@@ -504,7 +502,7 @@ def test_from_paddle_slices(test, device):
     # copy contents to contiguous array
     a_contiguous = wp.empty_like(a)
     wp.launch(copy2d_mat22_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
-    assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
+    assert_np_equal(a_contiguous.numpy(), t.numpy())
 def test_from_paddle_zero_strides(test, device):
@@ -522,7 +520,7 @@ def test_from_paddle_zero_strides(test, device):
     assert a.shape == tuple(t.shape)
     a_contiguous = wp.empty_like(a)
     wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
-    assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
+    assert_np_equal(a_contiguous.numpy(), t.numpy())
     # expand middle dimension
     t = t_base.unsqueeze(1).expand([-1, 3, -1])
@@ -532,7 +530,7 @@ def test_from_paddle_zero_strides(test, device):
     assert a.shape == tuple(t.shape)
     a_contiguous = wp.empty_like(a)
     wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
-    assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
+    assert_np_equal(a_contiguous.numpy(), t.numpy())
     # expand innermost dimension
     t = t_base.unsqueeze(2).expand([-1, -1, 3])
@@ -542,77 +540,7 @@ def test_from_paddle_zero_strides(test, device):
     assert a.shape == tuple(t.shape)
     a_contiguous = wp.empty_like(a)
     wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
-    assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
-def test_paddle_mgpu_from_paddle(test, device):
-    import paddle
-    n = 32
-    t0 = paddle.arange(0, n, 1, dtype=paddle.int32).to(device="gpu:0")
-    t1 = paddle.arange(0, n * 2, 2, dtype=paddle.int32).to(device="gpu:1")
-    a0 = wp.from_paddle(t0, dtype=wp.int32)
-    a1 = wp.from_paddle(t1, dtype=wp.int32)
-    assert a0.device == "gpu:0"
-    assert a1.device == "gpu:1"
-    expected0 = np.arange(0, n, 1)
-    expected1 = np.arange(0, n * 2, 2)
-    assert_np_equal(a0.numpy(), expected0)
-    assert_np_equal(a1.numpy(), expected1)
-def test_paddle_mgpu_to_paddle(test, device):
-    n = 32
-    with wp.ScopedDevice("gpu:0"):
-        a0 = wp.empty(n, dtype=wp.int32)
-        wp.launch(arange, dim=a0.size, inputs=[0, 1, a0])
-    with wp.ScopedDevice("gpu:1"):
-        a1 = wp.empty(n, dtype=wp.int32)
-        wp.launch(arange, dim=a1.size, inputs=[0, 2, a1])
-    t0 = wp.to_paddle(a0)
-    t1 = wp.to_paddle(a1)
-    assert str(t0.device) == "gpu:0"
-    assert str(t1.device) == "gpu:1"
-    expected0 = np.arange(0, n, 1, dtype=np.int32)
-    expected1 = np.arange(0, n * 2, 2, dtype=np.int32)
-    assert_np_equal(t0.cpu().numpy(), expected0)
-    assert_np_equal(t1.cpu().numpy(), expected1)
-def test_paddle_mgpu_interop(test, device):
-    import paddle
-    n = 1024 * 1024
-    with paddle.cuda.device(0):
-        t0 = paddle.arange(n, dtype=paddle.float32).to(device="gpu")
-        a0 = wp.from_paddle(t0)
-        wp.launch(inc, dim=a0.size, inputs=[a0], stream=wp.stream_from_paddle())
-    with paddle.cuda.device(1):
-        t1 = paddle.arange(n, dtype=paddle.float32).to(device="gpu")
-        a1 = wp.from_paddle(t1)
-        wp.launch(inc, dim=a1.size, inputs=[a1], stream=wp.stream_from_paddle())
-    assert a0.device == "gpu:0"
-    assert a1.device == "gpu:1"
-    expected = np.arange(n, dtype=int) + 1
-    # ensure the paddle tensors were modified by warp
-    assert_np_equal(t0.cpu().numpy(), expected)
-    assert_np_equal(t1.cpu().numpy(), expected)
+    assert_np_equal(a_contiguous.numpy(), t.numpy())
 def test_paddle_autograd(test, device):
@@ -624,6 +552,9 @@ def test_paddle_autograd(test, device):
     class TestFunc(paddle.autograd.PyLayer):
         @staticmethod
         def forward(ctx, x):
+            # ensure Paddle operations complete before running Warp
+            wp.synchronize_device()
             # allocate output array
             y = paddle.empty_like(x)
@@ -632,10 +563,16 @@ def test_paddle_autograd(test, device):
             wp.launch(kernel=op_kernel, dim=len(x), inputs=[wp.from_paddle(x)], outputs=[wp.from_paddle(y)])
+            # ensure Warp operations complete before returning data to Paddle
+            wp.synchronize_device()
             return y
         @staticmethod
         def backward(ctx, adj_y):
+            # ensure Paddle operations complete before running Warp
+            wp.synchronize_device()
             # adjoints should be allocated as zero initialized
             adj_x = paddle.zeros_like(ctx.x).contiguous()
             adj_y = adj_y.contiguous()
@@ -655,6 +592,9 @@ def test_paddle_autograd(test, device):
                 adjoint=True,
             )
+            # ensure Warp operations complete before returning data to Paddle
+            wp.synchronize_device()
             return adj_x
     # run autograd on given device
@@ -691,7 +631,7 @@ def test_warp_graph_warp_stream(test, device):
     paddle_stream = wp.stream_to_paddle(device)
     # capture graph
-    with wp.ScopedDevice(device), paddle.device.stream(paddle_stream):
+    with wp.ScopedDevice(device), paddle.device.stream_guard(paddle.device.Stream(paddle_stream)):
         wp.capture_begin(force_module_load=False)
         try:
             t += 1.0
@@ -837,11 +777,11 @@ try:
     #         devices=paddle_compatible_cuda_devices,
     #     )
-    # multi-GPU tests
-    if len(paddle_compatible_cuda_devices) > 1:
-        add_function_test(TestPaddle, "test_paddle_mgpu_from_paddle", test_paddle_mgpu_from_paddle)
-        add_function_test(TestPaddle, "test_paddle_mgpu_to_paddle", test_paddle_mgpu_to_paddle)
-        add_function_test(TestPaddle, "test_paddle_mgpu_interop", test_paddle_mgpu_interop)
+    # multi-GPU not supported yet.
+    # if len(paddle_compatible_cuda_devices) > 1:
+    #     add_function_test(TestPaddle, "test_paddle_mgpu_from_paddle", test_paddle_mgpu_from_paddle)
+    #     add_function_test(TestPaddle, "test_paddle_mgpu_to_paddle", test_paddle_mgpu_to_paddle)
+    #     add_function_test(TestPaddle, "test_paddle_mgpu_interop", test_paddle_mgpu_interop)
 except Exception as e:
     print(f"Skipping Paddle tests due to exception: {e}")

warp/tests/test_print.py CHANGED Viewed

@@ -17,8 +17,22 @@ from warp.tests.unittest_utils import *
 def test_print_kernel():
     wp.print(1.0)
     wp.print("this is a string")
+    wp.printf("this is another string\n")
     wp.printf("this is a float %f\n", 457.5)
     wp.printf("this is an int %d\n", 123)
+    # fmt: off
+    wp.printf(
+        "0=%d, 1=%d, 2=%d, 3=%d, 4=%d, 5=%d, 6=%d, 7=%d, "
+        "8=%d, 9=%d, 10=%d, 11=%d, 12=%d, 13=%d, 14=%d, 15=%d, "
+        "16=%d, 17=%d, 18=%d, 19=%d, 20=%d, 21=%d, 22=%d, 23=%d, "
+        "24=%d, 25=%d, 26=%d, 27=%d, 28=%d, 29=%d, 30=%d, 31=%d"
+        "\n",
+         0,  1,  2,  3,  4,  5,  6,  7,
+         8,  9, 10, 11, 12, 13, 14, 15,
+        16, 17, 18, 19, 20, 21, 22, 23,
+        24, 25, 26, 27, 28, 29, 30, 31,
+    )
+    # fmt: on
 @wp.kernel
@@ -59,8 +73,13 @@ def test_print(test, device):
             s,
             rf"1{os.linesep}"
             rf"this is a string{os.linesep}"
+            rf"this is another string{os.linesep}"
             rf"this is a float 457\.500000{os.linesep}"
-            rf"this is an int 123",
+            rf"this is an int 123{os.linesep}"
+            rf"0=0, 1=1, 2=2, 3=3, 4=4, 5=5, 6=6, 7=7, "
+            rf"8=8, 9=9, 10=10, 11=11, 12=12, 13=13, 14=14, 15=15, "
+            rf"16=16, 17=17, 18=18, 19=19, 20=20, 21=21, 22=22, 23=23, "
+            rf"24=24, 25=25, 26=26, 27=27, 28=28, 29=29, 30=30, 31=31{os.linesep}",
         )
@@ -260,6 +279,35 @@ def test_print_adjoint(test, device):
         )
+def test_print_error_variadic_arg_count(test, device):
+    @wp.kernel
+    def kernel():
+        # fmt: off
+        wp.printf(
+            "0=%d, 1=%d, 2=%d, 3=%d, 4=%d, 5=%d, 6=%d, 7=%d, "
+            "8=%d, 9=%d, 10=%d, 11=%d, 12=%d, 13=%d, 14=%d, 15=%d, "
+            "16=%d, 17=%d, 18=%d, 19=%d, 20=%d, 21=%d, 22=%d, 23=%d, "
+            "24=%d, 25=%d, 26=%d, 27=%d, 28=%d, 29=%d, 30=%d, 31=%d, "
+            "32=%d\n",
+            0,  1,  2,  3,  4,  5,  6,  7,
+            8,  9, 10, 11, 12, 13, 14, 15,
+            16, 17, 18, 19, 20, 21, 22, 23,
+            24, 25, 26, 27, 28, 29, 30, 31,
+            32,
+        )
+        # fmt: on
+    with test.assertRaisesRegex(
+        RuntimeError,
+        r"the maximum number of variadic arguments that can be passed to `printf` is 32$",
+    ):
+        wp.launch(
+            kernel,
+            dim=1,
+            device=device,
+        )
 class TestPrint(unittest.TestCase):
     pass
@@ -269,6 +317,13 @@ add_function_test(TestPrint, "test_print", test_print, devices=devices, check_ou
 add_function_test(TestPrint, "test_print_numeric", test_print_numeric, devices=devices, check_output=False)
 add_function_test(TestPrint, "test_print_boolean", test_print_boolean, devices=devices, check_output=False)
 add_function_test(TestPrint, "test_print_adjoint", test_print_adjoint, devices=devices, check_output=False)
+add_function_test(
+    TestPrint,
+    "test_print_error_variadic_arg_count",
+    test_print_error_variadic_arg_count,
+    devices=devices,
+    check_output=False,
+)
 if __name__ == "__main__":

warp/tests/test_spatial.py CHANGED Viewed

@@ -1611,7 +1611,7 @@ def test_spatial_matmat_multiplication(test, device, dtype, register_kernels=Fal
     tol = {
         np.float16: 2.0e-2,
         np.float32: 5.0e-6,
-        np.float64: 1.0e-8,
+        np.float64: 5.0e-7,
     }.get(dtype, 0)
     wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]