warp-lang 1.4.1__py3-none-manylinux2014_x86_64.whl → 1.5.0__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (164) hide show
  1. warp/__init__.py +4 -0
  2. warp/autograd.py +43 -8
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +21 -2
  6. warp/build_dll.py +23 -6
  7. warp/builtins.py +1920 -111
  8. warp/codegen.py +186 -62
  9. warp/config.py +2 -2
  10. warp/context.py +322 -73
  11. warp/examples/assets/pixel.jpg +0 -0
  12. warp/examples/benchmarks/benchmark_cloth_paddle.py +86 -0
  13. warp/examples/benchmarks/benchmark_gemm.py +121 -0
  14. warp/examples/benchmarks/benchmark_interop_paddle.py +158 -0
  15. warp/examples/benchmarks/benchmark_tile.py +179 -0
  16. warp/examples/core/example_dem.py +2 -1
  17. warp/examples/core/example_mesh_intersect.py +3 -3
  18. warp/examples/fem/example_adaptive_grid.py +37 -10
  19. warp/examples/fem/example_apic_fluid.py +3 -2
  20. warp/examples/fem/example_convection_diffusion_dg.py +4 -5
  21. warp/examples/fem/example_deformed_geometry.py +1 -1
  22. warp/examples/fem/example_diffusion_3d.py +47 -4
  23. warp/examples/fem/example_distortion_energy.py +220 -0
  24. warp/examples/fem/example_magnetostatics.py +127 -85
  25. warp/examples/fem/example_nonconforming_contact.py +5 -5
  26. warp/examples/fem/example_stokes.py +3 -1
  27. warp/examples/fem/example_streamlines.py +12 -19
  28. warp/examples/fem/utils.py +38 -15
  29. warp/examples/optim/example_walker.py +2 -2
  30. warp/examples/sim/example_cloth.py +2 -25
  31. warp/examples/sim/example_jacobian_ik.py +6 -2
  32. warp/examples/sim/example_quadruped.py +2 -1
  33. warp/examples/tile/example_tile_convolution.py +58 -0
  34. warp/examples/tile/example_tile_fft.py +47 -0
  35. warp/examples/tile/example_tile_filtering.py +105 -0
  36. warp/examples/tile/example_tile_matmul.py +79 -0
  37. warp/examples/tile/example_tile_mlp.py +375 -0
  38. warp/fem/__init__.py +8 -0
  39. warp/fem/cache.py +16 -12
  40. warp/fem/dirichlet.py +1 -1
  41. warp/fem/domain.py +44 -1
  42. warp/fem/field/__init__.py +1 -2
  43. warp/fem/field/field.py +31 -19
  44. warp/fem/field/nodal_field.py +101 -49
  45. warp/fem/field/virtual.py +794 -0
  46. warp/fem/geometry/__init__.py +2 -2
  47. warp/fem/geometry/deformed_geometry.py +3 -105
  48. warp/fem/geometry/element.py +13 -0
  49. warp/fem/geometry/geometry.py +165 -5
  50. warp/fem/geometry/grid_2d.py +3 -6
  51. warp/fem/geometry/grid_3d.py +31 -28
  52. warp/fem/geometry/hexmesh.py +3 -46
  53. warp/fem/geometry/nanogrid.py +3 -2
  54. warp/fem/geometry/{quadmesh_2d.py → quadmesh.py} +280 -159
  55. warp/fem/geometry/tetmesh.py +2 -43
  56. warp/fem/geometry/{trimesh_2d.py → trimesh.py} +354 -186
  57. warp/fem/integrate.py +683 -261
  58. warp/fem/linalg.py +404 -0
  59. warp/fem/operator.py +101 -18
  60. warp/fem/polynomial.py +5 -5
  61. warp/fem/quadrature/quadrature.py +45 -21
  62. warp/fem/space/__init__.py +45 -11
  63. warp/fem/space/basis_function_space.py +451 -0
  64. warp/fem/space/basis_space.py +58 -11
  65. warp/fem/space/function_space.py +146 -5
  66. warp/fem/space/grid_2d_function_space.py +80 -66
  67. warp/fem/space/grid_3d_function_space.py +113 -68
  68. warp/fem/space/hexmesh_function_space.py +96 -108
  69. warp/fem/space/nanogrid_function_space.py +62 -110
  70. warp/fem/space/quadmesh_function_space.py +208 -0
  71. warp/fem/space/shape/__init__.py +45 -7
  72. warp/fem/space/shape/cube_shape_function.py +328 -54
  73. warp/fem/space/shape/shape_function.py +10 -1
  74. warp/fem/space/shape/square_shape_function.py +328 -60
  75. warp/fem/space/shape/tet_shape_function.py +269 -19
  76. warp/fem/space/shape/triangle_shape_function.py +238 -19
  77. warp/fem/space/tetmesh_function_space.py +69 -37
  78. warp/fem/space/topology.py +38 -0
  79. warp/fem/space/trimesh_function_space.py +179 -0
  80. warp/fem/utils.py +6 -331
  81. warp/jax_experimental.py +3 -1
  82. warp/native/array.h +55 -40
  83. warp/native/builtin.h +124 -43
  84. warp/native/bvh.h +4 -0
  85. warp/native/coloring.cpp +600 -0
  86. warp/native/cuda_util.cpp +14 -0
  87. warp/native/cuda_util.h +2 -1
  88. warp/native/fabric.h +8 -0
  89. warp/native/hashgrid.h +4 -0
  90. warp/native/marching.cu +8 -0
  91. warp/native/mat.h +14 -3
  92. warp/native/mathdx.cpp +59 -0
  93. warp/native/mesh.h +4 -0
  94. warp/native/range.h +13 -1
  95. warp/native/reduce.cpp +9 -1
  96. warp/native/reduce.cu +7 -0
  97. warp/native/runlength_encode.cpp +9 -1
  98. warp/native/runlength_encode.cu +7 -1
  99. warp/native/scan.cpp +8 -0
  100. warp/native/scan.cu +8 -0
  101. warp/native/scan.h +8 -1
  102. warp/native/sparse.cpp +8 -0
  103. warp/native/sparse.cu +8 -0
  104. warp/native/temp_buffer.h +7 -0
  105. warp/native/tile.h +1857 -0
  106. warp/native/tile_gemm.h +341 -0
  107. warp/native/tile_reduce.h +210 -0
  108. warp/native/volume_builder.cu +8 -0
  109. warp/native/volume_builder.h +8 -0
  110. warp/native/warp.cpp +10 -2
  111. warp/native/warp.cu +369 -15
  112. warp/native/warp.h +12 -2
  113. warp/optim/adam.py +39 -4
  114. warp/paddle.py +29 -12
  115. warp/render/render_opengl.py +137 -65
  116. warp/sim/graph_coloring.py +292 -0
  117. warp/sim/integrator_euler.py +4 -2
  118. warp/sim/integrator_featherstone.py +115 -44
  119. warp/sim/integrator_vbd.py +6 -0
  120. warp/sim/model.py +90 -17
  121. warp/stubs.py +651 -85
  122. warp/tape.py +12 -7
  123. warp/tests/assets/pixel.npy +0 -0
  124. warp/tests/aux_test_instancing_gc.py +18 -0
  125. warp/tests/test_array.py +207 -48
  126. warp/tests/test_closest_point_edge_edge.py +8 -8
  127. warp/tests/test_codegen.py +120 -1
  128. warp/tests/test_codegen_instancing.py +30 -0
  129. warp/tests/test_collision.py +110 -0
  130. warp/tests/test_coloring.py +241 -0
  131. warp/tests/test_context.py +34 -0
  132. warp/tests/test_examples.py +18 -4
  133. warp/tests/test_fabricarray.py +33 -0
  134. warp/tests/test_fem.py +453 -113
  135. warp/tests/test_func.py +48 -1
  136. warp/tests/test_generics.py +52 -0
  137. warp/tests/test_iter.py +68 -0
  138. warp/tests/test_mat_scalar_ops.py +1 -1
  139. warp/tests/test_mesh_query_point.py +5 -4
  140. warp/tests/test_module_hashing.py +23 -0
  141. warp/tests/test_paddle.py +27 -87
  142. warp/tests/test_print.py +191 -1
  143. warp/tests/test_spatial.py +1 -1
  144. warp/tests/test_tile.py +700 -0
  145. warp/tests/test_tile_mathdx.py +144 -0
  146. warp/tests/test_tile_mlp.py +383 -0
  147. warp/tests/test_tile_reduce.py +374 -0
  148. warp/tests/test_tile_shared_memory.py +190 -0
  149. warp/tests/test_vbd.py +12 -20
  150. warp/tests/test_volume.py +43 -0
  151. warp/tests/unittest_suites.py +23 -2
  152. warp/tests/unittest_utils.py +4 -0
  153. warp/types.py +339 -73
  154. warp/utils.py +22 -1
  155. {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/METADATA +33 -7
  156. {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/RECORD +159 -132
  157. {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/WHEEL +1 -1
  158. warp/fem/field/test.py +0 -180
  159. warp/fem/field/trial.py +0 -183
  160. warp/fem/space/collocated_function_space.py +0 -102
  161. warp/fem/space/quadmesh_2d_function_space.py +0 -261
  162. warp/fem/space/trimesh_2d_function_space.py +0 -153
  163. {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/LICENSE.md +0 -0
  164. {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/top_level.txt +0 -0
warp/tests/test_func.py CHANGED
@@ -7,7 +7,7 @@
7
7
 
8
8
  import math
9
9
  import unittest
10
- from typing import Tuple
10
+ from typing import Any, Tuple
11
11
 
12
12
  import numpy as np
13
13
 
@@ -191,6 +191,47 @@ def test_user_func_return_multiple_values():
191
191
  wp.expect_eq(b, 54756.0)
192
192
 
193
193
 
194
+ @wp.func
195
+ def user_func_overload(
196
+ b: wp.array(dtype=Any),
197
+ i: int,
198
+ ):
199
+ return b[i] * 2.0
200
+
201
+
202
+ @wp.kernel
203
+ def user_func_overload_resolution_kernel(
204
+ a: wp.array(dtype=Any),
205
+ b: wp.array(dtype=Any),
206
+ ):
207
+ i = wp.tid()
208
+ a[i] = user_func_overload(b, i)
209
+
210
+
211
+ def test_user_func_overload_resolution(test, device):
212
+ a0 = wp.array((1, 2, 3), dtype=wp.vec3)
213
+ b0 = wp.array((2, 3, 4), dtype=wp.vec3)
214
+
215
+ a1 = wp.array((5,), dtype=float)
216
+ b1 = wp.array((6,), dtype=float)
217
+
218
+ wp.launch(user_func_overload_resolution_kernel, a0.shape, (a0, b0))
219
+ wp.launch(user_func_overload_resolution_kernel, a1.shape, (a1, b1))
220
+
221
+ assert_np_equal(a0.numpy()[0], (4, 6, 8))
222
+ assert a1.numpy()[0] == 12
223
+
224
+
225
+ @wp.func
226
+ def user_func_return_none() -> None:
227
+ pass
228
+
229
+
230
+ @wp.kernel
231
+ def test_return_annotation_none() -> None:
232
+ user_func_return_none()
233
+
234
+
194
235
  devices = get_test_devices()
195
236
 
196
237
 
@@ -375,6 +416,12 @@ add_kernel_test(
375
416
  dim=1,
376
417
  devices=devices,
377
418
  )
419
+ add_function_test(
420
+ TestFunc, func=test_user_func_overload_resolution, name="test_user_func_overload_resolution", devices=devices
421
+ )
422
+ add_kernel_test(
423
+ TestFunc, kernel=test_return_annotation_none, name="test_return_annotation_none", dim=1, devices=devices
424
+ )
378
425
 
379
426
 
380
427
  if __name__ == "__main__":
@@ -522,6 +522,57 @@ def test_type_attribute_error(test, device):
522
522
  )
523
523
 
524
524
 
525
+ @wp.func
526
+ def vec_int_annotation_func(v: wp.vec(3, wp.Int)) -> wp.Int:
527
+ return v[0] + v[1] + v[2]
528
+
529
+
530
+ @wp.func
531
+ def vec_float_annotation_func(v: wp.vec(3, wp.Float)) -> wp.Float:
532
+ return v[0] + v[1] + v[2]
533
+
534
+
535
+ @wp.func
536
+ def vec_scalar_annotation_func(v: wp.vec(3, wp.Scalar)) -> wp.Scalar:
537
+ return v[0] + v[1] + v[2]
538
+
539
+
540
+ @wp.func
541
+ def mat_int_annotation_func(m: wp.mat((2, 2), wp.Int)) -> wp.Int:
542
+ return m[0, 0] + m[0, 1] + m[1, 0] + m[1, 1]
543
+
544
+
545
+ @wp.func
546
+ def mat_float_annotation_func(m: wp.mat((2, 2), wp.Float)) -> wp.Float:
547
+ return m[0, 0] + m[0, 1] + m[1, 0] + m[1, 1]
548
+
549
+
550
+ @wp.func
551
+ def mat_scalar_annotation_func(m: wp.mat((2, 2), wp.Scalar)) -> wp.Scalar:
552
+ return m[0, 0] + m[0, 1] + m[1, 0] + m[1, 1]
553
+
554
+
555
+ mat22s = wp.mat((2, 2), wp.int16)
556
+ mat22d = wp.mat((2, 2), wp.float64)
557
+
558
+
559
+ @wp.kernel
560
+ def test_annotations_kernel():
561
+ vi16 = wp.vec3s(wp.int16(1), wp.int16(2), wp.int16(3))
562
+ vf64 = wp.vec3d(wp.float64(1), wp.float64(2), wp.float64(3))
563
+ wp.expect_eq(vec_int_annotation_func(vi16), wp.int16(6))
564
+ wp.expect_eq(vec_float_annotation_func(vf64), wp.float64(6))
565
+ wp.expect_eq(vec_scalar_annotation_func(vi16), wp.int16(6))
566
+ wp.expect_eq(vec_scalar_annotation_func(vf64), wp.float64(6))
567
+
568
+ mi16 = mat22s(wp.int16(1), wp.int16(2), wp.int16(3), wp.int16(4))
569
+ mf64 = mat22d(wp.float64(1), wp.float64(2), wp.float64(3), wp.float64(4))
570
+ wp.expect_eq(mat_int_annotation_func(mi16), wp.int16(10))
571
+ wp.expect_eq(mat_float_annotation_func(mf64), wp.float64(10))
572
+ wp.expect_eq(mat_scalar_annotation_func(mi16), wp.int16(10))
573
+ wp.expect_eq(mat_scalar_annotation_func(mf64), wp.float64(10))
574
+
575
+
525
576
  class TestGenerics(unittest.TestCase):
526
577
  pass
527
578
 
@@ -590,6 +641,7 @@ add_kernel_test(
590
641
  )
591
642
  add_function_test(TestGenerics, "test_type_operator_misspell", test_type_operator_misspell, devices=devices)
592
643
  add_function_test(TestGenerics, "test_type_attribute_error", test_type_attribute_error, devices=devices)
644
+ add_kernel_test(TestGenerics, name="test_annotations_kernel", kernel=test_annotations_kernel, dim=1, devices=devices)
593
645
 
594
646
  if __name__ == "__main__":
595
647
  wp.clear_kernel_cache()
@@ -0,0 +1,68 @@
1
+ # Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ import unittest
9
+
10
+ import warp as wp
11
+ from warp.tests.unittest_utils import *
12
+
13
+
14
+ @wp.kernel
15
+ def reversed_kernel(
16
+ start: wp.int32,
17
+ end: wp.int32,
18
+ step: wp.int32,
19
+ out_count: wp.array(dtype=wp.int32),
20
+ out_values: wp.array(dtype=wp.int32),
21
+ ):
22
+ count = wp.int32(0)
23
+ for i in reversed(range(start, end, step)):
24
+ out_values[count] = i
25
+ count += 1
26
+
27
+ out_count[0] = count
28
+
29
+
30
+ def test_reversed(test, device):
31
+ count = wp.empty(1, dtype=wp.int32)
32
+ values = wp.empty(32, dtype=wp.int32)
33
+
34
+ start, end, step = (-2, 8, 3)
35
+ wp.launch(
36
+ reversed_kernel,
37
+ dim=1,
38
+ inputs=(start, end, step),
39
+ outputs=(count, values),
40
+ )
41
+ expected = tuple(reversed(range(start, end, step)))
42
+ assert count.numpy()[0] == len(expected)
43
+ assert_np_equal(values.numpy()[: len(expected)], expected)
44
+
45
+ start, end, step = (9, -3, -2)
46
+ wp.launch(
47
+ reversed_kernel,
48
+ dim=1,
49
+ inputs=(start, end, step),
50
+ outputs=(count, values),
51
+ )
52
+ expected = tuple(reversed(range(start, end, step)))
53
+ assert count.numpy()[0] == len(expected)
54
+ assert_np_equal(values.numpy()[: len(expected)], expected)
55
+
56
+
57
+ devices = get_test_devices()
58
+
59
+
60
+ class TestIter(unittest.TestCase):
61
+ pass
62
+
63
+
64
+ add_function_test(TestIter, "test_reversed", test_reversed, devices=devices)
65
+
66
+ if __name__ == "__main__":
67
+ wp.clear_kernel_cache()
68
+ unittest.main(verbosity=2)
@@ -1501,7 +1501,7 @@ def test_matmat_multiplication(test, device, dtype, register_kernels=False):
1501
1501
  tol = {
1502
1502
  np.float16: 2.0e-2,
1503
1503
  np.float32: 5.0e-6,
1504
- np.float64: 1.0e-8,
1504
+ np.float64: 5.0e-7,
1505
1505
  }.get(dtype, 0)
1506
1506
 
1507
1507
  wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
@@ -769,20 +769,21 @@ def point_query_aabb_and_closest(
769
769
 
770
770
  @unittest.skipUnless(USD_AVAILABLE, "Requires usd-core")
771
771
  def test_set_mesh_points(test, device):
772
+ rng = np.random.default_rng(123)
773
+
772
774
  vs, fs = load_mesh()
773
775
 
774
776
  vertices1 = wp.array(vs, dtype=wp.vec3, device=device)
775
- velocities1_np = np.random.randn(vertices1.shape[0], 3)
777
+ velocities1_np = rng.standard_normal(size=(vertices1.shape[0], 3))
776
778
  velocities1 = wp.array(velocities1_np, dtype=wp.vec3, device=device)
777
779
 
778
780
  faces = wp.array(fs, dtype=wp.int32, device=device)
779
781
  mesh = wp.Mesh(vertices1, faces, velocities=velocities1)
780
782
  fs_2D = faces.reshape((-1, 3))
781
- np.random.seed(12345)
782
783
  n = 1000
783
784
  query_radius = 0.2
784
785
 
785
- pts1 = wp.array(np.random.randn(n, 3), dtype=wp.vec3, device=device)
786
+ pts1 = wp.array(rng.standard_normal(size=(n, 3)), dtype=wp.vec3, device=device)
786
787
 
787
788
  query_results_num_cols1 = wp.zeros(n, dtype=wp.int32, device=device)
788
789
  query_results_min_dist1 = wp.zeros(n, dtype=float, device=device)
@@ -804,7 +805,7 @@ def test_set_mesh_points(test, device):
804
805
  device=device,
805
806
  )
806
807
 
807
- shift = np.random.randn(3)
808
+ shift = rng.standard_normal(size=3)
808
809
 
809
810
  vs_higher = vs + shift
810
811
  vertices2 = wp.array(vs_higher, dtype=wp.vec3, device=device)
@@ -214,12 +214,35 @@ def test_function_generic_overload_hashing(test, device):
214
214
  test.assertNotEqual(hash4, hash1)
215
215
 
216
216
 
217
+ SIMPLE_MODULE = """# -*- coding: utf-8 -*-
218
+ import warp as wp
219
+
220
+ @wp.kernel
221
+ def k():
222
+ pass
223
+ """
224
+
225
+
226
+ def test_module_load(test, device):
227
+ """Ensure that loading a module does not change its hash"""
228
+ m = load_code_as_module(SIMPLE_MODULE, "simple_module")
229
+
230
+ hash1 = m.hash_module()
231
+ m.load(device)
232
+ hash2 = m.hash_module()
233
+
234
+ test.assertEqual(hash1, hash2)
235
+
236
+
217
237
  class TestModuleHashing(unittest.TestCase):
218
238
  pass
219
239
 
220
240
 
241
+ devices = get_test_devices()
242
+
221
243
  add_function_test(TestModuleHashing, "test_function_overload_hashing", test_function_overload_hashing)
222
244
  add_function_test(TestModuleHashing, "test_function_generic_overload_hashing", test_function_generic_overload_hashing)
245
+ add_function_test(TestModuleHashing, "test_module_load", test_module_load, devices=devices)
223
246
 
224
247
 
225
248
  if __name__ == "__main__":
warp/tests/test_paddle.py CHANGED
@@ -7,8 +7,6 @@
7
7
 
8
8
  import unittest
9
9
 
10
- import numpy as np
11
-
12
10
  import warp as wp
13
11
  from warp.tests.unittest_utils import *
14
12
 
@@ -444,7 +442,7 @@ def test_from_paddle_slices(test, device):
444
442
  assert a.ptr == t.data_ptr()
445
443
  assert a.is_contiguous
446
444
  assert a.shape == tuple(t.shape)
447
- assert_np_equal(a.numpy(), t.cpu().numpy())
445
+ assert_np_equal(a.numpy(), t.numpy())
448
446
 
449
447
  # 1D slice with non-contiguous stride
450
448
  t_base = paddle.arange(10, dtype=paddle.float32).to(device=paddle_device)
@@ -456,7 +454,7 @@ def test_from_paddle_slices(test, device):
456
454
  # copy contents to contiguous array
457
455
  a_contiguous = wp.empty_like(a)
458
456
  wp.launch(copy1d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
459
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
457
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
460
458
 
461
459
  # 2D slices (non-contiguous)
462
460
  t_base = paddle.arange(24, dtype=paddle.float32).to(device=paddle_device).reshape((4, 6))
@@ -468,7 +466,7 @@ def test_from_paddle_slices(test, device):
468
466
  # copy contents to contiguous array
469
467
  a_contiguous = wp.empty_like(a)
470
468
  wp.launch(copy2d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
471
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
469
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
472
470
 
473
471
  # 3D slices (non-contiguous)
474
472
  t_base = paddle.arange(36, dtype=paddle.float32).to(device=paddle_device).reshape((4, 3, 3))
@@ -480,7 +478,7 @@ def test_from_paddle_slices(test, device):
480
478
  # copy contents to contiguous array
481
479
  a_contiguous = wp.empty_like(a)
482
480
  wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
483
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
481
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
484
482
 
485
483
  # 2D slices of vec3 (inner contiguous, outer non-contiguous)
486
484
  t_base = paddle.arange(150, dtype=paddle.float32).to(device=paddle_device).reshape((10, 5, 3))
@@ -492,7 +490,7 @@ def test_from_paddle_slices(test, device):
492
490
  # copy contents to contiguous array
493
491
  a_contiguous = wp.empty_like(a)
494
492
  wp.launch(copy2d_vec3_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
495
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
493
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
496
494
 
497
495
  # 2D slices of mat22 (inner contiguous, outer non-contiguous)
498
496
  t_base = paddle.arange(200, dtype=paddle.float32).to(device=paddle_device).reshape((10, 5, 2, 2))
@@ -504,7 +502,7 @@ def test_from_paddle_slices(test, device):
504
502
  # copy contents to contiguous array
505
503
  a_contiguous = wp.empty_like(a)
506
504
  wp.launch(copy2d_mat22_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
507
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
505
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
508
506
 
509
507
 
510
508
  def test_from_paddle_zero_strides(test, device):
@@ -522,7 +520,7 @@ def test_from_paddle_zero_strides(test, device):
522
520
  assert a.shape == tuple(t.shape)
523
521
  a_contiguous = wp.empty_like(a)
524
522
  wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
525
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
523
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
526
524
 
527
525
  # expand middle dimension
528
526
  t = t_base.unsqueeze(1).expand([-1, 3, -1])
@@ -532,7 +530,7 @@ def test_from_paddle_zero_strides(test, device):
532
530
  assert a.shape == tuple(t.shape)
533
531
  a_contiguous = wp.empty_like(a)
534
532
  wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
535
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
533
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
536
534
 
537
535
  # expand innermost dimension
538
536
  t = t_base.unsqueeze(2).expand([-1, -1, 3])
@@ -542,77 +540,7 @@ def test_from_paddle_zero_strides(test, device):
542
540
  assert a.shape == tuple(t.shape)
543
541
  a_contiguous = wp.empty_like(a)
544
542
  wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
545
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
546
-
547
-
548
- def test_paddle_mgpu_from_paddle(test, device):
549
- import paddle
550
-
551
- n = 32
552
-
553
- t0 = paddle.arange(0, n, 1, dtype=paddle.int32).to(device="gpu:0")
554
- t1 = paddle.arange(0, n * 2, 2, dtype=paddle.int32).to(device="gpu:1")
555
-
556
- a0 = wp.from_paddle(t0, dtype=wp.int32)
557
- a1 = wp.from_paddle(t1, dtype=wp.int32)
558
-
559
- assert a0.device == "gpu:0"
560
- assert a1.device == "gpu:1"
561
-
562
- expected0 = np.arange(0, n, 1)
563
- expected1 = np.arange(0, n * 2, 2)
564
-
565
- assert_np_equal(a0.numpy(), expected0)
566
- assert_np_equal(a1.numpy(), expected1)
567
-
568
-
569
- def test_paddle_mgpu_to_paddle(test, device):
570
- n = 32
571
-
572
- with wp.ScopedDevice("gpu:0"):
573
- a0 = wp.empty(n, dtype=wp.int32)
574
- wp.launch(arange, dim=a0.size, inputs=[0, 1, a0])
575
-
576
- with wp.ScopedDevice("gpu:1"):
577
- a1 = wp.empty(n, dtype=wp.int32)
578
- wp.launch(arange, dim=a1.size, inputs=[0, 2, a1])
579
-
580
- t0 = wp.to_paddle(a0)
581
- t1 = wp.to_paddle(a1)
582
-
583
- assert str(t0.device) == "gpu:0"
584
- assert str(t1.device) == "gpu:1"
585
-
586
- expected0 = np.arange(0, n, 1, dtype=np.int32)
587
- expected1 = np.arange(0, n * 2, 2, dtype=np.int32)
588
-
589
- assert_np_equal(t0.cpu().numpy(), expected0)
590
- assert_np_equal(t1.cpu().numpy(), expected1)
591
-
592
-
593
- def test_paddle_mgpu_interop(test, device):
594
- import paddle
595
-
596
- n = 1024 * 1024
597
-
598
- with paddle.cuda.device(0):
599
- t0 = paddle.arange(n, dtype=paddle.float32).to(device="gpu")
600
- a0 = wp.from_paddle(t0)
601
- wp.launch(inc, dim=a0.size, inputs=[a0], stream=wp.stream_from_paddle())
602
-
603
- with paddle.cuda.device(1):
604
- t1 = paddle.arange(n, dtype=paddle.float32).to(device="gpu")
605
- a1 = wp.from_paddle(t1)
606
- wp.launch(inc, dim=a1.size, inputs=[a1], stream=wp.stream_from_paddle())
607
-
608
- assert a0.device == "gpu:0"
609
- assert a1.device == "gpu:1"
610
-
611
- expected = np.arange(n, dtype=int) + 1
612
-
613
- # ensure the paddle tensors were modified by warp
614
- assert_np_equal(t0.cpu().numpy(), expected)
615
- assert_np_equal(t1.cpu().numpy(), expected)
543
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
616
544
 
617
545
 
618
546
  def test_paddle_autograd(test, device):
@@ -624,6 +552,9 @@ def test_paddle_autograd(test, device):
624
552
  class TestFunc(paddle.autograd.PyLayer):
625
553
  @staticmethod
626
554
  def forward(ctx, x):
555
+ # ensure Paddle operations complete before running Warp
556
+ wp.synchronize_device()
557
+
627
558
  # allocate output array
628
559
  y = paddle.empty_like(x)
629
560
 
@@ -632,10 +563,16 @@ def test_paddle_autograd(test, device):
632
563
 
633
564
  wp.launch(kernel=op_kernel, dim=len(x), inputs=[wp.from_paddle(x)], outputs=[wp.from_paddle(y)])
634
565
 
566
+ # ensure Warp operations complete before returning data to Paddle
567
+ wp.synchronize_device()
568
+
635
569
  return y
636
570
 
637
571
  @staticmethod
638
572
  def backward(ctx, adj_y):
573
+ # ensure Paddle operations complete before running Warp
574
+ wp.synchronize_device()
575
+
639
576
  # adjoints should be allocated as zero initialized
640
577
  adj_x = paddle.zeros_like(ctx.x).contiguous()
641
578
  adj_y = adj_y.contiguous()
@@ -655,6 +592,9 @@ def test_paddle_autograd(test, device):
655
592
  adjoint=True,
656
593
  )
657
594
 
595
+ # ensure Warp operations complete before returning data to Paddle
596
+ wp.synchronize_device()
597
+
658
598
  return adj_x
659
599
 
660
600
  # run autograd on given device
@@ -691,7 +631,7 @@ def test_warp_graph_warp_stream(test, device):
691
631
  paddle_stream = wp.stream_to_paddle(device)
692
632
 
693
633
  # capture graph
694
- with wp.ScopedDevice(device), paddle.device.stream(paddle_stream):
634
+ with wp.ScopedDevice(device), paddle.device.stream_guard(paddle.device.Stream(paddle_stream)):
695
635
  wp.capture_begin(force_module_load=False)
696
636
  try:
697
637
  t += 1.0
@@ -837,11 +777,11 @@ try:
837
777
  # devices=paddle_compatible_cuda_devices,
838
778
  # )
839
779
 
840
- # multi-GPU tests
841
- if len(paddle_compatible_cuda_devices) > 1:
842
- add_function_test(TestPaddle, "test_paddle_mgpu_from_paddle", test_paddle_mgpu_from_paddle)
843
- add_function_test(TestPaddle, "test_paddle_mgpu_to_paddle", test_paddle_mgpu_to_paddle)
844
- add_function_test(TestPaddle, "test_paddle_mgpu_interop", test_paddle_mgpu_interop)
780
+ # multi-GPU not supported yet.
781
+ # if len(paddle_compatible_cuda_devices) > 1:
782
+ # add_function_test(TestPaddle, "test_paddle_mgpu_from_paddle", test_paddle_mgpu_from_paddle)
783
+ # add_function_test(TestPaddle, "test_paddle_mgpu_to_paddle", test_paddle_mgpu_to_paddle)
784
+ # add_function_test(TestPaddle, "test_paddle_mgpu_interop", test_paddle_mgpu_interop)
845
785
 
846
786
  except Exception as e:
847
787
  print(f"Skipping Paddle tests due to exception: {e}")