warp-lang 1.4.2__py3-none-manylinux2014_x86_64.whl → 1.5.0__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (158) hide show
  1. warp/__init__.py +4 -0
  2. warp/autograd.py +43 -8
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +21 -2
  6. warp/build_dll.py +23 -6
  7. warp/builtins.py +1783 -2
  8. warp/codegen.py +177 -45
  9. warp/config.py +2 -2
  10. warp/context.py +321 -73
  11. warp/examples/assets/pixel.jpg +0 -0
  12. warp/examples/benchmarks/benchmark_cloth_paddle.py +86 -0
  13. warp/examples/benchmarks/benchmark_gemm.py +121 -0
  14. warp/examples/benchmarks/benchmark_interop_paddle.py +158 -0
  15. warp/examples/benchmarks/benchmark_tile.py +179 -0
  16. warp/examples/fem/example_adaptive_grid.py +37 -10
  17. warp/examples/fem/example_apic_fluid.py +3 -2
  18. warp/examples/fem/example_convection_diffusion_dg.py +4 -5
  19. warp/examples/fem/example_deformed_geometry.py +1 -1
  20. warp/examples/fem/example_diffusion_3d.py +47 -4
  21. warp/examples/fem/example_distortion_energy.py +220 -0
  22. warp/examples/fem/example_magnetostatics.py +127 -85
  23. warp/examples/fem/example_nonconforming_contact.py +5 -5
  24. warp/examples/fem/example_stokes.py +3 -1
  25. warp/examples/fem/example_streamlines.py +12 -19
  26. warp/examples/fem/utils.py +38 -15
  27. warp/examples/sim/example_cloth.py +2 -25
  28. warp/examples/sim/example_quadruped.py +2 -1
  29. warp/examples/tile/example_tile_convolution.py +58 -0
  30. warp/examples/tile/example_tile_fft.py +47 -0
  31. warp/examples/tile/example_tile_filtering.py +105 -0
  32. warp/examples/tile/example_tile_matmul.py +79 -0
  33. warp/examples/tile/example_tile_mlp.py +375 -0
  34. warp/fem/__init__.py +8 -0
  35. warp/fem/cache.py +16 -12
  36. warp/fem/dirichlet.py +1 -1
  37. warp/fem/domain.py +44 -1
  38. warp/fem/field/__init__.py +1 -2
  39. warp/fem/field/field.py +31 -19
  40. warp/fem/field/nodal_field.py +101 -49
  41. warp/fem/field/virtual.py +794 -0
  42. warp/fem/geometry/__init__.py +2 -2
  43. warp/fem/geometry/deformed_geometry.py +3 -105
  44. warp/fem/geometry/element.py +13 -0
  45. warp/fem/geometry/geometry.py +165 -5
  46. warp/fem/geometry/grid_2d.py +3 -6
  47. warp/fem/geometry/grid_3d.py +31 -28
  48. warp/fem/geometry/hexmesh.py +3 -46
  49. warp/fem/geometry/nanogrid.py +3 -2
  50. warp/fem/geometry/{quadmesh_2d.py → quadmesh.py} +280 -159
  51. warp/fem/geometry/tetmesh.py +2 -43
  52. warp/fem/geometry/{trimesh_2d.py → trimesh.py} +354 -186
  53. warp/fem/integrate.py +683 -261
  54. warp/fem/linalg.py +404 -0
  55. warp/fem/operator.py +101 -18
  56. warp/fem/polynomial.py +5 -5
  57. warp/fem/quadrature/quadrature.py +45 -21
  58. warp/fem/space/__init__.py +45 -11
  59. warp/fem/space/basis_function_space.py +451 -0
  60. warp/fem/space/basis_space.py +58 -11
  61. warp/fem/space/function_space.py +146 -5
  62. warp/fem/space/grid_2d_function_space.py +80 -66
  63. warp/fem/space/grid_3d_function_space.py +113 -68
  64. warp/fem/space/hexmesh_function_space.py +96 -108
  65. warp/fem/space/nanogrid_function_space.py +62 -110
  66. warp/fem/space/quadmesh_function_space.py +208 -0
  67. warp/fem/space/shape/__init__.py +45 -7
  68. warp/fem/space/shape/cube_shape_function.py +328 -54
  69. warp/fem/space/shape/shape_function.py +10 -1
  70. warp/fem/space/shape/square_shape_function.py +328 -60
  71. warp/fem/space/shape/tet_shape_function.py +269 -19
  72. warp/fem/space/shape/triangle_shape_function.py +238 -19
  73. warp/fem/space/tetmesh_function_space.py +69 -37
  74. warp/fem/space/topology.py +38 -0
  75. warp/fem/space/trimesh_function_space.py +179 -0
  76. warp/fem/utils.py +6 -331
  77. warp/jax_experimental.py +3 -1
  78. warp/native/array.h +15 -0
  79. warp/native/builtin.h +66 -26
  80. warp/native/bvh.h +4 -0
  81. warp/native/coloring.cpp +600 -0
  82. warp/native/cuda_util.cpp +14 -0
  83. warp/native/cuda_util.h +2 -1
  84. warp/native/fabric.h +8 -0
  85. warp/native/hashgrid.h +4 -0
  86. warp/native/marching.cu +8 -0
  87. warp/native/mat.h +14 -3
  88. warp/native/mathdx.cpp +59 -0
  89. warp/native/mesh.h +4 -0
  90. warp/native/range.h +13 -1
  91. warp/native/reduce.cpp +9 -1
  92. warp/native/reduce.cu +7 -0
  93. warp/native/runlength_encode.cpp +9 -1
  94. warp/native/runlength_encode.cu +7 -1
  95. warp/native/scan.cpp +8 -0
  96. warp/native/scan.cu +8 -0
  97. warp/native/scan.h +8 -1
  98. warp/native/sparse.cpp +8 -0
  99. warp/native/sparse.cu +8 -0
  100. warp/native/temp_buffer.h +7 -0
  101. warp/native/tile.h +1857 -0
  102. warp/native/tile_gemm.h +341 -0
  103. warp/native/tile_reduce.h +210 -0
  104. warp/native/volume_builder.cu +8 -0
  105. warp/native/volume_builder.h +8 -0
  106. warp/native/warp.cpp +10 -2
  107. warp/native/warp.cu +369 -15
  108. warp/native/warp.h +12 -2
  109. warp/optim/adam.py +39 -4
  110. warp/paddle.py +29 -12
  111. warp/render/render_opengl.py +137 -65
  112. warp/sim/graph_coloring.py +292 -0
  113. warp/sim/integrator_euler.py +4 -2
  114. warp/sim/integrator_featherstone.py +115 -44
  115. warp/sim/integrator_vbd.py +6 -0
  116. warp/sim/model.py +88 -15
  117. warp/stubs.py +569 -4
  118. warp/tape.py +12 -7
  119. warp/tests/assets/pixel.npy +0 -0
  120. warp/tests/aux_test_instancing_gc.py +18 -0
  121. warp/tests/test_array.py +39 -0
  122. warp/tests/test_codegen.py +81 -1
  123. warp/tests/test_codegen_instancing.py +30 -0
  124. warp/tests/test_collision.py +110 -0
  125. warp/tests/test_coloring.py +241 -0
  126. warp/tests/test_context.py +34 -0
  127. warp/tests/test_examples.py +18 -4
  128. warp/tests/test_fem.py +453 -113
  129. warp/tests/test_func.py +13 -0
  130. warp/tests/test_generics.py +52 -0
  131. warp/tests/test_iter.py +68 -0
  132. warp/tests/test_mat_scalar_ops.py +1 -1
  133. warp/tests/test_mesh_query_point.py +1 -1
  134. warp/tests/test_module_hashing.py +23 -0
  135. warp/tests/test_paddle.py +27 -87
  136. warp/tests/test_print.py +56 -1
  137. warp/tests/test_spatial.py +1 -1
  138. warp/tests/test_tile.py +700 -0
  139. warp/tests/test_tile_mathdx.py +144 -0
  140. warp/tests/test_tile_mlp.py +383 -0
  141. warp/tests/test_tile_reduce.py +374 -0
  142. warp/tests/test_tile_shared_memory.py +190 -0
  143. warp/tests/test_vbd.py +12 -20
  144. warp/tests/test_volume.py +43 -0
  145. warp/tests/unittest_suites.py +19 -2
  146. warp/tests/unittest_utils.py +4 -0
  147. warp/types.py +338 -72
  148. warp/utils.py +22 -1
  149. {warp_lang-1.4.2.dist-info → warp_lang-1.5.0.dist-info}/METADATA +33 -7
  150. {warp_lang-1.4.2.dist-info → warp_lang-1.5.0.dist-info}/RECORD +153 -126
  151. {warp_lang-1.4.2.dist-info → warp_lang-1.5.0.dist-info}/WHEEL +1 -1
  152. warp/fem/field/test.py +0 -180
  153. warp/fem/field/trial.py +0 -183
  154. warp/fem/space/collocated_function_space.py +0 -102
  155. warp/fem/space/quadmesh_2d_function_space.py +0 -261
  156. warp/fem/space/trimesh_2d_function_space.py +0 -153
  157. {warp_lang-1.4.2.dist-info → warp_lang-1.5.0.dist-info}/LICENSE.md +0 -0
  158. {warp_lang-1.4.2.dist-info → warp_lang-1.5.0.dist-info}/top_level.txt +0 -0
warp/tests/test_func.py CHANGED
@@ -222,6 +222,16 @@ def test_user_func_overload_resolution(test, device):
222
222
  assert a1.numpy()[0] == 12
223
223
 
224
224
 
225
+ @wp.func
226
+ def user_func_return_none() -> None:
227
+ pass
228
+
229
+
230
+ @wp.kernel
231
+ def test_return_annotation_none() -> None:
232
+ user_func_return_none()
233
+
234
+
225
235
  devices = get_test_devices()
226
236
 
227
237
 
@@ -409,6 +419,9 @@ add_kernel_test(
409
419
  add_function_test(
410
420
  TestFunc, func=test_user_func_overload_resolution, name="test_user_func_overload_resolution", devices=devices
411
421
  )
422
+ add_kernel_test(
423
+ TestFunc, kernel=test_return_annotation_none, name="test_return_annotation_none", dim=1, devices=devices
424
+ )
412
425
 
413
426
 
414
427
  if __name__ == "__main__":
@@ -522,6 +522,57 @@ def test_type_attribute_error(test, device):
522
522
  )
523
523
 
524
524
 
525
+ @wp.func
526
+ def vec_int_annotation_func(v: wp.vec(3, wp.Int)) -> wp.Int:
527
+ return v[0] + v[1] + v[2]
528
+
529
+
530
+ @wp.func
531
+ def vec_float_annotation_func(v: wp.vec(3, wp.Float)) -> wp.Float:
532
+ return v[0] + v[1] + v[2]
533
+
534
+
535
+ @wp.func
536
+ def vec_scalar_annotation_func(v: wp.vec(3, wp.Scalar)) -> wp.Scalar:
537
+ return v[0] + v[1] + v[2]
538
+
539
+
540
+ @wp.func
541
+ def mat_int_annotation_func(m: wp.mat((2, 2), wp.Int)) -> wp.Int:
542
+ return m[0, 0] + m[0, 1] + m[1, 0] + m[1, 1]
543
+
544
+
545
+ @wp.func
546
+ def mat_float_annotation_func(m: wp.mat((2, 2), wp.Float)) -> wp.Float:
547
+ return m[0, 0] + m[0, 1] + m[1, 0] + m[1, 1]
548
+
549
+
550
+ @wp.func
551
+ def mat_scalar_annotation_func(m: wp.mat((2, 2), wp.Scalar)) -> wp.Scalar:
552
+ return m[0, 0] + m[0, 1] + m[1, 0] + m[1, 1]
553
+
554
+
555
+ mat22s = wp.mat((2, 2), wp.int16)
556
+ mat22d = wp.mat((2, 2), wp.float64)
557
+
558
+
559
+ @wp.kernel
560
+ def test_annotations_kernel():
561
+ vi16 = wp.vec3s(wp.int16(1), wp.int16(2), wp.int16(3))
562
+ vf64 = wp.vec3d(wp.float64(1), wp.float64(2), wp.float64(3))
563
+ wp.expect_eq(vec_int_annotation_func(vi16), wp.int16(6))
564
+ wp.expect_eq(vec_float_annotation_func(vf64), wp.float64(6))
565
+ wp.expect_eq(vec_scalar_annotation_func(vi16), wp.int16(6))
566
+ wp.expect_eq(vec_scalar_annotation_func(vf64), wp.float64(6))
567
+
568
+ mi16 = mat22s(wp.int16(1), wp.int16(2), wp.int16(3), wp.int16(4))
569
+ mf64 = mat22d(wp.float64(1), wp.float64(2), wp.float64(3), wp.float64(4))
570
+ wp.expect_eq(mat_int_annotation_func(mi16), wp.int16(10))
571
+ wp.expect_eq(mat_float_annotation_func(mf64), wp.float64(10))
572
+ wp.expect_eq(mat_scalar_annotation_func(mi16), wp.int16(10))
573
+ wp.expect_eq(mat_scalar_annotation_func(mf64), wp.float64(10))
574
+
575
+
525
576
  class TestGenerics(unittest.TestCase):
526
577
  pass
527
578
 
@@ -590,6 +641,7 @@ add_kernel_test(
590
641
  )
591
642
  add_function_test(TestGenerics, "test_type_operator_misspell", test_type_operator_misspell, devices=devices)
592
643
  add_function_test(TestGenerics, "test_type_attribute_error", test_type_attribute_error, devices=devices)
644
+ add_kernel_test(TestGenerics, name="test_annotations_kernel", kernel=test_annotations_kernel, dim=1, devices=devices)
593
645
 
594
646
  if __name__ == "__main__":
595
647
  wp.clear_kernel_cache()
@@ -0,0 +1,68 @@
1
+ # Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ import unittest
9
+
10
+ import warp as wp
11
+ from warp.tests.unittest_utils import *
12
+
13
+
14
+ @wp.kernel
15
+ def reversed_kernel(
16
+ start: wp.int32,
17
+ end: wp.int32,
18
+ step: wp.int32,
19
+ out_count: wp.array(dtype=wp.int32),
20
+ out_values: wp.array(dtype=wp.int32),
21
+ ):
22
+ count = wp.int32(0)
23
+ for i in reversed(range(start, end, step)):
24
+ out_values[count] = i
25
+ count += 1
26
+
27
+ out_count[0] = count
28
+
29
+
30
+ def test_reversed(test, device):
31
+ count = wp.empty(1, dtype=wp.int32)
32
+ values = wp.empty(32, dtype=wp.int32)
33
+
34
+ start, end, step = (-2, 8, 3)
35
+ wp.launch(
36
+ reversed_kernel,
37
+ dim=1,
38
+ inputs=(start, end, step),
39
+ outputs=(count, values),
40
+ )
41
+ expected = tuple(reversed(range(start, end, step)))
42
+ assert count.numpy()[0] == len(expected)
43
+ assert_np_equal(values.numpy()[: len(expected)], expected)
44
+
45
+ start, end, step = (9, -3, -2)
46
+ wp.launch(
47
+ reversed_kernel,
48
+ dim=1,
49
+ inputs=(start, end, step),
50
+ outputs=(count, values),
51
+ )
52
+ expected = tuple(reversed(range(start, end, step)))
53
+ assert count.numpy()[0] == len(expected)
54
+ assert_np_equal(values.numpy()[: len(expected)], expected)
55
+
56
+
57
+ devices = get_test_devices()
58
+
59
+
60
+ class TestIter(unittest.TestCase):
61
+ pass
62
+
63
+
64
+ add_function_test(TestIter, "test_reversed", test_reversed, devices=devices)
65
+
66
+ if __name__ == "__main__":
67
+ wp.clear_kernel_cache()
68
+ unittest.main(verbosity=2)
@@ -1501,7 +1501,7 @@ def test_matmat_multiplication(test, device, dtype, register_kernels=False):
1501
1501
  tol = {
1502
1502
  np.float16: 2.0e-2,
1503
1503
  np.float32: 5.0e-6,
1504
- np.float64: 1.0e-8,
1504
+ np.float64: 5.0e-7,
1505
1505
  }.get(dtype, 0)
1506
1506
 
1507
1507
  wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
@@ -805,7 +805,7 @@ def test_set_mesh_points(test, device):
805
805
  device=device,
806
806
  )
807
807
 
808
- shift = np.random.randn(3)
808
+ shift = rng.standard_normal(size=3)
809
809
 
810
810
  vs_higher = vs + shift
811
811
  vertices2 = wp.array(vs_higher, dtype=wp.vec3, device=device)
@@ -214,12 +214,35 @@ def test_function_generic_overload_hashing(test, device):
214
214
  test.assertNotEqual(hash4, hash1)
215
215
 
216
216
 
217
+ SIMPLE_MODULE = """# -*- coding: utf-8 -*-
218
+ import warp as wp
219
+
220
+ @wp.kernel
221
+ def k():
222
+ pass
223
+ """
224
+
225
+
226
+ def test_module_load(test, device):
227
+ """Ensure that loading a module does not change its hash"""
228
+ m = load_code_as_module(SIMPLE_MODULE, "simple_module")
229
+
230
+ hash1 = m.hash_module()
231
+ m.load(device)
232
+ hash2 = m.hash_module()
233
+
234
+ test.assertEqual(hash1, hash2)
235
+
236
+
217
237
  class TestModuleHashing(unittest.TestCase):
218
238
  pass
219
239
 
220
240
 
241
+ devices = get_test_devices()
242
+
221
243
  add_function_test(TestModuleHashing, "test_function_overload_hashing", test_function_overload_hashing)
222
244
  add_function_test(TestModuleHashing, "test_function_generic_overload_hashing", test_function_generic_overload_hashing)
245
+ add_function_test(TestModuleHashing, "test_module_load", test_module_load, devices=devices)
223
246
 
224
247
 
225
248
  if __name__ == "__main__":
warp/tests/test_paddle.py CHANGED
@@ -7,8 +7,6 @@
7
7
 
8
8
  import unittest
9
9
 
10
- import numpy as np
11
-
12
10
  import warp as wp
13
11
  from warp.tests.unittest_utils import *
14
12
 
@@ -444,7 +442,7 @@ def test_from_paddle_slices(test, device):
444
442
  assert a.ptr == t.data_ptr()
445
443
  assert a.is_contiguous
446
444
  assert a.shape == tuple(t.shape)
447
- assert_np_equal(a.numpy(), t.cpu().numpy())
445
+ assert_np_equal(a.numpy(), t.numpy())
448
446
 
449
447
  # 1D slice with non-contiguous stride
450
448
  t_base = paddle.arange(10, dtype=paddle.float32).to(device=paddle_device)
@@ -456,7 +454,7 @@ def test_from_paddle_slices(test, device):
456
454
  # copy contents to contiguous array
457
455
  a_contiguous = wp.empty_like(a)
458
456
  wp.launch(copy1d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
459
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
457
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
460
458
 
461
459
  # 2D slices (non-contiguous)
462
460
  t_base = paddle.arange(24, dtype=paddle.float32).to(device=paddle_device).reshape((4, 6))
@@ -468,7 +466,7 @@ def test_from_paddle_slices(test, device):
468
466
  # copy contents to contiguous array
469
467
  a_contiguous = wp.empty_like(a)
470
468
  wp.launch(copy2d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
471
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
469
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
472
470
 
473
471
  # 3D slices (non-contiguous)
474
472
  t_base = paddle.arange(36, dtype=paddle.float32).to(device=paddle_device).reshape((4, 3, 3))
@@ -480,7 +478,7 @@ def test_from_paddle_slices(test, device):
480
478
  # copy contents to contiguous array
481
479
  a_contiguous = wp.empty_like(a)
482
480
  wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
483
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
481
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
484
482
 
485
483
  # 2D slices of vec3 (inner contiguous, outer non-contiguous)
486
484
  t_base = paddle.arange(150, dtype=paddle.float32).to(device=paddle_device).reshape((10, 5, 3))
@@ -492,7 +490,7 @@ def test_from_paddle_slices(test, device):
492
490
  # copy contents to contiguous array
493
491
  a_contiguous = wp.empty_like(a)
494
492
  wp.launch(copy2d_vec3_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
495
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
493
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
496
494
 
497
495
  # 2D slices of mat22 (inner contiguous, outer non-contiguous)
498
496
  t_base = paddle.arange(200, dtype=paddle.float32).to(device=paddle_device).reshape((10, 5, 2, 2))
@@ -504,7 +502,7 @@ def test_from_paddle_slices(test, device):
504
502
  # copy contents to contiguous array
505
503
  a_contiguous = wp.empty_like(a)
506
504
  wp.launch(copy2d_mat22_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
507
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
505
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
508
506
 
509
507
 
510
508
  def test_from_paddle_zero_strides(test, device):
@@ -522,7 +520,7 @@ def test_from_paddle_zero_strides(test, device):
522
520
  assert a.shape == tuple(t.shape)
523
521
  a_contiguous = wp.empty_like(a)
524
522
  wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
525
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
523
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
526
524
 
527
525
  # expand middle dimension
528
526
  t = t_base.unsqueeze(1).expand([-1, 3, -1])
@@ -532,7 +530,7 @@ def test_from_paddle_zero_strides(test, device):
532
530
  assert a.shape == tuple(t.shape)
533
531
  a_contiguous = wp.empty_like(a)
534
532
  wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
535
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
533
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
536
534
 
537
535
  # expand innermost dimension
538
536
  t = t_base.unsqueeze(2).expand([-1, -1, 3])
@@ -542,77 +540,7 @@ def test_from_paddle_zero_strides(test, device):
542
540
  assert a.shape == tuple(t.shape)
543
541
  a_contiguous = wp.empty_like(a)
544
542
  wp.launch(copy3d_float_kernel, dim=a.shape, inputs=[a_contiguous, a], device=device)
545
- assert_np_equal(a_contiguous.numpy(), t.cpu().numpy())
546
-
547
-
548
- def test_paddle_mgpu_from_paddle(test, device):
549
- import paddle
550
-
551
- n = 32
552
-
553
- t0 = paddle.arange(0, n, 1, dtype=paddle.int32).to(device="gpu:0")
554
- t1 = paddle.arange(0, n * 2, 2, dtype=paddle.int32).to(device="gpu:1")
555
-
556
- a0 = wp.from_paddle(t0, dtype=wp.int32)
557
- a1 = wp.from_paddle(t1, dtype=wp.int32)
558
-
559
- assert a0.device == "gpu:0"
560
- assert a1.device == "gpu:1"
561
-
562
- expected0 = np.arange(0, n, 1)
563
- expected1 = np.arange(0, n * 2, 2)
564
-
565
- assert_np_equal(a0.numpy(), expected0)
566
- assert_np_equal(a1.numpy(), expected1)
567
-
568
-
569
- def test_paddle_mgpu_to_paddle(test, device):
570
- n = 32
571
-
572
- with wp.ScopedDevice("gpu:0"):
573
- a0 = wp.empty(n, dtype=wp.int32)
574
- wp.launch(arange, dim=a0.size, inputs=[0, 1, a0])
575
-
576
- with wp.ScopedDevice("gpu:1"):
577
- a1 = wp.empty(n, dtype=wp.int32)
578
- wp.launch(arange, dim=a1.size, inputs=[0, 2, a1])
579
-
580
- t0 = wp.to_paddle(a0)
581
- t1 = wp.to_paddle(a1)
582
-
583
- assert str(t0.device) == "gpu:0"
584
- assert str(t1.device) == "gpu:1"
585
-
586
- expected0 = np.arange(0, n, 1, dtype=np.int32)
587
- expected1 = np.arange(0, n * 2, 2, dtype=np.int32)
588
-
589
- assert_np_equal(t0.cpu().numpy(), expected0)
590
- assert_np_equal(t1.cpu().numpy(), expected1)
591
-
592
-
593
- def test_paddle_mgpu_interop(test, device):
594
- import paddle
595
-
596
- n = 1024 * 1024
597
-
598
- with paddle.cuda.device(0):
599
- t0 = paddle.arange(n, dtype=paddle.float32).to(device="gpu")
600
- a0 = wp.from_paddle(t0)
601
- wp.launch(inc, dim=a0.size, inputs=[a0], stream=wp.stream_from_paddle())
602
-
603
- with paddle.cuda.device(1):
604
- t1 = paddle.arange(n, dtype=paddle.float32).to(device="gpu")
605
- a1 = wp.from_paddle(t1)
606
- wp.launch(inc, dim=a1.size, inputs=[a1], stream=wp.stream_from_paddle())
607
-
608
- assert a0.device == "gpu:0"
609
- assert a1.device == "gpu:1"
610
-
611
- expected = np.arange(n, dtype=int) + 1
612
-
613
- # ensure the paddle tensors were modified by warp
614
- assert_np_equal(t0.cpu().numpy(), expected)
615
- assert_np_equal(t1.cpu().numpy(), expected)
543
+ assert_np_equal(a_contiguous.numpy(), t.numpy())
616
544
 
617
545
 
618
546
  def test_paddle_autograd(test, device):
@@ -624,6 +552,9 @@ def test_paddle_autograd(test, device):
624
552
  class TestFunc(paddle.autograd.PyLayer):
625
553
  @staticmethod
626
554
  def forward(ctx, x):
555
+ # ensure Paddle operations complete before running Warp
556
+ wp.synchronize_device()
557
+
627
558
  # allocate output array
628
559
  y = paddle.empty_like(x)
629
560
 
@@ -632,10 +563,16 @@ def test_paddle_autograd(test, device):
632
563
 
633
564
  wp.launch(kernel=op_kernel, dim=len(x), inputs=[wp.from_paddle(x)], outputs=[wp.from_paddle(y)])
634
565
 
566
+ # ensure Warp operations complete before returning data to Paddle
567
+ wp.synchronize_device()
568
+
635
569
  return y
636
570
 
637
571
  @staticmethod
638
572
  def backward(ctx, adj_y):
573
+ # ensure Paddle operations complete before running Warp
574
+ wp.synchronize_device()
575
+
639
576
  # adjoints should be allocated as zero initialized
640
577
  adj_x = paddle.zeros_like(ctx.x).contiguous()
641
578
  adj_y = adj_y.contiguous()
@@ -655,6 +592,9 @@ def test_paddle_autograd(test, device):
655
592
  adjoint=True,
656
593
  )
657
594
 
595
+ # ensure Warp operations complete before returning data to Paddle
596
+ wp.synchronize_device()
597
+
658
598
  return adj_x
659
599
 
660
600
  # run autograd on given device
@@ -691,7 +631,7 @@ def test_warp_graph_warp_stream(test, device):
691
631
  paddle_stream = wp.stream_to_paddle(device)
692
632
 
693
633
  # capture graph
694
- with wp.ScopedDevice(device), paddle.device.stream(paddle_stream):
634
+ with wp.ScopedDevice(device), paddle.device.stream_guard(paddle.device.Stream(paddle_stream)):
695
635
  wp.capture_begin(force_module_load=False)
696
636
  try:
697
637
  t += 1.0
@@ -837,11 +777,11 @@ try:
837
777
  # devices=paddle_compatible_cuda_devices,
838
778
  # )
839
779
 
840
- # multi-GPU tests
841
- if len(paddle_compatible_cuda_devices) > 1:
842
- add_function_test(TestPaddle, "test_paddle_mgpu_from_paddle", test_paddle_mgpu_from_paddle)
843
- add_function_test(TestPaddle, "test_paddle_mgpu_to_paddle", test_paddle_mgpu_to_paddle)
844
- add_function_test(TestPaddle, "test_paddle_mgpu_interop", test_paddle_mgpu_interop)
780
+ # multi-GPU not supported yet.
781
+ # if len(paddle_compatible_cuda_devices) > 1:
782
+ # add_function_test(TestPaddle, "test_paddle_mgpu_from_paddle", test_paddle_mgpu_from_paddle)
783
+ # add_function_test(TestPaddle, "test_paddle_mgpu_to_paddle", test_paddle_mgpu_to_paddle)
784
+ # add_function_test(TestPaddle, "test_paddle_mgpu_interop", test_paddle_mgpu_interop)
845
785
 
846
786
  except Exception as e:
847
787
  print(f"Skipping Paddle tests due to exception: {e}")
warp/tests/test_print.py CHANGED
@@ -17,8 +17,22 @@ from warp.tests.unittest_utils import *
17
17
  def test_print_kernel():
18
18
  wp.print(1.0)
19
19
  wp.print("this is a string")
20
+ wp.printf("this is another string\n")
20
21
  wp.printf("this is a float %f\n", 457.5)
21
22
  wp.printf("this is an int %d\n", 123)
23
+ # fmt: off
24
+ wp.printf(
25
+ "0=%d, 1=%d, 2=%d, 3=%d, 4=%d, 5=%d, 6=%d, 7=%d, "
26
+ "8=%d, 9=%d, 10=%d, 11=%d, 12=%d, 13=%d, 14=%d, 15=%d, "
27
+ "16=%d, 17=%d, 18=%d, 19=%d, 20=%d, 21=%d, 22=%d, 23=%d, "
28
+ "24=%d, 25=%d, 26=%d, 27=%d, 28=%d, 29=%d, 30=%d, 31=%d"
29
+ "\n",
30
+ 0, 1, 2, 3, 4, 5, 6, 7,
31
+ 8, 9, 10, 11, 12, 13, 14, 15,
32
+ 16, 17, 18, 19, 20, 21, 22, 23,
33
+ 24, 25, 26, 27, 28, 29, 30, 31,
34
+ )
35
+ # fmt: on
22
36
 
23
37
 
24
38
  @wp.kernel
@@ -59,8 +73,13 @@ def test_print(test, device):
59
73
  s,
60
74
  rf"1{os.linesep}"
61
75
  rf"this is a string{os.linesep}"
76
+ rf"this is another string{os.linesep}"
62
77
  rf"this is a float 457\.500000{os.linesep}"
63
- rf"this is an int 123",
78
+ rf"this is an int 123{os.linesep}"
79
+ rf"0=0, 1=1, 2=2, 3=3, 4=4, 5=5, 6=6, 7=7, "
80
+ rf"8=8, 9=9, 10=10, 11=11, 12=12, 13=13, 14=14, 15=15, "
81
+ rf"16=16, 17=17, 18=18, 19=19, 20=20, 21=21, 22=22, 23=23, "
82
+ rf"24=24, 25=25, 26=26, 27=27, 28=28, 29=29, 30=30, 31=31{os.linesep}",
64
83
  )
65
84
 
66
85
 
@@ -260,6 +279,35 @@ def test_print_adjoint(test, device):
260
279
  )
261
280
 
262
281
 
282
+ def test_print_error_variadic_arg_count(test, device):
283
+ @wp.kernel
284
+ def kernel():
285
+ # fmt: off
286
+ wp.printf(
287
+ "0=%d, 1=%d, 2=%d, 3=%d, 4=%d, 5=%d, 6=%d, 7=%d, "
288
+ "8=%d, 9=%d, 10=%d, 11=%d, 12=%d, 13=%d, 14=%d, 15=%d, "
289
+ "16=%d, 17=%d, 18=%d, 19=%d, 20=%d, 21=%d, 22=%d, 23=%d, "
290
+ "24=%d, 25=%d, 26=%d, 27=%d, 28=%d, 29=%d, 30=%d, 31=%d, "
291
+ "32=%d\n",
292
+ 0, 1, 2, 3, 4, 5, 6, 7,
293
+ 8, 9, 10, 11, 12, 13, 14, 15,
294
+ 16, 17, 18, 19, 20, 21, 22, 23,
295
+ 24, 25, 26, 27, 28, 29, 30, 31,
296
+ 32,
297
+ )
298
+ # fmt: on
299
+
300
+ with test.assertRaisesRegex(
301
+ RuntimeError,
302
+ r"the maximum number of variadic arguments that can be passed to `printf` is 32$",
303
+ ):
304
+ wp.launch(
305
+ kernel,
306
+ dim=1,
307
+ device=device,
308
+ )
309
+
310
+
263
311
  class TestPrint(unittest.TestCase):
264
312
  pass
265
313
 
@@ -269,6 +317,13 @@ add_function_test(TestPrint, "test_print", test_print, devices=devices, check_ou
269
317
  add_function_test(TestPrint, "test_print_numeric", test_print_numeric, devices=devices, check_output=False)
270
318
  add_function_test(TestPrint, "test_print_boolean", test_print_boolean, devices=devices, check_output=False)
271
319
  add_function_test(TestPrint, "test_print_adjoint", test_print_adjoint, devices=devices, check_output=False)
320
+ add_function_test(
321
+ TestPrint,
322
+ "test_print_error_variadic_arg_count",
323
+ test_print_error_variadic_arg_count,
324
+ devices=devices,
325
+ check_output=False,
326
+ )
272
327
 
273
328
 
274
329
  if __name__ == "__main__":
@@ -1611,7 +1611,7 @@ def test_spatial_matmat_multiplication(test, device, dtype, register_kernels=Fal
1611
1611
  tol = {
1612
1612
  np.float16: 2.0e-2,
1613
1613
  np.float32: 5.0e-6,
1614
- np.float64: 1.0e-8,
1614
+ np.float64: 5.0e-7,
1615
1615
  }.get(dtype, 0)
1616
1616
 
1617
1617
  wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]