warp-lang 0.11.0__py3-none-manylinux2014_x86_64.whl → 1.0.0__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (170) hide show
  1. warp/__init__.py +8 -0
  2. warp/bin/warp-clang.so +0 -0
  3. warp/bin/warp.so +0 -0
  4. warp/build.py +7 -6
  5. warp/build_dll.py +70 -79
  6. warp/builtins.py +10 -6
  7. warp/codegen.py +51 -19
  8. warp/config.py +7 -8
  9. warp/constants.py +3 -0
  10. warp/context.py +948 -245
  11. warp/dlpack.py +198 -113
  12. warp/examples/assets/bunny.usd +0 -0
  13. warp/examples/assets/cartpole.urdf +110 -0
  14. warp/examples/assets/crazyflie.usd +0 -0
  15. warp/examples/assets/cube.usda +42 -0
  16. warp/examples/assets/nv_ant.xml +92 -0
  17. warp/examples/assets/nv_humanoid.xml +183 -0
  18. warp/examples/assets/quadruped.urdf +268 -0
  19. warp/examples/assets/rocks.nvdb +0 -0
  20. warp/examples/assets/rocks.usd +0 -0
  21. warp/examples/assets/sphere.usda +56 -0
  22. warp/examples/assets/torus.usda +105 -0
  23. warp/examples/benchmarks/benchmark_api.py +383 -0
  24. warp/examples/benchmarks/benchmark_cloth.py +279 -0
  25. warp/examples/benchmarks/benchmark_cloth_cupy.py +88 -0
  26. warp/examples/benchmarks/benchmark_cloth_jax.py +100 -0
  27. warp/examples/benchmarks/benchmark_cloth_numba.py +142 -0
  28. warp/examples/benchmarks/benchmark_cloth_numpy.py +77 -0
  29. warp/examples/benchmarks/benchmark_cloth_pytorch.py +86 -0
  30. warp/examples/benchmarks/benchmark_cloth_taichi.py +112 -0
  31. warp/examples/benchmarks/benchmark_cloth_warp.py +146 -0
  32. warp/examples/benchmarks/benchmark_launches.py +295 -0
  33. warp/examples/core/example_dem.py +221 -0
  34. warp/examples/core/example_fluid.py +267 -0
  35. warp/examples/core/example_graph_capture.py +129 -0
  36. warp/examples/core/example_marching_cubes.py +177 -0
  37. warp/examples/core/example_mesh.py +154 -0
  38. warp/examples/core/example_mesh_intersect.py +193 -0
  39. warp/examples/core/example_nvdb.py +169 -0
  40. warp/examples/core/example_raycast.py +89 -0
  41. warp/examples/core/example_raymarch.py +178 -0
  42. warp/examples/core/example_render_opengl.py +141 -0
  43. warp/examples/core/example_sph.py +389 -0
  44. warp/examples/core/example_torch.py +181 -0
  45. warp/examples/core/example_wave.py +249 -0
  46. warp/examples/fem/bsr_utils.py +380 -0
  47. warp/examples/fem/example_apic_fluid.py +391 -0
  48. warp/examples/fem/example_convection_diffusion.py +168 -0
  49. warp/examples/fem/example_convection_diffusion_dg.py +209 -0
  50. warp/examples/fem/example_convection_diffusion_dg0.py +194 -0
  51. warp/examples/fem/example_deformed_geometry.py +159 -0
  52. warp/examples/fem/example_diffusion.py +173 -0
  53. warp/examples/fem/example_diffusion_3d.py +152 -0
  54. warp/examples/fem/example_diffusion_mgpu.py +214 -0
  55. warp/examples/fem/example_mixed_elasticity.py +222 -0
  56. warp/examples/fem/example_navier_stokes.py +243 -0
  57. warp/examples/fem/example_stokes.py +192 -0
  58. warp/examples/fem/example_stokes_transfer.py +249 -0
  59. warp/examples/fem/mesh_utils.py +109 -0
  60. warp/examples/fem/plot_utils.py +287 -0
  61. warp/examples/optim/example_bounce.py +248 -0
  62. warp/examples/optim/example_cloth_throw.py +210 -0
  63. warp/examples/optim/example_diffray.py +535 -0
  64. warp/examples/optim/example_drone.py +850 -0
  65. warp/examples/optim/example_inverse_kinematics.py +169 -0
  66. warp/examples/optim/example_inverse_kinematics_torch.py +170 -0
  67. warp/examples/optim/example_spring_cage.py +234 -0
  68. warp/examples/optim/example_trajectory.py +201 -0
  69. warp/examples/sim/example_cartpole.py +128 -0
  70. warp/examples/sim/example_cloth.py +184 -0
  71. warp/examples/sim/example_granular.py +113 -0
  72. warp/examples/sim/example_granular_collision_sdf.py +185 -0
  73. warp/examples/sim/example_jacobian_ik.py +213 -0
  74. warp/examples/sim/example_particle_chain.py +106 -0
  75. warp/examples/sim/example_quadruped.py +179 -0
  76. warp/examples/sim/example_rigid_chain.py +191 -0
  77. warp/examples/sim/example_rigid_contact.py +176 -0
  78. warp/examples/sim/example_rigid_force.py +126 -0
  79. warp/examples/sim/example_rigid_gyroscopic.py +97 -0
  80. warp/examples/sim/example_rigid_soft_contact.py +124 -0
  81. warp/examples/sim/example_soft_body.py +178 -0
  82. warp/fabric.py +29 -20
  83. warp/fem/cache.py +0 -1
  84. warp/fem/dirichlet.py +0 -2
  85. warp/fem/integrate.py +0 -1
  86. warp/jax.py +45 -0
  87. warp/jax_experimental.py +339 -0
  88. warp/native/builtin.h +12 -0
  89. warp/native/bvh.cu +18 -18
  90. warp/native/clang/clang.cpp +8 -3
  91. warp/native/cuda_util.cpp +94 -5
  92. warp/native/cuda_util.h +35 -6
  93. warp/native/cutlass_gemm.cpp +1 -1
  94. warp/native/cutlass_gemm.cu +4 -1
  95. warp/native/error.cpp +66 -0
  96. warp/native/error.h +27 -0
  97. warp/native/mesh.cu +2 -2
  98. warp/native/reduce.cu +4 -4
  99. warp/native/runlength_encode.cu +2 -2
  100. warp/native/scan.cu +2 -2
  101. warp/native/sparse.cu +0 -1
  102. warp/native/temp_buffer.h +2 -2
  103. warp/native/warp.cpp +95 -60
  104. warp/native/warp.cu +1053 -218
  105. warp/native/warp.h +49 -32
  106. warp/optim/linear.py +33 -16
  107. warp/render/render_opengl.py +202 -101
  108. warp/render/render_usd.py +82 -40
  109. warp/sim/__init__.py +13 -4
  110. warp/sim/articulation.py +4 -5
  111. warp/sim/collide.py +320 -175
  112. warp/sim/import_mjcf.py +25 -30
  113. warp/sim/import_urdf.py +94 -63
  114. warp/sim/import_usd.py +51 -36
  115. warp/sim/inertia.py +3 -2
  116. warp/sim/integrator.py +233 -0
  117. warp/sim/integrator_euler.py +447 -469
  118. warp/sim/integrator_featherstone.py +1991 -0
  119. warp/sim/integrator_xpbd.py +1420 -640
  120. warp/sim/model.py +765 -487
  121. warp/sim/particles.py +2 -1
  122. warp/sim/render.py +35 -13
  123. warp/sim/utils.py +222 -11
  124. warp/stubs.py +8 -0
  125. warp/tape.py +16 -1
  126. warp/tests/aux_test_grad_customs.py +23 -0
  127. warp/tests/test_array.py +190 -1
  128. warp/tests/test_async.py +656 -0
  129. warp/tests/test_bool.py +50 -0
  130. warp/tests/test_dlpack.py +164 -11
  131. warp/tests/test_examples.py +166 -74
  132. warp/tests/test_fem.py +8 -1
  133. warp/tests/test_generics.py +15 -5
  134. warp/tests/test_grad.py +1 -1
  135. warp/tests/test_grad_customs.py +172 -12
  136. warp/tests/test_jax.py +254 -0
  137. warp/tests/test_large.py +29 -6
  138. warp/tests/test_launch.py +25 -0
  139. warp/tests/test_linear_solvers.py +20 -3
  140. warp/tests/test_matmul.py +61 -16
  141. warp/tests/test_matmul_lite.py +13 -13
  142. warp/tests/test_mempool.py +186 -0
  143. warp/tests/test_multigpu.py +3 -0
  144. warp/tests/test_options.py +16 -2
  145. warp/tests/test_peer.py +137 -0
  146. warp/tests/test_print.py +3 -1
  147. warp/tests/test_quat.py +23 -0
  148. warp/tests/test_sim_kinematics.py +97 -0
  149. warp/tests/test_snippet.py +126 -3
  150. warp/tests/test_streams.py +108 -79
  151. warp/tests/test_torch.py +16 -8
  152. warp/tests/test_utils.py +32 -27
  153. warp/tests/test_verify_fp.py +65 -0
  154. warp/tests/test_volume.py +1 -1
  155. warp/tests/unittest_serial.py +2 -0
  156. warp/tests/unittest_suites.py +12 -0
  157. warp/tests/unittest_utils.py +14 -7
  158. warp/thirdparty/unittest_parallel.py +15 -3
  159. warp/torch.py +10 -8
  160. warp/types.py +363 -246
  161. warp/utils.py +143 -19
  162. warp_lang-1.0.0.dist-info/LICENSE.md +126 -0
  163. warp_lang-1.0.0.dist-info/METADATA +394 -0
  164. {warp_lang-0.11.0.dist-info → warp_lang-1.0.0.dist-info}/RECORD +167 -86
  165. warp/sim/optimizer.py +0 -138
  166. warp_lang-0.11.0.dist-info/LICENSE.md +0 -36
  167. warp_lang-0.11.0.dist-info/METADATA +0 -238
  168. /warp/tests/{walkthough_debug.py → walkthrough_debug.py} +0 -0
  169. {warp_lang-0.11.0.dist-info → warp_lang-1.0.0.dist-info}/WHEEL +0 -0
  170. {warp_lang-0.11.0.dist-info → warp_lang-1.0.0.dist-info}/top_level.txt +0 -0
warp/tests/test_matmul.py CHANGED
@@ -10,6 +10,7 @@ import unittest
10
10
  import numpy as np
11
11
 
12
12
  import warp as wp
13
+ from typing import Any
13
14
  from warp.tests.unittest_utils import *
14
15
 
15
16
  wp.init()
@@ -76,7 +77,7 @@ class gemm_test_bed_runner:
76
77
  if batch_count == 1:
77
78
  tape = wp.Tape()
78
79
  with tape:
79
- wp.matmul(A, B, C, D, alpha, beta, False, self.device)
80
+ wp.matmul(A, B, C, D, alpha, beta, False)
80
81
  tape.backward(grads={D: ones})
81
82
 
82
83
  D_np = alpha * (A.numpy() @ B.numpy()) + beta * C.numpy()
@@ -89,7 +90,7 @@ class gemm_test_bed_runner:
89
90
  else:
90
91
  tape = wp.Tape()
91
92
  with tape:
92
- wp.batched_matmul(A, B, C, D, alpha, beta, False, self.device)
93
+ wp.batched_matmul(A, B, C, D, alpha, beta, False)
93
94
  tape.backward(grads={D: ones})
94
95
 
95
96
  D_np = alpha * np.matmul(A.numpy(), B.numpy()) + beta * C.numpy()
@@ -196,9 +197,9 @@ class gemm_test_bed_runner_transpose:
196
197
  BTT2 = BT2.transpose([1, 0])
197
198
  tape = wp.Tape()
198
199
  with tape:
199
- wp.matmul(A, BTT1, C1, D1, alpha, beta, False, self.device)
200
- wp.matmul(ATT1, B, C2, D2, alpha, beta, False, self.device)
201
- wp.matmul(ATT2, BTT2, C3, D3, alpha, beta, False, self.device)
200
+ wp.matmul(A, BTT1, C1, D1, alpha, beta, False)
201
+ wp.matmul(ATT1, B, C2, D2, alpha, beta, False)
202
+ wp.matmul(ATT2, BTT2, C3, D3, alpha, beta, False)
202
203
  tape.backward(grads={D1: ones1, D2: ones2, D3: ones3})
203
204
 
204
205
  D_np = alpha * (A.numpy() @ B.numpy()) + beta * C1.numpy()
@@ -217,9 +218,9 @@ class gemm_test_bed_runner_transpose:
217
218
  BTT2 = BT2.transpose([0, 2, 1])
218
219
  tape = wp.Tape()
219
220
  with tape:
220
- wp.batched_matmul(A, BTT1, C1, D1, alpha, beta, False, self.device)
221
- wp.batched_matmul(ATT1, B, C2, D2, alpha, beta, False, self.device)
222
- wp.batched_matmul(ATT2, BTT2, C3, D3, alpha, beta, False, self.device)
221
+ wp.batched_matmul(A, BTT1, C1, D1, alpha, beta, False)
222
+ wp.batched_matmul(ATT1, B, C2, D2, alpha, beta, False)
223
+ wp.batched_matmul(ATT2, BTT2, C3, D3, alpha, beta, False)
223
224
  tape.backward(grads={D1: ones1, D2: ones2, D3: ones3})
224
225
 
225
226
  D_np = alpha * np.matmul(A.numpy(), B.numpy()) + beta * C1.numpy()
@@ -300,7 +301,7 @@ def test_tape(test, device):
300
301
  # test tape
301
302
  tape = wp.Tape()
302
303
  with tape:
303
- wp.matmul(A, B, C, D, device=device)
304
+ wp.matmul(A, B, C, D)
304
305
  wp.launch(matrix_sum_kernel, dim=(m, n), inputs=[D, loss], device=device)
305
306
 
306
307
  tape.backward(loss=loss)
@@ -308,8 +309,8 @@ def test_tape(test, device):
308
309
  tape.reset()
309
310
 
310
311
  # test adjoint
311
- D.grad = wp.array2d(np.ones((m, n)), dtype=float, device=device)
312
- wp.adj_matmul(A, B, C, A.grad, B.grad, C.grad, D.grad, device=device)
312
+ D.grad = wp.ones((m, n), dtype=float, device=device)
313
+ wp.adj_matmul(A, B, C, A.grad, B.grad, C.grad, D.grad)
313
314
  assert_np_equal(A_grad, A.grad.numpy())
314
315
 
315
316
  # test zero
@@ -342,7 +343,7 @@ def test_operator(test, device):
342
343
  tape.backward(loss=loss)
343
344
 
344
345
  # test adjoint
345
- D.grad = wp.array2d(np.ones((m, n)), dtype=float, device=device)
346
+ D.grad = wp.ones((m, n), dtype=float, device=device)
346
347
  B_transpose = wp.array2d(B.transpose().numpy(), dtype=float, device=device)
347
348
 
348
349
  adj_A = D.grad @ B_transpose
@@ -389,7 +390,7 @@ def test_large_batch_count(test, device):
389
390
 
390
391
  tape = wp.Tape()
391
392
  with tape:
392
- wp.batched_matmul(A, B, C, D, alpha=alpha, beta=beta, allow_tf32x3_arith=False, device=device)
393
+ wp.batched_matmul(A, B, C, D, alpha=alpha, beta=beta, allow_tf32x3_arith=False)
393
394
  tape.backward(grads={D: ones})
394
395
 
395
396
  D_np = alpha * np.matmul(A.numpy(), B.numpy()) + beta * C.numpy()
@@ -420,8 +421,8 @@ def test_adjoint_accumulation(test, device):
420
421
  tape = wp.Tape()
421
422
 
422
423
  with tape:
423
- wp.matmul(a_wp, b_wp, c_wp, d1_wp, alpha=1.0, beta=1.0, device=device)
424
- wp.matmul(a_wp, b_wp, d1_wp, d2_wp, alpha=1.0, beta=1.0, device=device)
424
+ wp.matmul(a_wp, b_wp, c_wp, d1_wp, alpha=1.0, beta=1.0)
425
+ wp.matmul(a_wp, b_wp, d1_wp, d2_wp, alpha=1.0, beta=1.0)
425
426
 
426
427
  d_grad = wp.zeros_like(d2_wp, device=device)
427
428
  d_grad.fill_(1.0)
@@ -433,8 +434,51 @@ def test_adjoint_accumulation(test, device):
433
434
  assert np.array_equal(c_wp.grad.numpy(), np.ones(shape=(2, 2)))
434
435
 
435
436
 
436
- devices = get_test_devices()
437
+ @unittest.skipUnless(runtime.core.is_cutlass_enabled(), "Warp was not built with CUTLASS support")
438
+ def test_cuda_graph_capture(test, device):
439
+ @wp.kernel
440
+ def mat_sum(mat: wp.array2d(dtype=Any), loss: wp.array(dtype=Any)):
441
+ i, j = wp.tid()
442
+ e = mat[i,j]
443
+ wp.atomic_add(loss, 0, e)
444
+
445
+ for T in [wp.float16, wp.float32, wp.float64]:
446
+ wp.overload(mat_sum, [wp.array2d(dtype=T), wp.array(dtype=T)])
447
+
448
+ wp.load_module(device=device)
449
+ wp.load_module(module="warp.utils", device=device)
450
+
451
+ for T in [wp.float16, wp.float32, wp.float64]:
452
+ m = 8
453
+ n = 8
454
+ k = 8
455
+
456
+ A = wp.ones((m, n), dtype=T, device=device, requires_grad=True)
457
+ B = wp.ones((n, k), dtype=T, device=device, requires_grad=True)
458
+ C = wp.zeros((m, k), dtype=T, device=device, requires_grad=True)
459
+ D = wp.zeros((m, k), dtype=T, device=device, requires_grad=True)
460
+
461
+ loss = wp.zeros(1, dtype=T, device=device, requires_grad=True)
462
+
463
+ wp.capture_begin(device, force_module_load=False)
464
+ try:
465
+ tape = wp.Tape()
466
+
467
+ with tape:
468
+ wp.matmul(A, B, C, D)
469
+ wp.launch(mat_sum, dim=(m, k), inputs=[D, loss], device=device)
437
470
 
471
+ tape.backward(loss=loss)
472
+ finally:
473
+ graph = wp.capture_end(device)
474
+
475
+ wp.capture_launch(graph)
476
+
477
+ assert_np_equal(A.grad.numpy(), 8.0 * np.ones((m, n), dtype=T))
478
+
479
+
480
+ devices = get_test_devices()
481
+ cuda_devices = get_unique_cuda_test_devices()
438
482
 
439
483
  class TestMatmul(unittest.TestCase):
440
484
  pass
@@ -447,6 +491,7 @@ add_function_test(TestMatmul, "test_tape", test_tape, devices=devices)
447
491
  add_function_test(TestMatmul, "test_operator", test_operator, devices=devices)
448
492
  add_function_test(TestMatmul, "test_large_batch_count", test_large_batch_count, devices=devices)
449
493
  add_function_test(TestMatmul, "test_adjoint_accumulation", test_adjoint_accumulation, devices=devices)
494
+ add_function_test(TestMatmul, "test_cuda_graph_capture", test_cuda_graph_capture, devices=cuda_devices)
450
495
 
451
496
 
452
497
  if __name__ == "__main__":
@@ -76,7 +76,7 @@ class gemm_test_bed_runner:
76
76
  if batch_count == 1:
77
77
  tape = wp.Tape()
78
78
  with tape:
79
- wp.matmul(A, B, C, D, alpha, beta, False, self.device)
79
+ wp.matmul(A, B, C, D, alpha, beta, False)
80
80
  tape.backward(grads={D: ones})
81
81
 
82
82
  D_np = alpha * (A.numpy() @ B.numpy()) + beta * C.numpy()
@@ -89,7 +89,7 @@ class gemm_test_bed_runner:
89
89
  else:
90
90
  tape = wp.Tape()
91
91
  with tape:
92
- wp.batched_matmul(A, B, C, D, alpha, beta, False, self.device)
92
+ wp.batched_matmul(A, B, C, D, alpha, beta, False)
93
93
  tape.backward(grads={D: ones})
94
94
 
95
95
  D_np = alpha * np.matmul(A.numpy(), B.numpy()) + beta * C.numpy()
@@ -196,9 +196,9 @@ class gemm_test_bed_runner_transpose:
196
196
  BTT2 = BT2.transpose([1, 0])
197
197
  tape = wp.Tape()
198
198
  with tape:
199
- wp.matmul(A, BTT1, C1, D1, alpha, beta, False, self.device)
200
- wp.matmul(ATT1, B, C2, D2, alpha, beta, False, self.device)
201
- wp.matmul(ATT2, BTT2, C3, D3, alpha, beta, False, self.device)
199
+ wp.matmul(A, BTT1, C1, D1, alpha, beta, False)
200
+ wp.matmul(ATT1, B, C2, D2, alpha, beta, False)
201
+ wp.matmul(ATT2, BTT2, C3, D3, alpha, beta, False)
202
202
  tape.backward(grads={D1: ones1, D2: ones2, D3: ones3})
203
203
 
204
204
  D_np = alpha * (A.numpy() @ B.numpy()) + beta * C1.numpy()
@@ -217,9 +217,9 @@ class gemm_test_bed_runner_transpose:
217
217
  BTT2 = BT2.transpose([0, 2, 1])
218
218
  tape = wp.Tape()
219
219
  with tape:
220
- wp.batched_matmul(A, BTT1, C1, D1, alpha, beta, False, self.device)
221
- wp.batched_matmul(ATT1, B, C2, D2, alpha, beta, False, self.device)
222
- wp.batched_matmul(ATT2, BTT2, C3, D3, alpha, beta, False, self.device)
220
+ wp.batched_matmul(A, BTT1, C1, D1, alpha, beta, False)
221
+ wp.batched_matmul(ATT1, B, C2, D2, alpha, beta, False)
222
+ wp.batched_matmul(ATT2, BTT2, C3, D3, alpha, beta, False)
223
223
  tape.backward(grads={D1: ones1, D2: ones2, D3: ones3})
224
224
 
225
225
  D_np = alpha * np.matmul(A.numpy(), B.numpy()) + beta * C1.numpy()
@@ -288,7 +288,7 @@ def test_tape(test, device):
288
288
  # test tape
289
289
  tape = wp.Tape()
290
290
  with tape:
291
- wp.matmul(A, B, C, D, device=device)
291
+ wp.matmul(A, B, C, D)
292
292
  wp.launch(matrix_sum_kernel, dim=(m, n), inputs=[D, loss], device=device)
293
293
 
294
294
  tape.backward(loss=loss)
@@ -296,8 +296,8 @@ def test_tape(test, device):
296
296
  tape.reset()
297
297
 
298
298
  # test adjoint
299
- D.grad = wp.array2d(np.ones((m, n)), dtype=float, device=device)
300
- wp.adj_matmul(A, B, C, A.grad, B.grad, C.grad, D.grad, device=device)
299
+ D.grad = wp.ones((m, n), dtype=float, device=device)
300
+ wp.adj_matmul(A, B, C, A.grad, B.grad, C.grad, D.grad)
301
301
  assert_np_equal(A_grad, A.grad.numpy())
302
302
 
303
303
  # test zero
@@ -330,7 +330,7 @@ def test_operator(test, device):
330
330
  tape.backward(loss=loss)
331
331
 
332
332
  # test adjoint
333
- D.grad = wp.array2d(np.ones((m, n)), dtype=float, device=device)
333
+ D.grad = wp.ones((m, n), dtype=float, device=device)
334
334
  B_transpose = wp.array2d(B.transpose().numpy(), dtype=float, device=device)
335
335
 
336
336
  adj_A = D.grad @ B_transpose
@@ -377,7 +377,7 @@ def test_large_batch_count(test, device):
377
377
 
378
378
  tape = wp.Tape()
379
379
  with tape:
380
- wp.batched_matmul(A, B, C, D, alpha=alpha, beta=beta, allow_tf32x3_arith=False, device=device)
380
+ wp.batched_matmul(A, B, C, D, alpha=alpha, beta=beta, allow_tf32x3_arith=False)
381
381
  tape.backward(grads={D: ones})
382
382
 
383
383
  D_np = alpha * np.matmul(A.numpy(), B.numpy()) + beta * C.numpy()
@@ -0,0 +1,186 @@
1
+ # Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ import unittest
9
+
10
+ import warp as wp
11
+ from warp.tests.unittest_utils import *
12
+
13
+ wp.init()
14
+
15
+
16
+ def get_device_pair_with_mempool_access_support():
17
+ devices = wp.get_cuda_devices()
18
+ for target_device in devices:
19
+ for peer_device in devices:
20
+ if target_device != peer_device:
21
+ if wp.is_mempool_access_supported(target_device, peer_device):
22
+ return (target_device, peer_device)
23
+ return None
24
+
25
+
26
+ def get_device_pair_without_mempool_access_support():
27
+ devices = wp.get_cuda_devices()
28
+ for target_device in devices:
29
+ for peer_device in devices:
30
+ if target_device != peer_device:
31
+ if not wp.is_mempool_access_supported(target_device, peer_device):
32
+ return (target_device, peer_device)
33
+ return None
34
+
35
+
36
+ def test_mempool_release_threshold(test, device):
37
+
38
+ device = wp.get_device(device)
39
+
40
+ assert device.is_mempool_supported
41
+
42
+ test.assertEqual(wp.is_mempool_supported(device), device.is_mempool_supported)
43
+
44
+ was_enabled = wp.is_mempool_enabled(device)
45
+
46
+ # toggle
47
+ wp.set_mempool_enabled(device, not was_enabled)
48
+ test.assertEqual(wp.is_mempool_enabled(device), not was_enabled)
49
+
50
+ # restore
51
+ wp.set_mempool_enabled(device, was_enabled)
52
+ test.assertEqual(wp.is_mempool_enabled(device), was_enabled)
53
+
54
+ saved_threshold = wp.get_mempool_release_threshold(device)
55
+
56
+ # set new threshold
57
+ wp.set_mempool_release_threshold(device, 42000)
58
+ test.assertEqual(wp.get_mempool_release_threshold(device), 42000)
59
+
60
+ # restore threshold
61
+ wp.set_mempool_release_threshold(device, saved_threshold)
62
+ test.assertEqual(wp.get_mempool_release_threshold(device), saved_threshold)
63
+
64
+
65
+ def test_mempool_exceptions(test, device):
66
+
67
+ device = wp.get_device(device)
68
+
69
+ assert not device.is_mempool_supported
70
+
71
+ if device.is_cuda:
72
+ expected_error = RuntimeError
73
+ else:
74
+ expected_error = ValueError
75
+
76
+ with test.assertRaises(expected_error):
77
+ wp.get_mempool_release_threshold(device)
78
+
79
+ with test.assertRaises(expected_error):
80
+ wp.set_mempool_release_threshold(device, 42000)
81
+
82
+
83
+ def test_mempool_access_self(test, device):
84
+
85
+ device = wp.get_device(device)
86
+
87
+ assert device.is_mempool_supported
88
+
89
+ # setting mempool access to self is a no-op
90
+ wp.set_mempool_access_enabled(device, device, True)
91
+ wp.set_mempool_access_enabled(device, device, False)
92
+
93
+ # should always be enabled
94
+ enabled = wp.is_mempool_access_enabled(device, device)
95
+ test.assertTrue(enabled)
96
+
97
+
98
+ @unittest.skipUnless(get_device_pair_with_mempool_access_support(), "Requires devices with mempool access support")
99
+ def test_mempool_access(test, _):
100
+
101
+ target_device, peer_device = get_device_pair_with_mempool_access_support()
102
+
103
+ was_enabled = wp.is_mempool_access_enabled(target_device, peer_device)
104
+
105
+ if was_enabled:
106
+ # try disabling
107
+ wp.set_mempool_access_enabled(target_device, peer_device, False)
108
+ is_enabled = wp.is_mempool_access_enabled(target_device, peer_device)
109
+ test.assertFalse(is_enabled)
110
+
111
+ # try re-enabling
112
+ wp.set_mempool_access_enabled(target_device, peer_device, True)
113
+ is_enabled = wp.is_mempool_access_enabled(target_device, peer_device)
114
+ test.assertTrue(is_enabled)
115
+ else:
116
+ # try enabling
117
+ wp.set_mempool_access_enabled(target_device, peer_device, True)
118
+ is_enabled = wp.is_mempool_access_enabled(target_device, peer_device)
119
+ test.assertTrue(is_enabled)
120
+
121
+ # try re-disabling
122
+ wp.set_mempool_access_enabled(target_device, peer_device, False)
123
+ is_enabled = wp.is_mempool_access_enabled(target_device, peer_device)
124
+ test.assertFalse(is_enabled)
125
+
126
+
127
+ @unittest.skipUnless(get_device_pair_without_mempool_access_support(), "Requires devices without mempool access support")
128
+ def test_mempool_access_exceptions_unsupported(test, _):
129
+
130
+ # get a CUDA device pair without mempool access support
131
+ target_device, peer_device = get_device_pair_without_mempool_access_support()
132
+
133
+ # querying is ok, but must return False
134
+ test.assertFalse(wp.is_mempool_access_enabled(target_device, peer_device))
135
+
136
+ # enabling should raise RuntimeError
137
+ with test.assertRaises(RuntimeError):
138
+ wp.set_mempool_access_enabled(target_device, peer_device, True)
139
+
140
+ # disabling should not raise an error
141
+ wp.set_mempool_access_enabled(target_device, peer_device, False)
142
+
143
+
144
+ @unittest.skipUnless(wp.is_cpu_available() and wp.is_cuda_available(), "Requires both CUDA and CPU devices")
145
+ def test_mempool_access_exceptions_cpu(test, _):
146
+
147
+ # querying is ok, but must return False
148
+ test.assertFalse(wp.is_mempool_access_enabled("cuda:0", "cpu"))
149
+ test.assertFalse(wp.is_mempool_access_enabled("cpu", "cuda:0"))
150
+
151
+ # enabling should raise ValueError
152
+ with test.assertRaises(ValueError):
153
+ wp.set_mempool_access_enabled("cpu", "cuda:0", True)
154
+ with test.assertRaises(ValueError):
155
+ wp.set_mempool_access_enabled("cuda:0", "cpu", True)
156
+
157
+ # disabling should not raise an error
158
+ wp.set_mempool_access_enabled("cpu", "cuda:0", False)
159
+ wp.set_mempool_access_enabled("cuda:0", "cpu", False)
160
+
161
+
162
+ class TestMempool(unittest.TestCase):
163
+ pass
164
+
165
+
166
+ devices_with_mempools = [d for d in get_test_devices() if d.is_mempool_supported]
167
+ devices_without_mempools = [d for d in get_test_devices() if not d.is_mempool_supported]
168
+
169
+ # test devices with mempool support
170
+ add_function_test(TestMempool, "test_mempool_release_threshold", test_mempool_release_threshold, devices=devices_with_mempools)
171
+ add_function_test(TestMempool, "test_mempool_access_self", test_mempool_access_self, devices=devices_with_mempools)
172
+
173
+ # test devices without mempool support
174
+ add_function_test(TestMempool, "test_mempool_exceptions", test_mempool_exceptions, devices=devices_without_mempools)
175
+
176
+ # mempool access tests
177
+ add_function_test(TestMempool, "test_mempool_access", test_mempool_access)
178
+
179
+ # mempool access exceptions
180
+ add_function_test(TestMempool, "test_mempool_access_exceptions_unsupported", test_mempool_access_exceptions_unsupported)
181
+ add_function_test(TestMempool, "test_mempool_access_exceptions_cpu", test_mempool_access_exceptions_cpu)
182
+
183
+
184
+ if __name__ == "__main__":
185
+ wp.build.clear_kernel_cache()
186
+ unittest.main(verbosity=2)
@@ -10,6 +10,7 @@ import unittest
10
10
  import numpy as np
11
11
 
12
12
  import warp as wp
13
+ from warp.utils import check_iommu
13
14
  from warp.tests.unittest_utils import *
14
15
 
15
16
  wp.init()
@@ -106,6 +107,7 @@ class TestMultiGPU(unittest.TestCase):
106
107
  assert wp.get_cuda_device() == initial_cuda_device
107
108
 
108
109
  @unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
110
+ @unittest.skipUnless(check_iommu(), "IOMMU seems enabled")
109
111
  def test_multigpu_pingpong(self):
110
112
  n = 1024 * 1024
111
113
 
@@ -129,6 +131,7 @@ class TestMultiGPU(unittest.TestCase):
129
131
  assert_np_equal(a1.numpy(), expected)
130
132
 
131
133
  @unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
134
+ @unittest.skipUnless(check_iommu(), "IOMMU seems enabled")
132
135
  def test_multigpu_pingpong_streams(self):
133
136
  n = 1024 * 1024
134
137
 
@@ -6,12 +6,16 @@
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
8
  import unittest
9
+ import contextlib
10
+ import io
9
11
 
10
12
  import warp as wp
11
13
  from warp.tests.unittest_utils import *
12
14
 
13
15
  wp.init()
14
16
 
17
+ from warp.context import runtime # noqa: E402
18
+
15
19
 
16
20
  @wp.kernel
17
21
  def scale(
@@ -47,7 +51,12 @@ def test_options_1(test, device):
47
51
  with tape:
48
52
  wp.launch(scale, dim=1, inputs=[x, y], device=device)
49
53
 
50
- tape.backward(y)
54
+ with contextlib.redirect_stdout(io.StringIO()) as f:
55
+ tape.backward(y)
56
+
57
+ expected = f"Warp UserWarning: Running the tape backwards may produce incorrect gradients because recorded kernel {scale.key} is defined in a module with the option 'enable_backward=False' set.\n"
58
+
59
+ assert f.getvalue() == expected
51
60
  assert_np_equal(tape.gradients[x].numpy(), np.array(0.0))
52
61
 
53
62
 
@@ -89,7 +98,12 @@ def test_options_4(test, device):
89
98
  with tape:
90
99
  wp.launch(scale_2, dim=1, inputs=[x, y], device=device)
91
100
 
92
- tape.backward(y)
101
+ with contextlib.redirect_stdout(io.StringIO()) as f:
102
+ tape.backward(y)
103
+
104
+ expected = f"Warp UserWarning: Running the tape backwards may produce incorrect gradients because recorded kernel {scale_2.key} is configured with the option 'enable_backward=False'.\n"
105
+
106
+ assert f.getvalue() == expected
93
107
  assert_np_equal(tape.gradients[x].numpy(), np.array(0.0))
94
108
 
95
109
 
@@ -0,0 +1,137 @@
1
+ # Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ import unittest
9
+
10
+ import warp as wp
11
+ from warp.tests.unittest_utils import *
12
+
13
+ wp.init()
14
+
15
+
16
+ def get_device_pair_with_peer_access_support():
17
+ devices = wp.get_cuda_devices()
18
+ for target_device in devices:
19
+ for peer_device in devices:
20
+ if target_device != peer_device:
21
+ if wp.is_peer_access_supported(target_device, peer_device):
22
+ return (target_device, peer_device)
23
+ return None
24
+
25
+
26
+ def get_device_pair_without_peer_access_support():
27
+ devices = wp.get_cuda_devices()
28
+ for target_device in devices:
29
+ for peer_device in devices:
30
+ if target_device != peer_device:
31
+ if not wp.is_peer_access_supported(target_device, peer_device):
32
+ return (target_device, peer_device)
33
+ return None
34
+
35
+
36
+ def test_peer_access_self(test, device):
37
+
38
+ device = wp.get_device(device)
39
+
40
+ assert device.is_cuda
41
+
42
+ # device can access self
43
+ can_access = wp.is_peer_access_supported(device, device)
44
+ test.assertTrue(can_access)
45
+
46
+ # setting peer access to self is a no-op
47
+ wp.set_peer_access_enabled(device, device, True)
48
+ wp.set_peer_access_enabled(device, device, False)
49
+
50
+ # should always be enabled
51
+ enabled = wp.is_peer_access_enabled(device, device)
52
+ test.assertTrue(enabled)
53
+
54
+
55
+ @unittest.skipUnless(get_device_pair_with_peer_access_support(), "Requires devices with peer access support")
56
+ def test_peer_access(test, _):
57
+
58
+ target_device, peer_device = get_device_pair_with_peer_access_support()
59
+
60
+ was_enabled = wp.is_peer_access_enabled(target_device, peer_device)
61
+
62
+ if was_enabled:
63
+ # try disabling
64
+ wp.set_peer_access_enabled(target_device, peer_device, False)
65
+ is_enabled = wp.is_peer_access_enabled(target_device, peer_device)
66
+ test.assertFalse(is_enabled)
67
+
68
+ # try re-enabling
69
+ wp.set_peer_access_enabled(target_device, peer_device, True)
70
+ is_enabled = wp.is_peer_access_enabled(target_device, peer_device)
71
+ test.assertTrue(is_enabled)
72
+ else:
73
+ # try enabling
74
+ wp.set_peer_access_enabled(target_device, peer_device, True)
75
+ is_enabled = wp.is_peer_access_enabled(target_device, peer_device)
76
+ test.assertTrue(is_enabled)
77
+
78
+ # try re-disabling
79
+ wp.set_peer_access_enabled(target_device, peer_device, False)
80
+ is_enabled = wp.is_peer_access_enabled(target_device, peer_device)
81
+ test.assertFalse(is_enabled)
82
+
83
+
84
+ @unittest.skipUnless(get_device_pair_without_peer_access_support(), "Requires devices without peer access support")
85
+ def test_peer_access_exceptions_unsupported(test, _):
86
+
87
+ # get a CUDA device pair without peer access support
88
+ target_device, peer_device = get_device_pair_without_peer_access_support()
89
+
90
+ # querying is ok, but must return False
91
+ test.assertFalse(wp.is_peer_access_enabled(target_device, peer_device))
92
+
93
+ # enabling should raise RuntimeError
94
+ with test.assertRaises(RuntimeError):
95
+ wp.set_peer_access_enabled(target_device, peer_device, True)
96
+
97
+ # disabling should not raise an error
98
+ wp.set_peer_access_enabled(target_device, peer_device, False)
99
+
100
+
101
+ @unittest.skipUnless(wp.is_cpu_available() and wp.is_cuda_available(), "Requires both CUDA and CPU devices")
102
+ def test_peer_access_exceptions_cpu(test, _):
103
+
104
+ # querying is ok, but must return False
105
+ test.assertFalse(wp.is_peer_access_enabled("cuda:0", "cpu"))
106
+ test.assertFalse(wp.is_peer_access_enabled("cpu", "cuda:0"))
107
+
108
+ # enabling should raise ValueError
109
+ with test.assertRaises(ValueError):
110
+ wp.set_peer_access_enabled("cpu", "cuda:0", True)
111
+ with test.assertRaises(ValueError):
112
+ wp.set_peer_access_enabled("cuda:0", "cpu", True)
113
+
114
+ # disabling should not raise an error
115
+ wp.set_peer_access_enabled("cpu", "cuda:0", False)
116
+ wp.set_peer_access_enabled("cuda:0", "cpu", False)
117
+
118
+
119
+ class TestPeer(unittest.TestCase):
120
+ pass
121
+
122
+
123
+ cuda_test_devices = get_cuda_test_devices()
124
+
125
+ add_function_test(TestPeer, "test_peer_access_self", test_peer_access_self, devices=cuda_test_devices)
126
+
127
+ # peer access tests
128
+ add_function_test(TestPeer, "test_peer_access", test_peer_access)
129
+
130
+ # peer access exceptions
131
+ add_function_test(TestPeer, "test_peer_access_exceptions_unsupported", test_peer_access_exceptions_unsupported)
132
+ add_function_test(TestPeer, "test_peer_access_exceptions_cpu", test_peer_access_exceptions_cpu)
133
+
134
+
135
+ if __name__ == "__main__":
136
+ wp.build.clear_kernel_cache()
137
+ unittest.main(verbosity=2)
warp/tests/test_print.py CHANGED
@@ -5,6 +5,7 @@
5
5
  # distribution of this software and related documentation without an express
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
+ import sys
8
9
  import unittest
9
10
 
10
11
  import warp as wp
@@ -30,7 +31,8 @@ def test_print(test, device):
30
31
  s = capture.end()
31
32
 
32
33
  # The CPU kernel printouts don't get captured by StdOutCapture()
33
- if device.is_cuda:
34
+ # We skip the win32 comparison for now since the capture sometimes is an empty string
35
+ if device.is_cuda and sys.platform != "win32":
34
36
  test.assertRegex(
35
37
  s,
36
38
  rf"1{os.linesep}"
warp/tests/test_quat.py CHANGED
@@ -11,6 +11,7 @@ import numpy as np
11
11
 
12
12
  import warp as wp
13
13
  from warp.tests.unittest_utils import *
14
+ import warp.sim
14
15
 
15
16
  wp.init()
16
17
 
@@ -1871,6 +1872,21 @@ def test_quat_identity(test, device, dtype, register_kernels=False):
1871
1872
  assert_np_equal(output.numpy(), expected)
1872
1873
 
1873
1874
 
1875
+ ############################################################
1876
+
1877
+
1878
+ def test_quat_euler_conversion(test, device, dtype, register_kernels=False):
1879
+ rng = np.random.default_rng(123)
1880
+ N = 3
1881
+
1882
+ rpy_arr = rng.uniform(low=-np.pi, high=np.pi, size=(N, 3))
1883
+
1884
+ quats_from_euler = [list(wp.sim.quat_from_euler(wp.vec3(*rpy), 0, 1, 2)) for rpy in rpy_arr]
1885
+ quats_from_rpy = [list(wp.quat_rpy(rpy[0], rpy[1], rpy[2])) for rpy in rpy_arr]
1886
+
1887
+ assert_np_equal(np.array(quats_from_euler), np.array(quats_from_rpy), tol=1e-4)
1888
+
1889
+
1874
1890
  def test_anon_type_instance(test, device, dtype, register_kernels=False):
1875
1891
  rng = np.random.default_rng(123)
1876
1892
  wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
@@ -2053,6 +2069,13 @@ for dtype in np_float_types:
2053
2069
  add_function_test_register_kernel(
2054
2070
  TestQuat, f"test_quat_to_matrix_{dtype.__name__}", test_quat_to_matrix, devices=devices, dtype=dtype
2055
2071
  )
2072
+ add_function_test_register_kernel(
2073
+ TestQuat,
2074
+ f"test_quat_euler_conversion_{dtype.__name__}",
2075
+ test_quat_euler_conversion,
2076
+ devices=devices,
2077
+ dtype=dtype,
2078
+ )
2056
2079
  add_function_test(
2057
2080
  TestQuat, f"test_py_arithmetic_ops_{dtype.__name__}", test_py_arithmetic_ops, devices=None, dtype=dtype
2058
2081
  )