warp-lang 1.7.0__py3-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (429) hide show
  1. warp/__init__.py +139 -0
  2. warp/__init__.pyi +1 -0
  3. warp/autograd.py +1142 -0
  4. warp/bin/warp-clang.so +0 -0
  5. warp/bin/warp.so +0 -0
  6. warp/build.py +557 -0
  7. warp/build_dll.py +405 -0
  8. warp/builtins.py +6855 -0
  9. warp/codegen.py +3969 -0
  10. warp/config.py +158 -0
  11. warp/constants.py +57 -0
  12. warp/context.py +6812 -0
  13. warp/dlpack.py +462 -0
  14. warp/examples/__init__.py +24 -0
  15. warp/examples/assets/bear.usd +0 -0
  16. warp/examples/assets/bunny.usd +0 -0
  17. warp/examples/assets/cartpole.urdf +110 -0
  18. warp/examples/assets/crazyflie.usd +0 -0
  19. warp/examples/assets/cube.usd +0 -0
  20. warp/examples/assets/nonuniform.usd +0 -0
  21. warp/examples/assets/nv_ant.xml +92 -0
  22. warp/examples/assets/nv_humanoid.xml +183 -0
  23. warp/examples/assets/nvidia_logo.png +0 -0
  24. warp/examples/assets/pixel.jpg +0 -0
  25. warp/examples/assets/quadruped.urdf +268 -0
  26. warp/examples/assets/rocks.nvdb +0 -0
  27. warp/examples/assets/rocks.usd +0 -0
  28. warp/examples/assets/sphere.usd +0 -0
  29. warp/examples/assets/square_cloth.usd +0 -0
  30. warp/examples/benchmarks/benchmark_api.py +389 -0
  31. warp/examples/benchmarks/benchmark_cloth.py +296 -0
  32. warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
  33. warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
  34. warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
  35. warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
  36. warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
  37. warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
  38. warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
  39. warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
  40. warp/examples/benchmarks/benchmark_gemm.py +164 -0
  41. warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
  42. warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
  43. warp/examples/benchmarks/benchmark_launches.py +301 -0
  44. warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
  45. warp/examples/browse.py +37 -0
  46. warp/examples/core/example_cupy.py +86 -0
  47. warp/examples/core/example_dem.py +241 -0
  48. warp/examples/core/example_fluid.py +299 -0
  49. warp/examples/core/example_graph_capture.py +150 -0
  50. warp/examples/core/example_marching_cubes.py +194 -0
  51. warp/examples/core/example_mesh.py +180 -0
  52. warp/examples/core/example_mesh_intersect.py +211 -0
  53. warp/examples/core/example_nvdb.py +182 -0
  54. warp/examples/core/example_raycast.py +111 -0
  55. warp/examples/core/example_raymarch.py +205 -0
  56. warp/examples/core/example_render_opengl.py +193 -0
  57. warp/examples/core/example_sample_mesh.py +300 -0
  58. warp/examples/core/example_sph.py +411 -0
  59. warp/examples/core/example_torch.py +211 -0
  60. warp/examples/core/example_wave.py +269 -0
  61. warp/examples/fem/example_adaptive_grid.py +286 -0
  62. warp/examples/fem/example_apic_fluid.py +423 -0
  63. warp/examples/fem/example_burgers.py +261 -0
  64. warp/examples/fem/example_convection_diffusion.py +178 -0
  65. warp/examples/fem/example_convection_diffusion_dg.py +204 -0
  66. warp/examples/fem/example_deformed_geometry.py +172 -0
  67. warp/examples/fem/example_diffusion.py +196 -0
  68. warp/examples/fem/example_diffusion_3d.py +225 -0
  69. warp/examples/fem/example_diffusion_mgpu.py +220 -0
  70. warp/examples/fem/example_distortion_energy.py +228 -0
  71. warp/examples/fem/example_magnetostatics.py +240 -0
  72. warp/examples/fem/example_mixed_elasticity.py +291 -0
  73. warp/examples/fem/example_navier_stokes.py +261 -0
  74. warp/examples/fem/example_nonconforming_contact.py +298 -0
  75. warp/examples/fem/example_stokes.py +213 -0
  76. warp/examples/fem/example_stokes_transfer.py +262 -0
  77. warp/examples/fem/example_streamlines.py +352 -0
  78. warp/examples/fem/utils.py +1000 -0
  79. warp/examples/interop/example_jax_callable.py +116 -0
  80. warp/examples/interop/example_jax_ffi_callback.py +132 -0
  81. warp/examples/interop/example_jax_kernel.py +205 -0
  82. warp/examples/optim/example_bounce.py +266 -0
  83. warp/examples/optim/example_cloth_throw.py +228 -0
  84. warp/examples/optim/example_diffray.py +561 -0
  85. warp/examples/optim/example_drone.py +870 -0
  86. warp/examples/optim/example_fluid_checkpoint.py +497 -0
  87. warp/examples/optim/example_inverse_kinematics.py +182 -0
  88. warp/examples/optim/example_inverse_kinematics_torch.py +191 -0
  89. warp/examples/optim/example_softbody_properties.py +400 -0
  90. warp/examples/optim/example_spring_cage.py +245 -0
  91. warp/examples/optim/example_trajectory.py +227 -0
  92. warp/examples/sim/example_cartpole.py +143 -0
  93. warp/examples/sim/example_cloth.py +225 -0
  94. warp/examples/sim/example_cloth_self_contact.py +322 -0
  95. warp/examples/sim/example_granular.py +130 -0
  96. warp/examples/sim/example_granular_collision_sdf.py +202 -0
  97. warp/examples/sim/example_jacobian_ik.py +244 -0
  98. warp/examples/sim/example_particle_chain.py +124 -0
  99. warp/examples/sim/example_quadruped.py +203 -0
  100. warp/examples/sim/example_rigid_chain.py +203 -0
  101. warp/examples/sim/example_rigid_contact.py +195 -0
  102. warp/examples/sim/example_rigid_force.py +133 -0
  103. warp/examples/sim/example_rigid_gyroscopic.py +115 -0
  104. warp/examples/sim/example_rigid_soft_contact.py +140 -0
  105. warp/examples/sim/example_soft_body.py +196 -0
  106. warp/examples/tile/example_tile_cholesky.py +87 -0
  107. warp/examples/tile/example_tile_convolution.py +66 -0
  108. warp/examples/tile/example_tile_fft.py +55 -0
  109. warp/examples/tile/example_tile_filtering.py +113 -0
  110. warp/examples/tile/example_tile_matmul.py +85 -0
  111. warp/examples/tile/example_tile_mlp.py +383 -0
  112. warp/examples/tile/example_tile_nbody.py +199 -0
  113. warp/examples/tile/example_tile_walker.py +327 -0
  114. warp/fabric.py +355 -0
  115. warp/fem/__init__.py +106 -0
  116. warp/fem/adaptivity.py +508 -0
  117. warp/fem/cache.py +572 -0
  118. warp/fem/dirichlet.py +202 -0
  119. warp/fem/domain.py +411 -0
  120. warp/fem/field/__init__.py +125 -0
  121. warp/fem/field/field.py +619 -0
  122. warp/fem/field/nodal_field.py +326 -0
  123. warp/fem/field/restriction.py +37 -0
  124. warp/fem/field/virtual.py +848 -0
  125. warp/fem/geometry/__init__.py +32 -0
  126. warp/fem/geometry/adaptive_nanogrid.py +857 -0
  127. warp/fem/geometry/closest_point.py +84 -0
  128. warp/fem/geometry/deformed_geometry.py +221 -0
  129. warp/fem/geometry/element.py +776 -0
  130. warp/fem/geometry/geometry.py +362 -0
  131. warp/fem/geometry/grid_2d.py +392 -0
  132. warp/fem/geometry/grid_3d.py +452 -0
  133. warp/fem/geometry/hexmesh.py +911 -0
  134. warp/fem/geometry/nanogrid.py +571 -0
  135. warp/fem/geometry/partition.py +389 -0
  136. warp/fem/geometry/quadmesh.py +663 -0
  137. warp/fem/geometry/tetmesh.py +855 -0
  138. warp/fem/geometry/trimesh.py +806 -0
  139. warp/fem/integrate.py +2335 -0
  140. warp/fem/linalg.py +419 -0
  141. warp/fem/operator.py +293 -0
  142. warp/fem/polynomial.py +229 -0
  143. warp/fem/quadrature/__init__.py +17 -0
  144. warp/fem/quadrature/pic_quadrature.py +299 -0
  145. warp/fem/quadrature/quadrature.py +591 -0
  146. warp/fem/space/__init__.py +228 -0
  147. warp/fem/space/basis_function_space.py +468 -0
  148. warp/fem/space/basis_space.py +667 -0
  149. warp/fem/space/dof_mapper.py +251 -0
  150. warp/fem/space/function_space.py +309 -0
  151. warp/fem/space/grid_2d_function_space.py +177 -0
  152. warp/fem/space/grid_3d_function_space.py +227 -0
  153. warp/fem/space/hexmesh_function_space.py +257 -0
  154. warp/fem/space/nanogrid_function_space.py +201 -0
  155. warp/fem/space/partition.py +367 -0
  156. warp/fem/space/quadmesh_function_space.py +223 -0
  157. warp/fem/space/restriction.py +179 -0
  158. warp/fem/space/shape/__init__.py +143 -0
  159. warp/fem/space/shape/cube_shape_function.py +1105 -0
  160. warp/fem/space/shape/shape_function.py +133 -0
  161. warp/fem/space/shape/square_shape_function.py +926 -0
  162. warp/fem/space/shape/tet_shape_function.py +834 -0
  163. warp/fem/space/shape/triangle_shape_function.py +672 -0
  164. warp/fem/space/tetmesh_function_space.py +271 -0
  165. warp/fem/space/topology.py +424 -0
  166. warp/fem/space/trimesh_function_space.py +194 -0
  167. warp/fem/types.py +99 -0
  168. warp/fem/utils.py +420 -0
  169. warp/jax.py +187 -0
  170. warp/jax_experimental/__init__.py +16 -0
  171. warp/jax_experimental/custom_call.py +351 -0
  172. warp/jax_experimental/ffi.py +698 -0
  173. warp/jax_experimental/xla_ffi.py +602 -0
  174. warp/math.py +244 -0
  175. warp/native/array.h +1145 -0
  176. warp/native/builtin.h +1800 -0
  177. warp/native/bvh.cpp +492 -0
  178. warp/native/bvh.cu +791 -0
  179. warp/native/bvh.h +554 -0
  180. warp/native/clang/clang.cpp +536 -0
  181. warp/native/coloring.cpp +613 -0
  182. warp/native/crt.cpp +51 -0
  183. warp/native/crt.h +362 -0
  184. warp/native/cuda_crt.h +1058 -0
  185. warp/native/cuda_util.cpp +646 -0
  186. warp/native/cuda_util.h +307 -0
  187. warp/native/error.cpp +77 -0
  188. warp/native/error.h +36 -0
  189. warp/native/exports.h +1878 -0
  190. warp/native/fabric.h +245 -0
  191. warp/native/hashgrid.cpp +311 -0
  192. warp/native/hashgrid.cu +87 -0
  193. warp/native/hashgrid.h +240 -0
  194. warp/native/initializer_array.h +41 -0
  195. warp/native/intersect.h +1230 -0
  196. warp/native/intersect_adj.h +375 -0
  197. warp/native/intersect_tri.h +339 -0
  198. warp/native/marching.cpp +19 -0
  199. warp/native/marching.cu +514 -0
  200. warp/native/marching.h +19 -0
  201. warp/native/mat.h +2220 -0
  202. warp/native/mathdx.cpp +87 -0
  203. warp/native/matnn.h +343 -0
  204. warp/native/mesh.cpp +266 -0
  205. warp/native/mesh.cu +404 -0
  206. warp/native/mesh.h +1980 -0
  207. warp/native/nanovdb/GridHandle.h +366 -0
  208. warp/native/nanovdb/HostBuffer.h +590 -0
  209. warp/native/nanovdb/NanoVDB.h +6624 -0
  210. warp/native/nanovdb/PNanoVDB.h +3390 -0
  211. warp/native/noise.h +859 -0
  212. warp/native/quat.h +1371 -0
  213. warp/native/rand.h +342 -0
  214. warp/native/range.h +139 -0
  215. warp/native/reduce.cpp +174 -0
  216. warp/native/reduce.cu +364 -0
  217. warp/native/runlength_encode.cpp +79 -0
  218. warp/native/runlength_encode.cu +61 -0
  219. warp/native/scan.cpp +47 -0
  220. warp/native/scan.cu +53 -0
  221. warp/native/scan.h +23 -0
  222. warp/native/solid_angle.h +466 -0
  223. warp/native/sort.cpp +251 -0
  224. warp/native/sort.cu +277 -0
  225. warp/native/sort.h +33 -0
  226. warp/native/sparse.cpp +378 -0
  227. warp/native/sparse.cu +524 -0
  228. warp/native/spatial.h +657 -0
  229. warp/native/svd.h +702 -0
  230. warp/native/temp_buffer.h +46 -0
  231. warp/native/tile.h +2584 -0
  232. warp/native/tile_reduce.h +264 -0
  233. warp/native/vec.h +1426 -0
  234. warp/native/volume.cpp +501 -0
  235. warp/native/volume.cu +67 -0
  236. warp/native/volume.h +969 -0
  237. warp/native/volume_builder.cu +477 -0
  238. warp/native/volume_builder.h +52 -0
  239. warp/native/volume_impl.h +70 -0
  240. warp/native/warp.cpp +1082 -0
  241. warp/native/warp.cu +3636 -0
  242. warp/native/warp.h +381 -0
  243. warp/optim/__init__.py +17 -0
  244. warp/optim/adam.py +163 -0
  245. warp/optim/linear.py +1137 -0
  246. warp/optim/sgd.py +112 -0
  247. warp/paddle.py +407 -0
  248. warp/render/__init__.py +18 -0
  249. warp/render/render_opengl.py +3518 -0
  250. warp/render/render_usd.py +784 -0
  251. warp/render/utils.py +160 -0
  252. warp/sim/__init__.py +65 -0
  253. warp/sim/articulation.py +793 -0
  254. warp/sim/collide.py +2395 -0
  255. warp/sim/graph_coloring.py +300 -0
  256. warp/sim/import_mjcf.py +790 -0
  257. warp/sim/import_snu.py +227 -0
  258. warp/sim/import_urdf.py +579 -0
  259. warp/sim/import_usd.py +894 -0
  260. warp/sim/inertia.py +324 -0
  261. warp/sim/integrator.py +242 -0
  262. warp/sim/integrator_euler.py +1997 -0
  263. warp/sim/integrator_featherstone.py +2101 -0
  264. warp/sim/integrator_vbd.py +2048 -0
  265. warp/sim/integrator_xpbd.py +3292 -0
  266. warp/sim/model.py +4791 -0
  267. warp/sim/particles.py +121 -0
  268. warp/sim/render.py +427 -0
  269. warp/sim/utils.py +428 -0
  270. warp/sparse.py +2057 -0
  271. warp/stubs.py +3333 -0
  272. warp/tape.py +1203 -0
  273. warp/tests/__init__.py +1 -0
  274. warp/tests/__main__.py +4 -0
  275. warp/tests/assets/curlnoise_golden.npy +0 -0
  276. warp/tests/assets/mlp_golden.npy +0 -0
  277. warp/tests/assets/pixel.npy +0 -0
  278. warp/tests/assets/pnoise_golden.npy +0 -0
  279. warp/tests/assets/spiky.usd +0 -0
  280. warp/tests/assets/test_grid.nvdb +0 -0
  281. warp/tests/assets/test_index_grid.nvdb +0 -0
  282. warp/tests/assets/test_int32_grid.nvdb +0 -0
  283. warp/tests/assets/test_vec_grid.nvdb +0 -0
  284. warp/tests/assets/torus.nvdb +0 -0
  285. warp/tests/assets/torus.usda +105 -0
  286. warp/tests/aux_test_class_kernel.py +34 -0
  287. warp/tests/aux_test_compile_consts_dummy.py +18 -0
  288. warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
  289. warp/tests/aux_test_dependent.py +29 -0
  290. warp/tests/aux_test_grad_customs.py +29 -0
  291. warp/tests/aux_test_instancing_gc.py +26 -0
  292. warp/tests/aux_test_module_unload.py +23 -0
  293. warp/tests/aux_test_name_clash1.py +40 -0
  294. warp/tests/aux_test_name_clash2.py +40 -0
  295. warp/tests/aux_test_reference.py +9 -0
  296. warp/tests/aux_test_reference_reference.py +8 -0
  297. warp/tests/aux_test_square.py +16 -0
  298. warp/tests/aux_test_unresolved_func.py +22 -0
  299. warp/tests/aux_test_unresolved_symbol.py +22 -0
  300. warp/tests/cuda/__init__.py +0 -0
  301. warp/tests/cuda/test_async.py +676 -0
  302. warp/tests/cuda/test_ipc.py +124 -0
  303. warp/tests/cuda/test_mempool.py +233 -0
  304. warp/tests/cuda/test_multigpu.py +169 -0
  305. warp/tests/cuda/test_peer.py +139 -0
  306. warp/tests/cuda/test_pinned.py +84 -0
  307. warp/tests/cuda/test_streams.py +634 -0
  308. warp/tests/geometry/__init__.py +0 -0
  309. warp/tests/geometry/test_bvh.py +200 -0
  310. warp/tests/geometry/test_hash_grid.py +221 -0
  311. warp/tests/geometry/test_marching_cubes.py +74 -0
  312. warp/tests/geometry/test_mesh.py +316 -0
  313. warp/tests/geometry/test_mesh_query_aabb.py +399 -0
  314. warp/tests/geometry/test_mesh_query_point.py +932 -0
  315. warp/tests/geometry/test_mesh_query_ray.py +311 -0
  316. warp/tests/geometry/test_volume.py +1103 -0
  317. warp/tests/geometry/test_volume_write.py +346 -0
  318. warp/tests/interop/__init__.py +0 -0
  319. warp/tests/interop/test_dlpack.py +729 -0
  320. warp/tests/interop/test_jax.py +371 -0
  321. warp/tests/interop/test_paddle.py +800 -0
  322. warp/tests/interop/test_torch.py +1001 -0
  323. warp/tests/run_coverage_serial.py +39 -0
  324. warp/tests/sim/__init__.py +0 -0
  325. warp/tests/sim/disabled_kinematics.py +244 -0
  326. warp/tests/sim/flaky_test_sim_grad.py +290 -0
  327. warp/tests/sim/test_collision.py +604 -0
  328. warp/tests/sim/test_coloring.py +258 -0
  329. warp/tests/sim/test_model.py +224 -0
  330. warp/tests/sim/test_sim_grad_bounce_linear.py +212 -0
  331. warp/tests/sim/test_sim_kinematics.py +98 -0
  332. warp/tests/sim/test_vbd.py +597 -0
  333. warp/tests/test_adam.py +163 -0
  334. warp/tests/test_arithmetic.py +1096 -0
  335. warp/tests/test_array.py +2972 -0
  336. warp/tests/test_array_reduce.py +156 -0
  337. warp/tests/test_assert.py +250 -0
  338. warp/tests/test_atomic.py +153 -0
  339. warp/tests/test_bool.py +220 -0
  340. warp/tests/test_builtins_resolution.py +1298 -0
  341. warp/tests/test_closest_point_edge_edge.py +327 -0
  342. warp/tests/test_codegen.py +810 -0
  343. warp/tests/test_codegen_instancing.py +1495 -0
  344. warp/tests/test_compile_consts.py +215 -0
  345. warp/tests/test_conditional.py +252 -0
  346. warp/tests/test_context.py +42 -0
  347. warp/tests/test_copy.py +238 -0
  348. warp/tests/test_ctypes.py +638 -0
  349. warp/tests/test_dense.py +73 -0
  350. warp/tests/test_devices.py +97 -0
  351. warp/tests/test_examples.py +482 -0
  352. warp/tests/test_fabricarray.py +996 -0
  353. warp/tests/test_fast_math.py +74 -0
  354. warp/tests/test_fem.py +2003 -0
  355. warp/tests/test_fp16.py +136 -0
  356. warp/tests/test_func.py +454 -0
  357. warp/tests/test_future_annotations.py +98 -0
  358. warp/tests/test_generics.py +656 -0
  359. warp/tests/test_grad.py +893 -0
  360. warp/tests/test_grad_customs.py +339 -0
  361. warp/tests/test_grad_debug.py +341 -0
  362. warp/tests/test_implicit_init.py +411 -0
  363. warp/tests/test_import.py +45 -0
  364. warp/tests/test_indexedarray.py +1140 -0
  365. warp/tests/test_intersect.py +73 -0
  366. warp/tests/test_iter.py +76 -0
  367. warp/tests/test_large.py +177 -0
  368. warp/tests/test_launch.py +411 -0
  369. warp/tests/test_lerp.py +151 -0
  370. warp/tests/test_linear_solvers.py +193 -0
  371. warp/tests/test_lvalue.py +427 -0
  372. warp/tests/test_mat.py +2089 -0
  373. warp/tests/test_mat_lite.py +122 -0
  374. warp/tests/test_mat_scalar_ops.py +2913 -0
  375. warp/tests/test_math.py +178 -0
  376. warp/tests/test_mlp.py +282 -0
  377. warp/tests/test_module_hashing.py +258 -0
  378. warp/tests/test_modules_lite.py +44 -0
  379. warp/tests/test_noise.py +252 -0
  380. warp/tests/test_operators.py +299 -0
  381. warp/tests/test_options.py +129 -0
  382. warp/tests/test_overwrite.py +551 -0
  383. warp/tests/test_print.py +339 -0
  384. warp/tests/test_quat.py +2315 -0
  385. warp/tests/test_rand.py +339 -0
  386. warp/tests/test_reload.py +302 -0
  387. warp/tests/test_rounding.py +185 -0
  388. warp/tests/test_runlength_encode.py +196 -0
  389. warp/tests/test_scalar_ops.py +105 -0
  390. warp/tests/test_smoothstep.py +108 -0
  391. warp/tests/test_snippet.py +318 -0
  392. warp/tests/test_sparse.py +582 -0
  393. warp/tests/test_spatial.py +2229 -0
  394. warp/tests/test_special_values.py +361 -0
  395. warp/tests/test_static.py +592 -0
  396. warp/tests/test_struct.py +734 -0
  397. warp/tests/test_tape.py +204 -0
  398. warp/tests/test_transient_module.py +93 -0
  399. warp/tests/test_triangle_closest_point.py +145 -0
  400. warp/tests/test_types.py +562 -0
  401. warp/tests/test_utils.py +588 -0
  402. warp/tests/test_vec.py +1487 -0
  403. warp/tests/test_vec_lite.py +80 -0
  404. warp/tests/test_vec_scalar_ops.py +2327 -0
  405. warp/tests/test_verify_fp.py +100 -0
  406. warp/tests/tile/__init__.py +0 -0
  407. warp/tests/tile/test_tile.py +780 -0
  408. warp/tests/tile/test_tile_load.py +407 -0
  409. warp/tests/tile/test_tile_mathdx.py +208 -0
  410. warp/tests/tile/test_tile_mlp.py +402 -0
  411. warp/tests/tile/test_tile_reduce.py +447 -0
  412. warp/tests/tile/test_tile_shared_memory.py +247 -0
  413. warp/tests/tile/test_tile_view.py +173 -0
  414. warp/tests/unittest_serial.py +47 -0
  415. warp/tests/unittest_suites.py +427 -0
  416. warp/tests/unittest_utils.py +468 -0
  417. warp/tests/walkthrough_debug.py +93 -0
  418. warp/thirdparty/__init__.py +0 -0
  419. warp/thirdparty/appdirs.py +598 -0
  420. warp/thirdparty/dlpack.py +145 -0
  421. warp/thirdparty/unittest_parallel.py +570 -0
  422. warp/torch.py +391 -0
  423. warp/types.py +5230 -0
  424. warp/utils.py +1137 -0
  425. warp_lang-1.7.0.dist-info/METADATA +516 -0
  426. warp_lang-1.7.0.dist-info/RECORD +429 -0
  427. warp_lang-1.7.0.dist-info/WHEEL +5 -0
  428. warp_lang-1.7.0.dist-info/licenses/LICENSE.md +202 -0
  429. warp_lang-1.7.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,634 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import unittest
17
+
18
+ import numpy as np
19
+
20
+ import warp as wp
21
+ from warp.tests.unittest_utils import *
22
+ from warp.utils import check_p2p
23
+
24
+
25
+ @wp.kernel
26
+ def inc(a: wp.array(dtype=float)):
27
+ tid = wp.tid()
28
+ a[tid] = a[tid] + 1.0
29
+
30
+
31
+ @wp.kernel
32
+ def inc_new(src: wp.array(dtype=float), dst: wp.array(dtype=float)):
33
+ tid = wp.tid()
34
+ dst[tid] = src[tid] + 1.0
35
+
36
+
37
+ @wp.kernel
38
+ def sum(a: wp.array(dtype=float), b: wp.array(dtype=float), c: wp.array(dtype=float)):
39
+ tid = wp.tid()
40
+ c[tid] = a[tid] + b[tid]
41
+
42
+
43
+ # number of elements to use for testing
44
+ N = 10 * 1024 * 1024
45
+
46
+
47
+ def test_stream_set(test, device):
48
+ device = wp.get_device(device)
49
+
50
+ old_stream = device.stream
51
+ new_stream = wp.Stream(device)
52
+
53
+ try:
54
+ wp.set_stream(new_stream, device)
55
+
56
+ test.assertTrue(device.has_stream)
57
+ test.assertEqual(device.stream, new_stream)
58
+
59
+ finally:
60
+ # restore original stream
61
+ wp.set_stream(old_stream, device)
62
+
63
+
64
+ def test_stream_arg_explicit_sync(test, device):
65
+ a = wp.zeros(N, dtype=float, device=device)
66
+ b = wp.full(N, 42, dtype=float, device=device)
67
+ c = wp.empty(N, dtype=float, device=device)
68
+
69
+ old_stream = wp.get_stream(device)
70
+ new_stream = wp.Stream(device)
71
+
72
+ # allocations need to be explicitly synced before launching work using stream arguments
73
+ new_stream.wait_stream(old_stream)
74
+
75
+ # launch work on new stream
76
+ wp.launch(inc, dim=a.size, inputs=[a], stream=new_stream)
77
+ wp.copy(b, a, stream=new_stream)
78
+ wp.launch(inc, dim=a.size, inputs=[a], stream=new_stream)
79
+ wp.copy(c, a, stream=new_stream)
80
+ wp.launch(inc, dim=a.size, inputs=[a], stream=new_stream)
81
+
82
+ assert_np_equal(a.numpy(), np.full(N, fill_value=3.0))
83
+ assert_np_equal(b.numpy(), np.full(N, fill_value=1.0))
84
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
85
+
86
+
87
+ def test_stream_scope_implicit_sync(test, device):
88
+ with wp.ScopedDevice(device):
89
+ a = wp.zeros(N, dtype=float)
90
+ b = wp.full(N, 42, dtype=float)
91
+ c = wp.empty(N, dtype=float)
92
+
93
+ old_stream = wp.get_stream()
94
+ new_stream = wp.Stream()
95
+
96
+ # launch work on new stream
97
+ # allocations are implicitly synced when entering wp.ScopedStream
98
+ with wp.ScopedStream(new_stream):
99
+ assert wp.get_stream() == new_stream
100
+
101
+ wp.launch(inc, dim=a.size, inputs=[a])
102
+ wp.copy(b, a)
103
+ wp.launch(inc, dim=a.size, inputs=[a])
104
+ wp.copy(c, a)
105
+ wp.launch(inc, dim=a.size, inputs=[a])
106
+
107
+ assert wp.get_stream() == old_stream
108
+
109
+ assert_np_equal(a.numpy(), np.full(N, fill_value=3.0))
110
+ assert_np_equal(b.numpy(), np.full(N, fill_value=1.0))
111
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
112
+
113
+
114
+ def test_stream_arg_synchronize(test, device):
115
+ a = wp.zeros(N, dtype=float, device=device)
116
+ b = wp.empty(N, dtype=float, device=device)
117
+ c = wp.empty(N, dtype=float, device=device)
118
+ d = wp.empty(N, dtype=float, device=device)
119
+
120
+ stream1 = wp.get_stream(device)
121
+ stream2 = wp.Stream(device)
122
+ stream3 = wp.Stream(device)
123
+
124
+ wp.launch(inc, dim=N, inputs=[a], device=device)
125
+
126
+ # b and c depend on a
127
+ wp.synchronize_stream(stream1)
128
+ wp.launch(inc_new, dim=N, inputs=[a, b], stream=stream2)
129
+ wp.launch(inc_new, dim=N, inputs=[a, c], stream=stream3)
130
+
131
+ # d depends on b and c
132
+ wp.synchronize_stream(stream2)
133
+ wp.synchronize_stream(stream3)
134
+ wp.launch(sum, dim=N, inputs=[b, c, d], device=device)
135
+
136
+ assert_np_equal(a.numpy(), np.full(N, fill_value=1.0))
137
+ assert_np_equal(b.numpy(), np.full(N, fill_value=2.0))
138
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
139
+ assert_np_equal(d.numpy(), np.full(N, fill_value=4.0))
140
+
141
+
142
+ def test_stream_arg_wait_event(test, device):
143
+ a = wp.zeros(N, dtype=float, device=device)
144
+ b = wp.empty(N, dtype=float, device=device)
145
+ c = wp.empty(N, dtype=float, device=device)
146
+ d = wp.empty(N, dtype=float, device=device)
147
+
148
+ stream1 = wp.get_stream(device)
149
+ stream2 = wp.Stream(device)
150
+ stream3 = wp.Stream(device)
151
+
152
+ event1 = wp.Event(device)
153
+ event2 = wp.Event(device)
154
+ event3 = wp.Event(device)
155
+
156
+ wp.launch(inc, dim=N, inputs=[a], stream=stream1)
157
+ stream1.record_event(event1)
158
+
159
+ # b and c depend on a
160
+ stream2.wait_event(event1)
161
+ stream3.wait_event(event1)
162
+ wp.launch(inc_new, dim=N, inputs=[a, b], stream=stream2)
163
+ stream2.record_event(event2)
164
+ wp.launch(inc_new, dim=N, inputs=[a, c], stream=stream3)
165
+ stream3.record_event(event3)
166
+
167
+ # d depends on b and c
168
+ stream1.wait_event(event2)
169
+ stream1.wait_event(event3)
170
+ wp.launch(sum, dim=N, inputs=[b, c, d], stream=stream1)
171
+
172
+ assert_np_equal(a.numpy(), np.full(N, fill_value=1.0))
173
+ assert_np_equal(b.numpy(), np.full(N, fill_value=2.0))
174
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
175
+ assert_np_equal(d.numpy(), np.full(N, fill_value=4.0))
176
+
177
+
178
+ def test_stream_arg_wait_stream(test, device):
179
+ a = wp.zeros(N, dtype=float, device=device)
180
+ b = wp.empty(N, dtype=float, device=device)
181
+ c = wp.empty(N, dtype=float, device=device)
182
+ d = wp.empty(N, dtype=float, device=device)
183
+
184
+ stream1 = wp.get_stream(device)
185
+ stream2 = wp.Stream(device)
186
+ stream3 = wp.Stream(device)
187
+
188
+ wp.launch(inc, dim=N, inputs=[a], stream=stream1)
189
+
190
+ # b and c depend on a
191
+ stream2.wait_stream(stream1)
192
+ stream3.wait_stream(stream1)
193
+ wp.launch(inc_new, dim=N, inputs=[a, b], stream=stream2)
194
+ wp.launch(inc_new, dim=N, inputs=[a, c], stream=stream3)
195
+
196
+ # d depends on b and c
197
+ stream1.wait_stream(stream2)
198
+ stream1.wait_stream(stream3)
199
+ wp.launch(sum, dim=N, inputs=[b, c, d], stream=stream1)
200
+
201
+ assert_np_equal(a.numpy(), np.full(N, fill_value=1.0))
202
+ assert_np_equal(b.numpy(), np.full(N, fill_value=2.0))
203
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
204
+ assert_np_equal(d.numpy(), np.full(N, fill_value=4.0))
205
+
206
+
207
+ def test_stream_scope_synchronize(test, device):
208
+ with wp.ScopedDevice(device):
209
+ a = wp.zeros(N, dtype=float)
210
+ b = wp.empty(N, dtype=float)
211
+ c = wp.empty(N, dtype=float)
212
+ d = wp.empty(N, dtype=float)
213
+
214
+ stream2 = wp.Stream()
215
+ stream3 = wp.Stream()
216
+
217
+ wp.launch(inc, dim=N, inputs=[a])
218
+
219
+ # b and c depend on a
220
+ wp.synchronize_stream()
221
+ with wp.ScopedStream(stream2):
222
+ wp.launch(inc_new, dim=N, inputs=[a, b])
223
+ with wp.ScopedStream(stream3):
224
+ wp.launch(inc_new, dim=N, inputs=[a, c])
225
+
226
+ # d depends on b and c
227
+ wp.synchronize_stream(stream2)
228
+ wp.synchronize_stream(stream3)
229
+ wp.launch(sum, dim=N, inputs=[b, c, d])
230
+
231
+ assert_np_equal(a.numpy(), np.full(N, fill_value=1.0))
232
+ assert_np_equal(b.numpy(), np.full(N, fill_value=2.0))
233
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
234
+ assert_np_equal(d.numpy(), np.full(N, fill_value=4.0))
235
+
236
+
237
+ def test_stream_scope_wait_event(test, device):
238
+ with wp.ScopedDevice(device):
239
+ a = wp.zeros(N, dtype=float)
240
+ b = wp.empty(N, dtype=float)
241
+ c = wp.empty(N, dtype=float)
242
+ d = wp.empty(N, dtype=float)
243
+
244
+ stream2 = wp.Stream()
245
+ stream3 = wp.Stream()
246
+
247
+ event1 = wp.Event()
248
+ event2 = wp.Event()
249
+ event3 = wp.Event()
250
+
251
+ wp.launch(inc, dim=N, inputs=[a])
252
+ wp.record_event(event1)
253
+
254
+ # b and c depend on a
255
+ with wp.ScopedStream(stream2):
256
+ wp.wait_event(event1)
257
+ wp.launch(inc_new, dim=N, inputs=[a, b])
258
+ wp.record_event(event2)
259
+ with wp.ScopedStream(stream3):
260
+ wp.wait_event(event1)
261
+ wp.launch(inc_new, dim=N, inputs=[a, c])
262
+ wp.record_event(event3)
263
+
264
+ # d depends on b and c
265
+ wp.wait_event(event2)
266
+ wp.wait_event(event3)
267
+ wp.launch(sum, dim=N, inputs=[b, c, d])
268
+
269
+ assert_np_equal(a.numpy(), np.full(N, fill_value=1.0))
270
+ assert_np_equal(b.numpy(), np.full(N, fill_value=2.0))
271
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
272
+ assert_np_equal(d.numpy(), np.full(N, fill_value=4.0))
273
+
274
+
275
+ def test_stream_scope_wait_stream(test, device):
276
+ with wp.ScopedDevice(device):
277
+ a = wp.zeros(N, dtype=float)
278
+ b = wp.empty(N, dtype=float)
279
+ c = wp.empty(N, dtype=float)
280
+ d = wp.empty(N, dtype=float)
281
+
282
+ stream1 = wp.get_stream()
283
+ stream2 = wp.Stream()
284
+ stream3 = wp.Stream()
285
+
286
+ wp.launch(inc, dim=N, inputs=[a])
287
+
288
+ # b and c depend on a
289
+ with wp.ScopedStream(stream2):
290
+ wp.wait_stream(stream1)
291
+ wp.launch(inc_new, dim=N, inputs=[a, b])
292
+ with wp.ScopedStream(stream3):
293
+ wp.wait_stream(stream1)
294
+ wp.launch(inc_new, dim=N, inputs=[a, c])
295
+
296
+ # d depends on b and c
297
+ wp.wait_stream(stream2)
298
+ wp.wait_stream(stream3)
299
+ wp.launch(sum, dim=N, inputs=[b, c, d])
300
+
301
+ assert_np_equal(a.numpy(), np.full(N, fill_value=1.0))
302
+ assert_np_equal(b.numpy(), np.full(N, fill_value=2.0))
303
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
304
+ assert_np_equal(d.numpy(), np.full(N, fill_value=4.0))
305
+
306
+
307
+ def test_event_synchronize(test, device):
308
+ stream = wp.get_stream(device)
309
+
310
+ a_host = wp.empty(N, dtype=float, device="cpu", pinned=True)
311
+ b_host = wp.empty(N, dtype=float, device="cpu", pinned=True)
312
+
313
+ # initialize GPU array and do an asynchronous readback
314
+ a = wp.full(N, 17, dtype=float, device=device)
315
+ wp.copy(a_host, a)
316
+ a_event = stream.record_event()
317
+
318
+ b = wp.full(N, 42, dtype=float, device=device)
319
+ wp.copy(b_host, b)
320
+ b_event = stream.record_event()
321
+
322
+ wp.synchronize_event(a_event)
323
+ assert_np_equal(a_host.numpy(), np.full(N, fill_value=17.0))
324
+
325
+ wp.synchronize_event(b_event)
326
+ assert_np_equal(b_host.numpy(), np.full(N, fill_value=42.0))
327
+
328
+
329
+ def test_event_elapsed_time(test, device):
330
+ stream = wp.get_stream(device)
331
+ e1 = wp.Event(device, enable_timing=True)
332
+ e2 = wp.Event(device, enable_timing=True)
333
+
334
+ a = wp.zeros(N, dtype=float, device=device)
335
+
336
+ stream.record_event(e1)
337
+ wp.launch(inc, dim=N, inputs=[a], device=device)
338
+ stream.record_event(e2)
339
+
340
+ elapsed = wp.get_event_elapsed_time(e1, e2)
341
+
342
+ test.assertGreater(elapsed, 0)
343
+
344
+
345
+ def test_event_elapsed_time_graph(test, device):
346
+ stream = wp.get_stream(device)
347
+ e1 = wp.Event(device, enable_timing=True)
348
+ e2 = wp.Event(device, enable_timing=True)
349
+
350
+ a = wp.zeros(N, dtype=float, device=device)
351
+
352
+ wp.load_module(device=device)
353
+
354
+ with wp.ScopedCapture(device, force_module_load=False) as capture:
355
+ stream.record_event(e1)
356
+ wp.launch(inc, dim=N, inputs=[a], device=device)
357
+ stream.record_event(e2)
358
+
359
+ wp.capture_launch(capture.graph)
360
+
361
+ wp.synchronize_device(device)
362
+
363
+ elapsed = wp.get_event_elapsed_time(e1, e2)
364
+
365
+ test.assertGreater(elapsed, 0)
366
+
367
+
368
+ def test_stream_priority_basics(test, device):
369
+ standard_stream = wp.Stream(device)
370
+ test.assertEqual(standard_stream.priority, 0, "Default priority of streams must be 0.")
371
+
372
+ # Create a high-priority stream with a priority value that is smaller than -1 (clamping expected)
373
+ stream_hi = wp.Stream(device, priority=-100)
374
+
375
+ # Create a low-priority stream with a priority value that is greter than 0 (clamping expected)
376
+ stream_lo = wp.Stream(device, priority=100)
377
+
378
+ if stream_lo.priority == stream_hi.priority:
379
+ test.skipTest("Device must support stream priorities.")
380
+
381
+ test.assertEqual(stream_hi.priority, -1)
382
+
383
+ test.assertEqual(stream_lo.priority, 0)
384
+
385
+ with test.assertRaises(TypeError):
386
+ stream_invalid_priority = wp.Stream(device, priority=0.5)
387
+
388
+
389
+ def test_stream_priority_timings(test, device):
390
+ total_size = 256 * 1024 * 1024
391
+ each_size = 128 * 1024 * 1024
392
+
393
+ array_lo = wp.zeros(total_size, dtype=wp.float32, device=device)
394
+ array_hi = wp.zeros(total_size, dtype=wp.float32, device=device)
395
+
396
+ stream_lo = wp.Stream(device, 0)
397
+ stream_hi = wp.Stream(device, -1)
398
+
399
+ if stream_lo.priority == stream_hi.priority:
400
+ test.skipTest("Device must support stream priorities.")
401
+
402
+ # Create some events
403
+ start_lo_event = wp.Event(device, enable_timing=True)
404
+ start_hi_event = wp.Event(device, enable_timing=True)
405
+ end_lo_event = wp.Event(device, enable_timing=True)
406
+ end_hi_event = wp.Event(device, enable_timing=True)
407
+
408
+ wp.synchronize_device(device)
409
+
410
+ stream_lo.record_event(start_lo_event)
411
+ stream_hi.record_event(start_hi_event)
412
+
413
+ for copy_offset in range(0, total_size, each_size):
414
+ wp.copy(array_lo, array_lo, copy_offset, copy_offset, each_size, stream_lo)
415
+ wp.copy(array_hi, array_hi, copy_offset, copy_offset, each_size, stream_hi)
416
+
417
+ stream_lo.record_event(end_lo_event)
418
+ stream_hi.record_event(end_hi_event)
419
+
420
+ # get elapsed time between the two events
421
+ elapsed_lo = wp.get_event_elapsed_time(start_lo_event, end_lo_event)
422
+ elapsed_hi = wp.get_event_elapsed_time(start_hi_event, end_hi_event)
423
+
424
+ test.assertLess(elapsed_hi, elapsed_lo, "Copies on higher-priority stream should be faster.")
425
+
426
+
427
+ @wp.kernel
428
+ def sum_threads(sum: wp.array(dtype=wp.uint64)):
429
+ i = wp.tid()
430
+ wp.atomic_add(sum, 0, wp.uint64(1))
431
+
432
+
433
+ def test_stream_event_is_complete(test, device):
434
+ with wp.ScopedDevice(device):
435
+ stream = wp.Stream()
436
+ event = wp.Event()
437
+ # No operations on stream, should be complete
438
+ test.assertTrue(stream.is_complete)
439
+
440
+ # Event not recorded yet, should be complete
441
+ test.assertTrue(event.is_complete)
442
+
443
+ a = wp.zeros(1, dtype=wp.uint64)
444
+
445
+ threads = 1024 * 1024 * 8
446
+
447
+ with wp.ScopedStream(stream):
448
+ # Launch some work on the stream and reuse the event
449
+
450
+ for iter in range(5):
451
+ # Kernel takes about 1 ms to run on an RTX 3090
452
+ wp.launch(sum_threads, dim=threads, outputs=[a])
453
+
454
+ stream.record_event(event)
455
+
456
+ # Kernel should still be running
457
+ test.assertFalse(stream.is_complete)
458
+
459
+ # Event should not be finished
460
+ test.assertFalse(event.is_complete)
461
+
462
+ # Force the stream operations to complete
463
+ wp.synchronize_stream(stream)
464
+
465
+ # Now all operations are complete
466
+ test.assertTrue(stream.is_complete)
467
+ test.assertTrue(event.is_complete)
468
+
469
+ # Verify result
470
+ test.assertEqual(a.numpy()[0], (iter + 1) * threads)
471
+
472
+
473
+ devices = get_selected_cuda_test_devices()
474
+
475
+
476
+ class TestStreams(unittest.TestCase):
477
+ def test_stream_exceptions(self):
478
+ cpu_device = wp.get_device("cpu")
479
+
480
+ # Can't set the stream on a CPU device
481
+ with self.assertRaises(RuntimeError):
482
+ stream0 = wp.Stream()
483
+ cpu_device.stream = stream0
484
+
485
+ # Can't create a stream on the CPU
486
+ with self.assertRaises(RuntimeError):
487
+ wp.Stream(device="cpu")
488
+
489
+ # Can't create an event with CPU device
490
+ with self.assertRaises(RuntimeError):
491
+ wp.Event(device=cpu_device)
492
+
493
+ # Can't get the stream on a CPU device
494
+ with self.assertRaises(RuntimeError):
495
+ cpu_stream = cpu_device.stream # noqa: F841
496
+
497
+ @unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
498
+ @unittest.skipUnless(check_p2p(), "Peer-to-Peer transfers not supported")
499
+ def test_stream_arg_graph_mgpu(self):
500
+ wp.load_module(device="cuda:0")
501
+ wp.load_module(device="cuda:1")
502
+
503
+ # Peer-to-peer copies are not possible during graph capture if the arrays were
504
+ # allocated using pooled allocators and mempool access is not enabled.
505
+ # Here, we force default CUDA allocators and pre-allocate the memory.
506
+ with wp.ScopedMempool("cuda:0", False), wp.ScopedMempool("cuda:1", False):
507
+ # resources on GPU 0
508
+ stream0 = wp.get_stream("cuda:0")
509
+ a0 = wp.zeros(N, dtype=float, device="cuda:0")
510
+ b0 = wp.empty(N, dtype=float, device="cuda:0")
511
+ c0 = wp.empty(N, dtype=float, device="cuda:0")
512
+
513
+ # resources on GPU 1
514
+ stream1 = wp.get_stream("cuda:1")
515
+ a1 = wp.zeros(N, dtype=float, device="cuda:1")
516
+
517
+ # start recording on stream0
518
+ wp.capture_begin(stream=stream0, force_module_load=False)
519
+ try:
520
+ # branch into stream1
521
+ stream1.wait_stream(stream0)
522
+
523
+ # launch concurrent kernels on each stream
524
+ wp.launch(inc, dim=N, inputs=[a0], stream=stream0)
525
+ wp.launch(inc, dim=N, inputs=[a1], stream=stream1)
526
+
527
+ # wait for stream1 to finish
528
+ stream0.wait_stream(stream1)
529
+
530
+ # copy values from stream1
531
+ wp.copy(b0, a1, stream=stream0)
532
+
533
+ # compute sum
534
+ wp.launch(sum, dim=N, inputs=[a0, b0, c0], stream=stream0)
535
+ finally:
536
+ # finish recording on stream0
537
+ g = wp.capture_end(stream=stream0)
538
+
539
+ # replay
540
+ num_iters = 10
541
+ for _ in range(num_iters):
542
+ wp.capture_launch(g, stream=stream0)
543
+
544
+ # check results
545
+ assert_np_equal(c0.numpy(), np.full(N, fill_value=2 * num_iters))
546
+
547
+ @unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
548
+ @unittest.skipUnless(check_p2p(), "Peer-to-Peer transfers not supported")
549
+ def test_stream_scope_graph_mgpu(self):
550
+ wp.load_module(device="cuda:0")
551
+ wp.load_module(device="cuda:1")
552
+
553
+ # Peer-to-peer copies are not possible during graph capture if the arrays were
554
+ # allocated using pooled allocators and mempool access is not enabled.
555
+ # Here, we force default CUDA allocators and pre-allocate the memory.
556
+ with wp.ScopedMempool("cuda:0", False), wp.ScopedMempool("cuda:1", False):
557
+ # resources on GPU 0
558
+ with wp.ScopedDevice("cuda:0"):
559
+ stream0 = wp.get_stream()
560
+ a0 = wp.zeros(N, dtype=float)
561
+ b0 = wp.empty(N, dtype=float)
562
+ c0 = wp.empty(N, dtype=float)
563
+
564
+ # resources on GPU 1
565
+ with wp.ScopedDevice("cuda:1"):
566
+ stream1 = wp.get_stream()
567
+ a1 = wp.zeros(N, dtype=float)
568
+
569
+ # capture graph
570
+ with wp.ScopedDevice("cuda:0"):
571
+ # start recording
572
+ wp.capture_begin(force_module_load=False)
573
+ try:
574
+ with wp.ScopedDevice("cuda:1"):
575
+ # branch into stream1
576
+ wp.wait_stream(stream0)
577
+
578
+ wp.launch(inc, dim=N, inputs=[a1])
579
+
580
+ wp.launch(inc, dim=N, inputs=[a0])
581
+
582
+ # wait for stream1 to finish
583
+ wp.wait_stream(stream1)
584
+
585
+ # copy values from stream1
586
+ wp.copy(b0, a1)
587
+
588
+ # compute sum
589
+ wp.launch(sum, dim=N, inputs=[a0, b0, c0])
590
+ finally:
591
+ # finish recording
592
+ g = wp.capture_end()
593
+
594
+ # replay
595
+ with wp.ScopedDevice("cuda:0"):
596
+ num_iters = 10
597
+ for _ in range(num_iters):
598
+ wp.capture_launch(g)
599
+
600
+ # check results
601
+ assert_np_equal(c0.numpy(), np.full(N, fill_value=2 * num_iters))
602
+
603
+ def test_stream_new_del(self):
604
+ # test the scenario in which a Stream is created but not initialized before gc
605
+ instance = wp.Stream.__new__(wp.Stream)
606
+ instance.__del__()
607
+
608
+ def test_event_new_del(self):
609
+ # test the scenario in which an Event is created but not initialized before gc
610
+ instance = wp.Event.__new__(wp.Event)
611
+ instance.__del__()
612
+
613
+
614
+ add_function_test(TestStreams, "test_stream_set", test_stream_set, devices=devices)
615
+ add_function_test(TestStreams, "test_stream_arg_explicit_sync", test_stream_arg_explicit_sync, devices=devices)
616
+ add_function_test(TestStreams, "test_stream_scope_implicit_sync", test_stream_scope_implicit_sync, devices=devices)
617
+
618
+ add_function_test(TestStreams, "test_stream_arg_synchronize", test_stream_arg_synchronize, devices=devices)
619
+ add_function_test(TestStreams, "test_stream_arg_wait_event", test_stream_arg_wait_event, devices=devices)
620
+ add_function_test(TestStreams, "test_stream_arg_wait_stream", test_stream_arg_wait_stream, devices=devices)
621
+ add_function_test(TestStreams, "test_stream_scope_synchronize", test_stream_scope_synchronize, devices=devices)
622
+ add_function_test(TestStreams, "test_stream_scope_wait_event", test_stream_scope_wait_event, devices=devices)
623
+ add_function_test(TestStreams, "test_stream_scope_wait_stream", test_stream_scope_wait_stream, devices=devices)
624
+ add_function_test(TestStreams, "test_stream_priority_basics", test_stream_priority_basics, devices=devices)
625
+ add_function_test(TestStreams, "test_stream_priority_timings", test_stream_priority_timings, devices=devices)
626
+ add_function_test(TestStreams, "test_stream_event_is_complete", test_stream_event_is_complete, devices=devices)
627
+
628
+ add_function_test(TestStreams, "test_event_synchronize", test_event_synchronize, devices=devices)
629
+ add_function_test(TestStreams, "test_event_elapsed_time", test_event_elapsed_time, devices=devices)
630
+ add_function_test(TestStreams, "test_event_elapsed_time_graph", test_event_elapsed_time_graph, devices=devices)
631
+
632
+ if __name__ == "__main__":
633
+ wp.clear_kernel_cache()
634
+ unittest.main(verbosity=2)
File without changes