warp-lang 1.10.0__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (468) hide show
  1. warp/__init__.py +334 -0
  2. warp/__init__.pyi +5856 -0
  3. warp/_src/__init__.py +14 -0
  4. warp/_src/autograd.py +1077 -0
  5. warp/_src/build.py +620 -0
  6. warp/_src/build_dll.py +642 -0
  7. warp/_src/builtins.py +10555 -0
  8. warp/_src/codegen.py +4361 -0
  9. warp/_src/config.py +178 -0
  10. warp/_src/constants.py +59 -0
  11. warp/_src/context.py +8352 -0
  12. warp/_src/dlpack.py +464 -0
  13. warp/_src/fabric.py +362 -0
  14. warp/_src/fem/__init__.py +14 -0
  15. warp/_src/fem/adaptivity.py +510 -0
  16. warp/_src/fem/cache.py +689 -0
  17. warp/_src/fem/dirichlet.py +190 -0
  18. warp/_src/fem/domain.py +553 -0
  19. warp/_src/fem/field/__init__.py +131 -0
  20. warp/_src/fem/field/field.py +703 -0
  21. warp/_src/fem/field/nodal_field.py +403 -0
  22. warp/_src/fem/field/restriction.py +39 -0
  23. warp/_src/fem/field/virtual.py +1021 -0
  24. warp/_src/fem/geometry/__init__.py +32 -0
  25. warp/_src/fem/geometry/adaptive_nanogrid.py +782 -0
  26. warp/_src/fem/geometry/closest_point.py +99 -0
  27. warp/_src/fem/geometry/deformed_geometry.py +277 -0
  28. warp/_src/fem/geometry/element.py +854 -0
  29. warp/_src/fem/geometry/geometry.py +693 -0
  30. warp/_src/fem/geometry/grid_2d.py +478 -0
  31. warp/_src/fem/geometry/grid_3d.py +539 -0
  32. warp/_src/fem/geometry/hexmesh.py +956 -0
  33. warp/_src/fem/geometry/nanogrid.py +660 -0
  34. warp/_src/fem/geometry/partition.py +483 -0
  35. warp/_src/fem/geometry/quadmesh.py +597 -0
  36. warp/_src/fem/geometry/tetmesh.py +762 -0
  37. warp/_src/fem/geometry/trimesh.py +588 -0
  38. warp/_src/fem/integrate.py +2507 -0
  39. warp/_src/fem/linalg.py +385 -0
  40. warp/_src/fem/operator.py +398 -0
  41. warp/_src/fem/polynomial.py +231 -0
  42. warp/_src/fem/quadrature/__init__.py +17 -0
  43. warp/_src/fem/quadrature/pic_quadrature.py +318 -0
  44. warp/_src/fem/quadrature/quadrature.py +665 -0
  45. warp/_src/fem/space/__init__.py +248 -0
  46. warp/_src/fem/space/basis_function_space.py +499 -0
  47. warp/_src/fem/space/basis_space.py +681 -0
  48. warp/_src/fem/space/dof_mapper.py +253 -0
  49. warp/_src/fem/space/function_space.py +312 -0
  50. warp/_src/fem/space/grid_2d_function_space.py +179 -0
  51. warp/_src/fem/space/grid_3d_function_space.py +229 -0
  52. warp/_src/fem/space/hexmesh_function_space.py +255 -0
  53. warp/_src/fem/space/nanogrid_function_space.py +199 -0
  54. warp/_src/fem/space/partition.py +435 -0
  55. warp/_src/fem/space/quadmesh_function_space.py +222 -0
  56. warp/_src/fem/space/restriction.py +221 -0
  57. warp/_src/fem/space/shape/__init__.py +152 -0
  58. warp/_src/fem/space/shape/cube_shape_function.py +1107 -0
  59. warp/_src/fem/space/shape/shape_function.py +134 -0
  60. warp/_src/fem/space/shape/square_shape_function.py +928 -0
  61. warp/_src/fem/space/shape/tet_shape_function.py +829 -0
  62. warp/_src/fem/space/shape/triangle_shape_function.py +674 -0
  63. warp/_src/fem/space/tetmesh_function_space.py +270 -0
  64. warp/_src/fem/space/topology.py +461 -0
  65. warp/_src/fem/space/trimesh_function_space.py +193 -0
  66. warp/_src/fem/types.py +114 -0
  67. warp/_src/fem/utils.py +488 -0
  68. warp/_src/jax.py +188 -0
  69. warp/_src/jax_experimental/__init__.py +14 -0
  70. warp/_src/jax_experimental/custom_call.py +389 -0
  71. warp/_src/jax_experimental/ffi.py +1286 -0
  72. warp/_src/jax_experimental/xla_ffi.py +658 -0
  73. warp/_src/marching_cubes.py +710 -0
  74. warp/_src/math.py +416 -0
  75. warp/_src/optim/__init__.py +14 -0
  76. warp/_src/optim/adam.py +165 -0
  77. warp/_src/optim/linear.py +1608 -0
  78. warp/_src/optim/sgd.py +114 -0
  79. warp/_src/paddle.py +408 -0
  80. warp/_src/render/__init__.py +14 -0
  81. warp/_src/render/imgui_manager.py +291 -0
  82. warp/_src/render/render_opengl.py +3638 -0
  83. warp/_src/render/render_usd.py +939 -0
  84. warp/_src/render/utils.py +162 -0
  85. warp/_src/sparse.py +2718 -0
  86. warp/_src/tape.py +1208 -0
  87. warp/_src/thirdparty/__init__.py +0 -0
  88. warp/_src/thirdparty/appdirs.py +598 -0
  89. warp/_src/thirdparty/dlpack.py +145 -0
  90. warp/_src/thirdparty/unittest_parallel.py +676 -0
  91. warp/_src/torch.py +393 -0
  92. warp/_src/types.py +5888 -0
  93. warp/_src/utils.py +1695 -0
  94. warp/autograd.py +33 -0
  95. warp/bin/libwarp-clang.dylib +0 -0
  96. warp/bin/libwarp.dylib +0 -0
  97. warp/build.py +29 -0
  98. warp/build_dll.py +24 -0
  99. warp/codegen.py +24 -0
  100. warp/constants.py +24 -0
  101. warp/context.py +33 -0
  102. warp/dlpack.py +24 -0
  103. warp/examples/__init__.py +24 -0
  104. warp/examples/assets/bear.usd +0 -0
  105. warp/examples/assets/bunny.usd +0 -0
  106. warp/examples/assets/cube.usd +0 -0
  107. warp/examples/assets/nonuniform.usd +0 -0
  108. warp/examples/assets/nvidia_logo.png +0 -0
  109. warp/examples/assets/pixel.jpg +0 -0
  110. warp/examples/assets/rocks.nvdb +0 -0
  111. warp/examples/assets/rocks.usd +0 -0
  112. warp/examples/assets/sphere.usd +0 -0
  113. warp/examples/assets/square_cloth.usd +0 -0
  114. warp/examples/benchmarks/benchmark_api.py +389 -0
  115. warp/examples/benchmarks/benchmark_cloth.py +296 -0
  116. warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
  117. warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
  118. warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
  119. warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
  120. warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
  121. warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
  122. warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
  123. warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
  124. warp/examples/benchmarks/benchmark_gemm.py +164 -0
  125. warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
  126. warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
  127. warp/examples/benchmarks/benchmark_launches.py +301 -0
  128. warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
  129. warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
  130. warp/examples/browse.py +37 -0
  131. warp/examples/core/example_cupy.py +86 -0
  132. warp/examples/core/example_dem.py +241 -0
  133. warp/examples/core/example_fluid.py +299 -0
  134. warp/examples/core/example_graph_capture.py +150 -0
  135. warp/examples/core/example_marching_cubes.py +195 -0
  136. warp/examples/core/example_mesh.py +180 -0
  137. warp/examples/core/example_mesh_intersect.py +211 -0
  138. warp/examples/core/example_nvdb.py +182 -0
  139. warp/examples/core/example_raycast.py +111 -0
  140. warp/examples/core/example_raymarch.py +205 -0
  141. warp/examples/core/example_render_opengl.py +290 -0
  142. warp/examples/core/example_sample_mesh.py +300 -0
  143. warp/examples/core/example_sph.py +411 -0
  144. warp/examples/core/example_spin_lock.py +93 -0
  145. warp/examples/core/example_torch.py +211 -0
  146. warp/examples/core/example_wave.py +269 -0
  147. warp/examples/core/example_work_queue.py +118 -0
  148. warp/examples/distributed/example_jacobi_mpi.py +506 -0
  149. warp/examples/fem/example_adaptive_grid.py +286 -0
  150. warp/examples/fem/example_apic_fluid.py +469 -0
  151. warp/examples/fem/example_burgers.py +261 -0
  152. warp/examples/fem/example_convection_diffusion.py +181 -0
  153. warp/examples/fem/example_convection_diffusion_dg.py +225 -0
  154. warp/examples/fem/example_darcy_ls_optimization.py +489 -0
  155. warp/examples/fem/example_deformed_geometry.py +172 -0
  156. warp/examples/fem/example_diffusion.py +196 -0
  157. warp/examples/fem/example_diffusion_3d.py +225 -0
  158. warp/examples/fem/example_diffusion_mgpu.py +225 -0
  159. warp/examples/fem/example_distortion_energy.py +228 -0
  160. warp/examples/fem/example_elastic_shape_optimization.py +387 -0
  161. warp/examples/fem/example_magnetostatics.py +242 -0
  162. warp/examples/fem/example_mixed_elasticity.py +293 -0
  163. warp/examples/fem/example_navier_stokes.py +263 -0
  164. warp/examples/fem/example_nonconforming_contact.py +300 -0
  165. warp/examples/fem/example_stokes.py +213 -0
  166. warp/examples/fem/example_stokes_transfer.py +262 -0
  167. warp/examples/fem/example_streamlines.py +357 -0
  168. warp/examples/fem/utils.py +1047 -0
  169. warp/examples/interop/example_jax_callable.py +146 -0
  170. warp/examples/interop/example_jax_ffi_callback.py +132 -0
  171. warp/examples/interop/example_jax_kernel.py +232 -0
  172. warp/examples/optim/example_diffray.py +561 -0
  173. warp/examples/optim/example_fluid_checkpoint.py +497 -0
  174. warp/examples/tile/example_tile_block_cholesky.py +502 -0
  175. warp/examples/tile/example_tile_cholesky.py +88 -0
  176. warp/examples/tile/example_tile_convolution.py +66 -0
  177. warp/examples/tile/example_tile_fft.py +55 -0
  178. warp/examples/tile/example_tile_filtering.py +113 -0
  179. warp/examples/tile/example_tile_matmul.py +85 -0
  180. warp/examples/tile/example_tile_mcgp.py +191 -0
  181. warp/examples/tile/example_tile_mlp.py +385 -0
  182. warp/examples/tile/example_tile_nbody.py +199 -0
  183. warp/fabric.py +24 -0
  184. warp/fem/__init__.py +173 -0
  185. warp/fem/adaptivity.py +26 -0
  186. warp/fem/cache.py +30 -0
  187. warp/fem/dirichlet.py +24 -0
  188. warp/fem/field/__init__.py +24 -0
  189. warp/fem/field/field.py +26 -0
  190. warp/fem/geometry/__init__.py +21 -0
  191. warp/fem/geometry/closest_point.py +31 -0
  192. warp/fem/linalg.py +38 -0
  193. warp/fem/operator.py +32 -0
  194. warp/fem/polynomial.py +29 -0
  195. warp/fem/space/__init__.py +22 -0
  196. warp/fem/space/basis_space.py +24 -0
  197. warp/fem/space/shape/__init__.py +68 -0
  198. warp/fem/space/topology.py +24 -0
  199. warp/fem/types.py +24 -0
  200. warp/fem/utils.py +32 -0
  201. warp/jax.py +29 -0
  202. warp/jax_experimental/__init__.py +29 -0
  203. warp/jax_experimental/custom_call.py +29 -0
  204. warp/jax_experimental/ffi.py +39 -0
  205. warp/jax_experimental/xla_ffi.py +24 -0
  206. warp/marching_cubes.py +24 -0
  207. warp/math.py +37 -0
  208. warp/native/array.h +1687 -0
  209. warp/native/builtin.h +2327 -0
  210. warp/native/bvh.cpp +562 -0
  211. warp/native/bvh.cu +826 -0
  212. warp/native/bvh.h +555 -0
  213. warp/native/clang/clang.cpp +541 -0
  214. warp/native/coloring.cpp +622 -0
  215. warp/native/crt.cpp +51 -0
  216. warp/native/crt.h +568 -0
  217. warp/native/cuda_crt.h +1058 -0
  218. warp/native/cuda_util.cpp +677 -0
  219. warp/native/cuda_util.h +313 -0
  220. warp/native/error.cpp +77 -0
  221. warp/native/error.h +36 -0
  222. warp/native/exports.h +2023 -0
  223. warp/native/fabric.h +246 -0
  224. warp/native/hashgrid.cpp +311 -0
  225. warp/native/hashgrid.cu +89 -0
  226. warp/native/hashgrid.h +240 -0
  227. warp/native/initializer_array.h +41 -0
  228. warp/native/intersect.h +1253 -0
  229. warp/native/intersect_adj.h +375 -0
  230. warp/native/intersect_tri.h +348 -0
  231. warp/native/mat.h +5189 -0
  232. warp/native/mathdx.cpp +93 -0
  233. warp/native/matnn.h +221 -0
  234. warp/native/mesh.cpp +266 -0
  235. warp/native/mesh.cu +406 -0
  236. warp/native/mesh.h +2097 -0
  237. warp/native/nanovdb/GridHandle.h +533 -0
  238. warp/native/nanovdb/HostBuffer.h +591 -0
  239. warp/native/nanovdb/NanoVDB.h +6246 -0
  240. warp/native/nanovdb/NodeManager.h +323 -0
  241. warp/native/nanovdb/PNanoVDB.h +3390 -0
  242. warp/native/noise.h +859 -0
  243. warp/native/quat.h +1664 -0
  244. warp/native/rand.h +342 -0
  245. warp/native/range.h +145 -0
  246. warp/native/reduce.cpp +174 -0
  247. warp/native/reduce.cu +363 -0
  248. warp/native/runlength_encode.cpp +79 -0
  249. warp/native/runlength_encode.cu +61 -0
  250. warp/native/scan.cpp +47 -0
  251. warp/native/scan.cu +55 -0
  252. warp/native/scan.h +23 -0
  253. warp/native/solid_angle.h +466 -0
  254. warp/native/sort.cpp +251 -0
  255. warp/native/sort.cu +286 -0
  256. warp/native/sort.h +35 -0
  257. warp/native/sparse.cpp +241 -0
  258. warp/native/sparse.cu +435 -0
  259. warp/native/spatial.h +1306 -0
  260. warp/native/svd.h +727 -0
  261. warp/native/temp_buffer.h +46 -0
  262. warp/native/tile.h +4124 -0
  263. warp/native/tile_radix_sort.h +1112 -0
  264. warp/native/tile_reduce.h +838 -0
  265. warp/native/tile_scan.h +240 -0
  266. warp/native/tuple.h +189 -0
  267. warp/native/vec.h +2199 -0
  268. warp/native/version.h +23 -0
  269. warp/native/volume.cpp +501 -0
  270. warp/native/volume.cu +68 -0
  271. warp/native/volume.h +970 -0
  272. warp/native/volume_builder.cu +483 -0
  273. warp/native/volume_builder.h +52 -0
  274. warp/native/volume_impl.h +70 -0
  275. warp/native/warp.cpp +1143 -0
  276. warp/native/warp.cu +4604 -0
  277. warp/native/warp.h +358 -0
  278. warp/optim/__init__.py +20 -0
  279. warp/optim/adam.py +24 -0
  280. warp/optim/linear.py +35 -0
  281. warp/optim/sgd.py +24 -0
  282. warp/paddle.py +24 -0
  283. warp/py.typed +0 -0
  284. warp/render/__init__.py +22 -0
  285. warp/render/imgui_manager.py +29 -0
  286. warp/render/render_opengl.py +24 -0
  287. warp/render/render_usd.py +24 -0
  288. warp/render/utils.py +24 -0
  289. warp/sparse.py +51 -0
  290. warp/tape.py +24 -0
  291. warp/tests/__init__.py +1 -0
  292. warp/tests/__main__.py +4 -0
  293. warp/tests/assets/curlnoise_golden.npy +0 -0
  294. warp/tests/assets/mlp_golden.npy +0 -0
  295. warp/tests/assets/pixel.npy +0 -0
  296. warp/tests/assets/pnoise_golden.npy +0 -0
  297. warp/tests/assets/spiky.usd +0 -0
  298. warp/tests/assets/test_grid.nvdb +0 -0
  299. warp/tests/assets/test_index_grid.nvdb +0 -0
  300. warp/tests/assets/test_int32_grid.nvdb +0 -0
  301. warp/tests/assets/test_vec_grid.nvdb +0 -0
  302. warp/tests/assets/torus.nvdb +0 -0
  303. warp/tests/assets/torus.usda +105 -0
  304. warp/tests/aux_test_class_kernel.py +34 -0
  305. warp/tests/aux_test_compile_consts_dummy.py +18 -0
  306. warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
  307. warp/tests/aux_test_dependent.py +29 -0
  308. warp/tests/aux_test_grad_customs.py +29 -0
  309. warp/tests/aux_test_instancing_gc.py +26 -0
  310. warp/tests/aux_test_module_aot.py +7 -0
  311. warp/tests/aux_test_module_unload.py +23 -0
  312. warp/tests/aux_test_name_clash1.py +40 -0
  313. warp/tests/aux_test_name_clash2.py +40 -0
  314. warp/tests/aux_test_reference.py +9 -0
  315. warp/tests/aux_test_reference_reference.py +8 -0
  316. warp/tests/aux_test_square.py +16 -0
  317. warp/tests/aux_test_unresolved_func.py +22 -0
  318. warp/tests/aux_test_unresolved_symbol.py +22 -0
  319. warp/tests/cuda/__init__.py +0 -0
  320. warp/tests/cuda/test_async.py +676 -0
  321. warp/tests/cuda/test_conditional_captures.py +1147 -0
  322. warp/tests/cuda/test_ipc.py +124 -0
  323. warp/tests/cuda/test_mempool.py +233 -0
  324. warp/tests/cuda/test_multigpu.py +169 -0
  325. warp/tests/cuda/test_peer.py +139 -0
  326. warp/tests/cuda/test_pinned.py +84 -0
  327. warp/tests/cuda/test_streams.py +691 -0
  328. warp/tests/geometry/__init__.py +0 -0
  329. warp/tests/geometry/test_bvh.py +335 -0
  330. warp/tests/geometry/test_hash_grid.py +259 -0
  331. warp/tests/geometry/test_marching_cubes.py +294 -0
  332. warp/tests/geometry/test_mesh.py +318 -0
  333. warp/tests/geometry/test_mesh_query_aabb.py +392 -0
  334. warp/tests/geometry/test_mesh_query_point.py +935 -0
  335. warp/tests/geometry/test_mesh_query_ray.py +323 -0
  336. warp/tests/geometry/test_volume.py +1103 -0
  337. warp/tests/geometry/test_volume_write.py +346 -0
  338. warp/tests/interop/__init__.py +0 -0
  339. warp/tests/interop/test_dlpack.py +730 -0
  340. warp/tests/interop/test_jax.py +1673 -0
  341. warp/tests/interop/test_paddle.py +800 -0
  342. warp/tests/interop/test_torch.py +1001 -0
  343. warp/tests/run_coverage_serial.py +39 -0
  344. warp/tests/test_adam.py +162 -0
  345. warp/tests/test_arithmetic.py +1096 -0
  346. warp/tests/test_array.py +3756 -0
  347. warp/tests/test_array_reduce.py +156 -0
  348. warp/tests/test_assert.py +303 -0
  349. warp/tests/test_atomic.py +336 -0
  350. warp/tests/test_atomic_bitwise.py +209 -0
  351. warp/tests/test_atomic_cas.py +312 -0
  352. warp/tests/test_bool.py +220 -0
  353. warp/tests/test_builtins_resolution.py +732 -0
  354. warp/tests/test_closest_point_edge_edge.py +327 -0
  355. warp/tests/test_codegen.py +974 -0
  356. warp/tests/test_codegen_instancing.py +1495 -0
  357. warp/tests/test_compile_consts.py +215 -0
  358. warp/tests/test_conditional.py +298 -0
  359. warp/tests/test_context.py +35 -0
  360. warp/tests/test_copy.py +319 -0
  361. warp/tests/test_ctypes.py +618 -0
  362. warp/tests/test_dense.py +73 -0
  363. warp/tests/test_devices.py +127 -0
  364. warp/tests/test_enum.py +136 -0
  365. warp/tests/test_examples.py +424 -0
  366. warp/tests/test_fabricarray.py +998 -0
  367. warp/tests/test_fast_math.py +72 -0
  368. warp/tests/test_fem.py +2204 -0
  369. warp/tests/test_fixedarray.py +229 -0
  370. warp/tests/test_fp16.py +136 -0
  371. warp/tests/test_func.py +501 -0
  372. warp/tests/test_future_annotations.py +100 -0
  373. warp/tests/test_generics.py +656 -0
  374. warp/tests/test_grad.py +893 -0
  375. warp/tests/test_grad_customs.py +339 -0
  376. warp/tests/test_grad_debug.py +341 -0
  377. warp/tests/test_implicit_init.py +411 -0
  378. warp/tests/test_import.py +45 -0
  379. warp/tests/test_indexedarray.py +1140 -0
  380. warp/tests/test_intersect.py +103 -0
  381. warp/tests/test_iter.py +76 -0
  382. warp/tests/test_large.py +177 -0
  383. warp/tests/test_launch.py +411 -0
  384. warp/tests/test_lerp.py +151 -0
  385. warp/tests/test_linear_solvers.py +223 -0
  386. warp/tests/test_lvalue.py +427 -0
  387. warp/tests/test_map.py +526 -0
  388. warp/tests/test_mat.py +3515 -0
  389. warp/tests/test_mat_assign_copy.py +178 -0
  390. warp/tests/test_mat_constructors.py +573 -0
  391. warp/tests/test_mat_lite.py +122 -0
  392. warp/tests/test_mat_scalar_ops.py +2913 -0
  393. warp/tests/test_math.py +212 -0
  394. warp/tests/test_module_aot.py +287 -0
  395. warp/tests/test_module_hashing.py +258 -0
  396. warp/tests/test_modules_lite.py +70 -0
  397. warp/tests/test_noise.py +252 -0
  398. warp/tests/test_operators.py +299 -0
  399. warp/tests/test_options.py +129 -0
  400. warp/tests/test_overwrite.py +551 -0
  401. warp/tests/test_print.py +408 -0
  402. warp/tests/test_quat.py +2653 -0
  403. warp/tests/test_quat_assign_copy.py +145 -0
  404. warp/tests/test_rand.py +339 -0
  405. warp/tests/test_reload.py +303 -0
  406. warp/tests/test_rounding.py +157 -0
  407. warp/tests/test_runlength_encode.py +196 -0
  408. warp/tests/test_scalar_ops.py +133 -0
  409. warp/tests/test_smoothstep.py +108 -0
  410. warp/tests/test_snippet.py +318 -0
  411. warp/tests/test_sparse.py +845 -0
  412. warp/tests/test_spatial.py +2859 -0
  413. warp/tests/test_spatial_assign_copy.py +160 -0
  414. warp/tests/test_special_values.py +361 -0
  415. warp/tests/test_static.py +640 -0
  416. warp/tests/test_struct.py +901 -0
  417. warp/tests/test_tape.py +242 -0
  418. warp/tests/test_transient_module.py +93 -0
  419. warp/tests/test_triangle_closest_point.py +192 -0
  420. warp/tests/test_tuple.py +361 -0
  421. warp/tests/test_types.py +615 -0
  422. warp/tests/test_utils.py +594 -0
  423. warp/tests/test_vec.py +1408 -0
  424. warp/tests/test_vec_assign_copy.py +143 -0
  425. warp/tests/test_vec_constructors.py +325 -0
  426. warp/tests/test_vec_lite.py +80 -0
  427. warp/tests/test_vec_scalar_ops.py +2327 -0
  428. warp/tests/test_verify_fp.py +100 -0
  429. warp/tests/test_version.py +75 -0
  430. warp/tests/tile/__init__.py +0 -0
  431. warp/tests/tile/test_tile.py +1519 -0
  432. warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
  433. warp/tests/tile/test_tile_cholesky.py +608 -0
  434. warp/tests/tile/test_tile_load.py +724 -0
  435. warp/tests/tile/test_tile_mathdx.py +156 -0
  436. warp/tests/tile/test_tile_matmul.py +179 -0
  437. warp/tests/tile/test_tile_mlp.py +400 -0
  438. warp/tests/tile/test_tile_reduce.py +950 -0
  439. warp/tests/tile/test_tile_shared_memory.py +376 -0
  440. warp/tests/tile/test_tile_sort.py +121 -0
  441. warp/tests/tile/test_tile_view.py +173 -0
  442. warp/tests/unittest_serial.py +47 -0
  443. warp/tests/unittest_suites.py +430 -0
  444. warp/tests/unittest_utils.py +469 -0
  445. warp/tests/walkthrough_debug.py +95 -0
  446. warp/torch.py +24 -0
  447. warp/types.py +51 -0
  448. warp/utils.py +31 -0
  449. warp_lang-1.10.0.dist-info/METADATA +459 -0
  450. warp_lang-1.10.0.dist-info/RECORD +468 -0
  451. warp_lang-1.10.0.dist-info/WHEEL +5 -0
  452. warp_lang-1.10.0.dist-info/licenses/LICENSE.md +176 -0
  453. warp_lang-1.10.0.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
  454. warp_lang-1.10.0.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
  455. warp_lang-1.10.0.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
  456. warp_lang-1.10.0.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
  457. warp_lang-1.10.0.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
  458. warp_lang-1.10.0.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
  459. warp_lang-1.10.0.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
  460. warp_lang-1.10.0.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
  461. warp_lang-1.10.0.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
  462. warp_lang-1.10.0.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
  463. warp_lang-1.10.0.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
  464. warp_lang-1.10.0.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
  465. warp_lang-1.10.0.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
  466. warp_lang-1.10.0.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
  467. warp_lang-1.10.0.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
  468. warp_lang-1.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,691 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import unittest
17
+
18
+ import numpy as np
19
+
20
+ import warp as wp
21
+ from warp._src.utils import check_p2p
22
+ from warp.tests.unittest_utils import *
23
+
24
+
25
+ @wp.kernel
26
+ def inc(a: wp.array(dtype=float)):
27
+ tid = wp.tid()
28
+ a[tid] = a[tid] + 1.0
29
+
30
+
31
+ @wp.kernel
32
+ def inc_new(src: wp.array(dtype=float), dst: wp.array(dtype=float)):
33
+ tid = wp.tid()
34
+ dst[tid] = src[tid] + 1.0
35
+
36
+
37
+ @wp.kernel
38
+ def sum(a: wp.array(dtype=float), b: wp.array(dtype=float), c: wp.array(dtype=float)):
39
+ tid = wp.tid()
40
+ c[tid] = a[tid] + b[tid]
41
+
42
+
43
+ # number of elements to use for testing
44
+ N = 10 * 1024 * 1024
45
+
46
+
47
+ def test_stream_set(test, device):
48
+ device = wp.get_device(device)
49
+
50
+ old_stream = device.stream
51
+ new_stream = wp.Stream(device)
52
+
53
+ try:
54
+ wp.set_stream(new_stream, device)
55
+
56
+ test.assertTrue(device.has_stream)
57
+ test.assertEqual(device.stream, new_stream)
58
+
59
+ finally:
60
+ # restore original stream
61
+ wp.set_stream(old_stream, device)
62
+
63
+
64
+ def test_stream_arg_explicit_sync(test, device):
65
+ a = wp.zeros(N, dtype=float, device=device)
66
+ b = wp.full(N, 42, dtype=float, device=device)
67
+ c = wp.empty(N, dtype=float, device=device)
68
+
69
+ old_stream = wp.get_stream(device)
70
+ new_stream = wp.Stream(device)
71
+
72
+ # allocations need to be explicitly synced before launching work using stream arguments
73
+ new_stream.wait_stream(old_stream)
74
+
75
+ # launch work on new stream
76
+ wp.launch(inc, dim=a.size, inputs=[a], stream=new_stream)
77
+ wp.copy(b, a, stream=new_stream)
78
+ wp.launch(inc, dim=a.size, inputs=[a], stream=new_stream)
79
+ wp.copy(c, a, stream=new_stream)
80
+ wp.launch(inc, dim=a.size, inputs=[a], stream=new_stream)
81
+
82
+ assert_np_equal(a.numpy(), np.full(N, fill_value=3.0))
83
+ assert_np_equal(b.numpy(), np.full(N, fill_value=1.0))
84
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
85
+
86
+
87
+ def test_stream_scope_implicit_sync(test, device):
88
+ with wp.ScopedDevice(device):
89
+ a = wp.zeros(N, dtype=float)
90
+ b = wp.full(N, 42, dtype=float)
91
+ c = wp.empty(N, dtype=float)
92
+
93
+ old_stream = wp.get_stream()
94
+ new_stream = wp.Stream()
95
+
96
+ # launch work on new stream
97
+ # allocations are implicitly synced when entering wp.ScopedStream
98
+ with wp.ScopedStream(new_stream):
99
+ assert wp.get_stream() == new_stream
100
+
101
+ wp.launch(inc, dim=a.size, inputs=[a])
102
+ wp.copy(b, a)
103
+ wp.launch(inc, dim=a.size, inputs=[a])
104
+ wp.copy(c, a)
105
+ wp.launch(inc, dim=a.size, inputs=[a])
106
+
107
+ assert wp.get_stream() == old_stream
108
+
109
+ assert_np_equal(a.numpy(), np.full(N, fill_value=3.0))
110
+ assert_np_equal(b.numpy(), np.full(N, fill_value=1.0))
111
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
112
+
113
+
114
+ def test_stream_arg_synchronize(test, device):
115
+ a = wp.zeros(N, dtype=float, device=device)
116
+ b = wp.empty(N, dtype=float, device=device)
117
+ c = wp.empty(N, dtype=float, device=device)
118
+ d = wp.empty(N, dtype=float, device=device)
119
+
120
+ stream1 = wp.get_stream(device)
121
+ stream2 = wp.Stream(device)
122
+ stream3 = wp.Stream(device)
123
+
124
+ wp.launch(inc, dim=N, inputs=[a], device=device)
125
+
126
+ # b and c depend on a
127
+ wp.synchronize_stream(stream1)
128
+ wp.launch(inc_new, dim=N, inputs=[a, b], stream=stream2)
129
+ wp.launch(inc_new, dim=N, inputs=[a, c], stream=stream3)
130
+
131
+ # d depends on b and c
132
+ wp.synchronize_stream(stream2)
133
+ wp.synchronize_stream(stream3)
134
+ wp.launch(sum, dim=N, inputs=[b, c, d], device=device)
135
+
136
+ assert_np_equal(a.numpy(), np.full(N, fill_value=1.0))
137
+ assert_np_equal(b.numpy(), np.full(N, fill_value=2.0))
138
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
139
+ assert_np_equal(d.numpy(), np.full(N, fill_value=4.0))
140
+
141
+
142
+ def test_stream_arg_wait_event(test, device):
143
+ a = wp.zeros(N, dtype=float, device=device)
144
+ b = wp.empty(N, dtype=float, device=device)
145
+ c = wp.empty(N, dtype=float, device=device)
146
+ d = wp.empty(N, dtype=float, device=device)
147
+
148
+ stream1 = wp.get_stream(device)
149
+ stream2 = wp.Stream(device)
150
+ stream3 = wp.Stream(device)
151
+
152
+ event1 = wp.Event(device)
153
+ event2 = wp.Event(device)
154
+ event3 = wp.Event(device)
155
+
156
+ wp.launch(inc, dim=N, inputs=[a], stream=stream1)
157
+ stream1.record_event(event1)
158
+
159
+ # b and c depend on a
160
+ stream2.wait_event(event1)
161
+ stream3.wait_event(event1)
162
+ wp.launch(inc_new, dim=N, inputs=[a, b], stream=stream2)
163
+ stream2.record_event(event2)
164
+ wp.launch(inc_new, dim=N, inputs=[a, c], stream=stream3)
165
+ stream3.record_event(event3)
166
+
167
+ # d depends on b and c
168
+ stream1.wait_event(event2)
169
+ stream1.wait_event(event3)
170
+ wp.launch(sum, dim=N, inputs=[b, c, d], stream=stream1)
171
+
172
+ assert_np_equal(a.numpy(), np.full(N, fill_value=1.0))
173
+ assert_np_equal(b.numpy(), np.full(N, fill_value=2.0))
174
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
175
+ assert_np_equal(d.numpy(), np.full(N, fill_value=4.0))
176
+
177
+
178
+ def test_stream_arg_wait_stream(test, device):
179
+ a = wp.zeros(N, dtype=float, device=device)
180
+ b = wp.empty(N, dtype=float, device=device)
181
+ c = wp.empty(N, dtype=float, device=device)
182
+ d = wp.empty(N, dtype=float, device=device)
183
+
184
+ stream1 = wp.get_stream(device)
185
+ stream2 = wp.Stream(device)
186
+ stream3 = wp.Stream(device)
187
+
188
+ wp.launch(inc, dim=N, inputs=[a], stream=stream1)
189
+
190
+ # b and c depend on a
191
+ stream2.wait_stream(stream1)
192
+ stream3.wait_stream(stream1)
193
+ wp.launch(inc_new, dim=N, inputs=[a, b], stream=stream2)
194
+ wp.launch(inc_new, dim=N, inputs=[a, c], stream=stream3)
195
+
196
+ # d depends on b and c
197
+ stream1.wait_stream(stream2)
198
+ stream1.wait_stream(stream3)
199
+ wp.launch(sum, dim=N, inputs=[b, c, d], stream=stream1)
200
+
201
+ assert_np_equal(a.numpy(), np.full(N, fill_value=1.0))
202
+ assert_np_equal(b.numpy(), np.full(N, fill_value=2.0))
203
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
204
+ assert_np_equal(d.numpy(), np.full(N, fill_value=4.0))
205
+
206
+
207
+ def test_stream_scope_synchronize(test, device):
208
+ with wp.ScopedDevice(device):
209
+ a = wp.zeros(N, dtype=float)
210
+ b = wp.empty(N, dtype=float)
211
+ c = wp.empty(N, dtype=float)
212
+ d = wp.empty(N, dtype=float)
213
+
214
+ stream2 = wp.Stream()
215
+ stream3 = wp.Stream()
216
+
217
+ wp.launch(inc, dim=N, inputs=[a])
218
+
219
+ # b and c depend on a
220
+ wp.synchronize_stream()
221
+ with wp.ScopedStream(stream2):
222
+ wp.launch(inc_new, dim=N, inputs=[a, b])
223
+ with wp.ScopedStream(stream3):
224
+ wp.launch(inc_new, dim=N, inputs=[a, c])
225
+
226
+ # d depends on b and c
227
+ wp.synchronize_stream(stream2)
228
+ wp.synchronize_stream(stream3)
229
+ wp.launch(sum, dim=N, inputs=[b, c, d])
230
+
231
+ assert_np_equal(a.numpy(), np.full(N, fill_value=1.0))
232
+ assert_np_equal(b.numpy(), np.full(N, fill_value=2.0))
233
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
234
+ assert_np_equal(d.numpy(), np.full(N, fill_value=4.0))
235
+
236
+
237
+ def test_stream_scope_wait_event(test, device):
238
+ with wp.ScopedDevice(device):
239
+ a = wp.zeros(N, dtype=float)
240
+ b = wp.empty(N, dtype=float)
241
+ c = wp.empty(N, dtype=float)
242
+ d = wp.empty(N, dtype=float)
243
+
244
+ stream2 = wp.Stream()
245
+ stream3 = wp.Stream()
246
+
247
+ event1 = wp.Event()
248
+ event2 = wp.Event()
249
+ event3 = wp.Event()
250
+
251
+ wp.launch(inc, dim=N, inputs=[a])
252
+ wp.record_event(event1)
253
+
254
+ # b and c depend on a
255
+ with wp.ScopedStream(stream2):
256
+ wp.wait_event(event1)
257
+ wp.launch(inc_new, dim=N, inputs=[a, b])
258
+ wp.record_event(event2)
259
+ with wp.ScopedStream(stream3):
260
+ wp.wait_event(event1)
261
+ wp.launch(inc_new, dim=N, inputs=[a, c])
262
+ wp.record_event(event3)
263
+
264
+ # d depends on b and c
265
+ wp.wait_event(event2)
266
+ wp.wait_event(event3)
267
+ wp.launch(sum, dim=N, inputs=[b, c, d])
268
+
269
+ assert_np_equal(a.numpy(), np.full(N, fill_value=1.0))
270
+ assert_np_equal(b.numpy(), np.full(N, fill_value=2.0))
271
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
272
+ assert_np_equal(d.numpy(), np.full(N, fill_value=4.0))
273
+
274
+
275
+ def test_stream_scope_wait_stream(test, device):
276
+ with wp.ScopedDevice(device):
277
+ a = wp.zeros(N, dtype=float)
278
+ b = wp.empty(N, dtype=float)
279
+ c = wp.empty(N, dtype=float)
280
+ d = wp.empty(N, dtype=float)
281
+
282
+ stream1 = wp.get_stream()
283
+ stream2 = wp.Stream()
284
+ stream3 = wp.Stream()
285
+
286
+ wp.launch(inc, dim=N, inputs=[a])
287
+
288
+ # b and c depend on a
289
+ with wp.ScopedStream(stream2):
290
+ wp.wait_stream(stream1)
291
+ wp.launch(inc_new, dim=N, inputs=[a, b])
292
+ with wp.ScopedStream(stream3):
293
+ wp.wait_stream(stream1)
294
+ wp.launch(inc_new, dim=N, inputs=[a, c])
295
+
296
+ # d depends on b and c
297
+ wp.wait_stream(stream2)
298
+ wp.wait_stream(stream3)
299
+ wp.launch(sum, dim=N, inputs=[b, c, d])
300
+
301
+ assert_np_equal(a.numpy(), np.full(N, fill_value=1.0))
302
+ assert_np_equal(b.numpy(), np.full(N, fill_value=2.0))
303
+ assert_np_equal(c.numpy(), np.full(N, fill_value=2.0))
304
+ assert_np_equal(d.numpy(), np.full(N, fill_value=4.0))
305
+
306
+
307
+ def test_event_synchronize(test, device):
308
+ stream = wp.get_stream(device)
309
+
310
+ a_host = wp.empty(N, dtype=float, device="cpu", pinned=True)
311
+ b_host = wp.empty(N, dtype=float, device="cpu", pinned=True)
312
+
313
+ # initialize GPU array and do an asynchronous readback
314
+ a = wp.full(N, 17, dtype=float, device=device)
315
+ wp.copy(a_host, a)
316
+ a_event = stream.record_event()
317
+
318
+ b = wp.full(N, 42, dtype=float, device=device)
319
+ wp.copy(b_host, b)
320
+ b_event = stream.record_event()
321
+
322
+ wp.synchronize_event(a_event)
323
+ assert_np_equal(a_host.numpy(), np.full(N, fill_value=17.0))
324
+
325
+ wp.synchronize_event(b_event)
326
+ assert_np_equal(b_host.numpy(), np.full(N, fill_value=42.0))
327
+
328
+
329
+ def test_event_elapsed_time(test, device):
330
+ stream = wp.get_stream(device)
331
+ e1 = wp.Event(device, enable_timing=True)
332
+ e2 = wp.Event(device, enable_timing=True)
333
+
334
+ a = wp.zeros(N, dtype=float, device=device)
335
+
336
+ stream.record_event(e1)
337
+ wp.launch(inc, dim=N, inputs=[a], device=device)
338
+ stream.record_event(e2)
339
+
340
+ elapsed = wp.get_event_elapsed_time(e1, e2)
341
+
342
+ test.assertGreater(elapsed, 0)
343
+
344
+
345
+ def test_event_elapsed_time_graph(test, device):
346
+ stream = wp.get_stream(device)
347
+ e1 = wp.Event(device, enable_timing=True)
348
+ e2 = wp.Event(device, enable_timing=True)
349
+
350
+ a = wp.zeros(N, dtype=float, device=device)
351
+
352
+ wp.load_module(device=device)
353
+
354
+ with wp.ScopedCapture(device, force_module_load=False) as capture:
355
+ stream.record_event(e1)
356
+ wp.launch(inc, dim=N, inputs=[a], device=device)
357
+ stream.record_event(e2)
358
+
359
+ wp.capture_launch(capture.graph)
360
+
361
+ wp.synchronize_device(device)
362
+
363
+ elapsed = wp.get_event_elapsed_time(e1, e2)
364
+
365
+ test.assertGreater(elapsed, 0)
366
+
367
+
368
+ def test_event_external(test, device):
369
+ with wp.ScopedDevice(device):
370
+ # event used to synchronize two graphs (external event)
371
+ event = wp.Event()
372
+
373
+ n = 1_000_000
374
+ a = wp.zeros(n, dtype=float)
375
+ b = wp.zeros(n, dtype=float)
376
+ c = wp.zeros(n, dtype=float)
377
+
378
+ with wp.ScopedCapture() as capture1:
379
+ wp.launch(inc, dim=n, inputs=[a])
380
+ wp.launch(inc, dim=n, inputs=[b])
381
+ # record in first graph
382
+ wp.record_event(event, external=True)
383
+
384
+ with wp.ScopedCapture() as capture2:
385
+ # wait in second graph
386
+ wp.wait_event(event, external=True)
387
+ wp.launch(sum, dim=n, inputs=[a, b, c])
388
+
389
+ stream1 = wp.Stream()
390
+ stream2 = wp.Stream()
391
+ num_iters = 10
392
+
393
+ for _ in range(num_iters):
394
+ # Launch graphs on different streams, but they should be
395
+ # synchronized using the external event.
396
+ wp.capture_launch(capture1.graph, stream=stream1)
397
+ wp.capture_launch(capture2.graph, stream=stream2)
398
+
399
+ expected = np.full(n, 2 * num_iters, dtype=np.float32)
400
+ assert_np_equal(c.numpy(), expected)
401
+
402
+
403
+ def test_stream_priority_basics(test, device):
404
+ standard_stream = wp.Stream(device)
405
+ test.assertEqual(standard_stream.priority, 0, "Default priority of streams must be 0.")
406
+
407
+ # Create a high-priority stream with a priority value that is smaller than -1 (clamping expected)
408
+ stream_hi = wp.Stream(device, priority=-100)
409
+
410
+ # Create a low-priority stream with a priority value that is greter than 0 (clamping expected)
411
+ stream_lo = wp.Stream(device, priority=100)
412
+
413
+ if stream_lo.priority == stream_hi.priority:
414
+ test.skipTest("Device must support stream priorities.")
415
+
416
+ test.assertEqual(stream_hi.priority, -1)
417
+
418
+ test.assertEqual(stream_lo.priority, 0)
419
+
420
+ with test.assertRaises(TypeError):
421
+ stream_invalid_priority = wp.Stream(device, priority=0.5)
422
+
423
+
424
+ def test_stream_priority_timings(test, device):
425
+ total_size = 256 * 1024 * 1024
426
+ each_size = 128 * 1024 * 1024
427
+
428
+ array_lo = wp.zeros(total_size, dtype=wp.float32, device=device)
429
+ array_hi = wp.zeros(total_size, dtype=wp.float32, device=device)
430
+
431
+ stream_lo = wp.Stream(device, 0)
432
+ stream_hi = wp.Stream(device, -1)
433
+
434
+ if stream_lo.priority == stream_hi.priority:
435
+ test.skipTest("Device must support stream priorities.")
436
+
437
+ # Create some events
438
+ start_lo_event = wp.Event(device, enable_timing=True)
439
+ start_hi_event = wp.Event(device, enable_timing=True)
440
+ end_lo_event = wp.Event(device, enable_timing=True)
441
+ end_hi_event = wp.Event(device, enable_timing=True)
442
+
443
+ wp.synchronize_device(device)
444
+
445
+ stream_lo.record_event(start_lo_event)
446
+ stream_hi.record_event(start_hi_event)
447
+
448
+ for copy_offset in range(0, total_size, each_size):
449
+ wp.copy(array_lo, array_lo, copy_offset, copy_offset, each_size, stream_lo)
450
+ wp.copy(array_hi, array_hi, copy_offset, copy_offset, each_size, stream_hi)
451
+
452
+ stream_lo.record_event(end_lo_event)
453
+ stream_hi.record_event(end_hi_event)
454
+
455
+ # get elapsed time between the two events
456
+ elapsed_lo = wp.get_event_elapsed_time(start_lo_event, end_lo_event)
457
+ elapsed_hi = wp.get_event_elapsed_time(start_hi_event, end_hi_event)
458
+
459
+ test.assertLess(elapsed_hi, elapsed_lo, "Copies on higher-priority stream should be faster.")
460
+
461
+
462
+ @wp.kernel
463
+ def sum_threads(sum: wp.array(dtype=wp.uint64)):
464
+ i = wp.tid()
465
+ wp.atomic_add(sum, 0, wp.uint64(1))
466
+
467
+
468
+ def test_stream_event_is_complete(test, device):
469
+ with wp.ScopedDevice(device):
470
+ stream = wp.Stream()
471
+ event = wp.Event()
472
+ # No operations on stream, should be complete
473
+ test.assertTrue(stream.is_complete)
474
+
475
+ # Event not recorded yet, should be complete
476
+ test.assertTrue(event.is_complete)
477
+
478
+ a = wp.zeros(1, dtype=wp.uint64)
479
+
480
+ threads = 1024 * 1024 * 64
481
+
482
+ with wp.ScopedStream(stream):
483
+ # Launch some work on the stream and reuse the event
484
+
485
+ for iter in range(5):
486
+ # Kernel takes about 1 ms to run on an RTX 3090
487
+ wp.launch(sum_threads, dim=threads, outputs=[a])
488
+
489
+ stream.record_event(event)
490
+
491
+ # Kernel should still be running
492
+ test.assertFalse(stream.is_complete)
493
+
494
+ # Event should not be finished
495
+ test.assertFalse(event.is_complete)
496
+
497
+ # Force the stream operations to complete
498
+ wp.synchronize_stream(stream)
499
+
500
+ # Now all operations are complete
501
+ test.assertTrue(stream.is_complete)
502
+ test.assertTrue(event.is_complete)
503
+
504
+ # Verify result
505
+ test.assertEqual(a.numpy()[0], (iter + 1) * threads)
506
+
507
+
508
+ def test_graph_destroy_during_capture(test, device):
509
+ with wp.ScopedDevice(device):
510
+ n = 10
511
+ a = wp.zeros(n, dtype=float)
512
+
513
+ with wp.ScopedCapture() as capture1:
514
+ wp.launch(inc, dim=n, inputs=[a])
515
+
516
+ wp.capture_launch(capture1.graph)
517
+
518
+ with wp.ScopedCapture() as capture2:
519
+ del capture1 # <--- should be deferred
520
+ wp.launch(inc, dim=n, inputs=[a])
521
+
522
+ wp.capture_launch(capture2.graph)
523
+
524
+ assert_np_equal(a.numpy(), np.full(n, 2, dtype=np.float32))
525
+
526
+
527
+ devices = get_selected_cuda_test_devices()
528
+
529
+
530
+ class TestStreams(unittest.TestCase):
531
+ def test_stream_exceptions(self):
532
+ cpu_device = wp.get_device("cpu")
533
+
534
+ # Can't set the stream on a CPU device
535
+ with self.assertRaises(RuntimeError):
536
+ stream0 = wp.Stream()
537
+ cpu_device.stream = stream0
538
+
539
+ # Can't create a stream on the CPU
540
+ with self.assertRaises(RuntimeError):
541
+ wp.Stream(device="cpu")
542
+
543
+ # Can't create an event with CPU device
544
+ with self.assertRaises(RuntimeError):
545
+ wp.Event(device=cpu_device)
546
+
547
+ # Can't get the stream on a CPU device
548
+ with self.assertRaises(RuntimeError):
549
+ cpu_stream = cpu_device.stream
550
+
551
+ @unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
552
+ @unittest.skipUnless(check_p2p(), "Peer-to-Peer transfers not supported")
553
+ def test_stream_arg_graph_mgpu(self):
554
+ wp.load_module(device="cuda:0")
555
+ wp.load_module(device="cuda:1")
556
+
557
+ # Peer-to-peer copies are not possible during graph capture if the arrays were
558
+ # allocated using pooled allocators and mempool access is not enabled.
559
+ # Here, we force default CUDA allocators and pre-allocate the memory.
560
+ with wp.ScopedMempool("cuda:0", False), wp.ScopedMempool("cuda:1", False):
561
+ # resources on GPU 0
562
+ stream0 = wp.get_stream("cuda:0")
563
+ a0 = wp.zeros(N, dtype=float, device="cuda:0")
564
+ b0 = wp.empty(N, dtype=float, device="cuda:0")
565
+ c0 = wp.empty(N, dtype=float, device="cuda:0")
566
+
567
+ # resources on GPU 1
568
+ stream1 = wp.get_stream("cuda:1")
569
+ a1 = wp.zeros(N, dtype=float, device="cuda:1")
570
+
571
+ # start recording on stream0
572
+ wp.capture_begin(stream=stream0, force_module_load=False)
573
+ try:
574
+ # branch into stream1
575
+ stream1.wait_stream(stream0)
576
+
577
+ # launch concurrent kernels on each stream
578
+ wp.launch(inc, dim=N, inputs=[a0], stream=stream0)
579
+ wp.launch(inc, dim=N, inputs=[a1], stream=stream1)
580
+
581
+ # wait for stream1 to finish
582
+ stream0.wait_stream(stream1)
583
+
584
+ # copy values from stream1
585
+ wp.copy(b0, a1, stream=stream0)
586
+
587
+ # compute sum
588
+ wp.launch(sum, dim=N, inputs=[a0, b0, c0], stream=stream0)
589
+ finally:
590
+ # finish recording on stream0
591
+ g = wp.capture_end(stream=stream0)
592
+
593
+ # replay
594
+ num_iters = 10
595
+ for _ in range(num_iters):
596
+ wp.capture_launch(g, stream=stream0)
597
+
598
+ # check results
599
+ assert_np_equal(c0.numpy(), np.full(N, fill_value=2 * num_iters))
600
+
601
+ @unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
602
+ @unittest.skipUnless(check_p2p(), "Peer-to-Peer transfers not supported")
603
+ def test_stream_scope_graph_mgpu(self):
604
+ wp.load_module(device="cuda:0")
605
+ wp.load_module(device="cuda:1")
606
+
607
+ # Peer-to-peer copies are not possible during graph capture if the arrays were
608
+ # allocated using pooled allocators and mempool access is not enabled.
609
+ # Here, we force default CUDA allocators and pre-allocate the memory.
610
+ with wp.ScopedMempool("cuda:0", False), wp.ScopedMempool("cuda:1", False):
611
+ # resources on GPU 0
612
+ with wp.ScopedDevice("cuda:0"):
613
+ stream0 = wp.get_stream()
614
+ a0 = wp.zeros(N, dtype=float)
615
+ b0 = wp.empty(N, dtype=float)
616
+ c0 = wp.empty(N, dtype=float)
617
+
618
+ # resources on GPU 1
619
+ with wp.ScopedDevice("cuda:1"):
620
+ stream1 = wp.get_stream()
621
+ a1 = wp.zeros(N, dtype=float)
622
+
623
+ # capture graph
624
+ with wp.ScopedDevice("cuda:0"):
625
+ # start recording
626
+ wp.capture_begin(force_module_load=False)
627
+ try:
628
+ with wp.ScopedDevice("cuda:1"):
629
+ # branch into stream1
630
+ wp.wait_stream(stream0)
631
+
632
+ wp.launch(inc, dim=N, inputs=[a1])
633
+
634
+ wp.launch(inc, dim=N, inputs=[a0])
635
+
636
+ # wait for stream1 to finish
637
+ wp.wait_stream(stream1)
638
+
639
+ # copy values from stream1
640
+ wp.copy(b0, a1)
641
+
642
+ # compute sum
643
+ wp.launch(sum, dim=N, inputs=[a0, b0, c0])
644
+ finally:
645
+ # finish recording
646
+ g = wp.capture_end()
647
+
648
+ # replay
649
+ with wp.ScopedDevice("cuda:0"):
650
+ num_iters = 10
651
+ for _ in range(num_iters):
652
+ wp.capture_launch(g)
653
+
654
+ # check results
655
+ assert_np_equal(c0.numpy(), np.full(N, fill_value=2 * num_iters))
656
+
657
+ def test_stream_new_del(self):
658
+ # test the scenario in which a Stream is created but not initialized before gc
659
+ instance = wp.Stream.__new__(wp.Stream)
660
+ instance.__del__()
661
+
662
+ def test_event_new_del(self):
663
+ # test the scenario in which an Event is created but not initialized before gc
664
+ instance = wp.Event.__new__(wp.Event)
665
+ instance.__del__()
666
+
667
+
668
+ add_function_test(TestStreams, "test_stream_set", test_stream_set, devices=devices)
669
+ add_function_test(TestStreams, "test_stream_arg_explicit_sync", test_stream_arg_explicit_sync, devices=devices)
670
+ add_function_test(TestStreams, "test_stream_scope_implicit_sync", test_stream_scope_implicit_sync, devices=devices)
671
+
672
+ add_function_test(TestStreams, "test_stream_arg_synchronize", test_stream_arg_synchronize, devices=devices)
673
+ add_function_test(TestStreams, "test_stream_arg_wait_event", test_stream_arg_wait_event, devices=devices)
674
+ add_function_test(TestStreams, "test_stream_arg_wait_stream", test_stream_arg_wait_stream, devices=devices)
675
+ add_function_test(TestStreams, "test_stream_scope_synchronize", test_stream_scope_synchronize, devices=devices)
676
+ add_function_test(TestStreams, "test_stream_scope_wait_event", test_stream_scope_wait_event, devices=devices)
677
+ add_function_test(TestStreams, "test_stream_scope_wait_stream", test_stream_scope_wait_stream, devices=devices)
678
+ add_function_test(TestStreams, "test_stream_priority_basics", test_stream_priority_basics, devices=devices)
679
+ add_function_test(TestStreams, "test_stream_priority_timings", test_stream_priority_timings, devices=devices)
680
+ add_function_test(TestStreams, "test_stream_event_is_complete", test_stream_event_is_complete, devices=devices)
681
+
682
+ add_function_test(TestStreams, "test_event_synchronize", test_event_synchronize, devices=devices)
683
+ add_function_test(TestStreams, "test_event_elapsed_time", test_event_elapsed_time, devices=devices)
684
+ add_function_test(TestStreams, "test_event_elapsed_time_graph", test_event_elapsed_time_graph, devices=devices)
685
+ add_function_test(TestStreams, "test_event_external", test_event_external, devices=devices)
686
+
687
+ add_function_test(TestStreams, "test_graph_destroy_during_capture", test_graph_destroy_during_capture, devices=devices)
688
+
689
+ if __name__ == "__main__":
690
+ wp.clear_kernel_cache()
691
+ unittest.main(verbosity=2)
File without changes