warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (315) hide show
  1. warp/__init__.py +15 -7
  2. warp/__init__.pyi +1 -0
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +22 -443
  6. warp/build_dll.py +384 -0
  7. warp/builtins.py +998 -488
  8. warp/codegen.py +1307 -739
  9. warp/config.py +5 -3
  10. warp/constants.py +6 -0
  11. warp/context.py +1291 -548
  12. warp/dlpack.py +31 -31
  13. warp/fabric.py +326 -0
  14. warp/fem/__init__.py +27 -0
  15. warp/fem/cache.py +389 -0
  16. warp/fem/dirichlet.py +181 -0
  17. warp/fem/domain.py +263 -0
  18. warp/fem/field/__init__.py +101 -0
  19. warp/fem/field/field.py +149 -0
  20. warp/fem/field/nodal_field.py +299 -0
  21. warp/fem/field/restriction.py +21 -0
  22. warp/fem/field/test.py +181 -0
  23. warp/fem/field/trial.py +183 -0
  24. warp/fem/geometry/__init__.py +19 -0
  25. warp/fem/geometry/closest_point.py +70 -0
  26. warp/fem/geometry/deformed_geometry.py +271 -0
  27. warp/fem/geometry/element.py +744 -0
  28. warp/fem/geometry/geometry.py +186 -0
  29. warp/fem/geometry/grid_2d.py +373 -0
  30. warp/fem/geometry/grid_3d.py +435 -0
  31. warp/fem/geometry/hexmesh.py +953 -0
  32. warp/fem/geometry/partition.py +376 -0
  33. warp/fem/geometry/quadmesh_2d.py +532 -0
  34. warp/fem/geometry/tetmesh.py +840 -0
  35. warp/fem/geometry/trimesh_2d.py +577 -0
  36. warp/fem/integrate.py +1616 -0
  37. warp/fem/operator.py +191 -0
  38. warp/fem/polynomial.py +213 -0
  39. warp/fem/quadrature/__init__.py +2 -0
  40. warp/fem/quadrature/pic_quadrature.py +245 -0
  41. warp/fem/quadrature/quadrature.py +294 -0
  42. warp/fem/space/__init__.py +292 -0
  43. warp/fem/space/basis_space.py +489 -0
  44. warp/fem/space/collocated_function_space.py +105 -0
  45. warp/fem/space/dof_mapper.py +236 -0
  46. warp/fem/space/function_space.py +145 -0
  47. warp/fem/space/grid_2d_function_space.py +267 -0
  48. warp/fem/space/grid_3d_function_space.py +306 -0
  49. warp/fem/space/hexmesh_function_space.py +352 -0
  50. warp/fem/space/partition.py +350 -0
  51. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  52. warp/fem/space/restriction.py +160 -0
  53. warp/fem/space/shape/__init__.py +15 -0
  54. warp/fem/space/shape/cube_shape_function.py +738 -0
  55. warp/fem/space/shape/shape_function.py +103 -0
  56. warp/fem/space/shape/square_shape_function.py +611 -0
  57. warp/fem/space/shape/tet_shape_function.py +567 -0
  58. warp/fem/space/shape/triangle_shape_function.py +429 -0
  59. warp/fem/space/tetmesh_function_space.py +292 -0
  60. warp/fem/space/topology.py +295 -0
  61. warp/fem/space/trimesh_2d_function_space.py +221 -0
  62. warp/fem/types.py +77 -0
  63. warp/fem/utils.py +495 -0
  64. warp/native/array.h +164 -55
  65. warp/native/builtin.h +150 -174
  66. warp/native/bvh.cpp +75 -328
  67. warp/native/bvh.cu +406 -23
  68. warp/native/bvh.h +37 -45
  69. warp/native/clang/clang.cpp +136 -24
  70. warp/native/crt.cpp +1 -76
  71. warp/native/crt.h +111 -104
  72. warp/native/cuda_crt.h +1049 -0
  73. warp/native/cuda_util.cpp +15 -3
  74. warp/native/cuda_util.h +3 -1
  75. warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
  76. warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
  77. warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
  78. warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
  79. warp/native/cutlass/tools/library/scripts/library.py +799 -0
  80. warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
  81. warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
  82. warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
  83. warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
  84. warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
  85. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
  86. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
  87. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
  88. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
  89. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
  90. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
  91. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
  92. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
  93. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
  94. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
  95. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
  96. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
  97. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
  98. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
  99. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
  100. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
  101. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
  102. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
  103. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
  104. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
  105. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
  106. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
  107. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
  108. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
  109. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
  110. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
  111. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
  112. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
  113. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
  114. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
  115. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
  116. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
  117. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  118. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  119. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
  120. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
  121. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
  122. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  123. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  124. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
  125. warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
  126. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
  127. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
  128. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
  129. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
  130. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
  131. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
  132. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
  133. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
  134. warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
  135. warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
  136. warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
  137. warp/native/cutlass/tools/library/scripts/rt.py +796 -0
  138. warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
  139. warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
  140. warp/native/cutlass_gemm.cu +5 -3
  141. warp/native/exports.h +1240 -949
  142. warp/native/fabric.h +228 -0
  143. warp/native/hashgrid.cpp +4 -4
  144. warp/native/hashgrid.h +22 -2
  145. warp/native/initializer_array.h +2 -2
  146. warp/native/intersect.h +22 -7
  147. warp/native/intersect_adj.h +8 -8
  148. warp/native/intersect_tri.h +13 -16
  149. warp/native/marching.cu +157 -161
  150. warp/native/mat.h +119 -19
  151. warp/native/matnn.h +2 -2
  152. warp/native/mesh.cpp +108 -83
  153. warp/native/mesh.cu +243 -6
  154. warp/native/mesh.h +1547 -458
  155. warp/native/nanovdb/NanoVDB.h +1 -1
  156. warp/native/noise.h +272 -329
  157. warp/native/quat.h +51 -8
  158. warp/native/rand.h +45 -35
  159. warp/native/range.h +6 -2
  160. warp/native/reduce.cpp +157 -0
  161. warp/native/reduce.cu +348 -0
  162. warp/native/runlength_encode.cpp +62 -0
  163. warp/native/runlength_encode.cu +46 -0
  164. warp/native/scan.cu +11 -13
  165. warp/native/scan.h +1 -0
  166. warp/native/solid_angle.h +442 -0
  167. warp/native/sort.cpp +13 -0
  168. warp/native/sort.cu +9 -1
  169. warp/native/sparse.cpp +338 -0
  170. warp/native/sparse.cu +545 -0
  171. warp/native/spatial.h +2 -2
  172. warp/native/temp_buffer.h +30 -0
  173. warp/native/vec.h +126 -24
  174. warp/native/volume.h +120 -0
  175. warp/native/warp.cpp +658 -53
  176. warp/native/warp.cu +660 -68
  177. warp/native/warp.h +112 -12
  178. warp/optim/__init__.py +1 -0
  179. warp/optim/linear.py +922 -0
  180. warp/optim/sgd.py +92 -0
  181. warp/render/render_opengl.py +392 -152
  182. warp/render/render_usd.py +11 -11
  183. warp/sim/__init__.py +2 -2
  184. warp/sim/articulation.py +385 -185
  185. warp/sim/collide.py +21 -8
  186. warp/sim/import_mjcf.py +297 -106
  187. warp/sim/import_urdf.py +389 -210
  188. warp/sim/import_usd.py +198 -97
  189. warp/sim/inertia.py +17 -18
  190. warp/sim/integrator_euler.py +14 -8
  191. warp/sim/integrator_xpbd.py +161 -19
  192. warp/sim/model.py +795 -291
  193. warp/sim/optimizer.py +2 -6
  194. warp/sim/render.py +65 -3
  195. warp/sim/utils.py +3 -0
  196. warp/sparse.py +1227 -0
  197. warp/stubs.py +665 -223
  198. warp/tape.py +66 -15
  199. warp/tests/__main__.py +3 -6
  200. warp/tests/assets/curlnoise_golden.npy +0 -0
  201. warp/tests/assets/pnoise_golden.npy +0 -0
  202. warp/tests/assets/torus.usda +105 -105
  203. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  204. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  205. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  206. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  207. warp/tests/aux_test_unresolved_func.py +14 -0
  208. warp/tests/aux_test_unresolved_symbol.py +14 -0
  209. warp/tests/disabled_kinematics.py +239 -0
  210. warp/tests/run_coverage_serial.py +31 -0
  211. warp/tests/test_adam.py +103 -106
  212. warp/tests/test_arithmetic.py +128 -74
  213. warp/tests/test_array.py +1497 -211
  214. warp/tests/test_array_reduce.py +150 -0
  215. warp/tests/test_atomic.py +64 -28
  216. warp/tests/test_bool.py +99 -0
  217. warp/tests/test_builtins_resolution.py +1292 -0
  218. warp/tests/test_bvh.py +75 -43
  219. warp/tests/test_closest_point_edge_edge.py +54 -57
  220. warp/tests/test_codegen.py +233 -128
  221. warp/tests/test_compile_consts.py +28 -20
  222. warp/tests/test_conditional.py +108 -24
  223. warp/tests/test_copy.py +10 -12
  224. warp/tests/test_ctypes.py +112 -88
  225. warp/tests/test_dense.py +21 -14
  226. warp/tests/test_devices.py +98 -0
  227. warp/tests/test_dlpack.py +136 -108
  228. warp/tests/test_examples.py +277 -0
  229. warp/tests/test_fabricarray.py +955 -0
  230. warp/tests/test_fast_math.py +15 -11
  231. warp/tests/test_fem.py +1271 -0
  232. warp/tests/test_fp16.py +53 -19
  233. warp/tests/test_func.py +187 -74
  234. warp/tests/test_generics.py +194 -49
  235. warp/tests/test_grad.py +180 -116
  236. warp/tests/test_grad_customs.py +176 -0
  237. warp/tests/test_hash_grid.py +52 -37
  238. warp/tests/test_import.py +10 -23
  239. warp/tests/test_indexedarray.py +577 -24
  240. warp/tests/test_intersect.py +18 -9
  241. warp/tests/test_large.py +141 -0
  242. warp/tests/test_launch.py +251 -15
  243. warp/tests/test_lerp.py +64 -65
  244. warp/tests/test_linear_solvers.py +154 -0
  245. warp/tests/test_lvalue.py +493 -0
  246. warp/tests/test_marching_cubes.py +12 -13
  247. warp/tests/test_mat.py +508 -2778
  248. warp/tests/test_mat_lite.py +115 -0
  249. warp/tests/test_mat_scalar_ops.py +2889 -0
  250. warp/tests/test_math.py +103 -9
  251. warp/tests/test_matmul.py +305 -69
  252. warp/tests/test_matmul_lite.py +410 -0
  253. warp/tests/test_mesh.py +71 -14
  254. warp/tests/test_mesh_query_aabb.py +41 -25
  255. warp/tests/test_mesh_query_point.py +325 -34
  256. warp/tests/test_mesh_query_ray.py +39 -22
  257. warp/tests/test_mlp.py +30 -22
  258. warp/tests/test_model.py +92 -89
  259. warp/tests/test_modules_lite.py +39 -0
  260. warp/tests/test_multigpu.py +88 -114
  261. warp/tests/test_noise.py +12 -11
  262. warp/tests/test_operators.py +16 -20
  263. warp/tests/test_options.py +11 -11
  264. warp/tests/test_pinned.py +17 -18
  265. warp/tests/test_print.py +32 -11
  266. warp/tests/test_quat.py +275 -129
  267. warp/tests/test_rand.py +18 -16
  268. warp/tests/test_reload.py +38 -34
  269. warp/tests/test_rounding.py +50 -43
  270. warp/tests/test_runlength_encode.py +190 -0
  271. warp/tests/test_smoothstep.py +9 -11
  272. warp/tests/test_snippet.py +143 -0
  273. warp/tests/test_sparse.py +460 -0
  274. warp/tests/test_spatial.py +276 -243
  275. warp/tests/test_streams.py +110 -85
  276. warp/tests/test_struct.py +331 -85
  277. warp/tests/test_tape.py +39 -21
  278. warp/tests/test_torch.py +118 -89
  279. warp/tests/test_transient_module.py +12 -13
  280. warp/tests/test_types.py +614 -0
  281. warp/tests/test_utils.py +494 -0
  282. warp/tests/test_vec.py +354 -1987
  283. warp/tests/test_vec_lite.py +73 -0
  284. warp/tests/test_vec_scalar_ops.py +2099 -0
  285. warp/tests/test_volume.py +457 -293
  286. warp/tests/test_volume_write.py +124 -134
  287. warp/tests/unittest_serial.py +35 -0
  288. warp/tests/unittest_suites.py +341 -0
  289. warp/tests/unittest_utils.py +568 -0
  290. warp/tests/unused_test_misc.py +71 -0
  291. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  292. warp/thirdparty/appdirs.py +36 -45
  293. warp/thirdparty/unittest_parallel.py +549 -0
  294. warp/torch.py +72 -30
  295. warp/types.py +1744 -713
  296. warp/utils.py +360 -350
  297. warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
  298. warp_lang-0.11.0.dist-info/METADATA +238 -0
  299. warp_lang-0.11.0.dist-info/RECORD +332 -0
  300. {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
  301. warp/bin/warp-clang.exp +0 -0
  302. warp/bin/warp-clang.lib +0 -0
  303. warp/bin/warp.exp +0 -0
  304. warp/bin/warp.lib +0 -0
  305. warp/tests/test_all.py +0 -215
  306. warp/tests/test_array_scan.py +0 -60
  307. warp/tests/test_base.py +0 -208
  308. warp/tests/test_unresolved_func.py +0 -7
  309. warp/tests/test_unresolved_symbol.py +0 -7
  310. warp_lang-0.9.0.dist-info/METADATA +0 -20
  311. warp_lang-0.9.0.dist-info/RECORD +0 -177
  312. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  313. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  314. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  315. {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/tests/test_mlp.py CHANGED
@@ -5,9 +5,12 @@
5
5
  # distribution of this software and related documentation without an express
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
+ import unittest
9
+
8
10
  import numpy as np
11
+
9
12
  import warp as wp
10
- from warp.tests.test_base import *
13
+ from warp.tests.unittest_utils import *
11
14
 
12
15
  wp.init()
13
16
 
@@ -35,17 +38,17 @@ def loss_kernel(x: wp.array2d(dtype=float), loss: wp.array(dtype=float)):
35
38
 
36
39
 
37
40
  def test_mlp(test, device):
38
- np.random.seed(0)
41
+ rng = np.random.default_rng(123)
39
42
 
40
43
  m = 10
41
44
  n = 200
42
45
 
43
46
  batches = 20000
44
47
 
45
- weights = wp.array(np.random.rand(m, n) * 0.5 - 0.5, dtype=float, device=device)
46
- bias = wp.array(np.random.rand(m) * 0.5 - 0.5, dtype=float, device=device)
48
+ weights = wp.array(rng.random(size=(m, n)) * 0.5 - 0.5, dtype=float, device=device)
49
+ bias = wp.array(rng.random(size=m) * 0.5 - 0.5, dtype=float, device=device)
47
50
 
48
- x = wp.array(np.random.rand(n, batches), dtype=float, device=device)
51
+ x = wp.array(rng.random(size=(n, batches)), dtype=float, device=device)
49
52
  y = wp.zeros(shape=(m, batches), device=device)
50
53
 
51
54
  with wp.ScopedTimer("warp", active=False):
@@ -86,13 +89,15 @@ def create_mlp(m, n):
86
89
  def create_golden():
87
90
  import torch
88
91
 
92
+ rng = np.random.default_rng(123)
93
+
89
94
  input_size = 32
90
95
  hidden_size = 16
91
96
  batch_size = 64
92
97
 
93
98
  network = create_mlp(input_size, hidden_size)
94
99
 
95
- x = torch.Tensor(np.random.rand(batch_size, input_size))
100
+ x = torch.Tensor(rng.random(size=(batch_size, input_size)))
96
101
  x.requires_grad = True
97
102
 
98
103
  y = network.forward(x)
@@ -169,6 +174,8 @@ def test_mlp_grad(test, device):
169
174
  def profile_mlp_torch(device):
170
175
  import torch
171
176
 
177
+ rng = np.random.default_rng(123)
178
+
172
179
  m = 128
173
180
  n = 64
174
181
 
@@ -179,7 +186,7 @@ def profile_mlp_torch(device):
179
186
 
180
187
  network = create_mlp(m, n)
181
188
 
182
- x = torch.Tensor(np.random.rand(b, m))
189
+ x = torch.Tensor(rng.random(size=(b, m)))
183
190
 
184
191
  with wp.ScopedTimer("torch_forward" + str(b)):
185
192
  y = network.forward(x)
@@ -190,7 +197,7 @@ def profile_mlp_torch(device):
190
197
 
191
198
  network = create_mlp(m, n)
192
199
 
193
- x = torch.Tensor(np.random.rand(b, m))
200
+ x = torch.Tensor(rng.random(size=(b, m)))
194
201
  y = network.forward(x)
195
202
 
196
203
  loss = torch.norm(y)
@@ -204,6 +211,8 @@ def profile_mlp_torch(device):
204
211
 
205
212
 
206
213
  def profile_mlp_warp(device):
214
+ rng = np.random.default_rng(123)
215
+
207
216
  m = 128
208
217
  n = 64
209
218
 
@@ -212,10 +221,10 @@ def profile_mlp_warp(device):
212
221
  for i in range(steps):
213
222
  b = 2**i
214
223
 
215
- weights = wp.array(np.random.rand(m, n) * 0.5 - 0.5, dtype=float, device=device)
216
- bias = wp.array(np.random.rand(m) * 0.5 - 0.5, dtype=float, device=device)
224
+ weights = wp.array(rng.random(size=(m, n)) * 0.5 - 0.5, dtype=float, device=device)
225
+ bias = wp.array(rng.random(size=m) * 0.5 - 0.5, dtype=float, device=device)
217
226
 
218
- x = wp.array(np.random.rand(n, b), dtype=float, device=device)
227
+ x = wp.array(rng.random(size=(n, b)), dtype=float, device=device)
219
228
  y = wp.zeros(shape=(m, b), device=device)
220
229
 
221
230
  with wp.ScopedTimer("warp-forward" + str(b)):
@@ -225,10 +234,10 @@ def profile_mlp_warp(device):
225
234
  for i in range(steps):
226
235
  b = 2**i
227
236
 
228
- weights = wp.array(np.random.rand(m, n) * 0.5 - 0.5, dtype=float, device=device, requires_grad=True)
229
- bias = wp.array(np.random.rand(m) * 0.5 - 0.5, dtype=float, device=device, requires_grad=True)
237
+ weights = wp.array(rng.random(size=(m, n)) * 0.5 - 0.5, dtype=float, device=device, requires_grad=True)
238
+ bias = wp.array(rng.random(size=m) * 0.5 - 0.5, dtype=float, device=device, requires_grad=True)
230
239
 
231
- x = wp.array(np.random.rand(n, b), dtype=float, device=device, requires_grad=True)
240
+ x = wp.array(rng.random(size=(n, b)), dtype=float, device=device, requires_grad=True)
232
241
  y = wp.zeros(shape=(m, b), device=device, requires_grad=True)
233
242
 
234
243
  loss = wp.zeros(1, dtype=float, device=device)
@@ -251,18 +260,17 @@ def profile_mlp_warp(device):
251
260
  # profile_mlp_torch("cuda")
252
261
 
253
262
 
254
- def register(parent):
255
- devices = get_test_devices()
263
+ devices = get_test_devices()
264
+
256
265
 
257
- class TestMLP(parent):
258
- pass
266
+ class TestMLP(unittest.TestCase):
267
+ pass
259
268
 
260
- add_function_test(TestMLP, "test_mlp", test_mlp, devices=devices)
261
- add_function_test(TestMLP, "test_mlp_grad", test_mlp_grad, devices=devices)
262
269
 
263
- return TestMLP
270
+ add_function_test(TestMLP, "test_mlp", test_mlp, devices=devices)
271
+ add_function_test(TestMLP, "test_mlp_grad", test_mlp_grad, devices=devices)
264
272
 
265
273
 
266
274
  if __name__ == "__main__":
267
- c = register(unittest.TestCase)
275
+ wp.build.clear_kernel_cache()
268
276
  unittest.main(verbosity=2, failfast=False)
warp/tests/test_model.py CHANGED
@@ -5,103 +5,106 @@
5
5
  # distribution of this software and related documentation without an express
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
- import warp as wp
9
- from warp.tests.test_base import *
10
- from warp.sim import ModelBuilder
8
+ import unittest
11
9
 
12
10
  import numpy as np
13
11
 
12
+ import warp as wp
13
+ from warp.sim import ModelBuilder
14
+ from warp.tests.unittest_utils import *
15
+
14
16
  wp.init()
15
17
 
16
18
 
17
- def register(parent):
18
- class TestModel(parent):
19
- def test_add_triangles(self):
20
- pts = np.array(
21
- [
22
- [-0.00585869, 0.34189449, -1.17415233],
23
- [-1.894547, 0.1788074, 0.9251329],
24
- [-1.26141048, 0.16140787, 0.08823282],
25
- [-0.08609255, -0.82722546, 0.65995427],
26
- [0.78827592, -1.77375711, -0.55582718],
27
- ]
28
- )
29
- tris = np.array([[0, 3, 4], [0, 2, 3], [2, 1, 3], [1, 4, 3]])
30
-
31
- builder1 = ModelBuilder()
32
- builder2 = ModelBuilder()
33
- for pt in pts:
34
- builder1.add_particle(pt, [0.0, 0.0, 0.0], 1.0)
35
- builder2.add_particle(pt, [0.0, 0.0, 0.0], 1.0)
36
-
37
- # test add_triangle(s) with default arguments:
38
- areas = builder2.add_triangles(tris[:, 0], tris[:, 1], tris[:, 2])
39
- for i, t in enumerate(tris):
40
- area = builder1.add_triangle(t[0], t[1], t[2])
41
- self.assertAlmostEqual(area, areas[i], places=6)
42
-
43
- # test add_triangle(s) with non default arguments:
44
- tri_ke = np.random.randn(pts.shape[0])
45
- tri_ka = np.random.randn(pts.shape[0])
46
- tri_kd = np.random.randn(pts.shape[0])
47
- tri_drag = np.random.randn(pts.shape[0])
48
- tri_lift = np.random.randn(pts.shape[0])
49
- for i, t in enumerate(tris):
50
- builder1.add_triangle(
51
- t[0],
52
- t[1],
53
- t[2],
54
- tri_ke[i],
55
- tri_ka[i],
56
- tri_kd[i],
57
- tri_drag[i],
58
- tri_lift[i],
59
- )
60
- builder2.add_triangles(tris[:, 0], tris[:, 1], tris[:, 2], tri_ke, tri_ka, tri_kd, tri_drag, tri_lift)
61
-
62
- assert_np_equal(np.array(builder1.tri_indices), np.array(builder2.tri_indices))
63
- assert_np_equal(np.array(builder1.tri_poses), np.array(builder2.tri_poses), tol=1.0e-6)
64
- assert_np_equal(np.array(builder1.tri_activations), np.array(builder2.tri_activations))
65
- assert_np_equal(np.array(builder1.tri_materials), np.array(builder2.tri_materials))
66
-
67
- def test_add_edges(self):
68
- pts = np.array(
69
- [
70
- [-0.00585869, 0.34189449, -1.17415233],
71
- [-1.894547, 0.1788074, 0.9251329],
72
- [-1.26141048, 0.16140787, 0.08823282],
73
- [-0.08609255, -0.82722546, 0.65995427],
74
- [0.78827592, -1.77375711, -0.55582718],
75
- ]
19
+ class TestModel(unittest.TestCase):
20
+ def test_add_triangles(self):
21
+ rng = np.random.default_rng(123)
22
+
23
+ pts = np.array(
24
+ [
25
+ [-0.00585869, 0.34189449, -1.17415233],
26
+ [-1.894547, 0.1788074, 0.9251329],
27
+ [-1.26141048, 0.16140787, 0.08823282],
28
+ [-0.08609255, -0.82722546, 0.65995427],
29
+ [0.78827592, -1.77375711, -0.55582718],
30
+ ]
31
+ )
32
+ tris = np.array([[0, 3, 4], [0, 2, 3], [2, 1, 3], [1, 4, 3]])
33
+
34
+ builder1 = ModelBuilder()
35
+ builder2 = ModelBuilder()
36
+ for pt in pts:
37
+ builder1.add_particle(wp.vec3(pt), wp.vec3(), 1.0)
38
+ builder2.add_particle(wp.vec3(pt), wp.vec3(), 1.0)
39
+
40
+ # test add_triangle(s) with default arguments:
41
+ areas = builder2.add_triangles(tris[:, 0], tris[:, 1], tris[:, 2])
42
+ for i, t in enumerate(tris):
43
+ area = builder1.add_triangle(t[0], t[1], t[2])
44
+ self.assertAlmostEqual(area, areas[i], places=6)
45
+
46
+ # test add_triangle(s) with non default arguments:
47
+ tri_ke = rng.standard_normal(size=pts.shape[0])
48
+ tri_ka = rng.standard_normal(size=pts.shape[0])
49
+ tri_kd = rng.standard_normal(size=pts.shape[0])
50
+ tri_drag = rng.standard_normal(size=pts.shape[0])
51
+ tri_lift = rng.standard_normal(size=pts.shape[0])
52
+ for i, t in enumerate(tris):
53
+ builder1.add_triangle(
54
+ t[0],
55
+ t[1],
56
+ t[2],
57
+ tri_ke[i],
58
+ tri_ka[i],
59
+ tri_kd[i],
60
+ tri_drag[i],
61
+ tri_lift[i],
76
62
  )
77
- edges = np.array([[0, 4, 3, 1], [3, 2, 4, 1]])
78
-
79
- builder1 = ModelBuilder()
80
- builder2 = ModelBuilder()
81
- for pt in pts:
82
- builder1.add_particle(pt, [0.0, 0.0, 0.0], 1.0)
83
- builder2.add_particle(pt, [0.0, 0.0, 0.0], 1.0)
84
-
85
- # test defaults:
86
- for i in range(2):
87
- builder1.add_edge(edges[i, 0], edges[i, 1], edges[i, 2], edges[i, 3])
88
- builder2.add_edges(edges[:, 0], edges[:, 1], edges[:, 2], edges[:, 3])
89
-
90
- # test non defaults:
91
- rest = np.random.randn(2)
92
- edge_ke = np.random.randn(2)
93
- edge_kd = np.random.randn(2)
94
- for i in range(2):
95
- builder1.add_edge(edges[i, 0], edges[i, 1], edges[i, 2], edges[i, 3], rest[i], edge_ke[i], edge_kd[i])
96
- builder2.add_edges(edges[:, 0], edges[:, 1], edges[:, 2], edges[:, 3], rest, edge_ke, edge_kd)
97
-
98
- assert_np_equal(np.array(builder1.edge_indices), np.array(builder2.edge_indices))
99
- assert_np_equal(np.array(builder1.edge_rest_angle), np.array(builder2.edge_rest_angle), tol=1.0e-4)
100
- assert_np_equal(np.array(builder1.edge_bending_properties), np.array(builder2.edge_bending_properties))
101
-
102
- return TestModel
63
+ builder2.add_triangles(tris[:, 0], tris[:, 1], tris[:, 2], tri_ke, tri_ka, tri_kd, tri_drag, tri_lift)
64
+
65
+ assert_np_equal(np.array(builder1.tri_indices), np.array(builder2.tri_indices))
66
+ assert_np_equal(np.array(builder1.tri_poses), np.array(builder2.tri_poses), tol=1.0e-6)
67
+ assert_np_equal(np.array(builder1.tri_activations), np.array(builder2.tri_activations))
68
+ assert_np_equal(np.array(builder1.tri_materials), np.array(builder2.tri_materials))
69
+
70
+ def test_add_edges(self):
71
+ rng = np.random.default_rng(123)
72
+
73
+ pts = np.array(
74
+ [
75
+ [-0.00585869, 0.34189449, -1.17415233],
76
+ [-1.894547, 0.1788074, 0.9251329],
77
+ [-1.26141048, 0.16140787, 0.08823282],
78
+ [-0.08609255, -0.82722546, 0.65995427],
79
+ [0.78827592, -1.77375711, -0.55582718],
80
+ ]
81
+ )
82
+ edges = np.array([[0, 4, 3, 1], [3, 2, 4, 1]])
83
+
84
+ builder1 = ModelBuilder()
85
+ builder2 = ModelBuilder()
86
+ for pt in pts:
87
+ builder1.add_particle(wp.vec3(pt), wp.vec3(), 1.0)
88
+ builder2.add_particle(wp.vec3(pt), wp.vec3(), 1.0)
89
+
90
+ # test defaults:
91
+ for i in range(2):
92
+ builder1.add_edge(edges[i, 0], edges[i, 1], edges[i, 2], edges[i, 3])
93
+ builder2.add_edges(edges[:, 0], edges[:, 1], edges[:, 2], edges[:, 3])
94
+
95
+ # test non defaults:
96
+ rest = rng.standard_normal(size=2)
97
+ edge_ke = rng.standard_normal(size=2)
98
+ edge_kd = rng.standard_normal(size=2)
99
+ for i in range(2):
100
+ builder1.add_edge(edges[i, 0], edges[i, 1], edges[i, 2], edges[i, 3], rest[i], edge_ke[i], edge_kd[i])
101
+ builder2.add_edges(edges[:, 0], edges[:, 1], edges[:, 2], edges[:, 3], rest, edge_ke, edge_kd)
102
+
103
+ assert_np_equal(np.array(builder1.edge_indices), np.array(builder2.edge_indices))
104
+ assert_np_equal(np.array(builder1.edge_rest_angle), np.array(builder2.edge_rest_angle), tol=1.0e-4)
105
+ assert_np_equal(np.array(builder1.edge_bending_properties), np.array(builder2.edge_bending_properties))
103
106
 
104
107
 
105
108
  if __name__ == "__main__":
106
- c = register(unittest.TestCase)
109
+ wp.build.clear_kernel_cache()
107
110
  unittest.main(verbosity=2)
@@ -0,0 +1,39 @@
1
+ # Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ import unittest
9
+
10
+ import warp as wp
11
+ from warp.tests.unittest_utils import *
12
+
13
+ wp.init()
14
+
15
+
16
+ devices = get_test_devices()
17
+
18
+
19
+ class TestModuleLite(unittest.TestCase):
20
+ def test_module_lite_load(self):
21
+ # Load current module
22
+ wp.load_module()
23
+
24
+ # Load named module
25
+ wp.load_module(wp.config)
26
+
27
+ # Load named module (string)
28
+ wp.load_module(wp.config, recursive=True)
29
+
30
+ def test_module_lite_options(self):
31
+ wp.set_module_options({"max_unroll": 8})
32
+ module_options = wp.get_module_options()
33
+ self.assertIsInstance(module_options, dict)
34
+ self.assertEqual(module_options["max_unroll"], 8)
35
+
36
+
37
+ if __name__ == "__main__":
38
+ wp.build.clear_kernel_cache()
39
+ unittest.main(verbosity=2)
@@ -5,16 +5,12 @@
5
5
  # distribution of this software and related documentation without an express
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
- import numpy as np
9
- import warp as wp
8
+ import unittest
10
9
 
11
- import math
10
+ import numpy as np
12
11
 
13
12
  import warp as wp
14
- from warp.tests.test_base import *
15
-
16
- import unittest
17
-
13
+ from warp.tests.unittest_utils import *
18
14
 
19
15
  wp.init()
20
16
 
@@ -31,156 +27,134 @@ def arange(start: int, step: int, a: wp.array(dtype=int)):
31
27
  a[tid] = start + step * tid
32
28
 
33
29
 
34
- def test_multigpu_set_device(test, device):
35
- assert len(wp.get_cuda_devices()) > 1, "At least two CUDA devices are required"
36
-
37
- # save default device
38
- saved_device = wp.get_device()
39
-
40
- n = 32
41
-
42
- wp.set_device("cuda:0")
43
- a0 = wp.empty(n, dtype=int)
44
- wp.launch(arange, dim=a0.size, inputs=[0, 1, a0])
45
-
46
- wp.set_device("cuda:1")
47
- a1 = wp.empty(n, dtype=int)
48
- wp.launch(arange, dim=a1.size, inputs=[0, 1, a1])
49
-
50
- # restore default device
51
- wp.set_device(saved_device)
30
+ class TestMultiGPU(unittest.TestCase):
31
+ @unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
32
+ def test_multigpu_set_device(self):
33
+ # save default device
34
+ saved_device = wp.get_device()
52
35
 
53
- assert a0.device == "cuda:0"
54
- assert a1.device == "cuda:1"
36
+ n = 32
55
37
 
56
- expected = np.arange(n, dtype=int)
57
-
58
- assert_np_equal(a0.numpy(), expected)
59
- assert_np_equal(a1.numpy(), expected)
60
-
61
-
62
- def test_multigpu_scoped_device(test, device):
63
- assert len(wp.get_cuda_devices()) > 1, "At least two CUDA devices are required"
64
-
65
- n = 32
66
-
67
- with wp.ScopedDevice("cuda:0"):
38
+ wp.set_device("cuda:0")
68
39
  a0 = wp.empty(n, dtype=int)
69
40
  wp.launch(arange, dim=a0.size, inputs=[0, 1, a0])
70
41
 
71
- with wp.ScopedDevice("cuda:1"):
42
+ wp.set_device("cuda:1")
72
43
  a1 = wp.empty(n, dtype=int)
73
44
  wp.launch(arange, dim=a1.size, inputs=[0, 1, a1])
74
45
 
75
- assert a0.device == "cuda:0"
76
- assert a1.device == "cuda:1"
77
-
78
- expected = np.arange(n, dtype=int)
46
+ # restore default device
47
+ wp.set_device(saved_device)
79
48
 
80
- assert_np_equal(a0.numpy(), expected)
81
- assert_np_equal(a1.numpy(), expected)
49
+ assert a0.device == "cuda:0"
50
+ assert a1.device == "cuda:1"
82
51
 
52
+ expected = np.arange(n, dtype=int)
83
53
 
84
- def test_multigpu_nesting(test, device):
85
- assert len(wp.get_cuda_devices()) > 1, "At least two CUDA devices are required"
54
+ assert_np_equal(a0.numpy(), expected)
55
+ assert_np_equal(a1.numpy(), expected)
86
56
 
87
- initial_device = wp.get_device()
88
- initial_cuda_device = wp.get_cuda_device()
89
-
90
- with wp.ScopedDevice("cuda:1"):
91
- assert wp.get_device() == "cuda:1"
92
- assert wp.get_cuda_device() == "cuda:1"
57
+ @unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
58
+ def test_multigpu_scoped_device(self):
59
+ n = 32
93
60
 
94
61
  with wp.ScopedDevice("cuda:0"):
95
- assert wp.get_device() == "cuda:0"
96
- assert wp.get_cuda_device() == "cuda:0"
97
-
98
- with wp.ScopedDevice("cpu"):
99
- assert wp.get_device() == "cpu"
100
- assert wp.get_cuda_device() == "cuda:0"
62
+ a0 = wp.empty(n, dtype=int)
63
+ wp.launch(arange, dim=a0.size, inputs=[0, 1, a0])
101
64
 
102
- wp.set_device("cuda:1")
65
+ with wp.ScopedDevice("cuda:1"):
66
+ a1 = wp.empty(n, dtype=int)
67
+ wp.launch(arange, dim=a1.size, inputs=[0, 1, a1])
103
68
 
104
- assert wp.get_device() == "cuda:1"
105
- assert wp.get_cuda_device() == "cuda:1"
69
+ assert a0.device == "cuda:0"
70
+ assert a1.device == "cuda:1"
106
71
 
107
- assert wp.get_device() == "cuda:0"
108
- assert wp.get_cuda_device() == "cuda:0"
72
+ expected = np.arange(n, dtype=int)
109
73
 
110
- assert wp.get_device() == "cuda:1"
111
- assert wp.get_cuda_device() == "cuda:1"
74
+ assert_np_equal(a0.numpy(), expected)
75
+ assert_np_equal(a1.numpy(), expected)
112
76
 
113
- assert wp.get_device() == initial_device
114
- assert wp.get_cuda_device() == initial_cuda_device
77
+ @unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
78
+ def test_multigpu_nesting(self):
79
+ initial_device = wp.get_device()
80
+ initial_cuda_device = wp.get_cuda_device()
115
81
 
82
+ with wp.ScopedDevice("cuda:1"):
83
+ assert wp.get_device() == "cuda:1"
84
+ assert wp.get_cuda_device() == "cuda:1"
116
85
 
117
- def test_multigpu_pingpong(test, device):
118
- assert len(wp.get_cuda_devices()) > 1, "At least two CUDA devices are required"
86
+ with wp.ScopedDevice("cuda:0"):
87
+ assert wp.get_device() == "cuda:0"
88
+ assert wp.get_cuda_device() == "cuda:0"
119
89
 
120
- n = 1024 * 1024
90
+ with wp.ScopedDevice("cpu"):
91
+ assert wp.get_device() == "cpu"
92
+ assert wp.get_cuda_device() == "cuda:0"
121
93
 
122
- a0 = wp.zeros(n, dtype=float, device="cuda:0")
123
- a1 = wp.zeros(n, dtype=float, device="cuda:1")
94
+ wp.set_device("cuda:1")
124
95
 
125
- iters = 10
96
+ assert wp.get_device() == "cuda:1"
97
+ assert wp.get_cuda_device() == "cuda:1"
126
98
 
127
- for _ in range(iters):
128
- wp.launch(inc, dim=a0.size, inputs=[a0], device=a0.device)
129
- wp.synchronize_device(a0.device)
130
- wp.copy(a1, a0)
99
+ assert wp.get_device() == "cuda:0"
100
+ assert wp.get_cuda_device() == "cuda:0"
131
101
 
132
- wp.launch(inc, dim=a1.size, inputs=[a1], device=a1.device)
133
- wp.synchronize_device(a1.device)
134
- wp.copy(a0, a1)
102
+ assert wp.get_device() == "cuda:1"
103
+ assert wp.get_cuda_device() == "cuda:1"
135
104
 
136
- expected = np.full(n, iters * 2, dtype=np.float32)
105
+ assert wp.get_device() == initial_device
106
+ assert wp.get_cuda_device() == initial_cuda_device
137
107
 
138
- assert_np_equal(a0.numpy(), expected)
139
- assert_np_equal(a1.numpy(), expected)
108
+ @unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
109
+ def test_multigpu_pingpong(self):
110
+ n = 1024 * 1024
140
111
 
112
+ a0 = wp.zeros(n, dtype=float, device="cuda:0")
113
+ a1 = wp.zeros(n, dtype=float, device="cuda:1")
141
114
 
142
- def test_multigpu_pingpong_streams(test, device):
143
- assert len(wp.get_cuda_devices()) > 1, "At least two CUDA devices are required"
115
+ iters = 10
144
116
 
145
- n = 1024 * 1024
117
+ for _ in range(iters):
118
+ wp.launch(inc, dim=a0.size, inputs=[a0], device=a0.device)
119
+ wp.synchronize_device(a0.device)
120
+ wp.copy(a1, a0)
146
121
 
147
- a0 = wp.zeros(n, dtype=float, device="cuda:0")
148
- a1 = wp.zeros(n, dtype=float, device="cuda:1")
122
+ wp.launch(inc, dim=a1.size, inputs=[a1], device=a1.device)
123
+ wp.synchronize_device(a1.device)
124
+ wp.copy(a0, a1)
149
125
 
150
- stream0 = wp.get_stream("cuda:0")
151
- stream1 = wp.get_stream("cuda:1")
126
+ expected = np.full(n, iters * 2, dtype=np.float32)
152
127
 
153
- iters = 10
128
+ assert_np_equal(a0.numpy(), expected)
129
+ assert_np_equal(a1.numpy(), expected)
154
130
 
155
- for _ in range(iters):
156
- wp.launch(inc, dim=a0.size, inputs=[a0], stream=stream0)
157
- stream1.wait_stream(stream0)
158
- wp.copy(a1, a0, stream=stream1)
131
+ @unittest.skipUnless(len(wp.get_cuda_devices()) > 1, "Requires at least two CUDA devices")
132
+ def test_multigpu_pingpong_streams(self):
133
+ n = 1024 * 1024
159
134
 
160
- wp.launch(inc, dim=a1.size, inputs=[a1], stream=stream1)
161
- stream0.wait_stream(stream1)
162
- wp.copy(a0, a1, stream=stream0)
135
+ a0 = wp.zeros(n, dtype=float, device="cuda:0")
136
+ a1 = wp.zeros(n, dtype=float, device="cuda:1")
163
137
 
164
- expected = np.full(n, iters * 2, dtype=np.float32)
138
+ stream0 = wp.get_stream("cuda:0")
139
+ stream1 = wp.get_stream("cuda:1")
165
140
 
166
- assert_np_equal(a0.numpy(), expected)
167
- assert_np_equal(a1.numpy(), expected)
141
+ iters = 10
168
142
 
143
+ for _ in range(iters):
144
+ wp.launch(inc, dim=a0.size, inputs=[a0], stream=stream0)
145
+ stream1.wait_stream(stream0)
146
+ wp.copy(a1, a0, stream=stream1)
169
147
 
170
- def register(parent):
171
- class TestMultigpu(parent):
172
- pass
148
+ wp.launch(inc, dim=a1.size, inputs=[a1], stream=stream1)
149
+ stream0.wait_stream(stream1)
150
+ wp.copy(a0, a1, stream=stream0)
173
151
 
174
- if wp.get_cuda_device_count() > 1:
175
- add_function_test(TestMultigpu, "test_multigpu_set_device", test_multigpu_set_device)
176
- add_function_test(TestMultigpu, "test_multigpu_scoped_device", test_multigpu_scoped_device)
177
- add_function_test(TestMultigpu, "test_multigpu_nesting", test_multigpu_nesting)
178
- add_function_test(TestMultigpu, "test_multigpu_pingpong", test_multigpu_pingpong)
179
- add_function_test(TestMultigpu, "test_multigpu_pingpong_streams", test_multigpu_pingpong_streams)
152
+ expected = np.full(n, iters * 2, dtype=np.float32)
180
153
 
181
- return TestMultigpu
154
+ assert_np_equal(a0.numpy(), expected)
155
+ assert_np_equal(a1.numpy(), expected)
182
156
 
183
157
 
184
158
  if __name__ == "__main__":
185
- c = register(unittest.TestCase)
159
+ wp.build.clear_kernel_cache()
186
160
  unittest.main(verbosity=2, failfast=False)