warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (315) hide show
  1. warp/__init__.py +15 -7
  2. warp/__init__.pyi +1 -0
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +22 -443
  6. warp/build_dll.py +384 -0
  7. warp/builtins.py +998 -488
  8. warp/codegen.py +1307 -739
  9. warp/config.py +5 -3
  10. warp/constants.py +6 -0
  11. warp/context.py +1291 -548
  12. warp/dlpack.py +31 -31
  13. warp/fabric.py +326 -0
  14. warp/fem/__init__.py +27 -0
  15. warp/fem/cache.py +389 -0
  16. warp/fem/dirichlet.py +181 -0
  17. warp/fem/domain.py +263 -0
  18. warp/fem/field/__init__.py +101 -0
  19. warp/fem/field/field.py +149 -0
  20. warp/fem/field/nodal_field.py +299 -0
  21. warp/fem/field/restriction.py +21 -0
  22. warp/fem/field/test.py +181 -0
  23. warp/fem/field/trial.py +183 -0
  24. warp/fem/geometry/__init__.py +19 -0
  25. warp/fem/geometry/closest_point.py +70 -0
  26. warp/fem/geometry/deformed_geometry.py +271 -0
  27. warp/fem/geometry/element.py +744 -0
  28. warp/fem/geometry/geometry.py +186 -0
  29. warp/fem/geometry/grid_2d.py +373 -0
  30. warp/fem/geometry/grid_3d.py +435 -0
  31. warp/fem/geometry/hexmesh.py +953 -0
  32. warp/fem/geometry/partition.py +376 -0
  33. warp/fem/geometry/quadmesh_2d.py +532 -0
  34. warp/fem/geometry/tetmesh.py +840 -0
  35. warp/fem/geometry/trimesh_2d.py +577 -0
  36. warp/fem/integrate.py +1616 -0
  37. warp/fem/operator.py +191 -0
  38. warp/fem/polynomial.py +213 -0
  39. warp/fem/quadrature/__init__.py +2 -0
  40. warp/fem/quadrature/pic_quadrature.py +245 -0
  41. warp/fem/quadrature/quadrature.py +294 -0
  42. warp/fem/space/__init__.py +292 -0
  43. warp/fem/space/basis_space.py +489 -0
  44. warp/fem/space/collocated_function_space.py +105 -0
  45. warp/fem/space/dof_mapper.py +236 -0
  46. warp/fem/space/function_space.py +145 -0
  47. warp/fem/space/grid_2d_function_space.py +267 -0
  48. warp/fem/space/grid_3d_function_space.py +306 -0
  49. warp/fem/space/hexmesh_function_space.py +352 -0
  50. warp/fem/space/partition.py +350 -0
  51. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  52. warp/fem/space/restriction.py +160 -0
  53. warp/fem/space/shape/__init__.py +15 -0
  54. warp/fem/space/shape/cube_shape_function.py +738 -0
  55. warp/fem/space/shape/shape_function.py +103 -0
  56. warp/fem/space/shape/square_shape_function.py +611 -0
  57. warp/fem/space/shape/tet_shape_function.py +567 -0
  58. warp/fem/space/shape/triangle_shape_function.py +429 -0
  59. warp/fem/space/tetmesh_function_space.py +292 -0
  60. warp/fem/space/topology.py +295 -0
  61. warp/fem/space/trimesh_2d_function_space.py +221 -0
  62. warp/fem/types.py +77 -0
  63. warp/fem/utils.py +495 -0
  64. warp/native/array.h +164 -55
  65. warp/native/builtin.h +150 -174
  66. warp/native/bvh.cpp +75 -328
  67. warp/native/bvh.cu +406 -23
  68. warp/native/bvh.h +37 -45
  69. warp/native/clang/clang.cpp +136 -24
  70. warp/native/crt.cpp +1 -76
  71. warp/native/crt.h +111 -104
  72. warp/native/cuda_crt.h +1049 -0
  73. warp/native/cuda_util.cpp +15 -3
  74. warp/native/cuda_util.h +3 -1
  75. warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
  76. warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
  77. warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
  78. warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
  79. warp/native/cutlass/tools/library/scripts/library.py +799 -0
  80. warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
  81. warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
  82. warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
  83. warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
  84. warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
  85. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
  86. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
  87. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
  88. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
  89. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
  90. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
  91. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
  92. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
  93. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
  94. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
  95. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
  96. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
  97. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
  98. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
  99. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
  100. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
  101. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
  102. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
  103. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
  104. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
  105. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
  106. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
  107. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
  108. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
  109. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
  110. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
  111. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
  112. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
  113. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
  114. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
  115. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
  116. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
  117. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  118. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  119. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
  120. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
  121. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
  122. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  123. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  124. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
  125. warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
  126. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
  127. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
  128. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
  129. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
  130. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
  131. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
  132. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
  133. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
  134. warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
  135. warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
  136. warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
  137. warp/native/cutlass/tools/library/scripts/rt.py +796 -0
  138. warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
  139. warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
  140. warp/native/cutlass_gemm.cu +5 -3
  141. warp/native/exports.h +1240 -949
  142. warp/native/fabric.h +228 -0
  143. warp/native/hashgrid.cpp +4 -4
  144. warp/native/hashgrid.h +22 -2
  145. warp/native/initializer_array.h +2 -2
  146. warp/native/intersect.h +22 -7
  147. warp/native/intersect_adj.h +8 -8
  148. warp/native/intersect_tri.h +13 -16
  149. warp/native/marching.cu +157 -161
  150. warp/native/mat.h +119 -19
  151. warp/native/matnn.h +2 -2
  152. warp/native/mesh.cpp +108 -83
  153. warp/native/mesh.cu +243 -6
  154. warp/native/mesh.h +1547 -458
  155. warp/native/nanovdb/NanoVDB.h +1 -1
  156. warp/native/noise.h +272 -329
  157. warp/native/quat.h +51 -8
  158. warp/native/rand.h +45 -35
  159. warp/native/range.h +6 -2
  160. warp/native/reduce.cpp +157 -0
  161. warp/native/reduce.cu +348 -0
  162. warp/native/runlength_encode.cpp +62 -0
  163. warp/native/runlength_encode.cu +46 -0
  164. warp/native/scan.cu +11 -13
  165. warp/native/scan.h +1 -0
  166. warp/native/solid_angle.h +442 -0
  167. warp/native/sort.cpp +13 -0
  168. warp/native/sort.cu +9 -1
  169. warp/native/sparse.cpp +338 -0
  170. warp/native/sparse.cu +545 -0
  171. warp/native/spatial.h +2 -2
  172. warp/native/temp_buffer.h +30 -0
  173. warp/native/vec.h +126 -24
  174. warp/native/volume.h +120 -0
  175. warp/native/warp.cpp +658 -53
  176. warp/native/warp.cu +660 -68
  177. warp/native/warp.h +112 -12
  178. warp/optim/__init__.py +1 -0
  179. warp/optim/linear.py +922 -0
  180. warp/optim/sgd.py +92 -0
  181. warp/render/render_opengl.py +392 -152
  182. warp/render/render_usd.py +11 -11
  183. warp/sim/__init__.py +2 -2
  184. warp/sim/articulation.py +385 -185
  185. warp/sim/collide.py +21 -8
  186. warp/sim/import_mjcf.py +297 -106
  187. warp/sim/import_urdf.py +389 -210
  188. warp/sim/import_usd.py +198 -97
  189. warp/sim/inertia.py +17 -18
  190. warp/sim/integrator_euler.py +14 -8
  191. warp/sim/integrator_xpbd.py +161 -19
  192. warp/sim/model.py +795 -291
  193. warp/sim/optimizer.py +2 -6
  194. warp/sim/render.py +65 -3
  195. warp/sim/utils.py +3 -0
  196. warp/sparse.py +1227 -0
  197. warp/stubs.py +665 -223
  198. warp/tape.py +66 -15
  199. warp/tests/__main__.py +3 -6
  200. warp/tests/assets/curlnoise_golden.npy +0 -0
  201. warp/tests/assets/pnoise_golden.npy +0 -0
  202. warp/tests/assets/torus.usda +105 -105
  203. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  204. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  205. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  206. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  207. warp/tests/aux_test_unresolved_func.py +14 -0
  208. warp/tests/aux_test_unresolved_symbol.py +14 -0
  209. warp/tests/disabled_kinematics.py +239 -0
  210. warp/tests/run_coverage_serial.py +31 -0
  211. warp/tests/test_adam.py +103 -106
  212. warp/tests/test_arithmetic.py +128 -74
  213. warp/tests/test_array.py +1497 -211
  214. warp/tests/test_array_reduce.py +150 -0
  215. warp/tests/test_atomic.py +64 -28
  216. warp/tests/test_bool.py +99 -0
  217. warp/tests/test_builtins_resolution.py +1292 -0
  218. warp/tests/test_bvh.py +75 -43
  219. warp/tests/test_closest_point_edge_edge.py +54 -57
  220. warp/tests/test_codegen.py +233 -128
  221. warp/tests/test_compile_consts.py +28 -20
  222. warp/tests/test_conditional.py +108 -24
  223. warp/tests/test_copy.py +10 -12
  224. warp/tests/test_ctypes.py +112 -88
  225. warp/tests/test_dense.py +21 -14
  226. warp/tests/test_devices.py +98 -0
  227. warp/tests/test_dlpack.py +136 -108
  228. warp/tests/test_examples.py +277 -0
  229. warp/tests/test_fabricarray.py +955 -0
  230. warp/tests/test_fast_math.py +15 -11
  231. warp/tests/test_fem.py +1271 -0
  232. warp/tests/test_fp16.py +53 -19
  233. warp/tests/test_func.py +187 -74
  234. warp/tests/test_generics.py +194 -49
  235. warp/tests/test_grad.py +180 -116
  236. warp/tests/test_grad_customs.py +176 -0
  237. warp/tests/test_hash_grid.py +52 -37
  238. warp/tests/test_import.py +10 -23
  239. warp/tests/test_indexedarray.py +577 -24
  240. warp/tests/test_intersect.py +18 -9
  241. warp/tests/test_large.py +141 -0
  242. warp/tests/test_launch.py +251 -15
  243. warp/tests/test_lerp.py +64 -65
  244. warp/tests/test_linear_solvers.py +154 -0
  245. warp/tests/test_lvalue.py +493 -0
  246. warp/tests/test_marching_cubes.py +12 -13
  247. warp/tests/test_mat.py +508 -2778
  248. warp/tests/test_mat_lite.py +115 -0
  249. warp/tests/test_mat_scalar_ops.py +2889 -0
  250. warp/tests/test_math.py +103 -9
  251. warp/tests/test_matmul.py +305 -69
  252. warp/tests/test_matmul_lite.py +410 -0
  253. warp/tests/test_mesh.py +71 -14
  254. warp/tests/test_mesh_query_aabb.py +41 -25
  255. warp/tests/test_mesh_query_point.py +325 -34
  256. warp/tests/test_mesh_query_ray.py +39 -22
  257. warp/tests/test_mlp.py +30 -22
  258. warp/tests/test_model.py +92 -89
  259. warp/tests/test_modules_lite.py +39 -0
  260. warp/tests/test_multigpu.py +88 -114
  261. warp/tests/test_noise.py +12 -11
  262. warp/tests/test_operators.py +16 -20
  263. warp/tests/test_options.py +11 -11
  264. warp/tests/test_pinned.py +17 -18
  265. warp/tests/test_print.py +32 -11
  266. warp/tests/test_quat.py +275 -129
  267. warp/tests/test_rand.py +18 -16
  268. warp/tests/test_reload.py +38 -34
  269. warp/tests/test_rounding.py +50 -43
  270. warp/tests/test_runlength_encode.py +190 -0
  271. warp/tests/test_smoothstep.py +9 -11
  272. warp/tests/test_snippet.py +143 -0
  273. warp/tests/test_sparse.py +460 -0
  274. warp/tests/test_spatial.py +276 -243
  275. warp/tests/test_streams.py +110 -85
  276. warp/tests/test_struct.py +331 -85
  277. warp/tests/test_tape.py +39 -21
  278. warp/tests/test_torch.py +118 -89
  279. warp/tests/test_transient_module.py +12 -13
  280. warp/tests/test_types.py +614 -0
  281. warp/tests/test_utils.py +494 -0
  282. warp/tests/test_vec.py +354 -1987
  283. warp/tests/test_vec_lite.py +73 -0
  284. warp/tests/test_vec_scalar_ops.py +2099 -0
  285. warp/tests/test_volume.py +457 -293
  286. warp/tests/test_volume_write.py +124 -134
  287. warp/tests/unittest_serial.py +35 -0
  288. warp/tests/unittest_suites.py +341 -0
  289. warp/tests/unittest_utils.py +568 -0
  290. warp/tests/unused_test_misc.py +71 -0
  291. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  292. warp/thirdparty/appdirs.py +36 -45
  293. warp/thirdparty/unittest_parallel.py +549 -0
  294. warp/torch.py +72 -30
  295. warp/types.py +1744 -713
  296. warp/utils.py +360 -350
  297. warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
  298. warp_lang-0.11.0.dist-info/METADATA +238 -0
  299. warp_lang-0.11.0.dist-info/RECORD +332 -0
  300. {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
  301. warp/bin/warp-clang.exp +0 -0
  302. warp/bin/warp-clang.lib +0 -0
  303. warp/bin/warp.exp +0 -0
  304. warp/bin/warp.lib +0 -0
  305. warp/tests/test_all.py +0 -215
  306. warp/tests/test_array_scan.py +0 -60
  307. warp/tests/test_base.py +0 -208
  308. warp/tests/test_unresolved_func.py +0 -7
  309. warp/tests/test_unresolved_symbol.py +0 -7
  310. warp_lang-0.9.0.dist-info/METADATA +0 -20
  311. warp_lang-0.9.0.dist-info/RECORD +0 -177
  312. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  313. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  314. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  315. {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,141 @@
1
+ # Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ import math
9
+ import unittest
10
+
11
+ import warp as wp
12
+ from warp.tests.unittest_utils import *
13
+
14
+ wp.init()
15
+
16
+
17
+ @wp.kernel
18
+ def conditional_sum(result: wp.array(dtype=wp.uint64)):
19
+ i, j, k = wp.tid()
20
+
21
+ if i == 0:
22
+ wp.atomic_add(result, 0, wp.uint64(1))
23
+
24
+
25
+ def test_large_launch_large_kernel(test, device):
26
+ """Test tid() on kernel launch of 2**33 threads.
27
+
28
+ The function conditional sum will add 1 to result for every thread that has an i index of 0.
29
+ Due to the size of the grid, this test is not run on CPUs
30
+ """
31
+ test_result = wp.zeros(shape=(1,), dtype=wp.uint64, device=device)
32
+
33
+ large_dim_length = 2**16
34
+ half_result = large_dim_length * large_dim_length
35
+
36
+ wp.launch(kernel=conditional_sum, dim=[2, large_dim_length, large_dim_length], inputs=[test_result], device=device)
37
+ test.assertEqual(test_result.numpy()[0], half_result)
38
+
39
+
40
+ @wp.kernel
41
+ def count_elements(result: wp.array(dtype=wp.uint64)):
42
+ wp.atomic_add(result, 0, wp.uint64(1))
43
+
44
+
45
+ def test_large_launch_max_blocks(test, device):
46
+ # Loop over 1000x1x1 elements using a grid of 256 threads
47
+ test_result = wp.zeros(shape=(1,), dtype=wp.uint64, device=device)
48
+ wp.launch(count_elements, (1000,), inputs=[test_result], max_blocks=1, device=device)
49
+ test.assertEqual(test_result.numpy()[0], 1000)
50
+
51
+ # Loop over 2x10x10 elements using a grid of 256 threads, using the tid() index to count half the elements
52
+ test_result.zero_()
53
+ wp.launch(
54
+ conditional_sum,
55
+ (
56
+ 2,
57
+ 50,
58
+ 10,
59
+ ),
60
+ inputs=[test_result],
61
+ max_blocks=1,
62
+ device=device,
63
+ )
64
+ test.assertEqual(test_result.numpy()[0], 500)
65
+
66
+
67
+ def test_large_launch_very_large_kernel(test, device):
68
+ """Due to the size of the grid, this test is not run on CPUs"""
69
+
70
+ # Dim is chosen to be larger than the maximum CUDA one-dimensional grid size (total threads)
71
+ dim = (2**31 - 1) * 256 + 1
72
+ test_result = wp.zeros(shape=(1,), dtype=wp.uint64, device=device)
73
+ wp.launch(count_elements, (dim,), inputs=[test_result], device=device)
74
+ test.assertEqual(test_result.numpy()[0], dim)
75
+
76
+
77
+ def test_large_arrays_slow(test, device):
78
+ # The goal of this test is to use arrays just large enough to know
79
+ # if there's a flaw in handling arrays with more than 2**31-1 elements
80
+ # Unfortunately, it takes a long time to run so it won't be run automatically
81
+ # without changes to support how frequently a test may be run
82
+ total_elements = 2**31 + 8
83
+
84
+ # 1-D to 4-D arrays: test zero_, fill_, then zero_ for scalar data types:
85
+ for total_dims in range(1, 5):
86
+ dim_x = math.ceil(total_elements ** (1 / total_dims))
87
+ shape_tuple = tuple([dim_x] * total_dims)
88
+
89
+ for nptype, wptype in wp.types.np_dtype_to_warp_type.items():
90
+ a1 = wp.zeros(shape_tuple, dtype=wptype, device=device)
91
+ assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))
92
+
93
+ a1.fill_(127)
94
+ assert_np_equal(a1.numpy(), 127 * np.ones_like(a1.numpy()))
95
+
96
+ a1.zero_()
97
+ assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))
98
+
99
+
100
+ def test_large_arrays_fast(test, device):
101
+ # A truncated version of test_large_arrays_slow meant to catch basic errors
102
+ total_elements = 2**31 + 8
103
+
104
+ nptype = np.dtype(np.int8)
105
+ wptype = wp.types.np_dtype_to_warp_type[nptype]
106
+
107
+ a1 = wp.zeros((total_elements,), dtype=wptype, device=device)
108
+ assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))
109
+
110
+ a1.fill_(127)
111
+ assert_np_equal(a1.numpy(), 127 * np.ones_like(a1.numpy()))
112
+
113
+ a1.zero_()
114
+ assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))
115
+
116
+
117
+ devices = get_test_devices()
118
+
119
+
120
+ class TestLarge(unittest.TestCase):
121
+ pass
122
+
123
+
124
+ add_function_test(
125
+ TestLarge, "test_large_launch_large_kernel", test_large_launch_large_kernel, devices=get_unique_cuda_test_devices()
126
+ )
127
+
128
+ add_function_test(TestLarge, "test_large_launch_max_blocks", test_large_launch_max_blocks, devices=devices)
129
+ add_function_test(
130
+ TestLarge,
131
+ "test_large_launch_very_large_kernel",
132
+ test_large_launch_very_large_kernel,
133
+ devices=get_unique_cuda_test_devices(),
134
+ )
135
+
136
+ add_function_test(TestLarge, "test_large_arrays_fast", test_large_arrays_fast, devices=devices)
137
+
138
+
139
+ if __name__ == "__main__":
140
+ wp.build.clear_kernel_cache()
141
+ unittest.main(verbosity=2)
warp/tests/test_launch.py CHANGED
@@ -5,14 +5,12 @@
5
5
  # distribution of this software and related documentation without an express
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
- # include parent path
8
+ import unittest
9
+
9
10
  import numpy as np
10
- import math
11
11
 
12
12
  import warp as wp
13
- from warp.tests.test_base import *
14
-
15
- import unittest
13
+ from warp.tests.unittest_utils import *
16
14
 
17
15
  wp.init()
18
16
 
@@ -74,20 +72,258 @@ def test4d(test, device):
74
72
  wp.launch(kernel4d, dim=a.shape, inputs=[wp.array(a, dtype=int, device=device)], device=device)
75
73
 
76
74
 
77
- def register(parent):
78
- devices = get_test_devices()
75
+ @wp.struct
76
+ class Params:
77
+ a: wp.array(dtype=int)
78
+ i: int
79
+ f: float
80
+
81
+
82
+ @wp.kernel
83
+ def kernel_cmd(params: Params, i: int, f: float, v: wp.vec3, m: wp.mat33, out: wp.array(dtype=int)):
84
+ tid = wp.tid()
85
+
86
+ wp.expect_eq(params.i, i)
87
+ wp.expect_eq(params.f, f)
88
+
89
+ wp.expect_eq(i, int(f))
90
+
91
+ wp.expect_eq(v[0], f)
92
+ wp.expect_eq(v[1], f)
93
+ wp.expect_eq(v[2], f)
94
+
95
+ wp.expect_eq(m[0, 0], f)
96
+ wp.expect_eq(m[1, 1], f)
97
+ wp.expect_eq(m[2, 2], f)
98
+
99
+ out[tid] = tid + i
100
+
101
+
102
+ def test_launch_cmd(test, device):
103
+ n = 1
104
+
105
+ ref = np.arange(0, n)
106
+ out = wp.zeros(n, dtype=int, device=device)
107
+
108
+ params = Params()
109
+ params.i = 1
110
+ params.f = 1.0
111
+
112
+ v = wp.vec3(params.f, params.f, params.f)
113
+
114
+ m = wp.mat33(params.f, 0.0, 0.0, 0.0, params.f, 0.0, 0.0, 0.0, params.f)
115
+
116
+ # standard launch
117
+ wp.launch(kernel_cmd, dim=n, inputs=[params, params.i, params.f, v, m, out], device=device)
118
+
119
+ assert_np_equal(out.numpy(), ref + params.i)
120
+
121
+ # cmd launch
122
+ out.zero_()
123
+
124
+ cmd = wp.launch(kernel_cmd, dim=n, inputs=[params, params.i, params.f, v, m, out], device=device, record_cmd=True)
125
+
126
+ cmd.launch()
127
+
128
+ assert_np_equal(out.numpy(), ref + params.i)
129
+
130
+
131
+ def test_launch_cmd_set_param(test, device):
132
+ n = 1
133
+
134
+ ref = np.arange(0, n)
135
+
136
+ params = Params()
137
+ v = wp.vec3()
138
+ m = wp.mat33()
139
+
140
+ cmd = wp.launch(kernel_cmd, dim=n, inputs=[params, 0, 0.0, v, m, None], device=device, record_cmd=True)
141
+
142
+ # cmd param modification
143
+ out = wp.zeros(n, dtype=int, device=device)
144
+
145
+ params.i = 13
146
+ params.f = 13.0
147
+
148
+ v = wp.vec3(params.f, params.f, params.f)
149
+
150
+ m = wp.mat33(params.f, 0.0, 0.0, 0.0, params.f, 0.0, 0.0, 0.0, params.f)
151
+
152
+ cmd.set_param_at_index(0, params)
153
+ cmd.set_param_at_index(1, params.i)
154
+ cmd.set_param_at_index(2, params.f)
155
+ cmd.set_param_at_index(3, v)
156
+ cmd.set_param_at_index(4, m)
157
+ cmd.set_param_by_name("out", out)
158
+
159
+ cmd.launch()
160
+
161
+ assert_np_equal(out.numpy(), ref + params.i)
162
+
163
+ # test changing params after launch directly
164
+ # because we now cache the ctypes object inside the wp.struct
165
+ # instance the command buffer will be automatically updated
166
+ params.i = 14
167
+ params.f = 14.0
168
+
169
+ v = wp.vec3(params.f, params.f, params.f)
170
+
171
+ m = wp.mat33(params.f, 0.0, 0.0, 0.0, params.f, 0.0, 0.0, 0.0, params.f)
172
+
173
+ # this is the line we explicitly leave out to
174
+ # ensure that param changes are reflected in the launch
175
+ # launch.set_param_at_index(0, params)
176
+
177
+ cmd.set_param_at_index(1, params.i)
178
+ cmd.set_param_at_index(2, params.f)
179
+ cmd.set_param_at_index(3, v)
180
+ cmd.set_param_at_index(4, m)
181
+ cmd.set_param_by_name("out", out)
182
+
183
+ cmd.launch()
184
+
185
+ assert_np_equal(out.numpy(), ref + params.i)
186
+
187
+
188
+ def test_launch_cmd_set_ctype(test, device):
189
+ n = 1
190
+
191
+ ref = np.arange(0, n)
192
+
193
+ params = Params()
194
+ v = wp.vec3()
195
+ m = wp.mat33()
196
+
197
+ cmd = wp.launch(kernel_cmd, dim=n, inputs=[params, 0, 0.0, v, m, None], device=device, record_cmd=True)
198
+
199
+ # cmd param modification
200
+ out = wp.zeros(n, dtype=int, device=device)
201
+
202
+ # cmd param modification
203
+ out.zero_()
204
+
205
+ params.i = 13
206
+ params.f = 13.0
207
+
208
+ v = wp.vec3(params.f, params.f, params.f)
209
+
210
+ m = wp.mat33(params.f, 0.0, 0.0, 0.0, params.f, 0.0, 0.0, 0.0, params.f)
211
+
212
+ cmd.set_param_at_index_from_ctype(0, params.__ctype__())
213
+ cmd.set_param_at_index_from_ctype(1, params.i)
214
+ cmd.set_param_at_index_from_ctype(2, params.f)
215
+ cmd.set_param_at_index_from_ctype(3, v)
216
+ cmd.set_param_at_index_from_ctype(4, m)
217
+ cmd.set_param_by_name_from_ctype("out", out.__ctype__())
218
+
219
+ cmd.launch()
220
+
221
+ assert_np_equal(out.numpy(), ref + params.i)
222
+
223
+
224
+ @wp.kernel
225
+ def arange(out: wp.array(dtype=int)):
226
+ tid = wp.tid()
227
+ out[tid] = tid
228
+
229
+
230
+ def test_launch_cmd_set_dim(test, device):
231
+ n = 10
232
+
233
+ ref = np.arange(0, n, dtype=int)
234
+ out = wp.zeros(n, dtype=int, device=device)
235
+
236
+ cmd = wp.launch(arange, dim=n, inputs=[out], device=device, record_cmd=True)
237
+
238
+ cmd.set_dim(5)
239
+ cmd.launch()
240
+
241
+ # check first half the array is filled while rest is still zero
242
+ assert_np_equal(out.numpy()[0:5], ref[0:5])
243
+ assert_np_equal(out.numpy()[5:], np.zeros(5))
244
+
245
+ out.zero_()
246
+
247
+ cmd.set_dim(10)
248
+ cmd.launch()
249
+
250
+ # check the whole array was filled
251
+ assert_np_equal(out.numpy(), ref)
252
+
253
+
254
+ def test_launch_cmd_empty(test, device):
255
+ n = 10
256
+
257
+ ref = np.arange(0, n, dtype=int)
258
+ out = wp.zeros(n, dtype=int, device=device)
259
+
260
+ cmd = wp.Launch(arange, device)
261
+ cmd.set_dim(5)
262
+ cmd.set_param_by_name("out", out)
263
+
264
+ cmd.launch()
265
+
266
+ # check first half the array is filled while rest is still zero
267
+ assert_np_equal(out.numpy()[0:5], ref[0:5])
268
+ assert_np_equal(out.numpy()[5:], np.zeros(5))
269
+
270
+ out.zero_()
271
+
272
+ cmd.set_dim(10)
273
+ cmd.launch()
274
+
275
+ # check the whole array was filled
276
+ assert_np_equal(out.numpy(), ref)
277
+
278
+
279
+ @wp.kernel
280
+ def kernel_mul(
281
+ values: wp.array(dtype=int),
282
+ coeff: int,
283
+ out: wp.array(dtype=int),
284
+ ):
285
+ tid = wp.tid()
286
+ out[tid] = values[tid] * coeff
287
+
288
+
289
+ def test_launch_tuple_args(test, device):
290
+ values = wp.array(np.arange(0, 4), dtype=int, device=device)
291
+ coeff = 3
292
+ out = wp.empty_like(values)
293
+
294
+ wp.launch(
295
+ kernel_mul,
296
+ dim=len(values),
297
+ inputs=(
298
+ values,
299
+ coeff,
300
+ ),
301
+ outputs=(out,),
302
+ device=device,
303
+ )
304
+
305
+ assert_np_equal(out.numpy(), np.array((0, 3, 6, 9)))
306
+
307
+
308
+ devices = get_test_devices()
309
+
310
+
311
+ class TestLaunch(unittest.TestCase):
312
+ pass
79
313
 
80
- class TestLaunch(parent):
81
- pass
82
314
 
83
- add_function_test(TestLaunch, "test_1d_launch", test1d, devices=devices)
84
- add_function_test(TestLaunch, "test_2d_launch", test2d, devices=devices)
85
- add_function_test(TestLaunch, "test_3d_launch", test3d, devices=devices)
86
- add_function_test(TestLaunch, "test_4d_launch", test4d, devices=devices)
315
+ add_function_test(TestLaunch, "test_launch_1d", test1d, devices=devices)
316
+ add_function_test(TestLaunch, "test_launch_2d", test2d, devices=devices)
317
+ add_function_test(TestLaunch, "test_launch_3d", test3d, devices=devices)
318
+ add_function_test(TestLaunch, "test_launch_4d", test4d, devices=devices)
87
319
 
88
- return TestLaunch
320
+ add_function_test(TestLaunch, "test_launch_cmd", test_launch_cmd, devices=devices)
321
+ add_function_test(TestLaunch, "test_launch_cmd_set_param", test_launch_cmd_set_param, devices=devices)
322
+ add_function_test(TestLaunch, "test_launch_cmd_set_ctype", test_launch_cmd_set_ctype, devices=devices)
323
+ add_function_test(TestLaunch, "test_launch_cmd_set_dim", test_launch_cmd_set_dim, devices=devices)
324
+ add_function_test(TestLaunch, "test_launch_cmd_empty", test_launch_cmd_empty, devices=devices)
89
325
 
90
326
 
91
327
  if __name__ == "__main__":
92
- c = register(unittest.TestCase)
328
+ wp.build.clear_kernel_cache()
93
329
  unittest.main(verbosity=2)
warp/tests/test_lerp.py CHANGED
@@ -5,14 +5,16 @@
5
5
  # distribution of this software and related documentation without an express
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
+ import unittest
8
9
  from dataclasses import dataclass
9
10
  from typing import Any
10
- import unittest
11
11
 
12
12
  import numpy as np
13
13
 
14
14
  import warp as wp
15
- from warp.tests.test_base import *
15
+ from warp.tests.unittest_utils import *
16
+
17
+ wp.init()
16
18
 
17
19
 
18
20
  @dataclass
@@ -162,8 +164,6 @@ TEST_DATA = {
162
164
  ),
163
165
  }
164
166
 
165
- wp.init()
166
-
167
167
 
168
168
  def test_lerp(test, device):
169
169
  def make_kernel_fn(data_type):
@@ -179,84 +179,83 @@ def test_lerp(test, device):
179
179
 
180
180
  for data_type in TEST_DATA:
181
181
  kernel_fn = make_kernel_fn(data_type)
182
- module = wp.get_module(kernel_fn.__module__)
183
182
  kernel = wp.Kernel(
184
183
  func=kernel_fn,
185
184
  key=f"test_lerp_{data_type.__name__}_kernel",
186
- module=module,
187
185
  )
188
186
 
189
- for test_data in TEST_DATA[data_type]:
190
- a = wp.array(
191
- [test_data.a],
192
- dtype=data_type,
193
- device=device,
194
- requires_grad=True,
195
- )
196
- b = wp.array(
197
- [test_data.b],
198
- dtype=data_type,
199
- device=device,
200
- requires_grad=True,
201
- )
202
- t = wp.array(
203
- [test_data.t],
204
- dtype=float,
205
- device=device,
206
- requires_grad=True,
207
- )
208
- out = wp.array(
209
- [0] * wp.types.type_length(data_type),
210
- dtype=data_type,
211
- device=device,
212
- requires_grad=True,
213
- )
214
-
215
- tape = wp.Tape()
216
- with tape:
217
- wp.launch(
218
- kernel,
219
- dim=1,
220
- inputs=[a, b, t, out],
187
+ with test.subTest(data_type=data_type):
188
+ for test_data in TEST_DATA[data_type]:
189
+ a = wp.array(
190
+ [test_data.a],
191
+ dtype=data_type,
221
192
  device=device,
193
+ requires_grad=True,
222
194
  )
223
-
224
- assert_np_equal(
225
- out.numpy(),
226
- np.array([test_data.expected]),
227
- tol=1e-6,
228
- )
229
-
230
- if test_data.check_backwards():
231
- tape.backward(out)
232
-
233
- assert_np_equal(
234
- tape.gradients[a].numpy(),
235
- np.array([test_data.expected_adj_a]),
236
- tol=1e-6,
195
+ b = wp.array(
196
+ [test_data.b],
197
+ dtype=data_type,
198
+ device=device,
199
+ requires_grad=True,
237
200
  )
238
- assert_np_equal(
239
- tape.gradients[b].numpy(),
240
- np.array([test_data.expected_adj_b]),
241
- tol=1e-6,
201
+ t = wp.array(
202
+ [test_data.t],
203
+ dtype=float,
204
+ device=device,
205
+ requires_grad=True,
206
+ )
207
+ out = wp.array(
208
+ [0] * wp.types.type_length(data_type),
209
+ dtype=data_type,
210
+ device=device,
211
+ requires_grad=True,
242
212
  )
213
+
214
+ tape = wp.Tape()
215
+ with tape:
216
+ wp.launch(
217
+ kernel,
218
+ dim=1,
219
+ inputs=[a, b, t, out],
220
+ device=device,
221
+ )
222
+
243
223
  assert_np_equal(
244
- tape.gradients[t].numpy(),
245
- np.array([test_data.expected_adj_t]),
224
+ out.numpy(),
225
+ np.array([test_data.expected]),
246
226
  tol=1e-6,
247
227
  )
248
228
 
229
+ if test_data.check_backwards():
230
+ tape.backward(out)
231
+
232
+ assert_np_equal(
233
+ tape.gradients[a].numpy(),
234
+ np.array([test_data.expected_adj_a]),
235
+ tol=1e-6,
236
+ )
237
+ assert_np_equal(
238
+ tape.gradients[b].numpy(),
239
+ np.array([test_data.expected_adj_b]),
240
+ tol=1e-6,
241
+ )
242
+ assert_np_equal(
243
+ tape.gradients[t].numpy(),
244
+ np.array([test_data.expected_adj_t]),
245
+ tol=1e-6,
246
+ )
247
+
248
+
249
+ devices = get_test_devices()
250
+
249
251
 
250
- def register(parent):
251
- devices = get_test_devices()
252
+ class TestLerp(unittest.TestCase):
253
+ pass
252
254
 
253
- class TestLerp(parent):
254
- pass
255
255
 
256
- add_function_test(TestLerp, "test_lerp", test_lerp, devices=devices)
257
- return TestLerp
256
+ add_function_test(TestLerp, "test_lerp", test_lerp, devices=devices)
258
257
 
259
258
 
260
259
  if __name__ == "__main__":
261
- _ = register(unittest.TestCase)
260
+ wp.build.clear_kernel_cache()
262
261
  unittest.main(verbosity=2)