warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (315) hide show
  1. warp/__init__.py +15 -7
  2. warp/__init__.pyi +1 -0
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +22 -443
  6. warp/build_dll.py +384 -0
  7. warp/builtins.py +998 -488
  8. warp/codegen.py +1307 -739
  9. warp/config.py +5 -3
  10. warp/constants.py +6 -0
  11. warp/context.py +1291 -548
  12. warp/dlpack.py +31 -31
  13. warp/fabric.py +326 -0
  14. warp/fem/__init__.py +27 -0
  15. warp/fem/cache.py +389 -0
  16. warp/fem/dirichlet.py +181 -0
  17. warp/fem/domain.py +263 -0
  18. warp/fem/field/__init__.py +101 -0
  19. warp/fem/field/field.py +149 -0
  20. warp/fem/field/nodal_field.py +299 -0
  21. warp/fem/field/restriction.py +21 -0
  22. warp/fem/field/test.py +181 -0
  23. warp/fem/field/trial.py +183 -0
  24. warp/fem/geometry/__init__.py +19 -0
  25. warp/fem/geometry/closest_point.py +70 -0
  26. warp/fem/geometry/deformed_geometry.py +271 -0
  27. warp/fem/geometry/element.py +744 -0
  28. warp/fem/geometry/geometry.py +186 -0
  29. warp/fem/geometry/grid_2d.py +373 -0
  30. warp/fem/geometry/grid_3d.py +435 -0
  31. warp/fem/geometry/hexmesh.py +953 -0
  32. warp/fem/geometry/partition.py +376 -0
  33. warp/fem/geometry/quadmesh_2d.py +532 -0
  34. warp/fem/geometry/tetmesh.py +840 -0
  35. warp/fem/geometry/trimesh_2d.py +577 -0
  36. warp/fem/integrate.py +1616 -0
  37. warp/fem/operator.py +191 -0
  38. warp/fem/polynomial.py +213 -0
  39. warp/fem/quadrature/__init__.py +2 -0
  40. warp/fem/quadrature/pic_quadrature.py +245 -0
  41. warp/fem/quadrature/quadrature.py +294 -0
  42. warp/fem/space/__init__.py +292 -0
  43. warp/fem/space/basis_space.py +489 -0
  44. warp/fem/space/collocated_function_space.py +105 -0
  45. warp/fem/space/dof_mapper.py +236 -0
  46. warp/fem/space/function_space.py +145 -0
  47. warp/fem/space/grid_2d_function_space.py +267 -0
  48. warp/fem/space/grid_3d_function_space.py +306 -0
  49. warp/fem/space/hexmesh_function_space.py +352 -0
  50. warp/fem/space/partition.py +350 -0
  51. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  52. warp/fem/space/restriction.py +160 -0
  53. warp/fem/space/shape/__init__.py +15 -0
  54. warp/fem/space/shape/cube_shape_function.py +738 -0
  55. warp/fem/space/shape/shape_function.py +103 -0
  56. warp/fem/space/shape/square_shape_function.py +611 -0
  57. warp/fem/space/shape/tet_shape_function.py +567 -0
  58. warp/fem/space/shape/triangle_shape_function.py +429 -0
  59. warp/fem/space/tetmesh_function_space.py +292 -0
  60. warp/fem/space/topology.py +295 -0
  61. warp/fem/space/trimesh_2d_function_space.py +221 -0
  62. warp/fem/types.py +77 -0
  63. warp/fem/utils.py +495 -0
  64. warp/native/array.h +164 -55
  65. warp/native/builtin.h +150 -174
  66. warp/native/bvh.cpp +75 -328
  67. warp/native/bvh.cu +406 -23
  68. warp/native/bvh.h +37 -45
  69. warp/native/clang/clang.cpp +136 -24
  70. warp/native/crt.cpp +1 -76
  71. warp/native/crt.h +111 -104
  72. warp/native/cuda_crt.h +1049 -0
  73. warp/native/cuda_util.cpp +15 -3
  74. warp/native/cuda_util.h +3 -1
  75. warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
  76. warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
  77. warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
  78. warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
  79. warp/native/cutlass/tools/library/scripts/library.py +799 -0
  80. warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
  81. warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
  82. warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
  83. warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
  84. warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
  85. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
  86. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
  87. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
  88. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
  89. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
  90. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
  91. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
  92. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
  93. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
  94. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
  95. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
  96. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
  97. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
  98. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
  99. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
  100. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
  101. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
  102. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
  103. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
  104. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
  105. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
  106. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
  107. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
  108. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
  109. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
  110. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
  111. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
  112. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
  113. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
  114. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
  115. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
  116. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
  117. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  118. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  119. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
  120. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
  121. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
  122. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  123. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  124. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
  125. warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
  126. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
  127. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
  128. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
  129. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
  130. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
  131. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
  132. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
  133. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
  134. warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
  135. warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
  136. warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
  137. warp/native/cutlass/tools/library/scripts/rt.py +796 -0
  138. warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
  139. warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
  140. warp/native/cutlass_gemm.cu +5 -3
  141. warp/native/exports.h +1240 -949
  142. warp/native/fabric.h +228 -0
  143. warp/native/hashgrid.cpp +4 -4
  144. warp/native/hashgrid.h +22 -2
  145. warp/native/initializer_array.h +2 -2
  146. warp/native/intersect.h +22 -7
  147. warp/native/intersect_adj.h +8 -8
  148. warp/native/intersect_tri.h +13 -16
  149. warp/native/marching.cu +157 -161
  150. warp/native/mat.h +119 -19
  151. warp/native/matnn.h +2 -2
  152. warp/native/mesh.cpp +108 -83
  153. warp/native/mesh.cu +243 -6
  154. warp/native/mesh.h +1547 -458
  155. warp/native/nanovdb/NanoVDB.h +1 -1
  156. warp/native/noise.h +272 -329
  157. warp/native/quat.h +51 -8
  158. warp/native/rand.h +45 -35
  159. warp/native/range.h +6 -2
  160. warp/native/reduce.cpp +157 -0
  161. warp/native/reduce.cu +348 -0
  162. warp/native/runlength_encode.cpp +62 -0
  163. warp/native/runlength_encode.cu +46 -0
  164. warp/native/scan.cu +11 -13
  165. warp/native/scan.h +1 -0
  166. warp/native/solid_angle.h +442 -0
  167. warp/native/sort.cpp +13 -0
  168. warp/native/sort.cu +9 -1
  169. warp/native/sparse.cpp +338 -0
  170. warp/native/sparse.cu +545 -0
  171. warp/native/spatial.h +2 -2
  172. warp/native/temp_buffer.h +30 -0
  173. warp/native/vec.h +126 -24
  174. warp/native/volume.h +120 -0
  175. warp/native/warp.cpp +658 -53
  176. warp/native/warp.cu +660 -68
  177. warp/native/warp.h +112 -12
  178. warp/optim/__init__.py +1 -0
  179. warp/optim/linear.py +922 -0
  180. warp/optim/sgd.py +92 -0
  181. warp/render/render_opengl.py +392 -152
  182. warp/render/render_usd.py +11 -11
  183. warp/sim/__init__.py +2 -2
  184. warp/sim/articulation.py +385 -185
  185. warp/sim/collide.py +21 -8
  186. warp/sim/import_mjcf.py +297 -106
  187. warp/sim/import_urdf.py +389 -210
  188. warp/sim/import_usd.py +198 -97
  189. warp/sim/inertia.py +17 -18
  190. warp/sim/integrator_euler.py +14 -8
  191. warp/sim/integrator_xpbd.py +161 -19
  192. warp/sim/model.py +795 -291
  193. warp/sim/optimizer.py +2 -6
  194. warp/sim/render.py +65 -3
  195. warp/sim/utils.py +3 -0
  196. warp/sparse.py +1227 -0
  197. warp/stubs.py +665 -223
  198. warp/tape.py +66 -15
  199. warp/tests/__main__.py +3 -6
  200. warp/tests/assets/curlnoise_golden.npy +0 -0
  201. warp/tests/assets/pnoise_golden.npy +0 -0
  202. warp/tests/assets/torus.usda +105 -105
  203. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  204. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  205. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  206. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  207. warp/tests/aux_test_unresolved_func.py +14 -0
  208. warp/tests/aux_test_unresolved_symbol.py +14 -0
  209. warp/tests/disabled_kinematics.py +239 -0
  210. warp/tests/run_coverage_serial.py +31 -0
  211. warp/tests/test_adam.py +103 -106
  212. warp/tests/test_arithmetic.py +128 -74
  213. warp/tests/test_array.py +1497 -211
  214. warp/tests/test_array_reduce.py +150 -0
  215. warp/tests/test_atomic.py +64 -28
  216. warp/tests/test_bool.py +99 -0
  217. warp/tests/test_builtins_resolution.py +1292 -0
  218. warp/tests/test_bvh.py +75 -43
  219. warp/tests/test_closest_point_edge_edge.py +54 -57
  220. warp/tests/test_codegen.py +233 -128
  221. warp/tests/test_compile_consts.py +28 -20
  222. warp/tests/test_conditional.py +108 -24
  223. warp/tests/test_copy.py +10 -12
  224. warp/tests/test_ctypes.py +112 -88
  225. warp/tests/test_dense.py +21 -14
  226. warp/tests/test_devices.py +98 -0
  227. warp/tests/test_dlpack.py +136 -108
  228. warp/tests/test_examples.py +277 -0
  229. warp/tests/test_fabricarray.py +955 -0
  230. warp/tests/test_fast_math.py +15 -11
  231. warp/tests/test_fem.py +1271 -0
  232. warp/tests/test_fp16.py +53 -19
  233. warp/tests/test_func.py +187 -74
  234. warp/tests/test_generics.py +194 -49
  235. warp/tests/test_grad.py +180 -116
  236. warp/tests/test_grad_customs.py +176 -0
  237. warp/tests/test_hash_grid.py +52 -37
  238. warp/tests/test_import.py +10 -23
  239. warp/tests/test_indexedarray.py +577 -24
  240. warp/tests/test_intersect.py +18 -9
  241. warp/tests/test_large.py +141 -0
  242. warp/tests/test_launch.py +251 -15
  243. warp/tests/test_lerp.py +64 -65
  244. warp/tests/test_linear_solvers.py +154 -0
  245. warp/tests/test_lvalue.py +493 -0
  246. warp/tests/test_marching_cubes.py +12 -13
  247. warp/tests/test_mat.py +508 -2778
  248. warp/tests/test_mat_lite.py +115 -0
  249. warp/tests/test_mat_scalar_ops.py +2889 -0
  250. warp/tests/test_math.py +103 -9
  251. warp/tests/test_matmul.py +305 -69
  252. warp/tests/test_matmul_lite.py +410 -0
  253. warp/tests/test_mesh.py +71 -14
  254. warp/tests/test_mesh_query_aabb.py +41 -25
  255. warp/tests/test_mesh_query_point.py +325 -34
  256. warp/tests/test_mesh_query_ray.py +39 -22
  257. warp/tests/test_mlp.py +30 -22
  258. warp/tests/test_model.py +92 -89
  259. warp/tests/test_modules_lite.py +39 -0
  260. warp/tests/test_multigpu.py +88 -114
  261. warp/tests/test_noise.py +12 -11
  262. warp/tests/test_operators.py +16 -20
  263. warp/tests/test_options.py +11 -11
  264. warp/tests/test_pinned.py +17 -18
  265. warp/tests/test_print.py +32 -11
  266. warp/tests/test_quat.py +275 -129
  267. warp/tests/test_rand.py +18 -16
  268. warp/tests/test_reload.py +38 -34
  269. warp/tests/test_rounding.py +50 -43
  270. warp/tests/test_runlength_encode.py +190 -0
  271. warp/tests/test_smoothstep.py +9 -11
  272. warp/tests/test_snippet.py +143 -0
  273. warp/tests/test_sparse.py +460 -0
  274. warp/tests/test_spatial.py +276 -243
  275. warp/tests/test_streams.py +110 -85
  276. warp/tests/test_struct.py +331 -85
  277. warp/tests/test_tape.py +39 -21
  278. warp/tests/test_torch.py +118 -89
  279. warp/tests/test_transient_module.py +12 -13
  280. warp/tests/test_types.py +614 -0
  281. warp/tests/test_utils.py +494 -0
  282. warp/tests/test_vec.py +354 -1987
  283. warp/tests/test_vec_lite.py +73 -0
  284. warp/tests/test_vec_scalar_ops.py +2099 -0
  285. warp/tests/test_volume.py +457 -293
  286. warp/tests/test_volume_write.py +124 -134
  287. warp/tests/unittest_serial.py +35 -0
  288. warp/tests/unittest_suites.py +341 -0
  289. warp/tests/unittest_utils.py +568 -0
  290. warp/tests/unused_test_misc.py +71 -0
  291. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  292. warp/thirdparty/appdirs.py +36 -45
  293. warp/thirdparty/unittest_parallel.py +549 -0
  294. warp/torch.py +72 -30
  295. warp/types.py +1744 -713
  296. warp/utils.py +360 -350
  297. warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
  298. warp_lang-0.11.0.dist-info/METADATA +238 -0
  299. warp_lang-0.11.0.dist-info/RECORD +332 -0
  300. {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
  301. warp/bin/warp-clang.exp +0 -0
  302. warp/bin/warp-clang.lib +0 -0
  303. warp/bin/warp.exp +0 -0
  304. warp/bin/warp.lib +0 -0
  305. warp/tests/test_all.py +0 -215
  306. warp/tests/test_array_scan.py +0 -60
  307. warp/tests/test_base.py +0 -208
  308. warp/tests/test_unresolved_func.py +0 -7
  309. warp/tests/test_unresolved_symbol.py +0 -7
  310. warp_lang-0.9.0.dist-info/METADATA +0 -20
  311. warp_lang-0.9.0.dist-info/RECORD +0 -177
  312. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  313. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  314. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  315. {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/build_dll.py ADDED
@@ -0,0 +1,384 @@
1
+ # Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ import sys
9
+ import os
10
+ import subprocess
11
+ import platform
12
+
13
+ import warp.config
14
+ from warp.utils import ScopedTimer
15
+
16
+
17
+ # returns a canonical machine architecture string
18
+ # - "x86_64" for x86-64, aka. AMD64, aka. x64
19
+ # - "aarch64" for AArch64, aka. ARM64
20
+ def machine_architecture() -> str:
21
+ machine = platform.machine()
22
+ if machine == "x86_64" or machine == "AMD64":
23
+ return "x86_64"
24
+ if machine == "aarch64" or machine == "arm64":
25
+ return "aarch64"
26
+ raise RuntimeError(f"Unrecognized machine architecture {machine}")
27
+
28
+
29
+ def run_cmd(cmd, capture=False):
30
+ if warp.config.verbose:
31
+ print(cmd)
32
+
33
+ try:
34
+ return subprocess.check_output(cmd, shell=True)
35
+ except subprocess.CalledProcessError as e:
36
+ if e.stdout:
37
+ print(e.stdout.decode())
38
+ if e.stderr:
39
+ print(e.stderr.decode())
40
+ raise (e)
41
+
42
+
43
+ # cut-down version of vcvars64.bat that allows using
44
+ # custom toolchain locations
45
+ def set_msvc_compiler(msvc_path, sdk_path):
46
+ if "INCLUDE" not in os.environ:
47
+ os.environ["INCLUDE"] = ""
48
+
49
+ if "LIB" not in os.environ:
50
+ os.environ["LIB"] = ""
51
+
52
+ msvc_path = os.path.abspath(msvc_path)
53
+ sdk_path = os.path.abspath(sdk_path)
54
+
55
+ os.environ["INCLUDE"] += os.pathsep + os.path.join(msvc_path, "include")
56
+ os.environ["INCLUDE"] += os.pathsep + os.path.join(sdk_path, "include/winrt")
57
+ os.environ["INCLUDE"] += os.pathsep + os.path.join(sdk_path, "include/um")
58
+ os.environ["INCLUDE"] += os.pathsep + os.path.join(sdk_path, "include/ucrt")
59
+ os.environ["INCLUDE"] += os.pathsep + os.path.join(sdk_path, "include/shared")
60
+
61
+ os.environ["LIB"] += os.pathsep + os.path.join(msvc_path, "lib/x64")
62
+ os.environ["LIB"] += os.pathsep + os.path.join(sdk_path, "lib/ucrt/x64")
63
+ os.environ["LIB"] += os.pathsep + os.path.join(sdk_path, "lib/um/x64")
64
+
65
+ os.environ["PATH"] += os.pathsep + os.path.join(msvc_path, "bin/HostX64/x64")
66
+ os.environ["PATH"] += os.pathsep + os.path.join(sdk_path, "bin/x64")
67
+
68
+ warp.config.host_compiler = os.path.join(msvc_path, "bin", "HostX64", "x64", "cl.exe")
69
+
70
+
71
+ def find_host_compiler():
72
+ if os.name == "nt":
73
+ try:
74
+ # try and find an installed host compiler (msvc)
75
+ # runs vcvars and copies back the build environment
76
+
77
+ vswhere_path = r"%ProgramFiles(x86)%/Microsoft Visual Studio/Installer/vswhere.exe"
78
+ vswhere_path = os.path.expandvars(vswhere_path)
79
+ if not os.path.exists(vswhere_path):
80
+ return ""
81
+
82
+ vs_path = run_cmd(f'"{vswhere_path}" -latest -property installationPath').decode().rstrip()
83
+ vsvars_path = os.path.join(vs_path, "VC\\Auxiliary\\Build\\vcvars64.bat")
84
+
85
+ output = run_cmd(f'"{vsvars_path}" && set').decode()
86
+
87
+ for line in output.splitlines():
88
+ pair = line.split("=", 1)
89
+ if len(pair) >= 2:
90
+ os.environ[pair[0]] = pair[1]
91
+
92
+ cl_path = run_cmd("where cl.exe").decode("utf-8").rstrip()
93
+ cl_version = os.environ["VCToolsVersion"].split(".")
94
+
95
+ # ensure at least VS2019 version, see list of MSVC versions here https://en.wikipedia.org/wiki/Microsoft_Visual_C%2B%2B
96
+ cl_required_major = 14
97
+ cl_required_minor = 29
98
+
99
+ if (
100
+ (int(cl_version[0]) < cl_required_major)
101
+ or (int(cl_version[0]) == cl_required_major)
102
+ and int(cl_version[1]) < cl_required_minor
103
+ ):
104
+ print(
105
+ f"Warp: MSVC found but compiler version too old, found {cl_version[0]}.{cl_version[1]}, but must be {cl_required_major}.{cl_required_minor} or higher, kernel host compilation will be disabled."
106
+ )
107
+ return ""
108
+
109
+ return cl_path
110
+
111
+ except Exception as e:
112
+ # couldn't find host compiler
113
+ return ""
114
+ else:
115
+ # try and find g++
116
+ try:
117
+ return run_cmd("which g++").decode()
118
+ except:
119
+ return ""
120
+
121
+
122
+ def get_cuda_toolkit_version(cuda_home):
123
+ try:
124
+ # the toolkit version can be obtained by running "nvcc --version"
125
+ nvcc_path = os.path.join(cuda_home, "bin", "nvcc")
126
+ nvcc_version_output = subprocess.check_output([nvcc_path, "--version"]).decode("utf-8")
127
+ # search for release substring (e.g., "release 11.5")
128
+ import re
129
+
130
+ m = re.search(r"(?<=release )\d+\.\d+", nvcc_version_output)
131
+ if m is not None:
132
+ return tuple(int(x) for x in m.group(0).split("."))
133
+ else:
134
+ raise Exception("Failed to parse NVCC output")
135
+
136
+ except Exception as e:
137
+ print(f"Failed to determine CUDA Toolkit version: {e}")
138
+
139
+
140
+ def quote(path):
141
+ return '"' + path + '"'
142
+
143
+
144
+ def build_dll_for_arch(dll_path, cpp_paths, cu_path, libs, mode, arch, verify_fp=False, fast_math=False, quick=False):
145
+ cuda_home = warp.config.cuda_path
146
+ cuda_cmd = None
147
+
148
+ if quick:
149
+ cutlass_includes = ""
150
+ cutlass_enabled = "WP_ENABLE_CUTLASS=0"
151
+ else:
152
+ cutlass_home = "warp/native/cutlass"
153
+ cutlass_includes = f'-I"{cutlass_home}/include" -I"{cutlass_home}/tools/util/include"'
154
+ cutlass_enabled = "WP_ENABLE_CUTLASS=1"
155
+
156
+ if quick or cu_path is None:
157
+ cuda_compat_enabled = "WP_ENABLE_CUDA_COMPATIBILITY=0"
158
+ else:
159
+ cuda_compat_enabled = "WP_ENABLE_CUDA_COMPATIBILITY=1"
160
+
161
+ import pathlib
162
+
163
+ warp_home_path = pathlib.Path(__file__).parent
164
+ warp_home = warp_home_path.resolve()
165
+ nanovdb_home = warp_home_path.parent / "_build/host-deps/nanovdb/include"
166
+
167
+ # output stale, rebuild
168
+ if warp.config.verbose:
169
+ print(f"Building {dll_path}")
170
+
171
+ native_dir = os.path.join(warp_home, "native")
172
+
173
+ if cu_path:
174
+ # check CUDA Toolkit version
175
+ min_ctk_version = (11, 5)
176
+ ctk_version = get_cuda_toolkit_version(cuda_home) or min_ctk_version
177
+ if ctk_version < min_ctk_version:
178
+ raise Exception(
179
+ f"CUDA Toolkit version {min_ctk_version[0]}.{min_ctk_version[1]}+ is required (found {ctk_version[0]}.{ctk_version[1]} in {cuda_home})"
180
+ )
181
+
182
+ gencode_opts = []
183
+
184
+ if quick:
185
+ # minimum supported architectures (PTX)
186
+ gencode_opts += ["-gencode=arch=compute_52,code=compute_52", "-gencode=arch=compute_75,code=compute_75"]
187
+ else:
188
+ # generate code for all supported architectures
189
+ gencode_opts += [
190
+ # SASS for supported desktop/datacenter architectures
191
+ "-gencode=arch=compute_52,code=sm_52", # Maxwell
192
+ "-gencode=arch=compute_60,code=sm_60", # Pascal
193
+ "-gencode=arch=compute_61,code=sm_61",
194
+ "-gencode=arch=compute_70,code=sm_70", # Volta
195
+ "-gencode=arch=compute_75,code=sm_75", # Turing
196
+ "-gencode=arch=compute_80,code=sm_80", # Ampere
197
+ "-gencode=arch=compute_86,code=sm_86",
198
+ ]
199
+ if arch == "aarch64" and sys.platform == "linux":
200
+ gencode_opts += [
201
+ # SASS for supported mobile architectures (e.g. Tegra/Jetson)
202
+ "-gencode=arch=compute_53,code=sm_53", # X1
203
+ "-gencode=arch=compute_62,code=sm_62", # X2
204
+ "-gencode=arch=compute_72,code=sm_72", # Xavier
205
+ "-gencode=arch=compute_87,code=sm_87", # Orin
206
+ ]
207
+
208
+ # support for Ada and Hopper is available with CUDA Toolkit 11.8+
209
+ if ctk_version >= (11, 8):
210
+ gencode_opts += [
211
+ "-gencode=arch=compute_89,code=sm_89", # Ada
212
+ "-gencode=arch=compute_90,code=sm_90", # Hopper
213
+ # PTX for future hardware
214
+ "-gencode=arch=compute_90,code=compute_90",
215
+ ]
216
+ else:
217
+ gencode_opts += [
218
+ # PTX for future hardware
219
+ "-gencode=arch=compute_86,code=compute_86",
220
+ ]
221
+
222
+ nvcc_opts = gencode_opts + [
223
+ "-t0", # multithreaded compilation
224
+ "--extended-lambda",
225
+ ]
226
+
227
+ if fast_math:
228
+ nvcc_opts.append("--use_fast_math")
229
+
230
+ # is the library being built with CUDA enabled?
231
+ cuda_enabled = "WP_ENABLE_CUDA=1" if (cu_path is not None) else "WP_ENABLE_CUDA=0"
232
+
233
+ if os.name == "nt":
234
+ if warp.config.host_compiler:
235
+ host_linker = os.path.join(os.path.dirname(warp.config.host_compiler), "link.exe")
236
+ else:
237
+ raise RuntimeError("Warp build error: No host compiler was found")
238
+
239
+ cpp_includes = f' /I"{warp_home_path.parent}/external/llvm-project/out/install/{mode}-{arch}/include"'
240
+ cpp_includes += f' /I"{warp_home_path.parent}/_build/host-deps/llvm-project/release-{arch}/include"'
241
+ cuda_includes = f' /I"{cuda_home}/include"' if cu_path else ""
242
+ includes = cpp_includes + cuda_includes
243
+
244
+ # nvrtc_static.lib is built with /MT and _ITERATOR_DEBUG_LEVEL=0 so if we link it in we must match these options
245
+ if cu_path or mode != "debug":
246
+ runtime = "/MT"
247
+ iter_dbg = "_ITERATOR_DEBUG_LEVEL=0"
248
+ debug = "NDEBUG"
249
+ else:
250
+ runtime = "/MTd"
251
+ iter_dbg = "_ITERATOR_DEBUG_LEVEL=2"
252
+ debug = "_DEBUG"
253
+
254
+ if warp.config.mode == "debug":
255
+ cpp_flags = f'/nologo {runtime} /Zi /Od /D "{debug}" /D WP_ENABLE_DEBUG=1 /D "{cuda_enabled}" /D "{cutlass_enabled}" /D "{cuda_compat_enabled}" /D "{iter_dbg}" /I"{native_dir}" /I"{nanovdb_home}" {includes}'
256
+ linkopts = ["/DLL", "/DEBUG"]
257
+ elif warp.config.mode == "release":
258
+ cpp_flags = f'/nologo {runtime} /Ox /D "{debug}" /D WP_ENABLE_DEBUG=0 /D "{cuda_enabled}" /D "{cutlass_enabled}" /D "{cuda_compat_enabled}" /D "{iter_dbg}" /I"{native_dir}" /I"{nanovdb_home}" {includes}'
259
+ linkopts = ["/DLL"]
260
+ else:
261
+ raise RuntimeError(f"Unrecognized build configuration (debug, release), got: {mode}")
262
+
263
+ if verify_fp:
264
+ cpp_flags += ' /D "WP_VERIFY_FP"'
265
+
266
+ if fast_math:
267
+ cpp_flags += " /fp:fast"
268
+
269
+ with ScopedTimer("build", active=warp.config.verbose):
270
+ for cpp_path in cpp_paths:
271
+ cpp_out = cpp_path + ".obj"
272
+ linkopts.append(quote(cpp_out))
273
+
274
+ cpp_cmd = f'"{warp.config.host_compiler}" {cpp_flags} -c "{cpp_path}" /Fo"{cpp_out}"'
275
+ run_cmd(cpp_cmd)
276
+
277
+ if cu_path:
278
+ cu_out = cu_path + ".o"
279
+
280
+ if mode == "debug":
281
+ cuda_cmd = f'"{cuda_home}/bin/nvcc" --compiler-options=/MT,/Zi,/Od -g -G -O0 -DNDEBUG -D_ITERATOR_DEBUG_LEVEL=0 -I"{native_dir}" -I"{nanovdb_home}" -line-info {" ".join(nvcc_opts)} -DWP_ENABLE_CUDA=1 -D{cutlass_enabled} {cutlass_includes} -o "{cu_out}" -c "{cu_path}"'
282
+
283
+ elif mode == "release":
284
+ cuda_cmd = f'"{cuda_home}/bin/nvcc" -O3 {" ".join(nvcc_opts)} -I"{native_dir}" -I"{nanovdb_home}" -DNDEBUG -DWP_ENABLE_CUDA=1 -D{cutlass_enabled} {cutlass_includes} -o "{cu_out}" -c "{cu_path}"'
285
+
286
+ with ScopedTimer("build_cuda", active=warp.config.verbose):
287
+ run_cmd(cuda_cmd)
288
+ linkopts.append(quote(cu_out))
289
+ linkopts.append(
290
+ f'cudart_static.lib nvrtc_static.lib nvrtc-builtins_static.lib nvptxcompiler_static.lib ws2_32.lib user32.lib /LIBPATH:"{cuda_home}/lib/x64"'
291
+ )
292
+
293
+ with ScopedTimer("link", active=warp.config.verbose):
294
+ link_cmd = f'"{host_linker}" {" ".join(linkopts + libs)} /out:"{dll_path}"'
295
+ run_cmd(link_cmd)
296
+
297
+ else:
298
+ cpp_includes = f' -I"{warp_home_path.parent}/external/llvm-project/out/install/{mode}-{arch}/include"'
299
+ cpp_includes += f' -I"{warp_home_path.parent}/_build/host-deps/llvm-project/release-{arch}/include"'
300
+ cuda_includes = f' -I"{cuda_home}/include"' if cu_path else ""
301
+ includes = cpp_includes + cuda_includes
302
+
303
+ if sys.platform == "darwin":
304
+ target = f"--target={arch}-apple-macos11"
305
+ else:
306
+ target = ""
307
+
308
+ if mode == "debug":
309
+ cpp_flags = f'{target} -O0 -g -fno-rtti -D_DEBUG -DWP_ENABLE_DEBUG=1 -D{cuda_enabled} -D{cutlass_enabled} -D{cuda_compat_enabled} -fPIC -fvisibility=hidden --std=c++14 -D_GLIBCXX_USE_CXX11_ABI=0 -fkeep-inline-functions -I"{native_dir}" {includes}'
310
+
311
+ if mode == "release":
312
+ cpp_flags = f'{target} -O3 -DNDEBUG -DWP_ENABLE_DEBUG=0 -D{cuda_enabled} -D{cutlass_enabled} -D{cuda_compat_enabled} -fPIC -fvisibility=hidden --std=c++14 -D_GLIBCXX_USE_CXX11_ABI=0 -I"{native_dir}" {includes}'
313
+
314
+ if verify_fp:
315
+ cpp_flags += " -DWP_VERIFY_FP"
316
+
317
+ if fast_math:
318
+ cpp_flags += " -ffast-math"
319
+
320
+ ld_inputs = []
321
+
322
+ with ScopedTimer("build", active=warp.config.verbose):
323
+ for cpp_path in cpp_paths:
324
+ cpp_out = cpp_path + ".o"
325
+ ld_inputs.append(quote(cpp_out))
326
+
327
+ build_cmd = f'g++ {cpp_flags} -c "{cpp_path}" -o "{cpp_out}"'
328
+ run_cmd(build_cmd)
329
+
330
+ if cu_path:
331
+ cu_out = cu_path + ".o"
332
+
333
+ if mode == "debug":
334
+ cuda_cmd = f'"{cuda_home}/bin/nvcc" -g -G -O0 --compiler-options -fPIC,-fvisibility=hidden -D_DEBUG -D_ITERATOR_DEBUG_LEVEL=0 -line-info {" ".join(nvcc_opts)} -DWP_ENABLE_CUDA=1 -I"{native_dir}" -D{cutlass_enabled} {cutlass_includes} -o "{cu_out}" -c "{cu_path}"'
335
+
336
+ elif mode == "release":
337
+ cuda_cmd = f'"{cuda_home}/bin/nvcc" -O3 --compiler-options -fPIC,-fvisibility=hidden {" ".join(nvcc_opts)} -DNDEBUG -DWP_ENABLE_CUDA=1 -I"{native_dir}" -D{cutlass_enabled} {cutlass_includes} -o "{cu_out}" -c "{cu_path}"'
338
+
339
+ with ScopedTimer("build_cuda", active=warp.config.verbose):
340
+ run_cmd(cuda_cmd)
341
+
342
+ ld_inputs.append(quote(cu_out))
343
+ ld_inputs.append(
344
+ f'-L"{cuda_home}/lib64" -lcudart_static -lnvrtc_static -lnvrtc-builtins_static -lnvptxcompiler_static -lpthread -ldl -lrt'
345
+ )
346
+
347
+ if sys.platform == "darwin":
348
+ opt_no_undefined = "-Wl,-undefined,error"
349
+ opt_exclude_libs = ""
350
+ else:
351
+ opt_no_undefined = "-Wl,--no-undefined"
352
+ opt_exclude_libs = "-Wl,--exclude-libs,ALL"
353
+
354
+ with ScopedTimer("link", active=warp.config.verbose):
355
+ origin = "@loader_path" if (sys.platform == "darwin") else "$ORIGIN"
356
+ link_cmd = f"g++ {target} -shared -Wl,-rpath,'{origin}' {opt_no_undefined} {opt_exclude_libs} -o '{dll_path}' {' '.join(ld_inputs + libs)}"
357
+ run_cmd(link_cmd)
358
+
359
+ # Strip symbols to reduce the binary size
360
+ if sys.platform == "darwin":
361
+ run_cmd(f"strip -x {dll_path}") # Strip all local symbols
362
+ else: # Linux
363
+ # Strip all symbols except for those needed to support debugging JIT-compiled code
364
+ run_cmd(
365
+ f"strip --strip-all --keep-symbol=__jit_debug_register_code --keep-symbol=__jit_debug_descriptor {dll_path}"
366
+ )
367
+
368
+
369
+ def build_dll(dll_path, cpp_paths, cu_path, libs=[], mode="release", verify_fp=False, fast_math=False, quick=False):
370
+ if sys.platform == "darwin":
371
+ # create a universal binary by combining x86-64 and AArch64 builds
372
+ build_dll_for_arch(dll_path + "-x86_64", cpp_paths, cu_path, libs, mode, "x86_64", verify_fp, fast_math, quick)
373
+ build_dll_for_arch(
374
+ dll_path + "-aarch64", cpp_paths, cu_path, libs, mode, "aarch64", verify_fp, fast_math, quick
375
+ )
376
+
377
+ run_cmd(f"lipo -create -output {dll_path} {dll_path}-x86_64 {dll_path}-aarch64")
378
+ os.remove(f"{dll_path}-x86_64")
379
+ os.remove(f"{dll_path}-aarch64")
380
+
381
+ else:
382
+ build_dll_for_arch(
383
+ dll_path, cpp_paths, cu_path, libs, mode, machine_architecture(), verify_fp, fast_math, quick
384
+ )