warp-lang 1.9.1__py3-none-manylinux_2_34_aarch64.whl → 1.10.0rc2__py3-none-manylinux_2_34_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (346) hide show
  1. warp/__init__.py +301 -287
  2. warp/__init__.pyi +794 -305
  3. warp/_src/__init__.py +14 -0
  4. warp/_src/autograd.py +1075 -0
  5. warp/_src/build.py +618 -0
  6. warp/_src/build_dll.py +640 -0
  7. warp/{builtins.py → _src/builtins.py} +1382 -377
  8. warp/_src/codegen.py +4359 -0
  9. warp/{config.py → _src/config.py} +178 -169
  10. warp/_src/constants.py +57 -0
  11. warp/_src/context.py +8294 -0
  12. warp/_src/dlpack.py +462 -0
  13. warp/_src/fabric.py +355 -0
  14. warp/_src/fem/__init__.py +14 -0
  15. warp/_src/fem/adaptivity.py +508 -0
  16. warp/_src/fem/cache.py +687 -0
  17. warp/_src/fem/dirichlet.py +188 -0
  18. warp/{fem → _src/fem}/domain.py +40 -30
  19. warp/_src/fem/field/__init__.py +131 -0
  20. warp/_src/fem/field/field.py +701 -0
  21. warp/{fem → _src/fem}/field/nodal_field.py +30 -15
  22. warp/{fem → _src/fem}/field/restriction.py +1 -1
  23. warp/{fem → _src/fem}/field/virtual.py +53 -27
  24. warp/_src/fem/geometry/__init__.py +32 -0
  25. warp/{fem → _src/fem}/geometry/adaptive_nanogrid.py +77 -163
  26. warp/_src/fem/geometry/closest_point.py +97 -0
  27. warp/{fem → _src/fem}/geometry/deformed_geometry.py +14 -22
  28. warp/{fem → _src/fem}/geometry/element.py +32 -10
  29. warp/{fem → _src/fem}/geometry/geometry.py +48 -20
  30. warp/{fem → _src/fem}/geometry/grid_2d.py +12 -23
  31. warp/{fem → _src/fem}/geometry/grid_3d.py +12 -23
  32. warp/{fem → _src/fem}/geometry/hexmesh.py +40 -63
  33. warp/{fem → _src/fem}/geometry/nanogrid.py +255 -248
  34. warp/{fem → _src/fem}/geometry/partition.py +121 -63
  35. warp/{fem → _src/fem}/geometry/quadmesh.py +26 -45
  36. warp/{fem → _src/fem}/geometry/tetmesh.py +40 -63
  37. warp/{fem → _src/fem}/geometry/trimesh.py +26 -45
  38. warp/{fem → _src/fem}/integrate.py +164 -158
  39. warp/_src/fem/linalg.py +383 -0
  40. warp/_src/fem/operator.py +396 -0
  41. warp/_src/fem/polynomial.py +229 -0
  42. warp/{fem → _src/fem}/quadrature/pic_quadrature.py +15 -20
  43. warp/{fem → _src/fem}/quadrature/quadrature.py +95 -47
  44. warp/_src/fem/space/__init__.py +248 -0
  45. warp/{fem → _src/fem}/space/basis_function_space.py +20 -11
  46. warp/_src/fem/space/basis_space.py +679 -0
  47. warp/{fem → _src/fem}/space/dof_mapper.py +3 -3
  48. warp/{fem → _src/fem}/space/function_space.py +14 -13
  49. warp/{fem → _src/fem}/space/grid_2d_function_space.py +4 -7
  50. warp/{fem → _src/fem}/space/grid_3d_function_space.py +4 -4
  51. warp/{fem → _src/fem}/space/hexmesh_function_space.py +4 -10
  52. warp/{fem → _src/fem}/space/nanogrid_function_space.py +3 -9
  53. warp/{fem → _src/fem}/space/partition.py +117 -60
  54. warp/{fem → _src/fem}/space/quadmesh_function_space.py +4 -10
  55. warp/{fem → _src/fem}/space/restriction.py +66 -33
  56. warp/_src/fem/space/shape/__init__.py +152 -0
  57. warp/{fem → _src/fem}/space/shape/cube_shape_function.py +9 -9
  58. warp/{fem → _src/fem}/space/shape/shape_function.py +8 -9
  59. warp/{fem → _src/fem}/space/shape/square_shape_function.py +6 -6
  60. warp/{fem → _src/fem}/space/shape/tet_shape_function.py +3 -3
  61. warp/{fem → _src/fem}/space/shape/triangle_shape_function.py +3 -3
  62. warp/{fem → _src/fem}/space/tetmesh_function_space.py +3 -9
  63. warp/_src/fem/space/topology.py +459 -0
  64. warp/{fem → _src/fem}/space/trimesh_function_space.py +3 -9
  65. warp/_src/fem/types.py +112 -0
  66. warp/_src/fem/utils.py +486 -0
  67. warp/_src/jax.py +186 -0
  68. warp/_src/jax_experimental/__init__.py +14 -0
  69. warp/_src/jax_experimental/custom_call.py +387 -0
  70. warp/_src/jax_experimental/ffi.py +1284 -0
  71. warp/_src/jax_experimental/xla_ffi.py +656 -0
  72. warp/_src/marching_cubes.py +708 -0
  73. warp/_src/math.py +414 -0
  74. warp/_src/optim/__init__.py +14 -0
  75. warp/_src/optim/adam.py +163 -0
  76. warp/_src/optim/linear.py +1606 -0
  77. warp/_src/optim/sgd.py +112 -0
  78. warp/_src/paddle.py +406 -0
  79. warp/_src/render/__init__.py +14 -0
  80. warp/_src/render/imgui_manager.py +289 -0
  81. warp/_src/render/render_opengl.py +3636 -0
  82. warp/_src/render/render_usd.py +937 -0
  83. warp/_src/render/utils.py +160 -0
  84. warp/_src/sparse.py +2716 -0
  85. warp/_src/tape.py +1206 -0
  86. warp/{thirdparty → _src/thirdparty}/unittest_parallel.py +9 -2
  87. warp/_src/torch.py +391 -0
  88. warp/_src/types.py +5870 -0
  89. warp/_src/utils.py +1693 -0
  90. warp/autograd.py +12 -1054
  91. warp/bin/warp-clang.so +0 -0
  92. warp/bin/warp.so +0 -0
  93. warp/build.py +8 -588
  94. warp/build_dll.py +6 -721
  95. warp/codegen.py +6 -4251
  96. warp/constants.py +6 -39
  97. warp/context.py +12 -8062
  98. warp/dlpack.py +6 -444
  99. warp/examples/distributed/example_jacobi_mpi.py +4 -5
  100. warp/examples/fem/example_adaptive_grid.py +1 -1
  101. warp/examples/fem/example_apic_fluid.py +1 -1
  102. warp/examples/fem/example_burgers.py +8 -8
  103. warp/examples/fem/example_diffusion.py +1 -1
  104. warp/examples/fem/example_distortion_energy.py +1 -1
  105. warp/examples/fem/example_mixed_elasticity.py +2 -2
  106. warp/examples/fem/example_navier_stokes.py +1 -1
  107. warp/examples/fem/example_nonconforming_contact.py +7 -7
  108. warp/examples/fem/example_stokes.py +1 -1
  109. warp/examples/fem/example_stokes_transfer.py +1 -1
  110. warp/examples/fem/utils.py +2 -2
  111. warp/examples/interop/example_jax_callable.py +1 -1
  112. warp/examples/interop/example_jax_ffi_callback.py +1 -1
  113. warp/examples/interop/example_jax_kernel.py +1 -1
  114. warp/examples/tile/example_tile_mcgp.py +191 -0
  115. warp/fabric.py +6 -337
  116. warp/fem/__init__.py +159 -97
  117. warp/fem/adaptivity.py +7 -489
  118. warp/fem/cache.py +9 -648
  119. warp/fem/dirichlet.py +6 -184
  120. warp/fem/field/__init__.py +8 -109
  121. warp/fem/field/field.py +7 -652
  122. warp/fem/geometry/__init__.py +7 -18
  123. warp/fem/geometry/closest_point.py +11 -77
  124. warp/fem/linalg.py +18 -366
  125. warp/fem/operator.py +11 -369
  126. warp/fem/polynomial.py +9 -209
  127. warp/fem/space/__init__.py +5 -211
  128. warp/fem/space/basis_space.py +6 -662
  129. warp/fem/space/shape/__init__.py +41 -118
  130. warp/fem/space/topology.py +6 -437
  131. warp/fem/types.py +6 -81
  132. warp/fem/utils.py +11 -444
  133. warp/jax.py +8 -165
  134. warp/jax_experimental/__init__.py +14 -1
  135. warp/jax_experimental/custom_call.py +8 -365
  136. warp/jax_experimental/ffi.py +17 -873
  137. warp/jax_experimental/xla_ffi.py +5 -605
  138. warp/marching_cubes.py +5 -689
  139. warp/math.py +16 -393
  140. warp/native/array.h +385 -37
  141. warp/native/builtin.h +314 -37
  142. warp/native/bvh.cpp +43 -9
  143. warp/native/bvh.cu +62 -27
  144. warp/native/bvh.h +310 -309
  145. warp/native/clang/clang.cpp +102 -97
  146. warp/native/coloring.cpp +0 -1
  147. warp/native/crt.h +208 -0
  148. warp/native/exports.h +156 -0
  149. warp/native/hashgrid.cu +2 -0
  150. warp/native/intersect.h +24 -1
  151. warp/native/intersect_tri.h +44 -35
  152. warp/native/mat.h +1456 -276
  153. warp/native/mesh.cpp +4 -4
  154. warp/native/mesh.cu +4 -2
  155. warp/native/mesh.h +176 -61
  156. warp/native/quat.h +0 -52
  157. warp/native/scan.cu +2 -0
  158. warp/native/sparse.cu +7 -3
  159. warp/native/spatial.h +12 -0
  160. warp/native/tile.h +681 -89
  161. warp/native/tile_radix_sort.h +1 -1
  162. warp/native/tile_reduce.h +394 -46
  163. warp/native/tile_scan.h +4 -4
  164. warp/native/vec.h +469 -0
  165. warp/native/version.h +23 -0
  166. warp/native/volume.cpp +1 -1
  167. warp/native/volume.cu +1 -0
  168. warp/native/volume.h +1 -1
  169. warp/native/volume_builder.cu +2 -0
  170. warp/native/warp.cpp +57 -29
  171. warp/native/warp.cu +253 -171
  172. warp/native/warp.h +11 -8
  173. warp/optim/__init__.py +6 -3
  174. warp/optim/adam.py +6 -145
  175. warp/optim/linear.py +14 -1585
  176. warp/optim/sgd.py +6 -94
  177. warp/paddle.py +6 -388
  178. warp/render/__init__.py +8 -4
  179. warp/render/imgui_manager.py +7 -267
  180. warp/render/render_opengl.py +6 -3618
  181. warp/render/render_usd.py +6 -919
  182. warp/render/utils.py +6 -142
  183. warp/sparse.py +37 -2563
  184. warp/tape.py +6 -1188
  185. warp/tests/__main__.py +1 -1
  186. warp/tests/cuda/test_async.py +4 -4
  187. warp/tests/cuda/test_conditional_captures.py +1 -1
  188. warp/tests/cuda/test_multigpu.py +1 -1
  189. warp/tests/cuda/test_streams.py +58 -1
  190. warp/tests/geometry/test_bvh.py +157 -22
  191. warp/tests/geometry/test_marching_cubes.py +0 -1
  192. warp/tests/geometry/test_mesh.py +5 -3
  193. warp/tests/geometry/test_mesh_query_aabb.py +5 -12
  194. warp/tests/geometry/test_mesh_query_point.py +5 -2
  195. warp/tests/geometry/test_mesh_query_ray.py +15 -3
  196. warp/tests/geometry/test_volume_write.py +5 -5
  197. warp/tests/interop/test_dlpack.py +14 -14
  198. warp/tests/interop/test_jax.py +772 -49
  199. warp/tests/interop/test_paddle.py +1 -1
  200. warp/tests/test_adam.py +0 -1
  201. warp/tests/test_arithmetic.py +9 -9
  202. warp/tests/test_array.py +527 -100
  203. warp/tests/test_array_reduce.py +3 -3
  204. warp/tests/test_atomic.py +12 -8
  205. warp/tests/test_atomic_bitwise.py +209 -0
  206. warp/tests/test_atomic_cas.py +4 -4
  207. warp/tests/test_bool.py +2 -2
  208. warp/tests/test_builtins_resolution.py +5 -571
  209. warp/tests/test_codegen.py +33 -14
  210. warp/tests/test_conditional.py +1 -1
  211. warp/tests/test_context.py +6 -6
  212. warp/tests/test_copy.py +242 -161
  213. warp/tests/test_ctypes.py +3 -3
  214. warp/tests/test_devices.py +24 -2
  215. warp/tests/test_examples.py +16 -84
  216. warp/tests/test_fabricarray.py +35 -35
  217. warp/tests/test_fast_math.py +0 -2
  218. warp/tests/test_fem.py +56 -10
  219. warp/tests/test_fixedarray.py +3 -3
  220. warp/tests/test_func.py +8 -5
  221. warp/tests/test_generics.py +1 -1
  222. warp/tests/test_indexedarray.py +24 -24
  223. warp/tests/test_intersect.py +39 -9
  224. warp/tests/test_large.py +1 -1
  225. warp/tests/test_lerp.py +3 -1
  226. warp/tests/test_linear_solvers.py +1 -1
  227. warp/tests/test_map.py +35 -4
  228. warp/tests/test_mat.py +52 -62
  229. warp/tests/test_mat_constructors.py +4 -5
  230. warp/tests/test_mat_lite.py +1 -1
  231. warp/tests/test_mat_scalar_ops.py +121 -121
  232. warp/tests/test_math.py +34 -0
  233. warp/tests/test_module_aot.py +4 -4
  234. warp/tests/test_modules_lite.py +28 -2
  235. warp/tests/test_print.py +11 -11
  236. warp/tests/test_quat.py +93 -58
  237. warp/tests/test_runlength_encode.py +1 -1
  238. warp/tests/test_scalar_ops.py +38 -10
  239. warp/tests/test_smoothstep.py +1 -1
  240. warp/tests/test_sparse.py +126 -15
  241. warp/tests/test_spatial.py +105 -87
  242. warp/tests/test_special_values.py +6 -6
  243. warp/tests/test_static.py +7 -7
  244. warp/tests/test_struct.py +13 -2
  245. warp/tests/test_triangle_closest_point.py +48 -1
  246. warp/tests/test_types.py +27 -15
  247. warp/tests/test_utils.py +52 -52
  248. warp/tests/test_vec.py +29 -29
  249. warp/tests/test_vec_constructors.py +5 -5
  250. warp/tests/test_vec_scalar_ops.py +97 -97
  251. warp/tests/test_version.py +75 -0
  252. warp/tests/tile/test_tile.py +178 -0
  253. warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
  254. warp/tests/tile/test_tile_cholesky.py +7 -4
  255. warp/tests/tile/test_tile_load.py +26 -2
  256. warp/tests/tile/test_tile_mathdx.py +3 -3
  257. warp/tests/tile/test_tile_matmul.py +1 -1
  258. warp/tests/tile/test_tile_mlp.py +2 -4
  259. warp/tests/tile/test_tile_reduce.py +214 -13
  260. warp/tests/unittest_suites.py +6 -14
  261. warp/tests/unittest_utils.py +10 -9
  262. warp/tests/walkthrough_debug.py +3 -1
  263. warp/torch.py +6 -373
  264. warp/types.py +29 -5764
  265. warp/utils.py +10 -1659
  266. {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/METADATA +46 -99
  267. warp_lang-1.10.0rc2.dist-info/RECORD +468 -0
  268. warp_lang-1.10.0rc2.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
  269. warp_lang-1.10.0rc2.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
  270. warp_lang-1.10.0rc2.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
  271. warp_lang-1.10.0rc2.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
  272. warp_lang-1.10.0rc2.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
  273. warp_lang-1.10.0rc2.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
  274. warp_lang-1.10.0rc2.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
  275. warp_lang-1.10.0rc2.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
  276. warp_lang-1.10.0rc2.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
  277. warp_lang-1.10.0rc2.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
  278. warp_lang-1.10.0rc2.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
  279. warp_lang-1.10.0rc2.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
  280. warp_lang-1.10.0rc2.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
  281. warp_lang-1.10.0rc2.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
  282. warp_lang-1.10.0rc2.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
  283. warp/examples/assets/cartpole.urdf +0 -110
  284. warp/examples/assets/crazyflie.usd +0 -0
  285. warp/examples/assets/nv_ant.xml +0 -92
  286. warp/examples/assets/nv_humanoid.xml +0 -183
  287. warp/examples/assets/quadruped.urdf +0 -268
  288. warp/examples/optim/example_bounce.py +0 -266
  289. warp/examples/optim/example_cloth_throw.py +0 -228
  290. warp/examples/optim/example_drone.py +0 -870
  291. warp/examples/optim/example_inverse_kinematics.py +0 -182
  292. warp/examples/optim/example_inverse_kinematics_torch.py +0 -191
  293. warp/examples/optim/example_softbody_properties.py +0 -400
  294. warp/examples/optim/example_spring_cage.py +0 -245
  295. warp/examples/optim/example_trajectory.py +0 -227
  296. warp/examples/sim/example_cartpole.py +0 -143
  297. warp/examples/sim/example_cloth.py +0 -225
  298. warp/examples/sim/example_cloth_self_contact.py +0 -316
  299. warp/examples/sim/example_granular.py +0 -130
  300. warp/examples/sim/example_granular_collision_sdf.py +0 -202
  301. warp/examples/sim/example_jacobian_ik.py +0 -244
  302. warp/examples/sim/example_particle_chain.py +0 -124
  303. warp/examples/sim/example_quadruped.py +0 -203
  304. warp/examples/sim/example_rigid_chain.py +0 -203
  305. warp/examples/sim/example_rigid_contact.py +0 -195
  306. warp/examples/sim/example_rigid_force.py +0 -133
  307. warp/examples/sim/example_rigid_gyroscopic.py +0 -115
  308. warp/examples/sim/example_rigid_soft_contact.py +0 -140
  309. warp/examples/sim/example_soft_body.py +0 -196
  310. warp/examples/tile/example_tile_walker.py +0 -327
  311. warp/sim/__init__.py +0 -74
  312. warp/sim/articulation.py +0 -793
  313. warp/sim/collide.py +0 -2570
  314. warp/sim/graph_coloring.py +0 -307
  315. warp/sim/import_mjcf.py +0 -791
  316. warp/sim/import_snu.py +0 -227
  317. warp/sim/import_urdf.py +0 -579
  318. warp/sim/import_usd.py +0 -898
  319. warp/sim/inertia.py +0 -357
  320. warp/sim/integrator.py +0 -245
  321. warp/sim/integrator_euler.py +0 -2000
  322. warp/sim/integrator_featherstone.py +0 -2101
  323. warp/sim/integrator_vbd.py +0 -2487
  324. warp/sim/integrator_xpbd.py +0 -3295
  325. warp/sim/model.py +0 -4821
  326. warp/sim/particles.py +0 -121
  327. warp/sim/render.py +0 -431
  328. warp/sim/utils.py +0 -431
  329. warp/tests/sim/disabled_kinematics.py +0 -244
  330. warp/tests/sim/test_cloth.py +0 -863
  331. warp/tests/sim/test_collision.py +0 -743
  332. warp/tests/sim/test_coloring.py +0 -347
  333. warp/tests/sim/test_inertia.py +0 -161
  334. warp/tests/sim/test_model.py +0 -226
  335. warp/tests/sim/test_sim_grad.py +0 -287
  336. warp/tests/sim/test_sim_grad_bounce_linear.py +0 -212
  337. warp/tests/sim/test_sim_kinematics.py +0 -98
  338. warp/thirdparty/__init__.py +0 -0
  339. warp_lang-1.9.1.dist-info/RECORD +0 -456
  340. /warp/{fem → _src/fem}/quadrature/__init__.py +0 -0
  341. /warp/{tests/sim → _src/thirdparty}/__init__.py +0 -0
  342. /warp/{thirdparty → _src/thirdparty}/appdirs.py +0 -0
  343. /warp/{thirdparty → _src/thirdparty}/dlpack.py +0 -0
  344. {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/WHEEL +0 -0
  345. {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/licenses/LICENSE.md +0 -0
  346. {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/top_level.txt +0 -0
warp/build_dll.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,727 +13,12 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- from __future__ import annotations
16
+ # TODO: Remove after cleaning up the public API.
17
17
 
18
- import os
19
- import platform
20
- import subprocess
21
- import sys
18
+ from warp._src import build_dll as _build_dll
22
19
 
23
- from warp.utils import ScopedTimer
24
20
 
25
- verbose_cmd = True # print command lines before executing them
21
+ def __getattr__(name):
22
+ from warp._src.utils import get_deprecated_api
26
23
 
27
- MIN_CTK_VERSION = (12, 0)
28
-
29
-
30
- def machine_architecture() -> str:
31
- """Return a canonical machine architecture string.
32
- - "x86_64" for x86-64, aka. AMD64, aka. x64
33
- - "aarch64" for AArch64, aka. ARM64
34
- """
35
- machine = platform.machine()
36
- if machine == "x86_64" or machine == "AMD64":
37
- return "x86_64"
38
- if machine == "aarch64" or machine == "arm64":
39
- return "aarch64"
40
- raise RuntimeError(f"Unrecognized machine architecture {machine}")
41
-
42
-
43
- def run_cmd(cmd):
44
- if verbose_cmd:
45
- print(cmd)
46
-
47
- try:
48
- return subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
49
- except subprocess.CalledProcessError as e:
50
- print("Command failed with exit code:", e.returncode)
51
- print("Command output was:")
52
- print(e.output.decode())
53
- raise e
54
-
55
-
56
- # cut-down version of vcvars64.bat that allows using
57
- # custom toolchain locations, returns the compiler program path
58
- def set_msvc_env(msvc_path, sdk_path):
59
- if "INCLUDE" not in os.environ:
60
- os.environ["INCLUDE"] = ""
61
-
62
- if "LIB" not in os.environ:
63
- os.environ["LIB"] = ""
64
-
65
- msvc_path = os.path.abspath(msvc_path)
66
- sdk_path = os.path.abspath(sdk_path)
67
-
68
- os.environ["INCLUDE"] += os.pathsep + os.path.join(msvc_path, "include")
69
- os.environ["INCLUDE"] += os.pathsep + os.path.join(sdk_path, "include/winrt")
70
- os.environ["INCLUDE"] += os.pathsep + os.path.join(sdk_path, "include/um")
71
- os.environ["INCLUDE"] += os.pathsep + os.path.join(sdk_path, "include/ucrt")
72
- os.environ["INCLUDE"] += os.pathsep + os.path.join(sdk_path, "include/shared")
73
-
74
- os.environ["LIB"] += os.pathsep + os.path.join(msvc_path, "lib/x64")
75
- os.environ["LIB"] += os.pathsep + os.path.join(sdk_path, "lib/ucrt/x64")
76
- os.environ["LIB"] += os.pathsep + os.path.join(sdk_path, "lib/um/x64")
77
-
78
- os.environ["PATH"] += os.pathsep + os.path.join(msvc_path, "bin/HostX64/x64")
79
- os.environ["PATH"] += os.pathsep + os.path.join(sdk_path, "bin/x64")
80
-
81
- return os.path.join(msvc_path, "bin", "HostX64", "x64", "cl.exe")
82
-
83
-
84
- def find_host_compiler():
85
- if os.name == "nt":
86
- # try and find an installed host compiler (msvc)
87
- # runs vcvars and copies back the build environment
88
-
89
- vswhere_path = r"%ProgramFiles(x86)%/Microsoft Visual Studio/Installer/vswhere.exe"
90
- vswhere_path = os.path.expandvars(vswhere_path)
91
- if not os.path.exists(vswhere_path):
92
- return ""
93
-
94
- vs_path = run_cmd(f'"{vswhere_path}" -latest -property installationPath').decode().rstrip()
95
- vsvars_path = os.path.join(vs_path, "VC\\Auxiliary\\Build\\vcvars64.bat")
96
-
97
- output = run_cmd(f'"{vsvars_path}" && set').decode()
98
-
99
- for line in output.splitlines():
100
- pair = line.split("=", 1)
101
- if len(pair) >= 2:
102
- os.environ[pair[0]] = pair[1]
103
-
104
- cl_path = run_cmd("where cl.exe").decode("utf-8").rstrip()
105
- cl_version = os.environ["VCToolsVersion"].split(".")
106
-
107
- # ensure at least VS2019 version, see list of MSVC versions here https://en.wikipedia.org/wiki/Microsoft_Visual_C%2B%2B
108
- cl_required_major = 14
109
- cl_required_minor = 29
110
-
111
- if int(cl_version[0]) < cl_required_major or (
112
- (int(cl_version[0]) == cl_required_major) and (int(cl_version[1]) < cl_required_minor)
113
- ):
114
- print(
115
- f"Warp: MSVC found but compiler version too old, found {cl_version[0]}.{cl_version[1]}, but must be {cl_required_major}.{cl_required_minor} or higher, kernel host compilation will be disabled."
116
- )
117
- return ""
118
-
119
- return cl_path
120
-
121
- else:
122
- # try and find g++
123
- return run_cmd("which g++").decode()
124
-
125
-
126
- def get_cuda_toolkit_version(cuda_home) -> tuple[int, int]:
127
- try:
128
- # the toolkit version can be obtained by running "nvcc --version"
129
- nvcc_path = os.path.join(cuda_home, "bin", "nvcc")
130
- nvcc_version_output = subprocess.check_output([nvcc_path, "--version"]).decode("utf-8")
131
- # search for release substring (e.g., "release 11.5")
132
- import re
133
-
134
- m = re.search(r"release (\d+)\.(\d+)", nvcc_version_output)
135
- if m is not None:
136
- major, minor = map(int, m.groups())
137
- return (major, minor)
138
- else:
139
- raise Exception("Failed to parse NVCC output")
140
-
141
- except Exception as e:
142
- print(f"Warning: Failed to determine CUDA Toolkit version: {e}")
143
- return MIN_CTK_VERSION
144
-
145
-
146
- def quote(path):
147
- return '"' + path + '"'
148
-
149
-
150
- def add_llvm_bin_to_path(args):
151
- """Add the LLVM bin directory to the PATH environment variable if it's set.
152
-
153
- Args:
154
- args: The argument namespace containing llvm_path.
155
-
156
- Returns:
157
- ``True`` if the PATH was updated, ``False`` otherwise.
158
- """
159
- if not hasattr(args, "llvm_path") or not args.llvm_path:
160
- return False
161
-
162
- # Construct the bin directory path
163
- llvm_bin_path = os.path.join(args.llvm_path, "bin")
164
-
165
- # Check if the directory exists
166
- if not os.path.isdir(llvm_bin_path):
167
- print(f"Warning: LLVM bin directory not found at {llvm_bin_path}")
168
- return False
169
-
170
- # Add to PATH environment variable
171
- os.environ["PATH"] = llvm_bin_path + os.pathsep + os.environ.get("PATH", "")
172
-
173
- print(f"Added {llvm_bin_path} to PATH")
174
- return True
175
-
176
-
177
- def _get_architectures_cu12(
178
- ctk_version: tuple[int, int], arch: str, target_platform: str, quick_build: bool = False
179
- ) -> tuple[list[str], list[str]]:
180
- """Get architecture flags for CUDA 12.x."""
181
- gencode_opts = []
182
- clang_arch_flags = []
183
-
184
- if quick_build:
185
- gencode_opts = ["-gencode=arch=compute_52,code=compute_52", "-gencode=arch=compute_75,code=compute_75"]
186
- clang_arch_flags = ["--cuda-gpu-arch=sm_52", "--cuda-gpu-arch=sm_75"]
187
- else:
188
- if arch == "aarch64" and target_platform == "linux" and ctk_version == (12, 9):
189
- # Skip certain architectures for aarch64 with CUDA 12.9 due to CCCL bug
190
- print(
191
- "[INFO] Skipping sm_52, sm_60, sm_61, and sm_70 targets for ARM due to a CUDA Toolkit bug. "
192
- "See https://nvidia.github.io/warp/installation.html#cuda-12-9-limitation-on-linux-arm-platforms "
193
- "for details."
194
- )
195
- else:
196
- gencode_opts.extend(
197
- [
198
- "-gencode=arch=compute_52,code=sm_52", # Maxwell
199
- "-gencode=arch=compute_60,code=sm_60", # Pascal
200
- "-gencode=arch=compute_61,code=sm_61",
201
- "-gencode=arch=compute_70,code=sm_70", # Volta
202
- ]
203
- )
204
- clang_arch_flags.extend(
205
- [
206
- "--cuda-gpu-arch=sm_52",
207
- "--cuda-gpu-arch=sm_60",
208
- "--cuda-gpu-arch=sm_61",
209
- "--cuda-gpu-arch=sm_70",
210
- ]
211
- )
212
-
213
- # Desktop architectures
214
- gencode_opts.extend(
215
- [
216
- "-gencode=arch=compute_75,code=sm_75", # Turing
217
- "-gencode=arch=compute_75,code=compute_75", # Turing (PTX)
218
- "-gencode=arch=compute_80,code=sm_80", # Ampere
219
- "-gencode=arch=compute_86,code=sm_86",
220
- "-gencode=arch=compute_89,code=sm_89", # Ada
221
- "-gencode=arch=compute_90,code=sm_90", # Hopper
222
- ]
223
- )
224
- clang_arch_flags.extend(
225
- [
226
- "--cuda-gpu-arch=sm_75", # Turing
227
- "--cuda-gpu-arch=sm_80", # Ampere
228
- "--cuda-gpu-arch=sm_86",
229
- "--cuda-gpu-arch=sm_89", # Ada
230
- "--cuda-gpu-arch=sm_90", # Hopper
231
- ]
232
- )
233
-
234
- if ctk_version >= (12, 8):
235
- gencode_opts.extend(["-gencode=arch=compute_100,code=sm_100", "-gencode=arch=compute_120,code=sm_120"])
236
- clang_arch_flags.extend(["--cuda-gpu-arch=sm_100", "--cuda-gpu-arch=sm_120"])
237
-
238
- # Mobile architectures for aarch64 Linux
239
- if arch == "aarch64" and target_platform == "linux":
240
- gencode_opts.extend(
241
- [
242
- "-gencode=arch=compute_87,code=sm_87", # Orin
243
- "-gencode=arch=compute_53,code=sm_53", # X1
244
- "-gencode=arch=compute_62,code=sm_62", # X2
245
- "-gencode=arch=compute_72,code=sm_72", # Xavier
246
- ]
247
- )
248
- clang_arch_flags.extend(
249
- [
250
- "--cuda-gpu-arch=sm_87",
251
- "--cuda-gpu-arch=sm_53",
252
- "--cuda-gpu-arch=sm_62",
253
- "--cuda-gpu-arch=sm_72",
254
- ]
255
- )
256
-
257
- # Thor support in CUDA 12.8+
258
- if ctk_version >= (12, 8):
259
- gencode_opts.append("-gencode=arch=compute_101,code=sm_101") # Thor (CUDA 12 numbering)
260
- clang_arch_flags.append("--cuda-gpu-arch=sm_101")
261
-
262
- if ctk_version >= (12, 9):
263
- gencode_opts.append("-gencode=arch=compute_121,code=sm_121")
264
- clang_arch_flags.append("--cuda-gpu-arch=sm_121")
265
-
266
- # PTX for future hardware (use highest available compute capability)
267
- if ctk_version >= (12, 9):
268
- gencode_opts.extend(["-gencode=arch=compute_121,code=compute_121"])
269
- elif ctk_version >= (12, 8):
270
- gencode_opts.extend(["-gencode=arch=compute_120,code=compute_120"])
271
- else:
272
- gencode_opts.append("-gencode=arch=compute_90,code=compute_90")
273
-
274
- return gencode_opts, clang_arch_flags
275
-
276
-
277
- def _get_architectures_cu13(
278
- ctk_version: tuple[int, int], arch: str, target_platform: str, quick_build: bool = False
279
- ) -> tuple[list[str], list[str]]:
280
- """Get architecture flags for CUDA 13.x."""
281
- gencode_opts = []
282
- clang_arch_flags = []
283
-
284
- if quick_build:
285
- gencode_opts = ["-gencode=arch=compute_75,code=compute_75"]
286
- clang_arch_flags = ["--cuda-gpu-arch=sm_75"]
287
- else:
288
- # Desktop architectures
289
- gencode_opts.extend(
290
- [
291
- "-gencode=arch=compute_75,code=sm_75", # Turing
292
- "-gencode=arch=compute_75,code=compute_75", # Turing (PTX)
293
- "-gencode=arch=compute_80,code=sm_80", # Ampere
294
- "-gencode=arch=compute_86,code=sm_86",
295
- "-gencode=arch=compute_89,code=sm_89", # Ada
296
- "-gencode=arch=compute_90,code=sm_90", # Hopper
297
- "-gencode=arch=compute_100,code=sm_100", # Blackwell
298
- "-gencode=arch=compute_120,code=sm_120", # Blackwell
299
- ]
300
- )
301
- clang_arch_flags.extend(
302
- [
303
- "--cuda-gpu-arch=sm_75", # Turing
304
- "--cuda-gpu-arch=sm_80", # Ampere
305
- "--cuda-gpu-arch=sm_86",
306
- "--cuda-gpu-arch=sm_89", # Ada
307
- "--cuda-gpu-arch=sm_90", # Hopper
308
- "--cuda-gpu-arch=sm_100", # Blackwell
309
- "--cuda-gpu-arch=sm_120", # Blackwell
310
- ]
311
- )
312
-
313
- # Mobile architectures for aarch64 Linux
314
- if arch == "aarch64" and target_platform == "linux":
315
- gencode_opts.extend(
316
- [
317
- "-gencode=arch=compute_87,code=sm_87", # Orin
318
- "-gencode=arch=compute_110,code=sm_110", # Thor
319
- "-gencode=arch=compute_121,code=sm_121", # Spark
320
- ]
321
- )
322
- clang_arch_flags.extend(
323
- [
324
- "--cuda-gpu-arch=sm_87",
325
- "--cuda-gpu-arch=sm_110",
326
- "--cuda-gpu-arch=sm_121",
327
- ]
328
- )
329
-
330
- # PTX for future hardware (use highest available compute capability)
331
- gencode_opts.extend(["-gencode=arch=compute_121,code=compute_121"])
332
-
333
- return gencode_opts, clang_arch_flags
334
-
335
-
336
- def _get_architectures_cu12(
337
- ctk_version: tuple[int, int], arch: str, target_platform: str, quick_build: bool = False
338
- ) -> tuple[list[str], list[str]]:
339
- """Get architecture flags for CUDA 12.x."""
340
- gencode_opts = []
341
- clang_arch_flags = []
342
-
343
- if quick_build:
344
- gencode_opts = ["-gencode=arch=compute_52,code=compute_52", "-gencode=arch=compute_75,code=compute_75"]
345
- clang_arch_flags = ["--cuda-gpu-arch=sm_52", "--cuda-gpu-arch=sm_75"]
346
- else:
347
- if arch == "aarch64" and target_platform == "linux" and ctk_version == (12, 9):
348
- # Skip certain architectures for aarch64 with CUDA 12.9 due to CCCL bug
349
- print(
350
- "[INFO] Skipping sm_52, sm_60, sm_61, and sm_70 targets for ARM due to a CUDA Toolkit bug. "
351
- "See https://nvidia.github.io/warp/installation.html#cuda-12-9-limitation-on-linux-arm-platforms "
352
- "for details."
353
- )
354
- else:
355
- gencode_opts.extend(
356
- [
357
- "-gencode=arch=compute_52,code=sm_52", # Maxwell
358
- "-gencode=arch=compute_60,code=sm_60", # Pascal
359
- "-gencode=arch=compute_61,code=sm_61",
360
- "-gencode=arch=compute_70,code=sm_70", # Volta
361
- ]
362
- )
363
- clang_arch_flags.extend(
364
- [
365
- "--cuda-gpu-arch=sm_52",
366
- "--cuda-gpu-arch=sm_60",
367
- "--cuda-gpu-arch=sm_61",
368
- "--cuda-gpu-arch=sm_70",
369
- ]
370
- )
371
-
372
- # Desktop architectures
373
- gencode_opts.extend(
374
- [
375
- "-gencode=arch=compute_75,code=sm_75", # Turing
376
- "-gencode=arch=compute_75,code=compute_75", # Turing (PTX)
377
- "-gencode=arch=compute_80,code=sm_80", # Ampere
378
- "-gencode=arch=compute_86,code=sm_86",
379
- "-gencode=arch=compute_89,code=sm_89", # Ada
380
- "-gencode=arch=compute_90,code=sm_90", # Hopper
381
- ]
382
- )
383
- clang_arch_flags.extend(
384
- [
385
- "--cuda-gpu-arch=sm_75", # Turing
386
- "--cuda-gpu-arch=sm_80", # Ampere
387
- "--cuda-gpu-arch=sm_86",
388
- "--cuda-gpu-arch=sm_89", # Ada
389
- "--cuda-gpu-arch=sm_90", # Hopper
390
- ]
391
- )
392
-
393
- if ctk_version >= (12, 8):
394
- gencode_opts.extend(["-gencode=arch=compute_100,code=sm_100", "-gencode=arch=compute_120,code=sm_120"])
395
- clang_arch_flags.extend(["--cuda-gpu-arch=sm_100", "--cuda-gpu-arch=sm_120"])
396
-
397
- # Mobile architectures for aarch64 Linux
398
- if arch == "aarch64" and target_platform == "linux":
399
- gencode_opts.extend(
400
- [
401
- "-gencode=arch=compute_87,code=sm_87", # Orin
402
- "-gencode=arch=compute_53,code=sm_53", # X1
403
- "-gencode=arch=compute_62,code=sm_62", # X2
404
- "-gencode=arch=compute_72,code=sm_72", # Xavier
405
- ]
406
- )
407
- clang_arch_flags.extend(
408
- [
409
- "--cuda-gpu-arch=sm_87",
410
- "--cuda-gpu-arch=sm_53",
411
- "--cuda-gpu-arch=sm_62",
412
- "--cuda-gpu-arch=sm_72",
413
- ]
414
- )
415
-
416
- # Thor support in CUDA 12.8+
417
- if ctk_version >= (12, 8):
418
- gencode_opts.append("-gencode=arch=compute_101,code=sm_101") # Thor (CUDA 12 numbering)
419
- clang_arch_flags.append("--cuda-gpu-arch=sm_101")
420
-
421
- if ctk_version >= (12, 9):
422
- gencode_opts.append("-gencode=arch=compute_121,code=sm_121")
423
- clang_arch_flags.append("--cuda-gpu-arch=sm_121")
424
-
425
- # PTX for future hardware (use highest available compute capability)
426
- if ctk_version >= (12, 9):
427
- gencode_opts.extend(["-gencode=arch=compute_121,code=compute_121"])
428
- elif ctk_version >= (12, 8):
429
- gencode_opts.extend(["-gencode=arch=compute_120,code=compute_120"])
430
- else:
431
- gencode_opts.append("-gencode=arch=compute_90,code=compute_90")
432
-
433
- return gencode_opts, clang_arch_flags
434
-
435
-
436
- def _get_architectures_cu13(
437
- ctk_version: tuple[int, int], arch: str, target_platform: str, quick_build: bool = False
438
- ) -> tuple[list[str], list[str]]:
439
- """Get architecture flags for CUDA 13.x."""
440
- gencode_opts = []
441
- clang_arch_flags = []
442
-
443
- if quick_build:
444
- gencode_opts = ["-gencode=arch=compute_75,code=compute_75"]
445
- clang_arch_flags = ["--cuda-gpu-arch=sm_75"]
446
- else:
447
- # Desktop architectures
448
- gencode_opts.extend(
449
- [
450
- "-gencode=arch=compute_75,code=sm_75", # Turing
451
- "-gencode=arch=compute_75,code=compute_75", # Turing (PTX)
452
- "-gencode=arch=compute_80,code=sm_80", # Ampere
453
- "-gencode=arch=compute_86,code=sm_86",
454
- "-gencode=arch=compute_89,code=sm_89", # Ada
455
- "-gencode=arch=compute_90,code=sm_90", # Hopper
456
- "-gencode=arch=compute_100,code=sm_100", # Blackwell
457
- "-gencode=arch=compute_120,code=sm_120", # Blackwell
458
- ]
459
- )
460
- clang_arch_flags.extend(
461
- [
462
- "--cuda-gpu-arch=sm_75", # Turing
463
- "--cuda-gpu-arch=sm_80", # Ampere
464
- "--cuda-gpu-arch=sm_86",
465
- "--cuda-gpu-arch=sm_89", # Ada
466
- "--cuda-gpu-arch=sm_90", # Hopper
467
- "--cuda-gpu-arch=sm_100", # Blackwell
468
- "--cuda-gpu-arch=sm_120", # Blackwell
469
- ]
470
- )
471
-
472
- # Mobile architectures for aarch64 Linux
473
- if arch == "aarch64" and target_platform == "linux":
474
- gencode_opts.extend(
475
- [
476
- "-gencode=arch=compute_87,code=sm_87", # Orin
477
- "-gencode=arch=compute_110,code=sm_110", # Thor
478
- "-gencode=arch=compute_121,code=sm_121", # Spark
479
- ]
480
- )
481
- clang_arch_flags.extend(
482
- [
483
- "--cuda-gpu-arch=sm_87",
484
- "--cuda-gpu-arch=sm_110",
485
- "--cuda-gpu-arch=sm_121",
486
- ]
487
- )
488
-
489
- # PTX for future hardware (use highest available compute capability)
490
- gencode_opts.extend(["-gencode=arch=compute_121,code=compute_121"])
491
-
492
- return gencode_opts, clang_arch_flags
493
-
494
-
495
- def build_dll_for_arch(args, dll_path, cpp_paths, cu_path, arch, libs: list[str] | None = None, mode=None):
496
- mode = args.mode if (mode is None) else mode
497
- cuda_home = args.cuda_path
498
- cuda_cmd = None
499
-
500
- # Add LLVM bin directory to PATH
501
- add_llvm_bin_to_path(args)
502
-
503
- if args.quick or cu_path is None:
504
- cuda_compat_enabled = "WP_ENABLE_CUDA_COMPATIBILITY=0"
505
- else:
506
- cuda_compat_enabled = "WP_ENABLE_CUDA_COMPATIBILITY=1"
507
-
508
- if libs is None:
509
- libs = []
510
-
511
- import pathlib
512
-
513
- warp_home_path = pathlib.Path(__file__).parent
514
- warp_home = warp_home_path.resolve()
515
-
516
- if args.verbose:
517
- print(f"Building {dll_path}")
518
-
519
- native_dir = os.path.join(warp_home, "native")
520
-
521
- if cu_path:
522
- # check CUDA Toolkit version
523
- ctk_version = get_cuda_toolkit_version(cuda_home)
524
- if ctk_version < MIN_CTK_VERSION:
525
- raise Exception(
526
- f"CUDA Toolkit version {MIN_CTK_VERSION[0]}.{MIN_CTK_VERSION[1]}+ is required (found {ctk_version[0]}.{ctk_version[1]} in {cuda_home})"
527
- )
528
-
529
- # Get architecture flags based on CUDA version
530
- if ctk_version >= (13, 0):
531
- gencode_opts, clang_arch_flags = _get_architectures_cu13(ctk_version, arch, sys.platform, args.quick)
532
- else:
533
- gencode_opts, clang_arch_flags = _get_architectures_cu12(ctk_version, arch, sys.platform, args.quick)
534
-
535
- nvcc_opts = [
536
- *gencode_opts,
537
- "-t0", # multithreaded compilation
538
- "--extended-lambda",
539
- ]
540
-
541
- # Clang options
542
- clang_opts = [
543
- *clang_arch_flags,
544
- "-std=c++17",
545
- "-xcuda",
546
- f'--cuda-path="{cuda_home}"',
547
- ]
548
-
549
- if args.compile_time_trace:
550
- if ctk_version >= (12, 8):
551
- nvcc_opts.append("--fdevice-time-trace=build_lib_compile-time-trace")
552
- else:
553
- print("Warp warning: CUDA version is less than 12.8, compile_time_trace is not supported")
554
-
555
- if args.fast_math:
556
- nvcc_opts.append("--use_fast_math")
557
-
558
- # is the library being built with CUDA enabled?
559
- cuda_enabled = "WP_ENABLE_CUDA=1" if (cu_path is not None) else "WP_ENABLE_CUDA=0"
560
-
561
- if args.libmathdx_path:
562
- libmathdx_includes = f' -I"{args.libmathdx_path}/include"'
563
- mathdx_enabled = "WP_ENABLE_MATHDX=1"
564
- else:
565
- libmathdx_includes = ""
566
- mathdx_enabled = "WP_ENABLE_MATHDX=0"
567
-
568
- if os.name == "nt":
569
- if args.host_compiler:
570
- host_linker = os.path.join(os.path.dirname(args.host_compiler), "link.exe")
571
- else:
572
- raise RuntimeError("Warp build error: No host compiler was found")
573
-
574
- cpp_includes = f' /I"{warp_home_path.parent}/external/llvm-project/out/install/{mode}-{arch}/include"'
575
- cpp_includes += f' /I"{warp_home_path.parent}/_build/host-deps/llvm-project/release-{arch}/include"'
576
- cuda_includes = f' /I"{cuda_home}/include"' if cu_path else ""
577
- includes = cpp_includes + cuda_includes
578
-
579
- # nvrtc_static.lib is built with /MT and _ITERATOR_DEBUG_LEVEL=0 so if we link it in we must match these options
580
- if cu_path or mode != "debug":
581
- runtime = "/MT"
582
- iter_dbg = "_ITERATOR_DEBUG_LEVEL=0"
583
- debug = "NDEBUG"
584
- else:
585
- runtime = "/MTd"
586
- iter_dbg = "_ITERATOR_DEBUG_LEVEL=2"
587
- debug = "_DEBUG"
588
-
589
- cpp_flags = f'/nologo /std:c++17 /GR- {runtime} /D "{debug}" /D "{cuda_enabled}" /D "{mathdx_enabled}" /D "{cuda_compat_enabled}" /D "{iter_dbg}" /I"{native_dir}" {includes} '
590
-
591
- if args.mode == "debug":
592
- cpp_flags += "/Zi /Od /D WP_ENABLE_DEBUG=1"
593
- linkopts = ["/DLL", "/DEBUG"]
594
- elif args.mode == "release":
595
- cpp_flags += "/Ox /D WP_ENABLE_DEBUG=0"
596
- linkopts = ["/DLL"]
597
- else:
598
- raise RuntimeError(f"Unrecognized build configuration (debug, release), got: {args.mode}")
599
-
600
- if args.verify_fp:
601
- cpp_flags += ' /D "WP_VERIFY_FP"'
602
-
603
- if args.fast_math:
604
- cpp_flags += " /fp:fast"
605
-
606
- with ScopedTimer("build", active=args.verbose):
607
- for cpp_path in cpp_paths:
608
- cpp_out = cpp_path + ".obj"
609
- linkopts.append(quote(cpp_out))
610
-
611
- cpp_cmd = f'"{args.host_compiler}" {cpp_flags} -c "{cpp_path}" /Fo"{cpp_out}"'
612
- run_cmd(cpp_cmd)
613
-
614
- if cu_path:
615
- cu_out = cu_path + ".o"
616
-
617
- if mode == "debug":
618
- cuda_cmd = f'"{cuda_home}/bin/nvcc" --std=c++17 --compiler-options=/MT,/Zi,/Od -g -G -O0 -DNDEBUG -D_ITERATOR_DEBUG_LEVEL=0 -I"{native_dir}" -line-info {" ".join(nvcc_opts)} -DWP_ENABLE_CUDA=1 -D{mathdx_enabled} {libmathdx_includes} -o "{cu_out}" -c "{cu_path}"'
619
-
620
- elif mode == "release":
621
- cuda_cmd = f'"{cuda_home}/bin/nvcc" --std=c++17 -O3 {" ".join(nvcc_opts)} -I"{native_dir}" -DNDEBUG -DWP_ENABLE_CUDA=1 -D{mathdx_enabled} {libmathdx_includes} -o "{cu_out}" -c "{cu_path}"'
622
-
623
- with ScopedTimer("build_cuda", active=args.verbose):
624
- run_cmd(cuda_cmd)
625
- linkopts.append(quote(cu_out))
626
- linkopts.append(
627
- f'cudart_static.lib nvrtc_static.lib nvrtc-builtins_static.lib nvptxcompiler_static.lib ws2_32.lib user32.lib /LIBPATH:"{cuda_home}/lib/x64"'
628
- )
629
-
630
- if args.libmathdx_path:
631
- linkopts.append(f'nvJitLink_static.lib /LIBPATH:"{args.libmathdx_path}/lib/x64" mathdx_static.lib')
632
-
633
- with ScopedTimer("link", active=args.verbose):
634
- link_cmd = f'"{host_linker}" {" ".join(linkopts + libs)} /out:"{dll_path}"'
635
- run_cmd(link_cmd)
636
-
637
- else:
638
- # Unix compilation
639
- cuda_compiler = "clang++" if getattr(args, "clang_build_toolchain", False) else "nvcc"
640
- cpp_compiler = "clang++" if getattr(args, "clang_build_toolchain", False) else "g++"
641
-
642
- cpp_includes = f' -I"{warp_home_path.parent}/external/llvm-project/out/install/{mode}-{arch}/include"'
643
- cpp_includes += f' -I"{warp_home_path.parent}/_build/host-deps/llvm-project/release-{arch}/include"'
644
- cuda_includes = f' -I"{cuda_home}/include"' if cu_path else ""
645
- includes = cpp_includes + cuda_includes
646
-
647
- if sys.platform == "darwin":
648
- version = f"--target={arch}-apple-macos11"
649
- else:
650
- if cpp_compiler == "g++":
651
- version = "-fabi-version=13" # GCC 8.2+
652
- else:
653
- version = ""
654
-
655
- cpp_flags = f'-Werror -Wuninitialized {version} --std=c++17 -fno-rtti -D{cuda_enabled} -D{mathdx_enabled} -D{cuda_compat_enabled} -fPIC -fvisibility=hidden -D_GLIBCXX_USE_CXX11_ABI=0 -I"{native_dir}" {includes} '
656
-
657
- if mode == "debug":
658
- cpp_flags += "-O0 -g -D_DEBUG -DWP_ENABLE_DEBUG=1 -fkeep-inline-functions"
659
-
660
- if mode == "release":
661
- cpp_flags += "-O3 -DNDEBUG -DWP_ENABLE_DEBUG=0"
662
-
663
- if args.verify_fp:
664
- cpp_flags += " -DWP_VERIFY_FP"
665
-
666
- if args.fast_math:
667
- cpp_flags += " -ffast-math"
668
-
669
- ld_inputs = []
670
-
671
- with ScopedTimer("build", active=args.verbose):
672
- for cpp_path in cpp_paths:
673
- cpp_out = cpp_path + ".o"
674
- ld_inputs.append(quote(cpp_out))
675
-
676
- build_cmd = f'{cpp_compiler} {cpp_flags} -c "{cpp_path}" -o "{cpp_out}"'
677
- run_cmd(build_cmd)
678
-
679
- if cu_path:
680
- cu_out = cu_path + ".o"
681
-
682
- if cuda_compiler == "nvcc":
683
- if mode == "debug":
684
- cuda_cmd = f'"{cuda_home}/bin/nvcc" --std=c++17 -g -G -O0 --compiler-options -fPIC,-fvisibility=hidden -D_DEBUG -D_ITERATOR_DEBUG_LEVEL=0 -line-info {" ".join(nvcc_opts)} -DWP_ENABLE_CUDA=1 -I"{native_dir}" -D{mathdx_enabled} {libmathdx_includes} -o "{cu_out}" -c "{cu_path}"'
685
- elif mode == "release":
686
- cuda_cmd = f'"{cuda_home}/bin/nvcc" --std=c++17 -O3 --compiler-options -fPIC,-fvisibility=hidden {" ".join(nvcc_opts)} -DNDEBUG -DWP_ENABLE_CUDA=1 -I"{native_dir}" -D{mathdx_enabled} {libmathdx_includes} -o "{cu_out}" -c "{cu_path}"'
687
- else:
688
- # Use Clang compiler
689
- if mode == "debug":
690
- cuda_cmd = f'clang++ -Werror -Wuninitialized -Wno-unknown-cuda-version {" ".join(clang_opts)} -g -O0 -fPIC -fvisibility=hidden -D_DEBUG -D_ITERATOR_DEBUG_LEVEL=0 -DWP_ENABLE_CUDA=1 -I"{native_dir}" -D{mathdx_enabled} {libmathdx_includes} -o "{cu_out}" -c "{cu_path}"'
691
- elif mode == "release":
692
- cuda_cmd = f'clang++ -Werror -Wuninitialized -Wno-unknown-cuda-version {" ".join(clang_opts)} -O3 -fPIC -fvisibility=hidden -DNDEBUG -DWP_ENABLE_CUDA=1 -I"{native_dir}" -D{mathdx_enabled} {libmathdx_includes} -o "{cu_out}" -c "{cu_path}"'
693
-
694
- with ScopedTimer("build_cuda", active=args.verbose):
695
- run_cmd(cuda_cmd)
696
-
697
- ld_inputs.append(quote(cu_out))
698
- ld_inputs.append(
699
- f'-L"{cuda_home}/lib64" -lcudart_static -lnvrtc_static -lnvrtc-builtins_static -lnvptxcompiler_static -lpthread -ldl -lrt'
700
- )
701
-
702
- if args.libmathdx_path:
703
- ld_inputs.append(f"-lnvJitLink_static -L{args.libmathdx_path}/lib -lmathdx_static")
704
-
705
- if sys.platform == "darwin":
706
- opt_no_undefined = "-Wl,-undefined,error"
707
- opt_exclude_libs = ""
708
- else:
709
- opt_no_undefined = "-Wl,--no-undefined"
710
- opt_exclude_libs = "-Wl,--exclude-libs,ALL"
711
-
712
- with ScopedTimer("link", active=args.verbose):
713
- origin = "@loader_path" if (sys.platform == "darwin") else "$ORIGIN"
714
- link_cmd = f"{cpp_compiler} {version} -shared -Wl,-rpath,'{origin}' {opt_no_undefined} {opt_exclude_libs} -o '{dll_path}' {' '.join(ld_inputs + libs)}"
715
- run_cmd(link_cmd)
716
-
717
- # Strip symbols to reduce the binary size
718
- if mode == "release":
719
- if sys.platform == "darwin":
720
- run_cmd(f"strip -x {dll_path}") # Strip all local symbols
721
- else: # Linux
722
- # Strip all symbols except for those needed to support debugging JIT-compiled code
723
- run_cmd(
724
- f"strip --strip-all --keep-symbol=__jit_debug_register_code --keep-symbol=__jit_debug_descriptor {dll_path}"
725
- )
726
-
727
-
728
- def build_dll(args, dll_path, cpp_paths, cu_path, libs=None):
729
- if sys.platform == "darwin":
730
- # create a universal binary by combining x86-64 and AArch64 builds
731
- build_dll_for_arch(args, dll_path + "-x86_64", cpp_paths, cu_path, "x86_64", libs)
732
- build_dll_for_arch(args, dll_path + "-aarch64", cpp_paths, cu_path, "aarch64", libs)
733
-
734
- run_cmd(f"lipo -create -output {dll_path} {dll_path}-x86_64 {dll_path}-aarch64")
735
- os.remove(f"{dll_path}-x86_64")
736
- os.remove(f"{dll_path}-aarch64")
737
-
738
- else:
739
- build_dll_for_arch(args, dll_path, cpp_paths, cu_path, machine_architecture(), libs)
24
+ return get_deprecated_api(_build_dll, "wp", name)