warp-lang 1.9.0__py3-none-win_amd64.whl → 1.10.0rc2__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (350) hide show
  1. warp/__init__.py +301 -287
  2. warp/__init__.pyi +2220 -313
  3. warp/_src/__init__.py +14 -0
  4. warp/_src/autograd.py +1075 -0
  5. warp/_src/build.py +618 -0
  6. warp/_src/build_dll.py +640 -0
  7. warp/{builtins.py → _src/builtins.py} +1497 -226
  8. warp/_src/codegen.py +4359 -0
  9. warp/{config.py → _src/config.py} +178 -169
  10. warp/_src/constants.py +57 -0
  11. warp/_src/context.py +8294 -0
  12. warp/_src/dlpack.py +462 -0
  13. warp/_src/fabric.py +355 -0
  14. warp/_src/fem/__init__.py +14 -0
  15. warp/_src/fem/adaptivity.py +508 -0
  16. warp/_src/fem/cache.py +687 -0
  17. warp/_src/fem/dirichlet.py +188 -0
  18. warp/{fem → _src/fem}/domain.py +40 -30
  19. warp/_src/fem/field/__init__.py +131 -0
  20. warp/_src/fem/field/field.py +701 -0
  21. warp/{fem → _src/fem}/field/nodal_field.py +30 -15
  22. warp/{fem → _src/fem}/field/restriction.py +1 -1
  23. warp/{fem → _src/fem}/field/virtual.py +53 -27
  24. warp/_src/fem/geometry/__init__.py +32 -0
  25. warp/{fem → _src/fem}/geometry/adaptive_nanogrid.py +77 -163
  26. warp/_src/fem/geometry/closest_point.py +97 -0
  27. warp/{fem → _src/fem}/geometry/deformed_geometry.py +14 -22
  28. warp/{fem → _src/fem}/geometry/element.py +32 -10
  29. warp/{fem → _src/fem}/geometry/geometry.py +48 -20
  30. warp/{fem → _src/fem}/geometry/grid_2d.py +12 -23
  31. warp/{fem → _src/fem}/geometry/grid_3d.py +12 -23
  32. warp/{fem → _src/fem}/geometry/hexmesh.py +40 -63
  33. warp/{fem → _src/fem}/geometry/nanogrid.py +255 -248
  34. warp/{fem → _src/fem}/geometry/partition.py +121 -63
  35. warp/{fem → _src/fem}/geometry/quadmesh.py +26 -45
  36. warp/{fem → _src/fem}/geometry/tetmesh.py +40 -63
  37. warp/{fem → _src/fem}/geometry/trimesh.py +26 -45
  38. warp/{fem → _src/fem}/integrate.py +164 -158
  39. warp/_src/fem/linalg.py +383 -0
  40. warp/_src/fem/operator.py +396 -0
  41. warp/_src/fem/polynomial.py +229 -0
  42. warp/{fem → _src/fem}/quadrature/pic_quadrature.py +15 -20
  43. warp/{fem → _src/fem}/quadrature/quadrature.py +95 -47
  44. warp/_src/fem/space/__init__.py +248 -0
  45. warp/{fem → _src/fem}/space/basis_function_space.py +20 -11
  46. warp/_src/fem/space/basis_space.py +679 -0
  47. warp/{fem → _src/fem}/space/dof_mapper.py +3 -3
  48. warp/{fem → _src/fem}/space/function_space.py +14 -13
  49. warp/{fem → _src/fem}/space/grid_2d_function_space.py +4 -7
  50. warp/{fem → _src/fem}/space/grid_3d_function_space.py +4 -4
  51. warp/{fem → _src/fem}/space/hexmesh_function_space.py +4 -10
  52. warp/{fem → _src/fem}/space/nanogrid_function_space.py +3 -9
  53. warp/{fem → _src/fem}/space/partition.py +117 -60
  54. warp/{fem → _src/fem}/space/quadmesh_function_space.py +4 -10
  55. warp/{fem → _src/fem}/space/restriction.py +66 -33
  56. warp/_src/fem/space/shape/__init__.py +152 -0
  57. warp/{fem → _src/fem}/space/shape/cube_shape_function.py +9 -9
  58. warp/{fem → _src/fem}/space/shape/shape_function.py +8 -9
  59. warp/{fem → _src/fem}/space/shape/square_shape_function.py +6 -6
  60. warp/{fem → _src/fem}/space/shape/tet_shape_function.py +3 -3
  61. warp/{fem → _src/fem}/space/shape/triangle_shape_function.py +3 -3
  62. warp/{fem → _src/fem}/space/tetmesh_function_space.py +3 -9
  63. warp/_src/fem/space/topology.py +459 -0
  64. warp/{fem → _src/fem}/space/trimesh_function_space.py +3 -9
  65. warp/_src/fem/types.py +112 -0
  66. warp/_src/fem/utils.py +486 -0
  67. warp/_src/jax.py +186 -0
  68. warp/_src/jax_experimental/__init__.py +14 -0
  69. warp/_src/jax_experimental/custom_call.py +387 -0
  70. warp/_src/jax_experimental/ffi.py +1284 -0
  71. warp/_src/jax_experimental/xla_ffi.py +656 -0
  72. warp/_src/marching_cubes.py +708 -0
  73. warp/_src/math.py +414 -0
  74. warp/_src/optim/__init__.py +14 -0
  75. warp/_src/optim/adam.py +163 -0
  76. warp/_src/optim/linear.py +1606 -0
  77. warp/_src/optim/sgd.py +112 -0
  78. warp/_src/paddle.py +406 -0
  79. warp/_src/render/__init__.py +14 -0
  80. warp/_src/render/imgui_manager.py +289 -0
  81. warp/_src/render/render_opengl.py +3636 -0
  82. warp/_src/render/render_usd.py +937 -0
  83. warp/_src/render/utils.py +160 -0
  84. warp/_src/sparse.py +2716 -0
  85. warp/_src/tape.py +1206 -0
  86. warp/{thirdparty → _src/thirdparty}/unittest_parallel.py +9 -2
  87. warp/_src/torch.py +391 -0
  88. warp/_src/types.py +5870 -0
  89. warp/_src/utils.py +1693 -0
  90. warp/autograd.py +12 -1054
  91. warp/bin/warp-clang.dll +0 -0
  92. warp/bin/warp.dll +0 -0
  93. warp/build.py +8 -588
  94. warp/build_dll.py +6 -471
  95. warp/codegen.py +6 -4246
  96. warp/constants.py +6 -39
  97. warp/context.py +12 -7851
  98. warp/dlpack.py +6 -444
  99. warp/examples/distributed/example_jacobi_mpi.py +4 -5
  100. warp/examples/fem/example_adaptive_grid.py +1 -1
  101. warp/examples/fem/example_apic_fluid.py +1 -1
  102. warp/examples/fem/example_burgers.py +8 -8
  103. warp/examples/fem/example_diffusion.py +1 -1
  104. warp/examples/fem/example_distortion_energy.py +1 -1
  105. warp/examples/fem/example_mixed_elasticity.py +2 -2
  106. warp/examples/fem/example_navier_stokes.py +1 -1
  107. warp/examples/fem/example_nonconforming_contact.py +7 -7
  108. warp/examples/fem/example_stokes.py +1 -1
  109. warp/examples/fem/example_stokes_transfer.py +1 -1
  110. warp/examples/fem/utils.py +2 -2
  111. warp/examples/interop/example_jax_callable.py +1 -1
  112. warp/examples/interop/example_jax_ffi_callback.py +1 -1
  113. warp/examples/interop/example_jax_kernel.py +3 -2
  114. warp/examples/tile/example_tile_mcgp.py +191 -0
  115. warp/fabric.py +6 -337
  116. warp/fem/__init__.py +159 -97
  117. warp/fem/adaptivity.py +7 -489
  118. warp/fem/cache.py +9 -648
  119. warp/fem/dirichlet.py +6 -184
  120. warp/fem/field/__init__.py +8 -109
  121. warp/fem/field/field.py +7 -652
  122. warp/fem/geometry/__init__.py +7 -18
  123. warp/fem/geometry/closest_point.py +11 -77
  124. warp/fem/linalg.py +18 -366
  125. warp/fem/operator.py +11 -369
  126. warp/fem/polynomial.py +9 -209
  127. warp/fem/space/__init__.py +5 -211
  128. warp/fem/space/basis_space.py +6 -662
  129. warp/fem/space/shape/__init__.py +41 -118
  130. warp/fem/space/topology.py +6 -437
  131. warp/fem/types.py +6 -81
  132. warp/fem/utils.py +11 -444
  133. warp/jax.py +8 -165
  134. warp/jax_experimental/__init__.py +14 -1
  135. warp/jax_experimental/custom_call.py +8 -342
  136. warp/jax_experimental/ffi.py +17 -853
  137. warp/jax_experimental/xla_ffi.py +5 -596
  138. warp/marching_cubes.py +5 -689
  139. warp/math.py +16 -393
  140. warp/native/array.h +385 -37
  141. warp/native/builtin.h +316 -39
  142. warp/native/bvh.cpp +43 -9
  143. warp/native/bvh.cu +62 -27
  144. warp/native/bvh.h +310 -309
  145. warp/native/clang/clang.cpp +102 -97
  146. warp/native/coloring.cpp +0 -1
  147. warp/native/crt.h +208 -0
  148. warp/native/exports.h +156 -0
  149. warp/native/hashgrid.cu +2 -0
  150. warp/native/intersect.h +24 -1
  151. warp/native/intersect_tri.h +44 -35
  152. warp/native/mat.h +1456 -276
  153. warp/native/mesh.cpp +4 -4
  154. warp/native/mesh.cu +4 -2
  155. warp/native/mesh.h +176 -61
  156. warp/native/quat.h +0 -52
  157. warp/native/scan.cu +2 -0
  158. warp/native/sort.cu +22 -13
  159. warp/native/sort.h +2 -0
  160. warp/native/sparse.cu +7 -3
  161. warp/native/spatial.h +12 -0
  162. warp/native/tile.h +837 -70
  163. warp/native/tile_radix_sort.h +1 -1
  164. warp/native/tile_reduce.h +394 -46
  165. warp/native/tile_scan.h +4 -4
  166. warp/native/vec.h +469 -53
  167. warp/native/version.h +23 -0
  168. warp/native/volume.cpp +1 -1
  169. warp/native/volume.cu +1 -0
  170. warp/native/volume.h +1 -1
  171. warp/native/volume_builder.cu +2 -0
  172. warp/native/warp.cpp +60 -32
  173. warp/native/warp.cu +313 -201
  174. warp/native/warp.h +14 -11
  175. warp/optim/__init__.py +6 -3
  176. warp/optim/adam.py +6 -145
  177. warp/optim/linear.py +14 -1585
  178. warp/optim/sgd.py +6 -94
  179. warp/paddle.py +6 -388
  180. warp/render/__init__.py +8 -4
  181. warp/render/imgui_manager.py +7 -267
  182. warp/render/render_opengl.py +6 -3616
  183. warp/render/render_usd.py +6 -918
  184. warp/render/utils.py +6 -142
  185. warp/sparse.py +37 -2563
  186. warp/tape.py +6 -1188
  187. warp/tests/__main__.py +1 -1
  188. warp/tests/cuda/test_async.py +4 -4
  189. warp/tests/cuda/test_conditional_captures.py +1 -1
  190. warp/tests/cuda/test_multigpu.py +1 -1
  191. warp/tests/cuda/test_streams.py +58 -1
  192. warp/tests/geometry/test_bvh.py +157 -22
  193. warp/tests/geometry/test_hash_grid.py +38 -0
  194. warp/tests/geometry/test_marching_cubes.py +0 -1
  195. warp/tests/geometry/test_mesh.py +5 -3
  196. warp/tests/geometry/test_mesh_query_aabb.py +5 -12
  197. warp/tests/geometry/test_mesh_query_point.py +5 -2
  198. warp/tests/geometry/test_mesh_query_ray.py +15 -3
  199. warp/tests/geometry/test_volume_write.py +5 -5
  200. warp/tests/interop/test_dlpack.py +14 -14
  201. warp/tests/interop/test_jax.py +1382 -79
  202. warp/tests/interop/test_paddle.py +1 -1
  203. warp/tests/test_adam.py +0 -1
  204. warp/tests/test_arithmetic.py +9 -9
  205. warp/tests/test_array.py +529 -100
  206. warp/tests/test_array_reduce.py +3 -3
  207. warp/tests/test_atomic.py +12 -8
  208. warp/tests/test_atomic_bitwise.py +209 -0
  209. warp/tests/test_atomic_cas.py +4 -4
  210. warp/tests/test_bool.py +2 -2
  211. warp/tests/test_builtins_resolution.py +5 -571
  212. warp/tests/test_codegen.py +34 -15
  213. warp/tests/test_conditional.py +1 -1
  214. warp/tests/test_context.py +6 -6
  215. warp/tests/test_copy.py +242 -161
  216. warp/tests/test_ctypes.py +3 -3
  217. warp/tests/test_devices.py +24 -2
  218. warp/tests/test_examples.py +16 -84
  219. warp/tests/test_fabricarray.py +35 -35
  220. warp/tests/test_fast_math.py +0 -2
  221. warp/tests/test_fem.py +60 -14
  222. warp/tests/test_fixedarray.py +3 -3
  223. warp/tests/test_func.py +8 -5
  224. warp/tests/test_generics.py +1 -1
  225. warp/tests/test_indexedarray.py +24 -24
  226. warp/tests/test_intersect.py +39 -9
  227. warp/tests/test_large.py +1 -1
  228. warp/tests/test_lerp.py +3 -1
  229. warp/tests/test_linear_solvers.py +1 -1
  230. warp/tests/test_map.py +49 -4
  231. warp/tests/test_mat.py +52 -62
  232. warp/tests/test_mat_constructors.py +4 -5
  233. warp/tests/test_mat_lite.py +1 -1
  234. warp/tests/test_mat_scalar_ops.py +121 -121
  235. warp/tests/test_math.py +34 -0
  236. warp/tests/test_module_aot.py +4 -4
  237. warp/tests/test_modules_lite.py +28 -2
  238. warp/tests/test_print.py +11 -11
  239. warp/tests/test_quat.py +93 -58
  240. warp/tests/test_runlength_encode.py +1 -1
  241. warp/tests/test_scalar_ops.py +38 -10
  242. warp/tests/test_smoothstep.py +1 -1
  243. warp/tests/test_sparse.py +126 -15
  244. warp/tests/test_spatial.py +105 -87
  245. warp/tests/test_special_values.py +6 -6
  246. warp/tests/test_static.py +7 -7
  247. warp/tests/test_struct.py +13 -2
  248. warp/tests/test_triangle_closest_point.py +48 -1
  249. warp/tests/test_tuple.py +96 -0
  250. warp/tests/test_types.py +82 -9
  251. warp/tests/test_utils.py +52 -52
  252. warp/tests/test_vec.py +29 -29
  253. warp/tests/test_vec_constructors.py +5 -5
  254. warp/tests/test_vec_scalar_ops.py +97 -97
  255. warp/tests/test_version.py +75 -0
  256. warp/tests/tile/test_tile.py +239 -0
  257. warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
  258. warp/tests/tile/test_tile_cholesky.py +7 -4
  259. warp/tests/tile/test_tile_load.py +26 -2
  260. warp/tests/tile/test_tile_mathdx.py +3 -3
  261. warp/tests/tile/test_tile_matmul.py +1 -1
  262. warp/tests/tile/test_tile_mlp.py +2 -4
  263. warp/tests/tile/test_tile_reduce.py +214 -13
  264. warp/tests/unittest_suites.py +6 -14
  265. warp/tests/unittest_utils.py +10 -9
  266. warp/tests/walkthrough_debug.py +3 -1
  267. warp/torch.py +6 -373
  268. warp/types.py +29 -5750
  269. warp/utils.py +10 -1659
  270. {warp_lang-1.9.0.dist-info → warp_lang-1.10.0rc2.dist-info}/METADATA +47 -103
  271. warp_lang-1.10.0rc2.dist-info/RECORD +468 -0
  272. warp_lang-1.10.0rc2.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
  273. warp_lang-1.10.0rc2.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
  274. warp_lang-1.10.0rc2.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
  275. warp_lang-1.10.0rc2.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
  276. warp_lang-1.10.0rc2.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
  277. warp_lang-1.10.0rc2.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
  278. warp_lang-1.10.0rc2.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
  279. warp_lang-1.10.0rc2.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
  280. warp_lang-1.10.0rc2.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
  281. warp_lang-1.10.0rc2.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
  282. warp_lang-1.10.0rc2.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
  283. warp_lang-1.10.0rc2.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
  284. warp_lang-1.10.0rc2.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
  285. warp_lang-1.10.0rc2.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
  286. warp_lang-1.10.0rc2.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
  287. warp/examples/assets/cartpole.urdf +0 -110
  288. warp/examples/assets/crazyflie.usd +0 -0
  289. warp/examples/assets/nv_ant.xml +0 -92
  290. warp/examples/assets/nv_humanoid.xml +0 -183
  291. warp/examples/assets/quadruped.urdf +0 -268
  292. warp/examples/optim/example_bounce.py +0 -266
  293. warp/examples/optim/example_cloth_throw.py +0 -228
  294. warp/examples/optim/example_drone.py +0 -870
  295. warp/examples/optim/example_inverse_kinematics.py +0 -182
  296. warp/examples/optim/example_inverse_kinematics_torch.py +0 -191
  297. warp/examples/optim/example_softbody_properties.py +0 -400
  298. warp/examples/optim/example_spring_cage.py +0 -245
  299. warp/examples/optim/example_trajectory.py +0 -227
  300. warp/examples/sim/example_cartpole.py +0 -143
  301. warp/examples/sim/example_cloth.py +0 -225
  302. warp/examples/sim/example_cloth_self_contact.py +0 -316
  303. warp/examples/sim/example_granular.py +0 -130
  304. warp/examples/sim/example_granular_collision_sdf.py +0 -202
  305. warp/examples/sim/example_jacobian_ik.py +0 -244
  306. warp/examples/sim/example_particle_chain.py +0 -124
  307. warp/examples/sim/example_quadruped.py +0 -203
  308. warp/examples/sim/example_rigid_chain.py +0 -203
  309. warp/examples/sim/example_rigid_contact.py +0 -195
  310. warp/examples/sim/example_rigid_force.py +0 -133
  311. warp/examples/sim/example_rigid_gyroscopic.py +0 -115
  312. warp/examples/sim/example_rigid_soft_contact.py +0 -140
  313. warp/examples/sim/example_soft_body.py +0 -196
  314. warp/examples/tile/example_tile_walker.py +0 -327
  315. warp/sim/__init__.py +0 -74
  316. warp/sim/articulation.py +0 -793
  317. warp/sim/collide.py +0 -2570
  318. warp/sim/graph_coloring.py +0 -307
  319. warp/sim/import_mjcf.py +0 -791
  320. warp/sim/import_snu.py +0 -227
  321. warp/sim/import_urdf.py +0 -579
  322. warp/sim/import_usd.py +0 -898
  323. warp/sim/inertia.py +0 -357
  324. warp/sim/integrator.py +0 -245
  325. warp/sim/integrator_euler.py +0 -2000
  326. warp/sim/integrator_featherstone.py +0 -2101
  327. warp/sim/integrator_vbd.py +0 -2487
  328. warp/sim/integrator_xpbd.py +0 -3295
  329. warp/sim/model.py +0 -4821
  330. warp/sim/particles.py +0 -121
  331. warp/sim/render.py +0 -431
  332. warp/sim/utils.py +0 -431
  333. warp/tests/sim/disabled_kinematics.py +0 -244
  334. warp/tests/sim/test_cloth.py +0 -863
  335. warp/tests/sim/test_collision.py +0 -743
  336. warp/tests/sim/test_coloring.py +0 -347
  337. warp/tests/sim/test_inertia.py +0 -161
  338. warp/tests/sim/test_model.py +0 -226
  339. warp/tests/sim/test_sim_grad.py +0 -287
  340. warp/tests/sim/test_sim_grad_bounce_linear.py +0 -212
  341. warp/tests/sim/test_sim_kinematics.py +0 -98
  342. warp/thirdparty/__init__.py +0 -0
  343. warp_lang-1.9.0.dist-info/RECORD +0 -456
  344. /warp/{fem → _src/fem}/quadrature/__init__.py +0 -0
  345. /warp/{tests/sim → _src/thirdparty}/__init__.py +0 -0
  346. /warp/{thirdparty → _src/thirdparty}/appdirs.py +0 -0
  347. /warp/{thirdparty → _src/thirdparty}/dlpack.py +0 -0
  348. {warp_lang-1.9.0.dist-info → warp_lang-1.10.0rc2.dist-info}/WHEEL +0 -0
  349. {warp_lang-1.9.0.dist-info → warp_lang-1.10.0rc2.dist-info}/licenses/LICENSE.md +0 -0
  350. {warp_lang-1.9.0.dist-info → warp_lang-1.10.0rc2.dist-info}/top_level.txt +0 -0
warp/bin/warp-clang.dll CHANGED
Binary file
warp/bin/warp.dll CHANGED
Binary file
warp/build.py CHANGED
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -13,597 +13,17 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- import ctypes
17
- import errno
18
- import hashlib
19
- import json
20
- import os
21
- import time
22
- from pathlib import Path
16
+ # isort: skip_file
23
17
 
24
- import warp.config
25
- from warp.thirdparty import appdirs
26
- from warp.types import *
18
+ from warp._src.build import clear_kernel_cache as clear_kernel_cache
27
19
 
28
- # From nvJitLink.h
29
- nvJitLink_input_type = {"cubin": 1, "ptx": 2, "ltoir": 3, "fatbin": 4, "object": 5, "library": 6}
30
20
 
21
+ # TODO: Remove after cleaning up the public API.
31
22
 
32
- # builds cuda source to PTX or CUBIN using NVRTC (output type determined by output_path extension)
33
- def build_cuda(
34
- cu_path,
35
- arch,
36
- output_path,
37
- config="release",
38
- verify_fp=False,
39
- fast_math=False,
40
- fuse_fp=True,
41
- lineinfo=False,
42
- compile_time_trace=False,
43
- ltoirs=None,
44
- fatbins=None,
45
- ) -> None:
46
- with open(cu_path, "rb") as src_file:
47
- src = src_file.read()
48
- cu_path_bytes = cu_path.encode("utf-8")
49
- program_name_bytes = os.path.basename(cu_path).encode("utf-8")
50
- inc_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "native").encode("utf-8")
51
- output_path = output_path.encode("utf-8")
23
+ from warp._src import build as _build
52
24
 
53
- if warp.config.llvm_cuda:
54
- warp.context.runtime.llvm.wp_compile_cuda(src, cu_path_bytes, inc_path, output_path, False)
55
25
 
56
- else:
57
- if ltoirs is None:
58
- ltoirs = []
59
- if fatbins is None:
60
- fatbins = []
26
+ def __getattr__(name):
27
+ from warp._src.utils import get_deprecated_api
61
28
 
62
- link_data = list(ltoirs) + list(fatbins)
63
- num_link = len(link_data)
64
- arr_link = (ctypes.c_char_p * num_link)(*link_data)
65
- arr_link_sizes = (ctypes.c_size_t * num_link)(*[len(l) for l in link_data])
66
- link_input_types = [nvJitLink_input_type["ltoir"]] * len(ltoirs) + [nvJitLink_input_type["fatbin"]] * len(
67
- fatbins
68
- )
69
- arr_link_input_types = (ctypes.c_int * num_link)(*link_input_types)
70
- err = warp.context.runtime.core.wp_cuda_compile_program(
71
- src,
72
- program_name_bytes,
73
- arch,
74
- inc_path,
75
- 0,
76
- None,
77
- config == "debug",
78
- warp.config.verbose,
79
- verify_fp,
80
- fast_math,
81
- fuse_fp,
82
- lineinfo,
83
- compile_time_trace,
84
- output_path,
85
- num_link,
86
- arr_link,
87
- arr_link_sizes,
88
- arr_link_input_types,
89
- )
90
- if err != 0:
91
- raise Exception(f"CUDA kernel build failed with error code {err}")
92
-
93
-
94
- # load PTX or CUBIN as a CUDA runtime module (input type determined by input_path extension)
95
- def load_cuda(input_path, device):
96
- if not device.is_cuda:
97
- raise RuntimeError("Not a CUDA device")
98
-
99
- return warp.context.runtime.core.wp_cuda_load_module(device.context, input_path.encode("utf-8"))
100
-
101
-
102
- def build_cpu(obj_path, cpp_path, mode="release", verify_fp=False, fast_math=False, fuse_fp=True):
103
- with open(cpp_path, "rb") as cpp:
104
- src = cpp.read()
105
- cpp_path = cpp_path.encode("utf-8")
106
- inc_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "native").encode("utf-8")
107
- obj_path = obj_path.encode("utf-8")
108
-
109
- err = warp.context.runtime.llvm.wp_compile_cpp(
110
- src, cpp_path, inc_path, obj_path, mode == "debug", verify_fp, fuse_fp
111
- )
112
- if err != 0:
113
- raise Exception(f"CPU kernel build failed with error code {err}")
114
-
115
-
116
- def init_kernel_cache(path=None):
117
- """Initialize kernel cache directory.
118
-
119
- This function is used during Warp initialization, but it can also be called directly to change the cache location.
120
- If the path is not explicitly specified, a default location will be chosen based on OS-specific conventions.
121
-
122
- To change the default cache location, set warp.config.kernel_cache_dir before calling warp.init().
123
- """
124
-
125
- if path is not None:
126
- cache_root_dir = os.path.realpath(path)
127
- elif "WARP_CACHE_PATH" in os.environ:
128
- cache_root_dir = os.path.realpath(os.environ.get("WARP_CACHE_PATH"))
129
- else:
130
- cache_root_dir = appdirs.user_cache_dir(appname="warp", appauthor="NVIDIA", version=warp.config.version)
131
-
132
- if os.name == "nt" and os.path.isabs(cache_root_dir) and not cache_root_dir.startswith("\\\\?\\"):
133
- # Add Windows long-path prefix, accounting for UNC shares.
134
- if cache_root_dir.startswith("\\\\"):
135
- # UNC path \\server\share\… → \\?\UNC\server\share\…
136
- cache_root_dir = "\\\\?\\UNC\\" + cache_root_dir.lstrip("\\")
137
- else:
138
- # Drive-letter path C:\… → \\?\C:\…
139
- cache_root_dir = "\\\\?\\" + cache_root_dir
140
-
141
- warp.config.kernel_cache_dir = cache_root_dir
142
-
143
- os.makedirs(warp.config.kernel_cache_dir, exist_ok=True)
144
-
145
-
146
- def clear_kernel_cache() -> None:
147
- """Clear the kernel cache directory of previously generated source code and compiler artifacts.
148
-
149
- Only directories beginning with ``wp_`` will be deleted.
150
- This function only clears the cache for the current Warp version.
151
- LTO artifacts are not affected.
152
- """
153
-
154
- warp.context.init()
155
-
156
- import shutil
157
-
158
- is_intialized = warp.context.runtime is not None
159
- assert is_intialized, "The kernel cache directory is not configured; wp.init() has not been called yet or failed."
160
-
161
- for item in os.listdir(warp.config.kernel_cache_dir):
162
- item_path = os.path.join(warp.config.kernel_cache_dir, item)
163
- if os.path.isdir(item_path) and item.startswith("wp_"):
164
- # Remove the directory and its contents
165
- shutil.rmtree(item_path, ignore_errors=True)
166
-
167
-
168
- def clear_lto_cache() -> None:
169
- """Clear the LTO cache directory of previously generated LTO code.
170
-
171
- The LTO cache is stored within a subdirectory of the kernel cache directory.
172
- This function only clears the cache for the current Warp version.
173
- """
174
-
175
- warp.context.init()
176
-
177
- import shutil
178
-
179
- is_intialized = warp.context.runtime is not None
180
- assert is_intialized, "The kernel cache directory is not configured; wp.init() has not been called yet or failed."
181
-
182
- lto_path = os.path.join(warp.config.kernel_cache_dir, "lto")
183
- if os.path.isdir(lto_path):
184
- # Remove the lto directory and its contents
185
- shutil.rmtree(lto_path, ignore_errors=True)
186
-
187
-
188
- def safe_rename(src, dst, attempts=5, delay=0.1):
189
- for i in range(attempts):
190
- try:
191
- os.rename(src, dst)
192
- return
193
- except FileExistsError:
194
- return
195
- except OSError as e:
196
- if e.errno == errno.ENOTEMPTY:
197
- # if directory exists we assume another process
198
- # got there first, in which case we will copy
199
- # our output to the directory manually in second step
200
- return
201
- else:
202
- # otherwise assume directory creation failed e.g.: access denied
203
- # on Windows we see occasional failures to rename directories due to
204
- # some process holding a lock on a file to be moved to workaround
205
- # this we make multiple attempts to rename with some delay
206
- if i < attempts - 1:
207
- time.sleep(delay)
208
- else:
209
- print(
210
- f"Could not update Warp cache with compiled binaries, trying to rename {src} to {dst}, error {e}"
211
- )
212
- raise e
213
-
214
-
215
- def hash_symbol(symbol):
216
- ch = hashlib.sha256()
217
- ch.update(symbol.encode("utf-8"))
218
- return ch.hexdigest()
219
-
220
-
221
- def get_lto_cache_dir():
222
- lto_dir = os.path.join(warp.config.kernel_cache_dir, "lto")
223
- return lto_dir
224
-
225
-
226
- def get_cached_lto(path):
227
- if os.path.exists(path):
228
- with open(path, "rb") as f:
229
- lto_code_data = f.read()
230
- return lto_code_data
231
- else:
232
- return None
233
-
234
-
235
- def get_cached_lto_meta(path, symbol):
236
- if os.path.exists(path):
237
- with open(path) as f:
238
- keys = json.load(f)
239
- value = keys[symbol]
240
- return value
241
- else:
242
- return None
243
-
244
-
245
- def _build_lto_base(lto_symbol, compile_func, builder, extra_files=None):
246
- """Generic LTO build function that handles caching, file operations and process management.
247
-
248
- Args:
249
- lto_symbol: Unique identifier for the LTO operation
250
- compile_func: Function to compile the specific LTO
251
- (receives a dictionary of build paths)
252
- builder: Builder object to store results
253
- extra_files: Dictionary of additional file types to handle (e.g.,
254
- {".meta": None, ".fatbin": None}). Values are the functions to get
255
- the cached file data.
256
-
257
- Returns:
258
- Tuple where the first element is a success flag (``bool``). The second
259
- element is the LTO code as bytes (or ``None`` on failure).
260
- If ``extra_files`` is provided, additional elements follow in the same
261
- order as the keys in ``extra_files``:
262
- - ``".meta"``: int (shared memory bytes).
263
- - ``"_fatbin.lto"``: bytes (universal fatbin).
264
- """
265
- if extra_files is None:
266
- extra_files = {}
267
-
268
- # Hash symbol and set up paths
269
- h = hash_symbol(lto_symbol)
270
- lto_dir = get_lto_cache_dir()
271
- lto_name = f"{h[:7]}.lto"
272
- lto_path = os.path.join(lto_dir, lto_name)
273
-
274
- # Set up paths for extra files
275
- file_paths = {".lto": lto_path}
276
- temp_file_paths = {}
277
-
278
- for ext, _ in extra_files.items():
279
- name = f"{h[:7]}{ext}"
280
- file_paths[ext] = os.path.join(lto_dir, name)
281
-
282
- # Check if already built but not cached
283
- lto_code_data = get_cached_lto(lto_path)
284
- if lto_code_data is not None:
285
- # Get the cached data for the extra files and early return
286
- all_files_cached = True
287
- for ext, getter in extra_files.items():
288
- if getter and os.path.exists(file_paths[ext]):
289
- cached_data = getter(file_paths[ext])
290
- if cached_data is None:
291
- all_files_cached = False
292
- break
293
- extra_files[ext] = cached_data
294
- elif getter: # If there's a getter but file doesn't exist
295
- all_files_cached = False
296
- break
297
-
298
- if all_files_cached:
299
- if not extra_files:
300
- return (True, lto_code_data)
301
- else:
302
- return (True, lto_code_data, *[extra_files[ext] for ext in extra_files.keys()])
303
-
304
- # Create process-dependent temporary build directory
305
- build_dir = f"{lto_dir}_p{os.getpid()}"
306
- Path(build_dir).mkdir(parents=True, exist_ok=True)
307
-
308
- # Set up temporary paths for the build outputs
309
- for ext, path in file_paths.items():
310
- temp_file_paths[ext] = os.path.join(build_dir, os.path.basename(path))
311
-
312
- # Compile LTO with the specialized function
313
- result, outputs = compile_func(temp_file_paths)
314
-
315
- if not result:
316
- # Clean up and fail
317
- for path in temp_file_paths.values():
318
- if Path(path).exists():
319
- Path(path).unlink()
320
-
321
- outputs[".lto"] = None
322
- for ext in extra_files.keys():
323
- outputs[ext] = None
324
- else:
325
- # Move outputs to cache
326
- safe_rename(build_dir, lto_dir)
327
-
328
- # If build_dir couldn't be moved by a rename, move the outputs one-by-one to lto_dir
329
- if os.path.exists(lto_dir):
330
- for ext, path in file_paths.items():
331
- if not os.path.exists(path):
332
- try:
333
- # copy output file to the destination lto dir
334
- os.rename(temp_file_paths[ext], path)
335
- except (OSError, FileExistsError):
336
- # another process likely updated the lto dir first
337
- pass
338
-
339
- # Clean up the temporary build directory
340
- if build_dir:
341
- import shutil
342
-
343
- shutil.rmtree(build_dir, ignore_errors=True)
344
-
345
- if not extra_files:
346
- return (result, outputs[".lto"])
347
- else:
348
- return (result, outputs[".lto"], *[outputs[ext] for ext in extra_files.keys()])
349
-
350
-
351
- def build_lto_dot(M, N, K, adtype, bdtype, cdtype, alayout, blayout, clayout, arch, num_threads, builder):
352
- arch = 120 if arch > 121 else arch
353
-
354
- # Maps Python/Warp types to C++ types and enums
355
- def cublasdx_type_map(dtype):
356
- if dtype == float16:
357
- return ("wp::float16", 3, 0)
358
- if dtype == float32:
359
- return ("wp::float32", 5, 0)
360
- if dtype == float64:
361
- return ("wp::float64", 6, 0)
362
- if dtype == vec2h:
363
- return ("wp::vec2h", 3, 1)
364
- if dtype == vec2f:
365
- return ("wp::vec2f", 5, 1)
366
- if dtype == vec2d:
367
- return ("wp::vec2d", 6, 1)
368
- raise TypeError("Unsupported input type in tile_matmul")
369
-
370
- def cublasdx_arrangement_map(layout):
371
- if layout == "colmajor":
372
- return 0 # CUBLASDX_ARRANGEMENT_COL_MAJOR
373
- if layout == "rowmajor":
374
- return 1 # CUBLASDX_ARRANGEMENT_ROW_MAJOR
375
- raise ValueError("Unsupported layout in tile_matmul")
376
-
377
- (a_dtype, a_prec, a_type) = cublasdx_type_map(adtype)
378
- (b_dtype, b_prec, b_type) = cublasdx_type_map(bdtype)
379
- (c_dtype, c_prec, c_type) = cublasdx_type_map(cdtype)
380
- a_arrangement = cublasdx_arrangement_map(alayout)
381
- b_arrangement = cublasdx_arrangement_map(blayout)
382
- c_arrangement = cublasdx_arrangement_map(clayout)
383
-
384
- if a_type != b_type or a_type != c_type:
385
- raise TypeError("tile_matmul(A, B, C) requires all inputs to be real or complex")
386
-
387
- element_type = a_type
388
-
389
- lto_symbol = f"dot_{M}_{N}_{K}_{arch}_{num_threads}_{a_arrangement}_{b_arrangement}_{c_arrangement}_{a_prec}_{b_prec}_{c_prec}_{element_type}"
390
-
391
- def compile_lto_dot(temp_paths):
392
- result = warp.context.runtime.core.wp_cuda_compile_dot(
393
- temp_paths[".lto"].encode("utf-8"),
394
- lto_symbol.encode("utf-8"),
395
- 0,
396
- None,
397
- None,
398
- arch,
399
- M,
400
- N,
401
- K,
402
- a_prec,
403
- b_prec,
404
- c_prec,
405
- element_type,
406
- a_arrangement,
407
- b_arrangement,
408
- c_arrangement,
409
- num_threads,
410
- )
411
-
412
- if result:
413
- with open(temp_paths[".lto"], "rb") as f:
414
- lto_code_data = f.read()
415
- return True, {".lto": lto_code_data}
416
- return False, {}
417
-
418
- # Early out if already cached in module
419
- if lto_symbol in builder.ltoirs:
420
- lto_code_data = builder.ltoirs[lto_symbol]
421
- else:
422
- (result, lto_code_data) = _build_lto_base(lto_symbol, compile_lto_dot, builder, {})
423
-
424
- if not result:
425
- raise RuntimeError(
426
- f"Failed to compile LTO '{lto_symbol}'. "
427
- "Set the environment variable LIBMATHDX_LOG_LEVEL=5 and rerun for more details."
428
- )
429
-
430
- # Update builder
431
- builder.ltoirs[lto_symbol] = lto_code_data
432
- builder.ltoirs_decl[lto_symbol] = (
433
- f"void {lto_symbol}({c_dtype}*, {a_dtype}*, {b_dtype}*, {c_dtype}*, {c_dtype}*);"
434
- )
435
-
436
- return lto_symbol, lto_code_data
437
-
438
-
439
- def build_lto_solver(
440
- M,
441
- N,
442
- NRHS,
443
- solver,
444
- solver_enum,
445
- side_enum,
446
- diag_enum,
447
- alayout,
448
- blayout,
449
- fill_mode,
450
- arch,
451
- precision_enum,
452
- num_threads,
453
- parameter_list,
454
- builder,
455
- smem_estimate_bytes=None,
456
- ):
457
- arch = 120 if arch > 121 else arch
458
-
459
- def cusolverdx_arrangement_map(layout):
460
- if layout == "colmajor":
461
- return 0 # CUSOLVERDX_ARRANGEMENT_COL_MAJOR
462
- if layout == "rowmajor":
463
- return 1 # CUSOLVERDX_ARRANGEMENT_ROW_MAJOR
464
- raise ValueError("Unsupported layout in tile_matmul")
465
-
466
- a_arrangement = cusolverdx_arrangement_map(alayout)
467
- b_arrangement = cusolverdx_arrangement_map(blayout)
468
-
469
- lto_symbol = f"{solver}_{M}_{N}_{NRHS}_{arch}_{num_threads}_{a_arrangement}_{b_arrangement}_{precision_enum}_{side_enum if side_enum >= 0 else 'x'}_{diag_enum if diag_enum >= 0 else 'x'}_{fill_mode}"
470
-
471
- def compile_lto_solver(temp_paths):
472
- # compile LTO
473
- result = warp.context.runtime.core.wp_cuda_compile_solver(
474
- temp_paths["_fatbin.lto"].encode("utf-8"),
475
- temp_paths[".lto"].encode("utf-8"),
476
- lto_symbol.encode("utf-8"),
477
- 0,
478
- None,
479
- None,
480
- arch,
481
- M,
482
- N,
483
- NRHS,
484
- solver_enum,
485
- side_enum,
486
- diag_enum,
487
- precision_enum,
488
- a_arrangement,
489
- b_arrangement,
490
- fill_mode,
491
- num_threads,
492
- )
493
-
494
- if result:
495
- with open(temp_paths[".lto"], "rb") as f:
496
- lto_code_data = f.read()
497
- with open(temp_paths["_fatbin.lto"], "rb") as f:
498
- universal_fatbin_code_data = f.read()
499
- return True, {".lto": lto_code_data, "_fatbin.lto": universal_fatbin_code_data}
500
- return False, {}
501
-
502
- # Early out if already cached in module
503
- if lto_symbol in builder.ltoirs:
504
- lto_code_data = builder.ltoirs[lto_symbol]
505
- else:
506
- (result, lto_code_data, universal_fatbin_code_data) = _build_lto_base(
507
- lto_symbol, compile_lto_solver, builder, {"_fatbin.lto": get_cached_lto}
508
- )
509
-
510
- if not result:
511
- hint = ""
512
- if smem_estimate_bytes:
513
- max_smem_bytes = 232448
514
- max_smem_is_estimate = True
515
- for d in warp.get_cuda_devices():
516
- if d.arch == arch:
517
- # We can directly query the max shared memory for this device
518
- queried_bytes = warp.context.runtime.core.wp_cuda_get_max_shared_memory(d.context)
519
- if queried_bytes > 0:
520
- max_smem_bytes = queried_bytes
521
- max_smem_is_estimate = False
522
- break
523
- if smem_estimate_bytes > max_smem_bytes:
524
- source = "estimated limit" if max_smem_is_estimate else "device-reported limit"
525
- hint = (
526
- f"Estimated shared memory requirement is {smem_estimate_bytes}B, "
527
- f"but the {source} is {max_smem_bytes}B. "
528
- "The tile size(s) may be too large for this device."
529
- )
530
-
531
- if warp.context.runtime.toolkit_version < (12, 6):
532
- raise RuntimeError(
533
- "cuSolverDx requires CUDA Toolkit 12.6.3 or later. This version of Warp was built against CUDA Toolkit "
534
- f"{warp.context.runtime.toolkit_version[0]}.{warp.context.runtime.toolkit_version[1]}. "
535
- "Upgrade your CUDA Toolkit and rebuild Warp, or install a Warp wheel built with CUDA >= 12.6.3."
536
- )
537
- else:
538
- raise RuntimeError(
539
- f"Failed to compile LTO '{lto_symbol}'. {hint}"
540
- " Set the environment variable LIBMATHDX_LOG_LEVEL=5 and rerun for more details."
541
- )
542
-
543
- # Update builder
544
- builder.ltoirs[lto_symbol] = lto_code_data
545
- builder.ltoirs_decl[lto_symbol] = f"void {lto_symbol}{parameter_list};"
546
- builder.fatbins[lto_symbol] = universal_fatbin_code_data
547
-
548
- return lto_symbol, lto_code_data
549
-
550
-
551
- def build_lto_fft(arch, size, ept, direction, dir, precision, builder):
552
- arch = 120 if arch > 121 else arch
553
-
554
- lto_symbol = f"fft_{size}_{ept}_{arch}_{direction}_{precision}"
555
-
556
- def compile_lto_fft(temp_paths):
557
- shared_memory_size = ctypes.c_int(0)
558
-
559
- result = warp.context.runtime.core.wp_cuda_compile_fft(
560
- temp_paths[".lto"].encode("utf-8"),
561
- lto_symbol.encode("utf-8"),
562
- 0,
563
- None,
564
- None,
565
- arch,
566
- size,
567
- ept,
568
- dir,
569
- precision,
570
- ctypes.byref(shared_memory_size),
571
- )
572
-
573
- if result:
574
- with open(temp_paths[".lto"], "rb") as f:
575
- lto_code_data = f.read()
576
-
577
- shared_memory_bytes = tile.round_up(shared_memory_size.value)
578
-
579
- # output meta file with shared memory requirements for this lto_symbol
580
- meta = {}
581
- meta[lto_symbol] = shared_memory_bytes
582
-
583
- with open(temp_paths[".meta"], "w") as meta_file:
584
- json.dump(meta, meta_file)
585
-
586
- return True, {".lto": lto_code_data, ".meta": shared_memory_bytes}
587
-
588
- return False, {}
589
-
590
- # Early out if already cached in module
591
- if lto_symbol in builder.ltoirs and lto_symbol in builder.shared_memory_bytes:
592
- lto_code_data = builder.ltoirs[lto_symbol]
593
- shared_memory_bytes = builder.shared_memory_bytes[lto_symbol]
594
- else:
595
- (result, lto_code_data, shared_memory_bytes) = _build_lto_base(
596
- lto_symbol, compile_lto_fft, builder, {".meta": lambda path: get_cached_lto_meta(path, lto_symbol)}
597
- )
598
-
599
- if not result:
600
- raise RuntimeError(
601
- f"Failed to compile LTO '{lto_symbol}'."
602
- "Set the environment variable LIBMATHDX_LOG_LEVEL=5 and rerun for more details."
603
- )
604
-
605
- # Update builder
606
- builder.ltoirs[lto_symbol] = lto_code_data
607
- builder.shared_memory_bytes[lto_symbol] = shared_memory_bytes
608
-
609
- return lto_symbol, lto_code_data, shared_memory_bytes
29
+ return get_deprecated_api(_build, "wp", name)