warp-lang 1.10.0__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (468) hide show
  1. warp/__init__.py +334 -0
  2. warp/__init__.pyi +5856 -0
  3. warp/_src/__init__.py +14 -0
  4. warp/_src/autograd.py +1077 -0
  5. warp/_src/build.py +620 -0
  6. warp/_src/build_dll.py +642 -0
  7. warp/_src/builtins.py +10555 -0
  8. warp/_src/codegen.py +4361 -0
  9. warp/_src/config.py +178 -0
  10. warp/_src/constants.py +59 -0
  11. warp/_src/context.py +8352 -0
  12. warp/_src/dlpack.py +464 -0
  13. warp/_src/fabric.py +362 -0
  14. warp/_src/fem/__init__.py +14 -0
  15. warp/_src/fem/adaptivity.py +510 -0
  16. warp/_src/fem/cache.py +689 -0
  17. warp/_src/fem/dirichlet.py +190 -0
  18. warp/_src/fem/domain.py +553 -0
  19. warp/_src/fem/field/__init__.py +131 -0
  20. warp/_src/fem/field/field.py +703 -0
  21. warp/_src/fem/field/nodal_field.py +403 -0
  22. warp/_src/fem/field/restriction.py +39 -0
  23. warp/_src/fem/field/virtual.py +1021 -0
  24. warp/_src/fem/geometry/__init__.py +32 -0
  25. warp/_src/fem/geometry/adaptive_nanogrid.py +782 -0
  26. warp/_src/fem/geometry/closest_point.py +99 -0
  27. warp/_src/fem/geometry/deformed_geometry.py +277 -0
  28. warp/_src/fem/geometry/element.py +854 -0
  29. warp/_src/fem/geometry/geometry.py +693 -0
  30. warp/_src/fem/geometry/grid_2d.py +478 -0
  31. warp/_src/fem/geometry/grid_3d.py +539 -0
  32. warp/_src/fem/geometry/hexmesh.py +956 -0
  33. warp/_src/fem/geometry/nanogrid.py +660 -0
  34. warp/_src/fem/geometry/partition.py +483 -0
  35. warp/_src/fem/geometry/quadmesh.py +597 -0
  36. warp/_src/fem/geometry/tetmesh.py +762 -0
  37. warp/_src/fem/geometry/trimesh.py +588 -0
  38. warp/_src/fem/integrate.py +2507 -0
  39. warp/_src/fem/linalg.py +385 -0
  40. warp/_src/fem/operator.py +398 -0
  41. warp/_src/fem/polynomial.py +231 -0
  42. warp/_src/fem/quadrature/__init__.py +17 -0
  43. warp/_src/fem/quadrature/pic_quadrature.py +318 -0
  44. warp/_src/fem/quadrature/quadrature.py +665 -0
  45. warp/_src/fem/space/__init__.py +248 -0
  46. warp/_src/fem/space/basis_function_space.py +499 -0
  47. warp/_src/fem/space/basis_space.py +681 -0
  48. warp/_src/fem/space/dof_mapper.py +253 -0
  49. warp/_src/fem/space/function_space.py +312 -0
  50. warp/_src/fem/space/grid_2d_function_space.py +179 -0
  51. warp/_src/fem/space/grid_3d_function_space.py +229 -0
  52. warp/_src/fem/space/hexmesh_function_space.py +255 -0
  53. warp/_src/fem/space/nanogrid_function_space.py +199 -0
  54. warp/_src/fem/space/partition.py +435 -0
  55. warp/_src/fem/space/quadmesh_function_space.py +222 -0
  56. warp/_src/fem/space/restriction.py +221 -0
  57. warp/_src/fem/space/shape/__init__.py +152 -0
  58. warp/_src/fem/space/shape/cube_shape_function.py +1107 -0
  59. warp/_src/fem/space/shape/shape_function.py +134 -0
  60. warp/_src/fem/space/shape/square_shape_function.py +928 -0
  61. warp/_src/fem/space/shape/tet_shape_function.py +829 -0
  62. warp/_src/fem/space/shape/triangle_shape_function.py +674 -0
  63. warp/_src/fem/space/tetmesh_function_space.py +270 -0
  64. warp/_src/fem/space/topology.py +461 -0
  65. warp/_src/fem/space/trimesh_function_space.py +193 -0
  66. warp/_src/fem/types.py +114 -0
  67. warp/_src/fem/utils.py +488 -0
  68. warp/_src/jax.py +188 -0
  69. warp/_src/jax_experimental/__init__.py +14 -0
  70. warp/_src/jax_experimental/custom_call.py +389 -0
  71. warp/_src/jax_experimental/ffi.py +1286 -0
  72. warp/_src/jax_experimental/xla_ffi.py +658 -0
  73. warp/_src/marching_cubes.py +710 -0
  74. warp/_src/math.py +416 -0
  75. warp/_src/optim/__init__.py +14 -0
  76. warp/_src/optim/adam.py +165 -0
  77. warp/_src/optim/linear.py +1608 -0
  78. warp/_src/optim/sgd.py +114 -0
  79. warp/_src/paddle.py +408 -0
  80. warp/_src/render/__init__.py +14 -0
  81. warp/_src/render/imgui_manager.py +291 -0
  82. warp/_src/render/render_opengl.py +3638 -0
  83. warp/_src/render/render_usd.py +939 -0
  84. warp/_src/render/utils.py +162 -0
  85. warp/_src/sparse.py +2718 -0
  86. warp/_src/tape.py +1208 -0
  87. warp/_src/thirdparty/__init__.py +0 -0
  88. warp/_src/thirdparty/appdirs.py +598 -0
  89. warp/_src/thirdparty/dlpack.py +145 -0
  90. warp/_src/thirdparty/unittest_parallel.py +676 -0
  91. warp/_src/torch.py +393 -0
  92. warp/_src/types.py +5888 -0
  93. warp/_src/utils.py +1695 -0
  94. warp/autograd.py +33 -0
  95. warp/bin/libwarp-clang.dylib +0 -0
  96. warp/bin/libwarp.dylib +0 -0
  97. warp/build.py +29 -0
  98. warp/build_dll.py +24 -0
  99. warp/codegen.py +24 -0
  100. warp/constants.py +24 -0
  101. warp/context.py +33 -0
  102. warp/dlpack.py +24 -0
  103. warp/examples/__init__.py +24 -0
  104. warp/examples/assets/bear.usd +0 -0
  105. warp/examples/assets/bunny.usd +0 -0
  106. warp/examples/assets/cube.usd +0 -0
  107. warp/examples/assets/nonuniform.usd +0 -0
  108. warp/examples/assets/nvidia_logo.png +0 -0
  109. warp/examples/assets/pixel.jpg +0 -0
  110. warp/examples/assets/rocks.nvdb +0 -0
  111. warp/examples/assets/rocks.usd +0 -0
  112. warp/examples/assets/sphere.usd +0 -0
  113. warp/examples/assets/square_cloth.usd +0 -0
  114. warp/examples/benchmarks/benchmark_api.py +389 -0
  115. warp/examples/benchmarks/benchmark_cloth.py +296 -0
  116. warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
  117. warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
  118. warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
  119. warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
  120. warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
  121. warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
  122. warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
  123. warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
  124. warp/examples/benchmarks/benchmark_gemm.py +164 -0
  125. warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
  126. warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
  127. warp/examples/benchmarks/benchmark_launches.py +301 -0
  128. warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
  129. warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
  130. warp/examples/browse.py +37 -0
  131. warp/examples/core/example_cupy.py +86 -0
  132. warp/examples/core/example_dem.py +241 -0
  133. warp/examples/core/example_fluid.py +299 -0
  134. warp/examples/core/example_graph_capture.py +150 -0
  135. warp/examples/core/example_marching_cubes.py +195 -0
  136. warp/examples/core/example_mesh.py +180 -0
  137. warp/examples/core/example_mesh_intersect.py +211 -0
  138. warp/examples/core/example_nvdb.py +182 -0
  139. warp/examples/core/example_raycast.py +111 -0
  140. warp/examples/core/example_raymarch.py +205 -0
  141. warp/examples/core/example_render_opengl.py +290 -0
  142. warp/examples/core/example_sample_mesh.py +300 -0
  143. warp/examples/core/example_sph.py +411 -0
  144. warp/examples/core/example_spin_lock.py +93 -0
  145. warp/examples/core/example_torch.py +211 -0
  146. warp/examples/core/example_wave.py +269 -0
  147. warp/examples/core/example_work_queue.py +118 -0
  148. warp/examples/distributed/example_jacobi_mpi.py +506 -0
  149. warp/examples/fem/example_adaptive_grid.py +286 -0
  150. warp/examples/fem/example_apic_fluid.py +469 -0
  151. warp/examples/fem/example_burgers.py +261 -0
  152. warp/examples/fem/example_convection_diffusion.py +181 -0
  153. warp/examples/fem/example_convection_diffusion_dg.py +225 -0
  154. warp/examples/fem/example_darcy_ls_optimization.py +489 -0
  155. warp/examples/fem/example_deformed_geometry.py +172 -0
  156. warp/examples/fem/example_diffusion.py +196 -0
  157. warp/examples/fem/example_diffusion_3d.py +225 -0
  158. warp/examples/fem/example_diffusion_mgpu.py +225 -0
  159. warp/examples/fem/example_distortion_energy.py +228 -0
  160. warp/examples/fem/example_elastic_shape_optimization.py +387 -0
  161. warp/examples/fem/example_magnetostatics.py +242 -0
  162. warp/examples/fem/example_mixed_elasticity.py +293 -0
  163. warp/examples/fem/example_navier_stokes.py +263 -0
  164. warp/examples/fem/example_nonconforming_contact.py +300 -0
  165. warp/examples/fem/example_stokes.py +213 -0
  166. warp/examples/fem/example_stokes_transfer.py +262 -0
  167. warp/examples/fem/example_streamlines.py +357 -0
  168. warp/examples/fem/utils.py +1047 -0
  169. warp/examples/interop/example_jax_callable.py +146 -0
  170. warp/examples/interop/example_jax_ffi_callback.py +132 -0
  171. warp/examples/interop/example_jax_kernel.py +232 -0
  172. warp/examples/optim/example_diffray.py +561 -0
  173. warp/examples/optim/example_fluid_checkpoint.py +497 -0
  174. warp/examples/tile/example_tile_block_cholesky.py +502 -0
  175. warp/examples/tile/example_tile_cholesky.py +88 -0
  176. warp/examples/tile/example_tile_convolution.py +66 -0
  177. warp/examples/tile/example_tile_fft.py +55 -0
  178. warp/examples/tile/example_tile_filtering.py +113 -0
  179. warp/examples/tile/example_tile_matmul.py +85 -0
  180. warp/examples/tile/example_tile_mcgp.py +191 -0
  181. warp/examples/tile/example_tile_mlp.py +385 -0
  182. warp/examples/tile/example_tile_nbody.py +199 -0
  183. warp/fabric.py +24 -0
  184. warp/fem/__init__.py +173 -0
  185. warp/fem/adaptivity.py +26 -0
  186. warp/fem/cache.py +30 -0
  187. warp/fem/dirichlet.py +24 -0
  188. warp/fem/field/__init__.py +24 -0
  189. warp/fem/field/field.py +26 -0
  190. warp/fem/geometry/__init__.py +21 -0
  191. warp/fem/geometry/closest_point.py +31 -0
  192. warp/fem/linalg.py +38 -0
  193. warp/fem/operator.py +32 -0
  194. warp/fem/polynomial.py +29 -0
  195. warp/fem/space/__init__.py +22 -0
  196. warp/fem/space/basis_space.py +24 -0
  197. warp/fem/space/shape/__init__.py +68 -0
  198. warp/fem/space/topology.py +24 -0
  199. warp/fem/types.py +24 -0
  200. warp/fem/utils.py +32 -0
  201. warp/jax.py +29 -0
  202. warp/jax_experimental/__init__.py +29 -0
  203. warp/jax_experimental/custom_call.py +29 -0
  204. warp/jax_experimental/ffi.py +39 -0
  205. warp/jax_experimental/xla_ffi.py +24 -0
  206. warp/marching_cubes.py +24 -0
  207. warp/math.py +37 -0
  208. warp/native/array.h +1687 -0
  209. warp/native/builtin.h +2327 -0
  210. warp/native/bvh.cpp +562 -0
  211. warp/native/bvh.cu +826 -0
  212. warp/native/bvh.h +555 -0
  213. warp/native/clang/clang.cpp +541 -0
  214. warp/native/coloring.cpp +622 -0
  215. warp/native/crt.cpp +51 -0
  216. warp/native/crt.h +568 -0
  217. warp/native/cuda_crt.h +1058 -0
  218. warp/native/cuda_util.cpp +677 -0
  219. warp/native/cuda_util.h +313 -0
  220. warp/native/error.cpp +77 -0
  221. warp/native/error.h +36 -0
  222. warp/native/exports.h +2023 -0
  223. warp/native/fabric.h +246 -0
  224. warp/native/hashgrid.cpp +311 -0
  225. warp/native/hashgrid.cu +89 -0
  226. warp/native/hashgrid.h +240 -0
  227. warp/native/initializer_array.h +41 -0
  228. warp/native/intersect.h +1253 -0
  229. warp/native/intersect_adj.h +375 -0
  230. warp/native/intersect_tri.h +348 -0
  231. warp/native/mat.h +5189 -0
  232. warp/native/mathdx.cpp +93 -0
  233. warp/native/matnn.h +221 -0
  234. warp/native/mesh.cpp +266 -0
  235. warp/native/mesh.cu +406 -0
  236. warp/native/mesh.h +2097 -0
  237. warp/native/nanovdb/GridHandle.h +533 -0
  238. warp/native/nanovdb/HostBuffer.h +591 -0
  239. warp/native/nanovdb/NanoVDB.h +6246 -0
  240. warp/native/nanovdb/NodeManager.h +323 -0
  241. warp/native/nanovdb/PNanoVDB.h +3390 -0
  242. warp/native/noise.h +859 -0
  243. warp/native/quat.h +1664 -0
  244. warp/native/rand.h +342 -0
  245. warp/native/range.h +145 -0
  246. warp/native/reduce.cpp +174 -0
  247. warp/native/reduce.cu +363 -0
  248. warp/native/runlength_encode.cpp +79 -0
  249. warp/native/runlength_encode.cu +61 -0
  250. warp/native/scan.cpp +47 -0
  251. warp/native/scan.cu +55 -0
  252. warp/native/scan.h +23 -0
  253. warp/native/solid_angle.h +466 -0
  254. warp/native/sort.cpp +251 -0
  255. warp/native/sort.cu +286 -0
  256. warp/native/sort.h +35 -0
  257. warp/native/sparse.cpp +241 -0
  258. warp/native/sparse.cu +435 -0
  259. warp/native/spatial.h +1306 -0
  260. warp/native/svd.h +727 -0
  261. warp/native/temp_buffer.h +46 -0
  262. warp/native/tile.h +4124 -0
  263. warp/native/tile_radix_sort.h +1112 -0
  264. warp/native/tile_reduce.h +838 -0
  265. warp/native/tile_scan.h +240 -0
  266. warp/native/tuple.h +189 -0
  267. warp/native/vec.h +2199 -0
  268. warp/native/version.h +23 -0
  269. warp/native/volume.cpp +501 -0
  270. warp/native/volume.cu +68 -0
  271. warp/native/volume.h +970 -0
  272. warp/native/volume_builder.cu +483 -0
  273. warp/native/volume_builder.h +52 -0
  274. warp/native/volume_impl.h +70 -0
  275. warp/native/warp.cpp +1143 -0
  276. warp/native/warp.cu +4604 -0
  277. warp/native/warp.h +358 -0
  278. warp/optim/__init__.py +20 -0
  279. warp/optim/adam.py +24 -0
  280. warp/optim/linear.py +35 -0
  281. warp/optim/sgd.py +24 -0
  282. warp/paddle.py +24 -0
  283. warp/py.typed +0 -0
  284. warp/render/__init__.py +22 -0
  285. warp/render/imgui_manager.py +29 -0
  286. warp/render/render_opengl.py +24 -0
  287. warp/render/render_usd.py +24 -0
  288. warp/render/utils.py +24 -0
  289. warp/sparse.py +51 -0
  290. warp/tape.py +24 -0
  291. warp/tests/__init__.py +1 -0
  292. warp/tests/__main__.py +4 -0
  293. warp/tests/assets/curlnoise_golden.npy +0 -0
  294. warp/tests/assets/mlp_golden.npy +0 -0
  295. warp/tests/assets/pixel.npy +0 -0
  296. warp/tests/assets/pnoise_golden.npy +0 -0
  297. warp/tests/assets/spiky.usd +0 -0
  298. warp/tests/assets/test_grid.nvdb +0 -0
  299. warp/tests/assets/test_index_grid.nvdb +0 -0
  300. warp/tests/assets/test_int32_grid.nvdb +0 -0
  301. warp/tests/assets/test_vec_grid.nvdb +0 -0
  302. warp/tests/assets/torus.nvdb +0 -0
  303. warp/tests/assets/torus.usda +105 -0
  304. warp/tests/aux_test_class_kernel.py +34 -0
  305. warp/tests/aux_test_compile_consts_dummy.py +18 -0
  306. warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
  307. warp/tests/aux_test_dependent.py +29 -0
  308. warp/tests/aux_test_grad_customs.py +29 -0
  309. warp/tests/aux_test_instancing_gc.py +26 -0
  310. warp/tests/aux_test_module_aot.py +7 -0
  311. warp/tests/aux_test_module_unload.py +23 -0
  312. warp/tests/aux_test_name_clash1.py +40 -0
  313. warp/tests/aux_test_name_clash2.py +40 -0
  314. warp/tests/aux_test_reference.py +9 -0
  315. warp/tests/aux_test_reference_reference.py +8 -0
  316. warp/tests/aux_test_square.py +16 -0
  317. warp/tests/aux_test_unresolved_func.py +22 -0
  318. warp/tests/aux_test_unresolved_symbol.py +22 -0
  319. warp/tests/cuda/__init__.py +0 -0
  320. warp/tests/cuda/test_async.py +676 -0
  321. warp/tests/cuda/test_conditional_captures.py +1147 -0
  322. warp/tests/cuda/test_ipc.py +124 -0
  323. warp/tests/cuda/test_mempool.py +233 -0
  324. warp/tests/cuda/test_multigpu.py +169 -0
  325. warp/tests/cuda/test_peer.py +139 -0
  326. warp/tests/cuda/test_pinned.py +84 -0
  327. warp/tests/cuda/test_streams.py +691 -0
  328. warp/tests/geometry/__init__.py +0 -0
  329. warp/tests/geometry/test_bvh.py +335 -0
  330. warp/tests/geometry/test_hash_grid.py +259 -0
  331. warp/tests/geometry/test_marching_cubes.py +294 -0
  332. warp/tests/geometry/test_mesh.py +318 -0
  333. warp/tests/geometry/test_mesh_query_aabb.py +392 -0
  334. warp/tests/geometry/test_mesh_query_point.py +935 -0
  335. warp/tests/geometry/test_mesh_query_ray.py +323 -0
  336. warp/tests/geometry/test_volume.py +1103 -0
  337. warp/tests/geometry/test_volume_write.py +346 -0
  338. warp/tests/interop/__init__.py +0 -0
  339. warp/tests/interop/test_dlpack.py +730 -0
  340. warp/tests/interop/test_jax.py +1673 -0
  341. warp/tests/interop/test_paddle.py +800 -0
  342. warp/tests/interop/test_torch.py +1001 -0
  343. warp/tests/run_coverage_serial.py +39 -0
  344. warp/tests/test_adam.py +162 -0
  345. warp/tests/test_arithmetic.py +1096 -0
  346. warp/tests/test_array.py +3756 -0
  347. warp/tests/test_array_reduce.py +156 -0
  348. warp/tests/test_assert.py +303 -0
  349. warp/tests/test_atomic.py +336 -0
  350. warp/tests/test_atomic_bitwise.py +209 -0
  351. warp/tests/test_atomic_cas.py +312 -0
  352. warp/tests/test_bool.py +220 -0
  353. warp/tests/test_builtins_resolution.py +732 -0
  354. warp/tests/test_closest_point_edge_edge.py +327 -0
  355. warp/tests/test_codegen.py +974 -0
  356. warp/tests/test_codegen_instancing.py +1495 -0
  357. warp/tests/test_compile_consts.py +215 -0
  358. warp/tests/test_conditional.py +298 -0
  359. warp/tests/test_context.py +35 -0
  360. warp/tests/test_copy.py +319 -0
  361. warp/tests/test_ctypes.py +618 -0
  362. warp/tests/test_dense.py +73 -0
  363. warp/tests/test_devices.py +127 -0
  364. warp/tests/test_enum.py +136 -0
  365. warp/tests/test_examples.py +424 -0
  366. warp/tests/test_fabricarray.py +998 -0
  367. warp/tests/test_fast_math.py +72 -0
  368. warp/tests/test_fem.py +2204 -0
  369. warp/tests/test_fixedarray.py +229 -0
  370. warp/tests/test_fp16.py +136 -0
  371. warp/tests/test_func.py +501 -0
  372. warp/tests/test_future_annotations.py +100 -0
  373. warp/tests/test_generics.py +656 -0
  374. warp/tests/test_grad.py +893 -0
  375. warp/tests/test_grad_customs.py +339 -0
  376. warp/tests/test_grad_debug.py +341 -0
  377. warp/tests/test_implicit_init.py +411 -0
  378. warp/tests/test_import.py +45 -0
  379. warp/tests/test_indexedarray.py +1140 -0
  380. warp/tests/test_intersect.py +103 -0
  381. warp/tests/test_iter.py +76 -0
  382. warp/tests/test_large.py +177 -0
  383. warp/tests/test_launch.py +411 -0
  384. warp/tests/test_lerp.py +151 -0
  385. warp/tests/test_linear_solvers.py +223 -0
  386. warp/tests/test_lvalue.py +427 -0
  387. warp/tests/test_map.py +526 -0
  388. warp/tests/test_mat.py +3515 -0
  389. warp/tests/test_mat_assign_copy.py +178 -0
  390. warp/tests/test_mat_constructors.py +573 -0
  391. warp/tests/test_mat_lite.py +122 -0
  392. warp/tests/test_mat_scalar_ops.py +2913 -0
  393. warp/tests/test_math.py +212 -0
  394. warp/tests/test_module_aot.py +287 -0
  395. warp/tests/test_module_hashing.py +258 -0
  396. warp/tests/test_modules_lite.py +70 -0
  397. warp/tests/test_noise.py +252 -0
  398. warp/tests/test_operators.py +299 -0
  399. warp/tests/test_options.py +129 -0
  400. warp/tests/test_overwrite.py +551 -0
  401. warp/tests/test_print.py +408 -0
  402. warp/tests/test_quat.py +2653 -0
  403. warp/tests/test_quat_assign_copy.py +145 -0
  404. warp/tests/test_rand.py +339 -0
  405. warp/tests/test_reload.py +303 -0
  406. warp/tests/test_rounding.py +157 -0
  407. warp/tests/test_runlength_encode.py +196 -0
  408. warp/tests/test_scalar_ops.py +133 -0
  409. warp/tests/test_smoothstep.py +108 -0
  410. warp/tests/test_snippet.py +318 -0
  411. warp/tests/test_sparse.py +845 -0
  412. warp/tests/test_spatial.py +2859 -0
  413. warp/tests/test_spatial_assign_copy.py +160 -0
  414. warp/tests/test_special_values.py +361 -0
  415. warp/tests/test_static.py +640 -0
  416. warp/tests/test_struct.py +901 -0
  417. warp/tests/test_tape.py +242 -0
  418. warp/tests/test_transient_module.py +93 -0
  419. warp/tests/test_triangle_closest_point.py +192 -0
  420. warp/tests/test_tuple.py +361 -0
  421. warp/tests/test_types.py +615 -0
  422. warp/tests/test_utils.py +594 -0
  423. warp/tests/test_vec.py +1408 -0
  424. warp/tests/test_vec_assign_copy.py +143 -0
  425. warp/tests/test_vec_constructors.py +325 -0
  426. warp/tests/test_vec_lite.py +80 -0
  427. warp/tests/test_vec_scalar_ops.py +2327 -0
  428. warp/tests/test_verify_fp.py +100 -0
  429. warp/tests/test_version.py +75 -0
  430. warp/tests/tile/__init__.py +0 -0
  431. warp/tests/tile/test_tile.py +1519 -0
  432. warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
  433. warp/tests/tile/test_tile_cholesky.py +608 -0
  434. warp/tests/tile/test_tile_load.py +724 -0
  435. warp/tests/tile/test_tile_mathdx.py +156 -0
  436. warp/tests/tile/test_tile_matmul.py +179 -0
  437. warp/tests/tile/test_tile_mlp.py +400 -0
  438. warp/tests/tile/test_tile_reduce.py +950 -0
  439. warp/tests/tile/test_tile_shared_memory.py +376 -0
  440. warp/tests/tile/test_tile_sort.py +121 -0
  441. warp/tests/tile/test_tile_view.py +173 -0
  442. warp/tests/unittest_serial.py +47 -0
  443. warp/tests/unittest_suites.py +430 -0
  444. warp/tests/unittest_utils.py +469 -0
  445. warp/tests/walkthrough_debug.py +95 -0
  446. warp/torch.py +24 -0
  447. warp/types.py +51 -0
  448. warp/utils.py +31 -0
  449. warp_lang-1.10.0.dist-info/METADATA +459 -0
  450. warp_lang-1.10.0.dist-info/RECORD +468 -0
  451. warp_lang-1.10.0.dist-info/WHEEL +5 -0
  452. warp_lang-1.10.0.dist-info/licenses/LICENSE.md +176 -0
  453. warp_lang-1.10.0.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
  454. warp_lang-1.10.0.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
  455. warp_lang-1.10.0.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
  456. warp_lang-1.10.0.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
  457. warp_lang-1.10.0.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
  458. warp_lang-1.10.0.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
  459. warp_lang-1.10.0.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
  460. warp_lang-1.10.0.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
  461. warp_lang-1.10.0.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
  462. warp_lang-1.10.0.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
  463. warp_lang-1.10.0.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
  464. warp_lang-1.10.0.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
  465. warp_lang-1.10.0.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
  466. warp_lang-1.10.0.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
  467. warp_lang-1.10.0.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
  468. warp_lang-1.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,658 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import ctypes
17
+ import enum
18
+
19
+ import jax.numpy as jnp
20
+ import numpy as np
21
+
22
+ import warp as wp
23
+
24
+ _wp_module_name_ = "warp.jax_experimental.xla_ffi"
25
+
26
+ #######################################################################
27
+ # ctypes structures and enums for XLA's FFI API:
28
+ # https://github.com/openxla/xla/blob/a1a5e62fbffa3a3b6c409d72607456cf5b353a22/xla/ffi/api/c_api.h
29
+ #######################################################################
30
+
31
+
32
+ # typedef enum {
33
+ # XLA_FFI_Extension_Metadata = 1,
34
+ # } XLA_FFI_Extension_Type;
35
+ class XLA_FFI_Extension_Type(enum.IntEnum):
36
+ Metadata = 1
37
+
38
+
39
+ # typedef struct XLA_FFI_Extension_Base {
40
+ # size_t struct_size;
41
+ # XLA_FFI_Extension_Type type;
42
+ # struct XLA_FFI_Extension_Base* next;
43
+ # } XLA_FFI_Extension_Base;
44
+ class XLA_FFI_Extension_Base(ctypes.Structure):
45
+ pass
46
+
47
+
48
+ XLA_FFI_Extension_Base._fields_ = [
49
+ ("struct_size", ctypes.c_size_t),
50
+ ("type", ctypes.c_int), # XLA_FFI_Extension_Type
51
+ ("next", ctypes.POINTER(XLA_FFI_Extension_Base)),
52
+ ]
53
+
54
+
55
+ # typedef enum {
56
+ # XLA_FFI_ExecutionStage_INSTANTIATE = 0,
57
+ # XLA_FFI_ExecutionStage_PREPARE = 1,
58
+ # XLA_FFI_ExecutionStage_INITIALIZE = 2,
59
+ # XLA_FFI_ExecutionStage_EXECUTE = 3,
60
+ # } XLA_FFI_ExecutionStage;
61
+ class XLA_FFI_ExecutionStage(enum.IntEnum):
62
+ INSTANTIATE = 0
63
+ PREPARE = 1
64
+ INITIALIZE = 2
65
+ EXECUTE = 3
66
+
67
+
68
+ # typedef enum {
69
+ # XLA_FFI_DataType_INVALID = 0,
70
+ # XLA_FFI_DataType_PRED = 1,
71
+ # XLA_FFI_DataType_S8 = 2,
72
+ # XLA_FFI_DataType_S16 = 3,
73
+ # XLA_FFI_DataType_S32 = 4,
74
+ # XLA_FFI_DataType_S64 = 5,
75
+ # XLA_FFI_DataType_U8 = 6,
76
+ # XLA_FFI_DataType_U16 = 7,
77
+ # XLA_FFI_DataType_U32 = 8,
78
+ # XLA_FFI_DataType_U64 = 9,
79
+ # XLA_FFI_DataType_F16 = 10,
80
+ # XLA_FFI_DataType_F32 = 11,
81
+ # XLA_FFI_DataType_F64 = 12,
82
+ # XLA_FFI_DataType_BF16 = 16,
83
+ # XLA_FFI_DataType_C64 = 15,
84
+ # XLA_FFI_DataType_C128 = 18,
85
+ # XLA_FFI_DataType_TOKEN = 17,
86
+ # XLA_FFI_DataType_F8E5M2 = 19,
87
+ # XLA_FFI_DataType_F8E3M4 = 29,
88
+ # XLA_FFI_DataType_F8E4M3 = 28,
89
+ # XLA_FFI_DataType_F8E4M3FN = 20,
90
+ # XLA_FFI_DataType_F8E4M3B11FNUZ = 23,
91
+ # XLA_FFI_DataType_F8E5M2FNUZ = 24,
92
+ # XLA_FFI_DataType_F8E4M3FNUZ = 25,
93
+ # XLA_FFI_DataType_F4E2M1FN = 32,
94
+ # XLA_FFI_DataType_F8E8M0FNU = 33,
95
+ # } XLA_FFI_DataType;
96
+ class XLA_FFI_DataType(enum.IntEnum):
97
+ INVALID = 0
98
+ PRED = 1
99
+ S8 = 2
100
+ S16 = 3
101
+ S32 = 4
102
+ S64 = 5
103
+ U8 = 6
104
+ U16 = 7
105
+ U32 = 8
106
+ U64 = 9
107
+ F16 = 10
108
+ F32 = 11
109
+ F64 = 12
110
+ BF16 = 16
111
+ C64 = 15
112
+ C128 = 18
113
+ TOKEN = 17
114
+ F8E5M2 = 19
115
+ F8E3M4 = 29
116
+ F8E4M3 = 28
117
+ F8E4M3FN = 20
118
+ F8E4M3B11FNUZ = 23
119
+ F8E5M2FNUZ = 24
120
+ F8E4M3FNUZ = 25
121
+ F4E2M1FN = 32
122
+ F8E8M0FNU = 33
123
+
124
+
125
+ # struct XLA_FFI_Buffer {
126
+ # size_t struct_size;
127
+ # XLA_FFI_Extension_Base* extension_start;
128
+ #
129
+ # XLA_FFI_DataType dtype;
130
+ # void* data;
131
+ # int64_t rank;
132
+ # int64_t* dims; // length == rank
133
+ # };
134
+ class XLA_FFI_Buffer(ctypes.Structure):
135
+ _fields_ = (
136
+ ("struct_size", ctypes.c_size_t),
137
+ ("extension_start", ctypes.POINTER(XLA_FFI_Extension_Base)),
138
+ ("dtype", ctypes.c_int), # XLA_FFI_DataType
139
+ ("data", ctypes.c_void_p),
140
+ ("rank", ctypes.c_int64),
141
+ ("dims", ctypes.POINTER(ctypes.c_int64)),
142
+ )
143
+
144
+
145
+ # typedef enum {
146
+ # XLA_FFI_ArgType_BUFFER = 1,
147
+ # } XLA_FFI_ArgType;
148
+ class XLA_FFI_ArgType(enum.IntEnum):
149
+ BUFFER = 1
150
+
151
+
152
+ # typedef enum {
153
+ # XLA_FFI_RetType_BUFFER = 1,
154
+ # } XLA_FFI_RetType;
155
+ class XLA_FFI_RetType(enum.IntEnum):
156
+ BUFFER = 1
157
+
158
+
159
+ # struct XLA_FFI_Args {
160
+ # size_t struct_size;
161
+ # XLA_FFI_Extension_Base* extension_start;
162
+ # int64_t size;
163
+ # XLA_FFI_ArgType* types; // length == size
164
+ # void** args; // length == size
165
+ # };
166
+ class XLA_FFI_Args(ctypes.Structure):
167
+ _fields_ = (
168
+ ("struct_size", ctypes.c_size_t),
169
+ ("extension_start", ctypes.POINTER(XLA_FFI_Extension_Base)),
170
+ ("size", ctypes.c_int64),
171
+ ("types", ctypes.POINTER(ctypes.c_int)), # XLA_FFI_ArgType*
172
+ ("args", ctypes.POINTER(ctypes.c_void_p)),
173
+ )
174
+
175
+
176
+ # struct XLA_FFI_Rets {
177
+ # size_t struct_size;
178
+ # XLA_FFI_Extension_Base* extension_start;
179
+ # int64_t size;
180
+ # XLA_FFI_RetType* types; // length == size
181
+ # void** rets; // length == size
182
+ # };
183
+ class XLA_FFI_Rets(ctypes.Structure):
184
+ _fields_ = (
185
+ ("struct_size", ctypes.c_size_t),
186
+ ("extension_start", ctypes.POINTER(XLA_FFI_Extension_Base)),
187
+ ("size", ctypes.c_int64),
188
+ ("types", ctypes.POINTER(ctypes.c_int)), # XLA_FFI_RetType*
189
+ ("rets", ctypes.POINTER(ctypes.c_void_p)),
190
+ )
191
+
192
+
193
+ # typedef struct XLA_FFI_ByteSpan {
194
+ # const char* ptr;
195
+ # size_t len;
196
+ # } XLA_FFI_ByteSpan;
197
+ class XLA_FFI_ByteSpan(ctypes.Structure):
198
+ _fields_ = (
199
+ ("ptr", ctypes.POINTER(ctypes.c_char)),
200
+ ("len", ctypes.c_size_t),
201
+ )
202
+
203
+
204
+ # typedef struct XLA_FFI_Scalar {
205
+ # XLA_FFI_DataType dtype;
206
+ # void* value;
207
+ # } XLA_FFI_Scalar;
208
+ class XLA_FFI_Scalar(ctypes.Structure):
209
+ _fields_ = (
210
+ ("dtype", ctypes.c_int),
211
+ ("value", ctypes.c_void_p),
212
+ )
213
+
214
+
215
+ # typedef struct XLA_FFI_Array {
216
+ # XLA_FFI_DataType dtype;
217
+ # size_t size;
218
+ # void* data;
219
+ # } XLA_FFI_Array;
220
+ class XLA_FFI_Array(ctypes.Structure):
221
+ _fields_ = (
222
+ ("dtype", ctypes.c_int),
223
+ ("size", ctypes.c_size_t),
224
+ ("data", ctypes.c_void_p),
225
+ )
226
+
227
+
228
+ # typedef enum {
229
+ # XLA_FFI_AttrType_ARRAY = 1,
230
+ # XLA_FFI_AttrType_DICTIONARY = 2,
231
+ # XLA_FFI_AttrType_SCALAR = 3,
232
+ # XLA_FFI_AttrType_STRING = 4,
233
+ # } XLA_FFI_AttrType;
234
+ class XLA_FFI_AttrType(enum.IntEnum):
235
+ ARRAY = 1
236
+ DICTIONARY = 2
237
+ SCALAR = 3
238
+ STRING = 4
239
+
240
+
241
+ # struct XLA_FFI_Attrs {
242
+ # size_t struct_size;
243
+ # XLA_FFI_Extension_Base* extension_start;
244
+ # int64_t size;
245
+ # XLA_FFI_AttrType* types; // length == size
246
+ # XLA_FFI_ByteSpan** names; // length == size
247
+ # void** attrs; // length == size
248
+ # };
249
+ class XLA_FFI_Attrs(ctypes.Structure):
250
+ _fields_ = (
251
+ ("struct_size", ctypes.c_size_t),
252
+ ("extension_start", ctypes.POINTER(XLA_FFI_Extension_Base)),
253
+ ("size", ctypes.c_int64),
254
+ ("types", ctypes.POINTER(ctypes.c_int)), # XLA_FFI_AttrType*
255
+ ("names", ctypes.POINTER(ctypes.POINTER(XLA_FFI_ByteSpan))),
256
+ ("attrs", ctypes.POINTER(ctypes.c_void_p)),
257
+ )
258
+
259
+
260
+ # struct XLA_FFI_Api_Version {
261
+ # size_t struct_size;
262
+ # XLA_FFI_Extension_Base* extension_start;
263
+ # int major_version; // out
264
+ # int minor_version; // out
265
+ # };
266
+ class XLA_FFI_Api_Version(ctypes.Structure):
267
+ _fields_ = (
268
+ ("struct_size", ctypes.c_size_t),
269
+ ("extension_start", ctypes.POINTER(XLA_FFI_Extension_Base)),
270
+ ("major_version", ctypes.c_int),
271
+ ("minor_version", ctypes.c_int),
272
+ )
273
+
274
+
275
+ # enum XLA_FFI_Handler_TraitsBits {
276
+ # // Calls to FFI handler are safe to trace into the command buffer. It means
277
+ # // that calls to FFI handler always launch exactly the same device operations
278
+ # // (can depend on attribute values) that can be captured and then replayed.
279
+ # XLA_FFI_HANDLER_TRAITS_COMMAND_BUFFER_COMPATIBLE = 1u << 0,
280
+ # };
281
+ class XLA_FFI_Handler_TraitsBits(enum.IntEnum):
282
+ COMMAND_BUFFER_COMPATIBLE = 1 << 0
283
+
284
+
285
+ # struct XLA_FFI_Metadata {
286
+ # size_t struct_size;
287
+ # XLA_FFI_Api_Version api_version;
288
+ # XLA_FFI_Handler_Traits traits;
289
+ # };
290
+ class XLA_FFI_Metadata(ctypes.Structure):
291
+ _fields_ = (
292
+ ("struct_size", ctypes.c_size_t),
293
+ ("api_version", XLA_FFI_Api_Version), # XLA_FFI_Extension_Type
294
+ ("traits", ctypes.c_uint32), # XLA_FFI_Handler_Traits
295
+ )
296
+
297
+
298
+ # struct XLA_FFI_Metadata_Extension {
299
+ # XLA_FFI_Extension_Base extension_base;
300
+ # XLA_FFI_Metadata* metadata;
301
+ # };
302
+ class XLA_FFI_Metadata_Extension(ctypes.Structure):
303
+ _fields_ = (
304
+ ("extension_base", XLA_FFI_Extension_Base),
305
+ ("metadata", ctypes.POINTER(XLA_FFI_Metadata)),
306
+ )
307
+
308
+
309
+ # typedef enum {
310
+ # XLA_FFI_Error_Code_OK = 0,
311
+ # XLA_FFI_Error_Code_CANCELLED = 1,
312
+ # XLA_FFI_Error_Code_UNKNOWN = 2,
313
+ # XLA_FFI_Error_Code_INVALID_ARGUMENT = 3,
314
+ # XLA_FFI_Error_Code_DEADLINE_EXCEEDED = 4,
315
+ # XLA_FFI_Error_Code_NOT_FOUND = 5,
316
+ # XLA_FFI_Error_Code_ALREADY_EXISTS = 6,
317
+ # XLA_FFI_Error_Code_PERMISSION_DENIED = 7,
318
+ # XLA_FFI_Error_Code_RESOURCE_EXHAUSTED = 8,
319
+ # XLA_FFI_Error_Code_FAILED_PRECONDITION = 9,
320
+ # XLA_FFI_Error_Code_ABORTED = 10,
321
+ # XLA_FFI_Error_Code_OUT_OF_RANGE = 11,
322
+ # XLA_FFI_Error_Code_UNIMPLEMENTED = 12,
323
+ # XLA_FFI_Error_Code_INTERNAL = 13,
324
+ # XLA_FFI_Error_Code_UNAVAILABLE = 14,
325
+ # XLA_FFI_Error_Code_DATA_LOSS = 15,
326
+ # XLA_FFI_Error_Code_UNAUTHENTICATED = 16
327
+ # } XLA_FFI_Error_Code;
328
+ class XLA_FFI_Error_Code(enum.IntEnum):
329
+ OK = 0
330
+ CANCELLED = 1
331
+ UNKNOWN = 2
332
+ INVALID_ARGUMENT = 3
333
+ DEADLINE_EXCEEDED = 4
334
+ NOT_FOUND = 5
335
+ ALREADY_EXISTS = 6
336
+ PERMISSION_DENIED = 7
337
+ RESOURCE_EXHAUSTED = 8
338
+ FAILED_PRECONDITION = 9
339
+ ABORTED = 10
340
+ OUT_OF_RANGE = 11
341
+ UNIMPLEMENTED = 12
342
+ INTERNAL = 13
343
+ UNAVAILABLE = 14
344
+ DATA_LOSS = 15
345
+ UNAUTHENTICATED = 16
346
+
347
+
348
+ # struct XLA_FFI_Error_Create_Args {
349
+ # size_t struct_size;
350
+ # XLA_FFI_Extension_Base* extension_start;
351
+ # const char* message;
352
+ # XLA_FFI_Error_Code errc;
353
+ # };
354
+ class XLA_FFI_Error_Create_Args(ctypes.Structure):
355
+ _fields_ = (
356
+ ("struct_size", ctypes.c_size_t),
357
+ ("extension_start", ctypes.POINTER(XLA_FFI_Extension_Base)),
358
+ ("message", ctypes.c_char_p),
359
+ ("errc", ctypes.c_int),
360
+ ) # XLA_FFI_Error_Code
361
+
362
+
363
+ XLA_FFI_Error_Create = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.POINTER(XLA_FFI_Error_Create_Args))
364
+
365
+
366
+ # struct XLA_FFI_Stream_Get_Args {
367
+ # size_t struct_size;
368
+ # XLA_FFI_Extension_Base* extension_start;
369
+ # XLA_FFI_ExecutionContext* ctx;
370
+ # void* stream; // out
371
+ # };
372
+ class XLA_FFI_Stream_Get_Args(ctypes.Structure):
373
+ _fields_ = (
374
+ ("struct_size", ctypes.c_size_t),
375
+ ("extension_start", ctypes.POINTER(XLA_FFI_Extension_Base)),
376
+ ("ctx", ctypes.c_void_p), # XLA_FFI_ExecutionContext*
377
+ ("stream", ctypes.c_void_p),
378
+ ) # // out
379
+
380
+
381
+ XLA_FFI_Stream_Get = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.POINTER(XLA_FFI_Stream_Get_Args))
382
+
383
+
384
+ # struct XLA_FFI_DeviceOrdinal_Get {
385
+ # size_t struct_size;
386
+ # XLA_FFI_Extension_Base* extension_start;
387
+ # XLA_FFI_ExecutionContext* ctx;
388
+ # int32_t device_ordinal; // out
389
+ # };
390
+ class XLA_FFI_DeviceOrdinal_Get_Args(ctypes.Structure):
391
+ _fields_ = (
392
+ ("struct_size", ctypes.c_size_t),
393
+ ("extension_start", ctypes.POINTER(XLA_FFI_Extension_Base)),
394
+ ("ctx", ctypes.c_void_p), # XLA_FFI_ExecutionContext*
395
+ ("device_ordinal", ctypes.c_int32),
396
+ ) # // out
397
+
398
+
399
+ XLA_FFI_DeviceOrdinal_Get = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.POINTER(XLA_FFI_DeviceOrdinal_Get_Args))
400
+
401
+
402
+ # struct XLA_FFI_Api {
403
+ # size_t struct_size;
404
+ # XLA_FFI_Extension_Base* extension_start;
405
+ #
406
+ # XLA_FFI_Api_Version api_version;
407
+ # XLA_FFI_InternalApi* internal_api;
408
+ #
409
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_Error_Create);
410
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_Error_GetMessage);
411
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_Error_Destroy);
412
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_Handler_Register);
413
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_Stream_Get);
414
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_TypeId_Register);
415
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_ExecutionContext_Get);
416
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_State_Set);
417
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_State_Get);
418
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_DeviceMemory_Allocate);
419
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_DeviceMemory_Free);
420
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_ThreadPool_Schedule);
421
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_ThreadPool_NumThreads);
422
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_Future_Create);
423
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_Future_SetAvailable);
424
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_Future_SetError);
425
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_RunId_Get);
426
+ # _XLA_FFI_API_STRUCT_FIELD(XLA_FFI_DeviceOrdinal_Get);
427
+ # };
428
+ class XLA_FFI_Api(ctypes.Structure):
429
+ _fields_ = (
430
+ ("struct_size", ctypes.c_size_t),
431
+ ("extension_start", ctypes.POINTER(XLA_FFI_Extension_Base)),
432
+ ("api_version", XLA_FFI_Api_Version),
433
+ ("internal_api", ctypes.c_void_p), # XLA_FFI_InternalApi*
434
+ ("XLA_FFI_Error_Create", XLA_FFI_Error_Create), # XLA_FFI_Error_Create
435
+ ("XLA_FFI_Error_GetMessage", ctypes.c_void_p), # XLA_FFI_Error_GetMessage
436
+ ("XLA_FFI_Error_Destroy", ctypes.c_void_p), # XLA_FFI_Error_Destroy
437
+ ("XLA_FFI_Handler_Register", ctypes.c_void_p), # XLA_FFI_Handler_Register
438
+ ("XLA_FFI_Stream_Get", XLA_FFI_Stream_Get), # XLA_FFI_Stream_Get
439
+ ("XLA_FFI_TypeId_Register", ctypes.c_void_p), # XLA_FFI_TypeId_Register
440
+ ("XLA_FFI_ExecutionContext_Get", ctypes.c_void_p), # XLA_FFI_ExecutionContext_Get
441
+ ("XLA_FFI_State_Set", ctypes.c_void_p), # XLA_FFI_State_Set
442
+ ("XLA_FFI_State_Get", ctypes.c_void_p), # XLA_FFI_State_Get
443
+ ("XLA_FFI_DeviceMemory_Allocate", ctypes.c_void_p), # XLA_FFI_DeviceMemory_Allocate
444
+ ("XLA_FFI_DeviceMemory_Free", ctypes.c_void_p), # XLA_FFI_DeviceMemory_Free
445
+ ("XLA_FFI_ThreadPool_Schedule", ctypes.c_void_p), # XLA_FFI_ThreadPool_Schedule
446
+ ("XLA_FFI_ThreadPool_NumThreads", ctypes.c_void_p), # XLA_FFI_ThreadPool_NumThreads
447
+ ("XLA_FFI_Future_Create", ctypes.c_void_p), # XLA_FFI_Future_Create
448
+ ("XLA_FFI_Future_SetAvailable", ctypes.c_void_p), # XLA_FFI_Future_SetAvailable
449
+ ("XLA_FFI_Future_SetError", ctypes.c_void_p), # XLA_FFI_Future_SetError
450
+ # TODO(chaserileyroberts): Make this return the correct value and not a c_void_p.
451
+ ("XLA_FFI_RunId_Get", ctypes.c_void_p), # XLA_FFI_RunId_Get
452
+ ("XLA_FFI_DeviceOrdinal_Get", XLA_FFI_DeviceOrdinal_Get), # XLA_FFI_DeviceOrdinal_Get
453
+ )
454
+
455
+
456
+ # struct XLA_FFI_CallFrame {
457
+ # size_t struct_size;
458
+ # XLA_FFI_Extension_Base* extension_start;
459
+ # const XLA_FFI_Api* api;
460
+ # XLA_FFI_ExecutionContext* ctx;
461
+ # XLA_FFI_ExecutionStage stage;
462
+ # XLA_FFI_Args args;
463
+ # XLA_FFI_Rets rets;
464
+ # XLA_FFI_Attrs attrs;
465
+ #
466
+ # // XLA FFI handler implementation can use `future` to signal a result of
467
+ # // asynchronous computation to the XLA runtime. XLA runtime will keep all
468
+ # // arguments, results and attributes alive until `future` is completed.
469
+ # XLA_FFI_Future* future; // out
470
+ # };
471
+ class XLA_FFI_CallFrame(ctypes.Structure):
472
+ _fields_ = (
473
+ ("struct_size", ctypes.c_size_t),
474
+ ("extension_start", ctypes.POINTER(XLA_FFI_Extension_Base)),
475
+ ("api", ctypes.POINTER(XLA_FFI_Api)),
476
+ ("ctx", ctypes.c_void_p), # XLA_FFI_ExecutionContext*
477
+ ("stage", ctypes.c_int), # XLA_FFI_ExecutionStage
478
+ ("args", XLA_FFI_Args),
479
+ ("rets", XLA_FFI_Rets),
480
+ ("attrs", XLA_FFI_Attrs),
481
+ ("future", ctypes.c_void_p), # XLA_FFI_Future* // out
482
+ )
483
+
484
+
485
+ _xla_data_type_to_constructor = {
486
+ # XLA_FFI_DataType.INVALID
487
+ XLA_FFI_DataType.PRED: jnp.bool,
488
+ XLA_FFI_DataType.S8: jnp.int8,
489
+ XLA_FFI_DataType.S16: jnp.int16,
490
+ XLA_FFI_DataType.S32: jnp.int32,
491
+ XLA_FFI_DataType.S64: jnp.int64,
492
+ XLA_FFI_DataType.U8: jnp.uint8,
493
+ XLA_FFI_DataType.U16: jnp.uint16,
494
+ XLA_FFI_DataType.U32: jnp.uint32,
495
+ XLA_FFI_DataType.U64: jnp.uint64,
496
+ XLA_FFI_DataType.F16: jnp.float16,
497
+ XLA_FFI_DataType.F32: jnp.float32,
498
+ XLA_FFI_DataType.F64: jnp.float64,
499
+ XLA_FFI_DataType.BF16: jnp.bfloat16,
500
+ XLA_FFI_DataType.C64: jnp.complex64,
501
+ XLA_FFI_DataType.C128: jnp.complex128,
502
+ # XLA_FFI_DataType.TOKEN
503
+ # XLA_FFI_DataType.F4E2M1FN: jnp.float4_e2m1fn.dtype,
504
+ # XLA_FFI_DataType.F8E8M0FNU: jnp.float8_e8m0fnu.dtype,
505
+ }
506
+
507
+ # newer types not supported by older versions
508
+ if hasattr(jnp, "float8_e5m2"):
509
+ _xla_data_type_to_constructor[XLA_FFI_DataType.F8E5M2] = jnp.float8_e5m2
510
+ if hasattr(jnp, "float8_e3m4"):
511
+ _xla_data_type_to_constructor[XLA_FFI_DataType.F8E3M4] = jnp.float8_e3m4
512
+ if hasattr(jnp, "float8_e4m3"):
513
+ _xla_data_type_to_constructor[XLA_FFI_DataType.F8E4M3] = jnp.float8_e4m3
514
+ if hasattr(jnp, "float8_e4m3fn"):
515
+ _xla_data_type_to_constructor[XLA_FFI_DataType.F8E4M3FN] = jnp.float8_e4m3fn
516
+ if hasattr(jnp, "float8_e4m3b11fnuz"):
517
+ _xla_data_type_to_constructor[XLA_FFI_DataType.F8E4M3B11FNUZ] = jnp.float8_e4m3b11fnuz
518
+ if hasattr(jnp, "float8_e5m2fnuz"):
519
+ _xla_data_type_to_constructor[XLA_FFI_DataType.F8E5M2FNUZ] = jnp.float8_e5m2fnuz
520
+ if hasattr(jnp, "float8_e4m3fnuz"):
521
+ _xla_data_type_to_constructor[XLA_FFI_DataType.F8E4M3FNUZ] = jnp.float8_e4m3fnuz
522
+
523
+
524
+ ########################################################################
525
+ # Helpers for translating between ctypes and python types
526
+ #######################################################################
527
+
528
+
529
+ def decode_bytespan(span: XLA_FFI_ByteSpan):
530
+ len = span.len
531
+ chars = ctypes.cast(span.ptr, ctypes.POINTER(ctypes.c_char * len))
532
+ return chars.contents.value.decode("utf-8")
533
+
534
+
535
+ def decode_scalar(scalar: XLA_FFI_Scalar):
536
+ # TODO validate if dtype supported
537
+ dtype = jnp.dtype(_xla_data_type_to_constructor[scalar.dtype])
538
+ bytes = ctypes.string_at(scalar.value, dtype.itemsize)
539
+ return np.frombuffer(bytes, dtype=dtype).reshape(())
540
+
541
+
542
+ def decode_array(array: XLA_FFI_Array):
543
+ # TODO validate if dtype supported
544
+ dtype = jnp.dtype(_xla_data_type_to_constructor[array.dtype])
545
+ bytes = ctypes.string_at(array.data, dtype.itemsize * array.size)
546
+ return np.frombuffer(bytes, dtype=dtype)
547
+
548
+
549
+ def decode_attrs(attrs: XLA_FFI_Attrs):
550
+ result = {}
551
+ for i in range(attrs.size):
552
+ attr_name = decode_bytespan(attrs.names[i].contents)
553
+ attr_type = attrs.types[i]
554
+ if attr_type == XLA_FFI_AttrType.STRING:
555
+ bytespan = ctypes.cast(attrs.attrs[i], ctypes.POINTER(XLA_FFI_ByteSpan))
556
+ attr_value = decode_bytespan(bytespan.contents)
557
+ elif attr_type == XLA_FFI_AttrType.SCALAR:
558
+ attr_value = ctypes.cast(attrs.attrs[i], ctypes.POINTER(XLA_FFI_Scalar))
559
+ attr_value = decode_scalar(attr_value.contents)
560
+ elif attr_type == XLA_FFI_AttrType.ARRAY:
561
+ attr_value = ctypes.cast(attrs.attrs[i], ctypes.POINTER(XLA_FFI_Array))
562
+ attr_value = decode_array(attr_value.contents)
563
+ elif attr_type == XLA_FFI_AttrType.DICTIONARY:
564
+ attr_value = ctypes.cast(attrs.attrs[i], ctypes.POINTER(XLA_FFI_Attrs))
565
+ attr_value = decode_attrs(attr_value.contents)
566
+ else:
567
+ raise Exception("Unexpected attr type")
568
+ result[attr_name] = attr_value
569
+ return result
570
+
571
+
572
+ # error-string to XLA_FFI_Error
573
+ def create_ffi_error(api, errc, message):
574
+ create_args = XLA_FFI_Error_Create_Args(
575
+ ctypes.sizeof(XLA_FFI_Error_Create_Args),
576
+ ctypes.POINTER(XLA_FFI_Extension_Base)(),
577
+ ctypes.c_char_p(message.encode("utf-8")),
578
+ errc,
579
+ )
580
+ return api.contents.XLA_FFI_Error_Create(create_args)
581
+
582
+
583
+ def create_invalid_argument_ffi_error(api, message):
584
+ return create_ffi_error(api, XLA_FFI_Error_Code.INVALID_ARGUMENT, message)
585
+
586
+
587
+ # Extract CUDA stream from XLA_FFI_CallFrame.
588
+ def get_stream_from_callframe(call_frame):
589
+ api = call_frame.api
590
+ get_stream_args = XLA_FFI_Stream_Get_Args(
591
+ ctypes.sizeof(XLA_FFI_Stream_Get_Args), ctypes.POINTER(XLA_FFI_Extension_Base)(), call_frame.ctx, None
592
+ )
593
+ api.contents.XLA_FFI_Stream_Get(get_stream_args)
594
+ # TODO check result
595
+ return get_stream_args.stream
596
+
597
+
598
+ def get_device_ordinal_from_callframe(call_frame):
599
+ api = call_frame.api
600
+ get_device_args = XLA_FFI_DeviceOrdinal_Get_Args(
601
+ ctypes.sizeof(XLA_FFI_DeviceOrdinal_Get_Args), ctypes.POINTER(XLA_FFI_Extension_Base)(), call_frame.ctx, 0
602
+ )
603
+ api.contents.XLA_FFI_DeviceOrdinal_Get(get_device_args)
604
+ return get_device_args.device_ordinal
605
+
606
+
607
+ _dtype_from_ffi = {
608
+ XLA_FFI_DataType.S8: wp.int8,
609
+ XLA_FFI_DataType.S16: wp.int16,
610
+ XLA_FFI_DataType.S32: wp.int32,
611
+ XLA_FFI_DataType.S64: wp.int64,
612
+ XLA_FFI_DataType.U8: wp.uint8,
613
+ XLA_FFI_DataType.U16: wp.uint16,
614
+ XLA_FFI_DataType.U32: wp.uint32,
615
+ XLA_FFI_DataType.U64: wp.uint64,
616
+ XLA_FFI_DataType.F16: wp.float16,
617
+ XLA_FFI_DataType.F32: wp.float32,
618
+ XLA_FFI_DataType.F64: wp.float64,
619
+ }
620
+
621
+
622
+ def dtype_from_ffi(ffi_dtype):
623
+ return _dtype_from_ffi.get(ffi_dtype)
624
+
625
+
626
+ def jax_dtype_from_ffi(ffi_dtype):
627
+ return _xla_data_type_to_constructor.get(ffi_dtype)
628
+
629
+
630
+ # Execution context (stream, stage)
631
+ class ExecutionContext:
632
+ stage: XLA_FFI_ExecutionStage
633
+ stream: int
634
+
635
+ def __init__(self, callframe: XLA_FFI_CallFrame):
636
+ self.stage = XLA_FFI_ExecutionStage(callframe.stage)
637
+ self.stream = get_stream_from_callframe(callframe)
638
+
639
+
640
+ class FfiBuffer:
641
+ dtype: str
642
+ data: int
643
+ shape: tuple[int]
644
+
645
+ def __init__(self, xla_buffer):
646
+ # TODO check if valid
647
+ self.dtype = jnp.dtype(_xla_data_type_to_constructor[xla_buffer.dtype])
648
+ self.shape = tuple(xla_buffer.dims[i] for i in range(xla_buffer.rank))
649
+ self.data = xla_buffer.data
650
+
651
+ @property
652
+ def __cuda_array_interface__(self):
653
+ return {
654
+ "shape": self.shape,
655
+ "typestr": self.dtype.char,
656
+ "data": (self.data, False),
657
+ "version": 2,
658
+ }