warp-lang 1.10.0__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (468) hide show
  1. warp/__init__.py +334 -0
  2. warp/__init__.pyi +5856 -0
  3. warp/_src/__init__.py +14 -0
  4. warp/_src/autograd.py +1077 -0
  5. warp/_src/build.py +620 -0
  6. warp/_src/build_dll.py +642 -0
  7. warp/_src/builtins.py +10555 -0
  8. warp/_src/codegen.py +4361 -0
  9. warp/_src/config.py +178 -0
  10. warp/_src/constants.py +59 -0
  11. warp/_src/context.py +8352 -0
  12. warp/_src/dlpack.py +464 -0
  13. warp/_src/fabric.py +362 -0
  14. warp/_src/fem/__init__.py +14 -0
  15. warp/_src/fem/adaptivity.py +510 -0
  16. warp/_src/fem/cache.py +689 -0
  17. warp/_src/fem/dirichlet.py +190 -0
  18. warp/_src/fem/domain.py +553 -0
  19. warp/_src/fem/field/__init__.py +131 -0
  20. warp/_src/fem/field/field.py +703 -0
  21. warp/_src/fem/field/nodal_field.py +403 -0
  22. warp/_src/fem/field/restriction.py +39 -0
  23. warp/_src/fem/field/virtual.py +1021 -0
  24. warp/_src/fem/geometry/__init__.py +32 -0
  25. warp/_src/fem/geometry/adaptive_nanogrid.py +782 -0
  26. warp/_src/fem/geometry/closest_point.py +99 -0
  27. warp/_src/fem/geometry/deformed_geometry.py +277 -0
  28. warp/_src/fem/geometry/element.py +854 -0
  29. warp/_src/fem/geometry/geometry.py +693 -0
  30. warp/_src/fem/geometry/grid_2d.py +478 -0
  31. warp/_src/fem/geometry/grid_3d.py +539 -0
  32. warp/_src/fem/geometry/hexmesh.py +956 -0
  33. warp/_src/fem/geometry/nanogrid.py +660 -0
  34. warp/_src/fem/geometry/partition.py +483 -0
  35. warp/_src/fem/geometry/quadmesh.py +597 -0
  36. warp/_src/fem/geometry/tetmesh.py +762 -0
  37. warp/_src/fem/geometry/trimesh.py +588 -0
  38. warp/_src/fem/integrate.py +2507 -0
  39. warp/_src/fem/linalg.py +385 -0
  40. warp/_src/fem/operator.py +398 -0
  41. warp/_src/fem/polynomial.py +231 -0
  42. warp/_src/fem/quadrature/__init__.py +17 -0
  43. warp/_src/fem/quadrature/pic_quadrature.py +318 -0
  44. warp/_src/fem/quadrature/quadrature.py +665 -0
  45. warp/_src/fem/space/__init__.py +248 -0
  46. warp/_src/fem/space/basis_function_space.py +499 -0
  47. warp/_src/fem/space/basis_space.py +681 -0
  48. warp/_src/fem/space/dof_mapper.py +253 -0
  49. warp/_src/fem/space/function_space.py +312 -0
  50. warp/_src/fem/space/grid_2d_function_space.py +179 -0
  51. warp/_src/fem/space/grid_3d_function_space.py +229 -0
  52. warp/_src/fem/space/hexmesh_function_space.py +255 -0
  53. warp/_src/fem/space/nanogrid_function_space.py +199 -0
  54. warp/_src/fem/space/partition.py +435 -0
  55. warp/_src/fem/space/quadmesh_function_space.py +222 -0
  56. warp/_src/fem/space/restriction.py +221 -0
  57. warp/_src/fem/space/shape/__init__.py +152 -0
  58. warp/_src/fem/space/shape/cube_shape_function.py +1107 -0
  59. warp/_src/fem/space/shape/shape_function.py +134 -0
  60. warp/_src/fem/space/shape/square_shape_function.py +928 -0
  61. warp/_src/fem/space/shape/tet_shape_function.py +829 -0
  62. warp/_src/fem/space/shape/triangle_shape_function.py +674 -0
  63. warp/_src/fem/space/tetmesh_function_space.py +270 -0
  64. warp/_src/fem/space/topology.py +461 -0
  65. warp/_src/fem/space/trimesh_function_space.py +193 -0
  66. warp/_src/fem/types.py +114 -0
  67. warp/_src/fem/utils.py +488 -0
  68. warp/_src/jax.py +188 -0
  69. warp/_src/jax_experimental/__init__.py +14 -0
  70. warp/_src/jax_experimental/custom_call.py +389 -0
  71. warp/_src/jax_experimental/ffi.py +1286 -0
  72. warp/_src/jax_experimental/xla_ffi.py +658 -0
  73. warp/_src/marching_cubes.py +710 -0
  74. warp/_src/math.py +416 -0
  75. warp/_src/optim/__init__.py +14 -0
  76. warp/_src/optim/adam.py +165 -0
  77. warp/_src/optim/linear.py +1608 -0
  78. warp/_src/optim/sgd.py +114 -0
  79. warp/_src/paddle.py +408 -0
  80. warp/_src/render/__init__.py +14 -0
  81. warp/_src/render/imgui_manager.py +291 -0
  82. warp/_src/render/render_opengl.py +3638 -0
  83. warp/_src/render/render_usd.py +939 -0
  84. warp/_src/render/utils.py +162 -0
  85. warp/_src/sparse.py +2718 -0
  86. warp/_src/tape.py +1208 -0
  87. warp/_src/thirdparty/__init__.py +0 -0
  88. warp/_src/thirdparty/appdirs.py +598 -0
  89. warp/_src/thirdparty/dlpack.py +145 -0
  90. warp/_src/thirdparty/unittest_parallel.py +676 -0
  91. warp/_src/torch.py +393 -0
  92. warp/_src/types.py +5888 -0
  93. warp/_src/utils.py +1695 -0
  94. warp/autograd.py +33 -0
  95. warp/bin/libwarp-clang.dylib +0 -0
  96. warp/bin/libwarp.dylib +0 -0
  97. warp/build.py +29 -0
  98. warp/build_dll.py +24 -0
  99. warp/codegen.py +24 -0
  100. warp/constants.py +24 -0
  101. warp/context.py +33 -0
  102. warp/dlpack.py +24 -0
  103. warp/examples/__init__.py +24 -0
  104. warp/examples/assets/bear.usd +0 -0
  105. warp/examples/assets/bunny.usd +0 -0
  106. warp/examples/assets/cube.usd +0 -0
  107. warp/examples/assets/nonuniform.usd +0 -0
  108. warp/examples/assets/nvidia_logo.png +0 -0
  109. warp/examples/assets/pixel.jpg +0 -0
  110. warp/examples/assets/rocks.nvdb +0 -0
  111. warp/examples/assets/rocks.usd +0 -0
  112. warp/examples/assets/sphere.usd +0 -0
  113. warp/examples/assets/square_cloth.usd +0 -0
  114. warp/examples/benchmarks/benchmark_api.py +389 -0
  115. warp/examples/benchmarks/benchmark_cloth.py +296 -0
  116. warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
  117. warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
  118. warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
  119. warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
  120. warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
  121. warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
  122. warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
  123. warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
  124. warp/examples/benchmarks/benchmark_gemm.py +164 -0
  125. warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
  126. warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
  127. warp/examples/benchmarks/benchmark_launches.py +301 -0
  128. warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
  129. warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
  130. warp/examples/browse.py +37 -0
  131. warp/examples/core/example_cupy.py +86 -0
  132. warp/examples/core/example_dem.py +241 -0
  133. warp/examples/core/example_fluid.py +299 -0
  134. warp/examples/core/example_graph_capture.py +150 -0
  135. warp/examples/core/example_marching_cubes.py +195 -0
  136. warp/examples/core/example_mesh.py +180 -0
  137. warp/examples/core/example_mesh_intersect.py +211 -0
  138. warp/examples/core/example_nvdb.py +182 -0
  139. warp/examples/core/example_raycast.py +111 -0
  140. warp/examples/core/example_raymarch.py +205 -0
  141. warp/examples/core/example_render_opengl.py +290 -0
  142. warp/examples/core/example_sample_mesh.py +300 -0
  143. warp/examples/core/example_sph.py +411 -0
  144. warp/examples/core/example_spin_lock.py +93 -0
  145. warp/examples/core/example_torch.py +211 -0
  146. warp/examples/core/example_wave.py +269 -0
  147. warp/examples/core/example_work_queue.py +118 -0
  148. warp/examples/distributed/example_jacobi_mpi.py +506 -0
  149. warp/examples/fem/example_adaptive_grid.py +286 -0
  150. warp/examples/fem/example_apic_fluid.py +469 -0
  151. warp/examples/fem/example_burgers.py +261 -0
  152. warp/examples/fem/example_convection_diffusion.py +181 -0
  153. warp/examples/fem/example_convection_diffusion_dg.py +225 -0
  154. warp/examples/fem/example_darcy_ls_optimization.py +489 -0
  155. warp/examples/fem/example_deformed_geometry.py +172 -0
  156. warp/examples/fem/example_diffusion.py +196 -0
  157. warp/examples/fem/example_diffusion_3d.py +225 -0
  158. warp/examples/fem/example_diffusion_mgpu.py +225 -0
  159. warp/examples/fem/example_distortion_energy.py +228 -0
  160. warp/examples/fem/example_elastic_shape_optimization.py +387 -0
  161. warp/examples/fem/example_magnetostatics.py +242 -0
  162. warp/examples/fem/example_mixed_elasticity.py +293 -0
  163. warp/examples/fem/example_navier_stokes.py +263 -0
  164. warp/examples/fem/example_nonconforming_contact.py +300 -0
  165. warp/examples/fem/example_stokes.py +213 -0
  166. warp/examples/fem/example_stokes_transfer.py +262 -0
  167. warp/examples/fem/example_streamlines.py +357 -0
  168. warp/examples/fem/utils.py +1047 -0
  169. warp/examples/interop/example_jax_callable.py +146 -0
  170. warp/examples/interop/example_jax_ffi_callback.py +132 -0
  171. warp/examples/interop/example_jax_kernel.py +232 -0
  172. warp/examples/optim/example_diffray.py +561 -0
  173. warp/examples/optim/example_fluid_checkpoint.py +497 -0
  174. warp/examples/tile/example_tile_block_cholesky.py +502 -0
  175. warp/examples/tile/example_tile_cholesky.py +88 -0
  176. warp/examples/tile/example_tile_convolution.py +66 -0
  177. warp/examples/tile/example_tile_fft.py +55 -0
  178. warp/examples/tile/example_tile_filtering.py +113 -0
  179. warp/examples/tile/example_tile_matmul.py +85 -0
  180. warp/examples/tile/example_tile_mcgp.py +191 -0
  181. warp/examples/tile/example_tile_mlp.py +385 -0
  182. warp/examples/tile/example_tile_nbody.py +199 -0
  183. warp/fabric.py +24 -0
  184. warp/fem/__init__.py +173 -0
  185. warp/fem/adaptivity.py +26 -0
  186. warp/fem/cache.py +30 -0
  187. warp/fem/dirichlet.py +24 -0
  188. warp/fem/field/__init__.py +24 -0
  189. warp/fem/field/field.py +26 -0
  190. warp/fem/geometry/__init__.py +21 -0
  191. warp/fem/geometry/closest_point.py +31 -0
  192. warp/fem/linalg.py +38 -0
  193. warp/fem/operator.py +32 -0
  194. warp/fem/polynomial.py +29 -0
  195. warp/fem/space/__init__.py +22 -0
  196. warp/fem/space/basis_space.py +24 -0
  197. warp/fem/space/shape/__init__.py +68 -0
  198. warp/fem/space/topology.py +24 -0
  199. warp/fem/types.py +24 -0
  200. warp/fem/utils.py +32 -0
  201. warp/jax.py +29 -0
  202. warp/jax_experimental/__init__.py +29 -0
  203. warp/jax_experimental/custom_call.py +29 -0
  204. warp/jax_experimental/ffi.py +39 -0
  205. warp/jax_experimental/xla_ffi.py +24 -0
  206. warp/marching_cubes.py +24 -0
  207. warp/math.py +37 -0
  208. warp/native/array.h +1687 -0
  209. warp/native/builtin.h +2327 -0
  210. warp/native/bvh.cpp +562 -0
  211. warp/native/bvh.cu +826 -0
  212. warp/native/bvh.h +555 -0
  213. warp/native/clang/clang.cpp +541 -0
  214. warp/native/coloring.cpp +622 -0
  215. warp/native/crt.cpp +51 -0
  216. warp/native/crt.h +568 -0
  217. warp/native/cuda_crt.h +1058 -0
  218. warp/native/cuda_util.cpp +677 -0
  219. warp/native/cuda_util.h +313 -0
  220. warp/native/error.cpp +77 -0
  221. warp/native/error.h +36 -0
  222. warp/native/exports.h +2023 -0
  223. warp/native/fabric.h +246 -0
  224. warp/native/hashgrid.cpp +311 -0
  225. warp/native/hashgrid.cu +89 -0
  226. warp/native/hashgrid.h +240 -0
  227. warp/native/initializer_array.h +41 -0
  228. warp/native/intersect.h +1253 -0
  229. warp/native/intersect_adj.h +375 -0
  230. warp/native/intersect_tri.h +348 -0
  231. warp/native/mat.h +5189 -0
  232. warp/native/mathdx.cpp +93 -0
  233. warp/native/matnn.h +221 -0
  234. warp/native/mesh.cpp +266 -0
  235. warp/native/mesh.cu +406 -0
  236. warp/native/mesh.h +2097 -0
  237. warp/native/nanovdb/GridHandle.h +533 -0
  238. warp/native/nanovdb/HostBuffer.h +591 -0
  239. warp/native/nanovdb/NanoVDB.h +6246 -0
  240. warp/native/nanovdb/NodeManager.h +323 -0
  241. warp/native/nanovdb/PNanoVDB.h +3390 -0
  242. warp/native/noise.h +859 -0
  243. warp/native/quat.h +1664 -0
  244. warp/native/rand.h +342 -0
  245. warp/native/range.h +145 -0
  246. warp/native/reduce.cpp +174 -0
  247. warp/native/reduce.cu +363 -0
  248. warp/native/runlength_encode.cpp +79 -0
  249. warp/native/runlength_encode.cu +61 -0
  250. warp/native/scan.cpp +47 -0
  251. warp/native/scan.cu +55 -0
  252. warp/native/scan.h +23 -0
  253. warp/native/solid_angle.h +466 -0
  254. warp/native/sort.cpp +251 -0
  255. warp/native/sort.cu +286 -0
  256. warp/native/sort.h +35 -0
  257. warp/native/sparse.cpp +241 -0
  258. warp/native/sparse.cu +435 -0
  259. warp/native/spatial.h +1306 -0
  260. warp/native/svd.h +727 -0
  261. warp/native/temp_buffer.h +46 -0
  262. warp/native/tile.h +4124 -0
  263. warp/native/tile_radix_sort.h +1112 -0
  264. warp/native/tile_reduce.h +838 -0
  265. warp/native/tile_scan.h +240 -0
  266. warp/native/tuple.h +189 -0
  267. warp/native/vec.h +2199 -0
  268. warp/native/version.h +23 -0
  269. warp/native/volume.cpp +501 -0
  270. warp/native/volume.cu +68 -0
  271. warp/native/volume.h +970 -0
  272. warp/native/volume_builder.cu +483 -0
  273. warp/native/volume_builder.h +52 -0
  274. warp/native/volume_impl.h +70 -0
  275. warp/native/warp.cpp +1143 -0
  276. warp/native/warp.cu +4604 -0
  277. warp/native/warp.h +358 -0
  278. warp/optim/__init__.py +20 -0
  279. warp/optim/adam.py +24 -0
  280. warp/optim/linear.py +35 -0
  281. warp/optim/sgd.py +24 -0
  282. warp/paddle.py +24 -0
  283. warp/py.typed +0 -0
  284. warp/render/__init__.py +22 -0
  285. warp/render/imgui_manager.py +29 -0
  286. warp/render/render_opengl.py +24 -0
  287. warp/render/render_usd.py +24 -0
  288. warp/render/utils.py +24 -0
  289. warp/sparse.py +51 -0
  290. warp/tape.py +24 -0
  291. warp/tests/__init__.py +1 -0
  292. warp/tests/__main__.py +4 -0
  293. warp/tests/assets/curlnoise_golden.npy +0 -0
  294. warp/tests/assets/mlp_golden.npy +0 -0
  295. warp/tests/assets/pixel.npy +0 -0
  296. warp/tests/assets/pnoise_golden.npy +0 -0
  297. warp/tests/assets/spiky.usd +0 -0
  298. warp/tests/assets/test_grid.nvdb +0 -0
  299. warp/tests/assets/test_index_grid.nvdb +0 -0
  300. warp/tests/assets/test_int32_grid.nvdb +0 -0
  301. warp/tests/assets/test_vec_grid.nvdb +0 -0
  302. warp/tests/assets/torus.nvdb +0 -0
  303. warp/tests/assets/torus.usda +105 -0
  304. warp/tests/aux_test_class_kernel.py +34 -0
  305. warp/tests/aux_test_compile_consts_dummy.py +18 -0
  306. warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
  307. warp/tests/aux_test_dependent.py +29 -0
  308. warp/tests/aux_test_grad_customs.py +29 -0
  309. warp/tests/aux_test_instancing_gc.py +26 -0
  310. warp/tests/aux_test_module_aot.py +7 -0
  311. warp/tests/aux_test_module_unload.py +23 -0
  312. warp/tests/aux_test_name_clash1.py +40 -0
  313. warp/tests/aux_test_name_clash2.py +40 -0
  314. warp/tests/aux_test_reference.py +9 -0
  315. warp/tests/aux_test_reference_reference.py +8 -0
  316. warp/tests/aux_test_square.py +16 -0
  317. warp/tests/aux_test_unresolved_func.py +22 -0
  318. warp/tests/aux_test_unresolved_symbol.py +22 -0
  319. warp/tests/cuda/__init__.py +0 -0
  320. warp/tests/cuda/test_async.py +676 -0
  321. warp/tests/cuda/test_conditional_captures.py +1147 -0
  322. warp/tests/cuda/test_ipc.py +124 -0
  323. warp/tests/cuda/test_mempool.py +233 -0
  324. warp/tests/cuda/test_multigpu.py +169 -0
  325. warp/tests/cuda/test_peer.py +139 -0
  326. warp/tests/cuda/test_pinned.py +84 -0
  327. warp/tests/cuda/test_streams.py +691 -0
  328. warp/tests/geometry/__init__.py +0 -0
  329. warp/tests/geometry/test_bvh.py +335 -0
  330. warp/tests/geometry/test_hash_grid.py +259 -0
  331. warp/tests/geometry/test_marching_cubes.py +294 -0
  332. warp/tests/geometry/test_mesh.py +318 -0
  333. warp/tests/geometry/test_mesh_query_aabb.py +392 -0
  334. warp/tests/geometry/test_mesh_query_point.py +935 -0
  335. warp/tests/geometry/test_mesh_query_ray.py +323 -0
  336. warp/tests/geometry/test_volume.py +1103 -0
  337. warp/tests/geometry/test_volume_write.py +346 -0
  338. warp/tests/interop/__init__.py +0 -0
  339. warp/tests/interop/test_dlpack.py +730 -0
  340. warp/tests/interop/test_jax.py +1673 -0
  341. warp/tests/interop/test_paddle.py +800 -0
  342. warp/tests/interop/test_torch.py +1001 -0
  343. warp/tests/run_coverage_serial.py +39 -0
  344. warp/tests/test_adam.py +162 -0
  345. warp/tests/test_arithmetic.py +1096 -0
  346. warp/tests/test_array.py +3756 -0
  347. warp/tests/test_array_reduce.py +156 -0
  348. warp/tests/test_assert.py +303 -0
  349. warp/tests/test_atomic.py +336 -0
  350. warp/tests/test_atomic_bitwise.py +209 -0
  351. warp/tests/test_atomic_cas.py +312 -0
  352. warp/tests/test_bool.py +220 -0
  353. warp/tests/test_builtins_resolution.py +732 -0
  354. warp/tests/test_closest_point_edge_edge.py +327 -0
  355. warp/tests/test_codegen.py +974 -0
  356. warp/tests/test_codegen_instancing.py +1495 -0
  357. warp/tests/test_compile_consts.py +215 -0
  358. warp/tests/test_conditional.py +298 -0
  359. warp/tests/test_context.py +35 -0
  360. warp/tests/test_copy.py +319 -0
  361. warp/tests/test_ctypes.py +618 -0
  362. warp/tests/test_dense.py +73 -0
  363. warp/tests/test_devices.py +127 -0
  364. warp/tests/test_enum.py +136 -0
  365. warp/tests/test_examples.py +424 -0
  366. warp/tests/test_fabricarray.py +998 -0
  367. warp/tests/test_fast_math.py +72 -0
  368. warp/tests/test_fem.py +2204 -0
  369. warp/tests/test_fixedarray.py +229 -0
  370. warp/tests/test_fp16.py +136 -0
  371. warp/tests/test_func.py +501 -0
  372. warp/tests/test_future_annotations.py +100 -0
  373. warp/tests/test_generics.py +656 -0
  374. warp/tests/test_grad.py +893 -0
  375. warp/tests/test_grad_customs.py +339 -0
  376. warp/tests/test_grad_debug.py +341 -0
  377. warp/tests/test_implicit_init.py +411 -0
  378. warp/tests/test_import.py +45 -0
  379. warp/tests/test_indexedarray.py +1140 -0
  380. warp/tests/test_intersect.py +103 -0
  381. warp/tests/test_iter.py +76 -0
  382. warp/tests/test_large.py +177 -0
  383. warp/tests/test_launch.py +411 -0
  384. warp/tests/test_lerp.py +151 -0
  385. warp/tests/test_linear_solvers.py +223 -0
  386. warp/tests/test_lvalue.py +427 -0
  387. warp/tests/test_map.py +526 -0
  388. warp/tests/test_mat.py +3515 -0
  389. warp/tests/test_mat_assign_copy.py +178 -0
  390. warp/tests/test_mat_constructors.py +573 -0
  391. warp/tests/test_mat_lite.py +122 -0
  392. warp/tests/test_mat_scalar_ops.py +2913 -0
  393. warp/tests/test_math.py +212 -0
  394. warp/tests/test_module_aot.py +287 -0
  395. warp/tests/test_module_hashing.py +258 -0
  396. warp/tests/test_modules_lite.py +70 -0
  397. warp/tests/test_noise.py +252 -0
  398. warp/tests/test_operators.py +299 -0
  399. warp/tests/test_options.py +129 -0
  400. warp/tests/test_overwrite.py +551 -0
  401. warp/tests/test_print.py +408 -0
  402. warp/tests/test_quat.py +2653 -0
  403. warp/tests/test_quat_assign_copy.py +145 -0
  404. warp/tests/test_rand.py +339 -0
  405. warp/tests/test_reload.py +303 -0
  406. warp/tests/test_rounding.py +157 -0
  407. warp/tests/test_runlength_encode.py +196 -0
  408. warp/tests/test_scalar_ops.py +133 -0
  409. warp/tests/test_smoothstep.py +108 -0
  410. warp/tests/test_snippet.py +318 -0
  411. warp/tests/test_sparse.py +845 -0
  412. warp/tests/test_spatial.py +2859 -0
  413. warp/tests/test_spatial_assign_copy.py +160 -0
  414. warp/tests/test_special_values.py +361 -0
  415. warp/tests/test_static.py +640 -0
  416. warp/tests/test_struct.py +901 -0
  417. warp/tests/test_tape.py +242 -0
  418. warp/tests/test_transient_module.py +93 -0
  419. warp/tests/test_triangle_closest_point.py +192 -0
  420. warp/tests/test_tuple.py +361 -0
  421. warp/tests/test_types.py +615 -0
  422. warp/tests/test_utils.py +594 -0
  423. warp/tests/test_vec.py +1408 -0
  424. warp/tests/test_vec_assign_copy.py +143 -0
  425. warp/tests/test_vec_constructors.py +325 -0
  426. warp/tests/test_vec_lite.py +80 -0
  427. warp/tests/test_vec_scalar_ops.py +2327 -0
  428. warp/tests/test_verify_fp.py +100 -0
  429. warp/tests/test_version.py +75 -0
  430. warp/tests/tile/__init__.py +0 -0
  431. warp/tests/tile/test_tile.py +1519 -0
  432. warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
  433. warp/tests/tile/test_tile_cholesky.py +608 -0
  434. warp/tests/tile/test_tile_load.py +724 -0
  435. warp/tests/tile/test_tile_mathdx.py +156 -0
  436. warp/tests/tile/test_tile_matmul.py +179 -0
  437. warp/tests/tile/test_tile_mlp.py +400 -0
  438. warp/tests/tile/test_tile_reduce.py +950 -0
  439. warp/tests/tile/test_tile_shared_memory.py +376 -0
  440. warp/tests/tile/test_tile_sort.py +121 -0
  441. warp/tests/tile/test_tile_view.py +173 -0
  442. warp/tests/unittest_serial.py +47 -0
  443. warp/tests/unittest_suites.py +430 -0
  444. warp/tests/unittest_utils.py +469 -0
  445. warp/tests/walkthrough_debug.py +95 -0
  446. warp/torch.py +24 -0
  447. warp/types.py +51 -0
  448. warp/utils.py +31 -0
  449. warp_lang-1.10.0.dist-info/METADATA +459 -0
  450. warp_lang-1.10.0.dist-info/RECORD +468 -0
  451. warp_lang-1.10.0.dist-info/WHEEL +5 -0
  452. warp_lang-1.10.0.dist-info/licenses/LICENSE.md +176 -0
  453. warp_lang-1.10.0.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
  454. warp_lang-1.10.0.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
  455. warp_lang-1.10.0.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
  456. warp_lang-1.10.0.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
  457. warp_lang-1.10.0.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
  458. warp_lang-1.10.0.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
  459. warp_lang-1.10.0.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
  460. warp_lang-1.10.0.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
  461. warp_lang-1.10.0.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
  462. warp_lang-1.10.0.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
  463. warp_lang-1.10.0.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
  464. warp_lang-1.10.0.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
  465. warp_lang-1.10.0.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
  466. warp_lang-1.10.0.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
  467. warp_lang-1.10.0.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
  468. warp_lang-1.10.0.dist-info/top_level.txt +1 -0
warp/native/crt.h ADDED
@@ -0,0 +1,568 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ */
17
+
18
+ #pragma once
19
+
20
+ // This file declares a subset of the C runtime (CRT) functions and macros for
21
+ // use by compute kernel modules. There are three environments in which this
22
+ // file gets included:
23
+ // - CUDA kernel modules (WP_NO_CRT and __CUDACC__). CUDA already has implicitly
24
+ // declared builtins for most functions. printf() and macro definitions are
25
+ // the notable exceptions.
26
+ // - C++ kernel modules (WP_NO_CRT and !__CUDACC__). These can't use the CRT
27
+ // directly when using a standalone compiler. The functions get obtained from
28
+ // the compiler library instead (clang.dll).
29
+ // - Warp runtime (!WP_NO_CRT). When building warp.dll it's fine to include the
30
+ // standard C library headers, and it avoids mismatched redefinitions.
31
+
32
+ #if !defined(__CUDA_ARCH__)
33
+ #if defined(_WIN32)
34
+ #define WP_API __declspec(dllexport)
35
+ #else
36
+ #define WP_API __attribute__ ((visibility ("default")))
37
+ #endif
38
+ #else
39
+ #define WP_API
40
+ #endif
41
+
42
+ #if !defined(__CUDA_ARCH__)
43
+
44
+ // Helper for implementing assert() macro
45
+ extern "C" WP_API void _wp_assert(const char* message, const char* file, unsigned int line);
46
+
47
+ // Helper for implementing isfinite()
48
+ extern "C" WP_API int _wp_isfinite(double);
49
+
50
+ // Helper for implementing isnan()
51
+ extern "C" WP_API int _wp_isnan(double);
52
+
53
+ // Helper for implementing isinf()
54
+ extern "C" WP_API int _wp_isinf(double);
55
+
56
+ #endif // !__CUDA_ARCH__
57
+
58
+ #if !defined(WP_NO_CRT)
59
+
60
+ #include <stdint.h>
61
+ #include <stdlib.h>
62
+ #include <stdio.h>
63
+ #include <math.h>
64
+ #include <assert.h>
65
+ #include <float.h>
66
+ #include <string.h>
67
+
68
+ #else
69
+
70
+ // These definitions are taken from Jitify: https://github.com/NVIDIA/jitify
71
+
72
+ /// float.h
73
+ #define FLT_RADIX 2
74
+ #define FLT_MANT_DIG 24
75
+ #define DBL_MANT_DIG 53
76
+ #define FLT_DIG 6
77
+ #define DBL_DIG 15
78
+ #define FLT_MIN_EXP -125
79
+ #define DBL_MIN_EXP -1021
80
+ #define FLT_MIN_10_EXP -37
81
+ #define DBL_MIN_10_EXP -307
82
+ #define FLT_MAX_EXP 128
83
+ #define DBL_MAX_EXP 1024
84
+ #define FLT_MAX_10_EXP 38
85
+ #define DBL_MAX_10_EXP 308
86
+ #define FLT_MAX 3.4028234e38f
87
+ #define DBL_MAX 1.7976931348623157e308
88
+ #define FLT_EPSILON 1.19209289e-7f
89
+ #define DBL_EPSILON 2.220440492503130e-16
90
+ #define FLT_MIN 1.1754943e-38f
91
+ #define DBL_MIN 2.2250738585072013e-308
92
+ #define FLT_ROUNDS 1
93
+ #if defined __cplusplus && __cplusplus >= 201103L
94
+ #define FLT_EVAL_METHOD 0
95
+ #define DECIMAL_DIG 21
96
+ #endif
97
+
98
+ /// limits.h
99
+ #if defined _WIN32 || defined _WIN64
100
+ #define __WORDSIZE 32
101
+ #else
102
+ #if defined __x86_64__ && !defined __ILP32__
103
+ #define __WORDSIZE 64
104
+ #else
105
+ #define __WORDSIZE 32
106
+ #endif
107
+ #endif
108
+ #define MB_LEN_MAX 16
109
+ #define CHAR_BIT 8
110
+ #define SCHAR_MIN (-128)
111
+ #define SCHAR_MAX 127
112
+ #define UCHAR_MAX 255
113
+ #define _JITIFY_CHAR_IS_UNSIGNED ((char)-1 >= 0)
114
+ #define CHAR_MIN (_JITIFY_CHAR_IS_UNSIGNED ? 0 : SCHAR_MIN)
115
+ #define CHAR_MAX (_JITIFY_CHAR_IS_UNSIGNED ? UCHAR_MAX : SCHAR_MAX)
116
+ #define SHRT_MIN (-32768)
117
+ #define SHRT_MAX 32767
118
+ #define USHRT_MAX 65535
119
+ #define INT_MIN (-INT_MAX - 1)
120
+ #define INT_MAX 2147483647
121
+ #define UINT_MAX 4294967295U
122
+ #if __WORDSIZE == 64
123
+ # define LONG_MAX 9223372036854775807L
124
+ #else
125
+ # define LONG_MAX 2147483647L
126
+ #endif
127
+ #define LONG_MIN (-LONG_MAX - 1L)
128
+ #if __WORDSIZE == 64
129
+ #define ULONG_MAX 18446744073709551615UL
130
+ #else
131
+ #define ULONG_MAX 4294967295UL
132
+ #endif
133
+ #define LLONG_MAX 9223372036854775807LL
134
+ #define LLONG_MIN (-LLONG_MAX - 1LL)
135
+ #define ULLONG_MAX 18446744073709551615ULL
136
+
137
+ #define INFINITY ((float)(DBL_MAX * DBL_MAX))
138
+ #define HUGE_VAL ((double)INFINITY)
139
+ #define HUGE_VALF ((float)INFINITY)
140
+ #define NAN ((float)(0.0 / 0.0))
141
+
142
+ /// stdint.h
143
+ typedef signed char int8_t;
144
+ typedef signed short int16_t;
145
+ typedef signed int int32_t;
146
+ typedef signed long long int64_t;
147
+ //typedef signed char int_fast8_t;
148
+ //typedef signed short int_fast16_t;
149
+ //typedef signed int int_fast32_t;
150
+ //typedef signed long long int_fast64_t;
151
+ //typedef signed char int_least8_t;
152
+ //typedef signed short int_least16_t;
153
+ //typedef signed int int_least32_t;
154
+ //typedef signed long long int_least64_t;
155
+ //typedef signed long long intmax_t;
156
+ //typedef signed long intptr_t;
157
+ typedef unsigned char uint8_t;
158
+ typedef unsigned short uint16_t;
159
+ typedef unsigned int uint32_t;
160
+ typedef unsigned long long uint64_t;
161
+ //typedef unsigned char uint_fast8_t;
162
+ //typedef unsigned short uint_fast16_t;
163
+ //typedef unsigned int uint_fast32_t;
164
+ //typedef unsigned long long uint_fast64_t;
165
+ //typedef unsigned char uint_least8_t;
166
+ //typedef unsigned short uint_least16_t;
167
+ //typedef unsigned int uint_least32_t;
168
+ //typedef unsigned long long uint_least64_t;
169
+ //typedef unsigned long long uintmax_t;
170
+
171
+
172
+ /// math.h
173
+
174
+ // #if __cplusplus >= 201103L
175
+ // #define DEFINE_MATH_UNARY_FUNC_WRAPPER(f) \
176
+ // inline double f(double x) { return ::f(x); } \
177
+ // inline float f##f(float x) { return ::f(x); } \
178
+ // /*inline long double f##l(long double x) { return ::f(x); }*/ \
179
+ // inline float f(float x) { return ::f(x); } \
180
+ // /*inline long double f(long double x) { return ::f(x); }*/
181
+ // #else
182
+ // #define DEFINE_MATH_UNARY_FUNC_WRAPPER(f) \
183
+ // inline double f(double x) { return ::f(x); } \
184
+ // inline float f##f(float x) { return ::f(x); } \
185
+ // /*inline long double f##l(long double x) { return ::f(x); }*/
186
+ // #endif
187
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(cos)
188
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(sin)
189
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(tan)
190
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(acos)
191
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(asin)
192
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(atan)
193
+ // template<typename T> inline T atan2(T y, T x) { return ::atan2(y, x); }
194
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(cosh)
195
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(sinh)
196
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(tanh)
197
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(exp)
198
+ // template<typename T> inline T frexp(T x, int* exp) { return ::frexp(x, exp); }
199
+ // template<typename T> inline T ldexp(T x, int exp) { return ::ldexp(x, exp); }
200
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(log)
201
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(log10)
202
+ // template<typename T> inline T modf(T x, T* intpart) { return ::modf(x, intpart); }
203
+ // template<typename T> inline T pow(T x, T y) { return ::pow(x, y); }
204
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(sqrt)
205
+ // template<typename T> inline T fmod(T n, T d) { return ::fmod(n, d); }
206
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(fabs)
207
+ // template<typename T> inline T abs(T x) { return ::abs(x); }
208
+ // #if __cplusplus >= 201103L
209
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(acosh)
210
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(asinh)
211
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(atanh)
212
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(exp2)
213
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(expm1)
214
+ // template<typename T> inline int ilogb(T x) { return ::ilogb(x); }
215
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(log1p)
216
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(log2)
217
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(logb)
218
+ // template<typename T> inline T scalbn (T x, int n) { return ::scalbn(x, n); }
219
+ // template<typename T> inline T scalbln(T x, long n) { return ::scalbn(x, n); }
220
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(cbrt)
221
+ // template<typename T> inline T hypot(T x, T y) { return ::hypot(x, y); }
222
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(erf)
223
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(erfc)
224
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(tgamma)
225
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(lgamma)
226
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(round)
227
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(rint)
228
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(trunc)
229
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(floor)
230
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(ceil)
231
+ // template<typename T> inline long lround(T x) { return ::lround(x); }
232
+ // template<typename T> inline long long llround(T x) { return ::llround(x); }
233
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(rint)
234
+ // template<typename T> inline long lrint(T x) { return ::lrint(x); }
235
+ // template<typename T> inline long long llrint(T x) { return ::llrint(x); }
236
+ // DEFINE_MATH_UNARY_FUNC_WRAPPER(nearbyint)
237
+ // //DEFINE_MATH_UNARY_FUNC_WRAPPER(isfinite)
238
+ // // TODO: remainder, remquo, copysign, nan, nextafter, nexttoward, fdim,
239
+ // // fmax, fmin, fma
240
+ // #endif
241
+ // #undef DEFINE_MATH_UNARY_FUNC_WRAPPER
242
+
243
+ #define M_PI 3.14159265358979323846
244
+
245
+ #if defined(__CUDACC__)
246
+
247
+ #if defined(__clang__)
248
+ // When compiling CUDA with barebones Clang we need to define its builtins and runtime functions ourselves.
249
+ #include "cuda_crt.h"
250
+ #endif
251
+
252
+ #else
253
+
254
+ extern "C" {
255
+
256
+ // stdio.h
257
+ int printf(const char * format, ... );
258
+
259
+ // stdlib.h
260
+ int abs(int);
261
+ long long llabs(long long);
262
+
263
+ // math.h
264
+ float fmodf(float, float);
265
+ double fmod(double, double);
266
+ float logf(float);
267
+ double log(double);
268
+ float log2f(float);
269
+ double log2(double);
270
+ float log10f(float);
271
+ double log10(double);
272
+ float expf(float);
273
+ double exp(double);
274
+ float sqrtf(float);
275
+ double sqrt(double);
276
+ float cbrtf(float);
277
+ double cbrt(double);
278
+ float powf(float, float);
279
+ double pow(double, double);
280
+ float floorf(float);
281
+ double floor(double);
282
+ float ceilf(float);
283
+ double ceil(double);
284
+ float fabsf(float);
285
+ double fabs(double);
286
+ float roundf(float);
287
+ double round(double);
288
+ float truncf(float);
289
+ double trunc(double);
290
+ float rintf(float);
291
+ double rint(double);
292
+ float acosf(float);
293
+ double acos(double);
294
+ float asinf(float);
295
+ double asin(double);
296
+ float atanf(float);
297
+ double atan(double);
298
+ float atan2f(float, float);
299
+ double atan2(double, double);
300
+ float cosf(float);
301
+ double cos(double);
302
+ float sinf(float);
303
+ double sin(double);
304
+ float tanf(float);
305
+ double tan(double);
306
+ float sinhf(float);
307
+ double sinh(double);
308
+ float coshf(float);
309
+ double cosh(double);
310
+ float tanhf(float);
311
+ double tanh(double);
312
+ float fmaf(float, float, float);
313
+ double fma(double, double, double);
314
+ double erf(double);
315
+ float erff(float);
316
+ double erfc(double);
317
+ float erfcf(float);
318
+ double erfinv(double);
319
+ float erfinvf(float);
320
+ double erfcinv(double);
321
+ float erfcinvf(float);
322
+
323
+ // stddef.h
324
+ #if defined(_WIN32)
325
+ using size_t = unsigned __int64;
326
+ #else
327
+ using size_t = unsigned long;
328
+ #endif
329
+
330
+ // string.h
331
+ void* memset(void*, int, size_t);
332
+ void* memcpy(void*, const void*, size_t);
333
+
334
+ // stdlib.h
335
+ void* malloc(size_t);
336
+ void free(void*);
337
+
338
+ } // extern "C"
339
+
340
+ // cmath
341
+ inline bool isfinite(double x)
342
+ {
343
+ return _wp_isfinite(x);
344
+ }
345
+
346
+ inline bool isnan(double x)
347
+ {
348
+ return _wp_isnan(x);
349
+ }
350
+
351
+ inline bool isinf(double x)
352
+ {
353
+ return _wp_isinf(x);
354
+ }
355
+
356
+ // assert.h
357
+ #ifdef NDEBUG
358
+ #define assert(expression) ((void)0)
359
+ #else
360
+ #define assert(expression) (void)( \
361
+ (!!(expression)) || \
362
+ (_wp_assert((#expression), (__FILE__), (unsigned)(__LINE__)), 0) \
363
+ )
364
+ #endif
365
+
366
+ #endif // !__CUDACC__
367
+
368
+ #endif // WP_NO_CRT
369
+
370
+ #if !defined(__CUDACC__)
371
+
372
+ /*
373
+ * From Cephes Library polevl.c
374
+ * Original source: https://www.netlib.org/cephes/
375
+ * Copyright (c) 1984 by Stephen L. Moshier.
376
+ * All rights reserved.
377
+ */
378
+ // evaluate polynomial using Horner's method
379
+ static inline double polevl(double x, const double* coefs, int N)
380
+ {
381
+ double ans = coefs[0];
382
+ for (int i = 1; i <= N; i++)
383
+ {
384
+ ans = ans * x + coefs[i];
385
+ }
386
+ return ans;
387
+ }
388
+
389
+ /*
390
+ * From Cephes Library polevl.c
391
+ * Original source: https://www.netlib.org/cephes/
392
+ * Copyright (c) 1984 by Stephen L. Moshier.
393
+ * All rights reserved.
394
+ */
395
+ // evaluate polynomial assuming leading coef = 1, using Horner's method
396
+ static inline double p1evl(double x, const double* coefs, int N)
397
+ {
398
+ double ans = x + coefs[0];
399
+ for (int i = 1; i < N; i++)
400
+ {
401
+ ans = ans * x + coefs[i];
402
+ }
403
+ return ans;
404
+ }
405
+
406
+ /*
407
+ * From Cephes Library ndtri.c
408
+ * Original source: https://www.netlib.org/cephes/
409
+ * Copyright (c) 1984 by Stephen L. Moshier.
410
+ * All rights reserved.
411
+ */
412
+ // inverse normal distribution function (ndtri)
413
+ static inline double ndtri(double y)
414
+ {
415
+ // domain check
416
+ if (y <= 0.0 || y >= 1.0)
417
+ {
418
+ return (y <= 0.0) ? -HUGE_VAL : HUGE_VAL;
419
+ }
420
+
421
+ // constants from Cephes
422
+ const double s2pi = 2.50662827463100050242E0; // sqrt(2*pi)
423
+ const double exp_neg2 = 0.13533528323661269189; // exp(-2)
424
+
425
+ // approximation for 0 <= abs(z - 0.5) <= 3/8
426
+ static const double P0[5] = {
427
+ -5.99633501014107895267e1,
428
+ 9.80010754185999661536e1,
429
+ -5.66762857469070293439e1,
430
+ 1.39312609387279679503e1,
431
+ -1.23916583867381258016e0
432
+ };
433
+
434
+ static const double Q0[8] = {
435
+ 1.95448858338141759834e0,
436
+ 4.67627912898881538453e0,
437
+ 8.63602421390890590575e1,
438
+ -2.25462687854119370527e2,
439
+ 2.00260212380060660359e2,
440
+ -8.20372256168333339912e1,
441
+ 1.59056225126211695515e1,
442
+ -1.18331621121330003142e0
443
+ };
444
+
445
+ // approximation for interval z = sqrt(-2 log y) between 2 and 8
446
+ static const double P1[9] = {
447
+ 4.05544892305962419923e0,
448
+ 3.15251094599893866154e1,
449
+ 5.71628192246421288162e1,
450
+ 4.40805073893200834700e1,
451
+ 1.46849561928858024014e1,
452
+ 2.18663306850790267539e0,
453
+ -1.40256079171354495875e-1,
454
+ -3.50424626827848203418e-2,
455
+ -8.57456785154685413611e-4
456
+ };
457
+
458
+ static const double Q1[8] = {
459
+ 1.57799883256466749731e1,
460
+ 4.53907635128879210584e1,
461
+ 4.13172038254672030440e1,
462
+ 1.50425385692907503408e1,
463
+ 2.50464946208309415979e0,
464
+ -1.42182922854787788574e-1,
465
+ -3.80806407691578277194e-2,
466
+ -9.33259480895457427372e-4
467
+ };
468
+
469
+ // approximation for interval z = sqrt(-2 log y) between 8 and 64
470
+ static const double P2[9] = {
471
+ 3.23774891776946035970e0,
472
+ 6.91522889068984211695e0,
473
+ 3.93881025292474443415e0,
474
+ 1.33303460815807542389e0,
475
+ 2.01485389549179081538e-1,
476
+ 1.23716634817820021358e-2,
477
+ 3.01581553508235416007e-4,
478
+ 2.65806974686737550832e-6,
479
+ 6.23974539184983293730e-9
480
+ };
481
+
482
+ static const double Q2[8] = {
483
+ 6.02427039364742014255e0,
484
+ 3.67983563856160859403e0,
485
+ 1.37702099489081330271e0,
486
+ 2.16236993594496635890e-1,
487
+ 1.34204006088543189037e-2,
488
+ 3.28014464682127739104e-4,
489
+ 2.89247864745380683936e-6,
490
+ 6.79019408009981274425e-9
491
+ };
492
+
493
+ int code = 1;
494
+ double y_work = y;
495
+
496
+ if (y_work > (1.0 - exp_neg2))
497
+ {
498
+ y_work = 1.0 - y_work;
499
+ code = 0;
500
+ }
501
+
502
+ // middle region: 0 <= |y - 0.5| <= 3/8
503
+ if (y_work > exp_neg2)
504
+ {
505
+ y_work -= 0.5;
506
+ double y2 = y_work * y_work;
507
+ double x = y_work + y_work * (y2 * polevl(y2, P0, 4) / p1evl(y2, Q0, 8));
508
+ x = x * s2pi;
509
+ return x;
510
+ }
511
+
512
+ double x = ::sqrt(-2.0 * ::log(y_work));
513
+ double x0 = x - ::log(x) / x;
514
+
515
+ double z = 1.0 / x;
516
+ double x1;
517
+ if (x < 8.0)
518
+ {
519
+ x1 = z * polevl(z, P1, 8) / p1evl(z, Q1, 8);
520
+ }
521
+ else
522
+ {
523
+ x1 = z * polevl(z, P2, 8) / p1evl(z, Q2, 8);
524
+ }
525
+
526
+ x = x0 - x1;
527
+ if (code != 0)
528
+ {
529
+ x = -x;
530
+ }
531
+
532
+ return x;
533
+ }
534
+
535
+ // inverse error function (not in standard C library)
536
+ // only compiled for non-CUDA builds - CUDA provides these in its math headers
537
+ inline double erfinv(double z)
538
+ {
539
+ // handle special cases
540
+ if (z == 0.0)
541
+ return 0.0;
542
+ if (z == 1.0)
543
+ return HUGE_VAL; // infinity
544
+ if (z == -1.0)
545
+ return -HUGE_VAL; // -infinity
546
+ if (z < -1.0 || z > 1.0)
547
+ return NAN; // outside valid range
548
+
549
+ // erfinv(z) = ndtri((z + 1) / 2) / sqrt(2)
550
+ return ndtri((z + 1.0) / 2.0) / ::sqrt(2.0);
551
+ }
552
+
553
+ inline float erfinvf(float x)
554
+ {
555
+ return (float)erfinv((double)x);
556
+ }
557
+
558
+ inline double erfcinv(double x)
559
+ {
560
+ return erfinv(1.0 - x);
561
+ }
562
+
563
+ inline float erfcinvf(float x)
564
+ {
565
+ return (float)erfcinv((double)x);
566
+ }
567
+
568
+ #endif // !defined(__CUDACC__)