warp-lang 1.10.0__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (468) hide show
  1. warp/__init__.py +334 -0
  2. warp/__init__.pyi +5856 -0
  3. warp/_src/__init__.py +14 -0
  4. warp/_src/autograd.py +1077 -0
  5. warp/_src/build.py +620 -0
  6. warp/_src/build_dll.py +642 -0
  7. warp/_src/builtins.py +10555 -0
  8. warp/_src/codegen.py +4361 -0
  9. warp/_src/config.py +178 -0
  10. warp/_src/constants.py +59 -0
  11. warp/_src/context.py +8352 -0
  12. warp/_src/dlpack.py +464 -0
  13. warp/_src/fabric.py +362 -0
  14. warp/_src/fem/__init__.py +14 -0
  15. warp/_src/fem/adaptivity.py +510 -0
  16. warp/_src/fem/cache.py +689 -0
  17. warp/_src/fem/dirichlet.py +190 -0
  18. warp/_src/fem/domain.py +553 -0
  19. warp/_src/fem/field/__init__.py +131 -0
  20. warp/_src/fem/field/field.py +703 -0
  21. warp/_src/fem/field/nodal_field.py +403 -0
  22. warp/_src/fem/field/restriction.py +39 -0
  23. warp/_src/fem/field/virtual.py +1021 -0
  24. warp/_src/fem/geometry/__init__.py +32 -0
  25. warp/_src/fem/geometry/adaptive_nanogrid.py +782 -0
  26. warp/_src/fem/geometry/closest_point.py +99 -0
  27. warp/_src/fem/geometry/deformed_geometry.py +277 -0
  28. warp/_src/fem/geometry/element.py +854 -0
  29. warp/_src/fem/geometry/geometry.py +693 -0
  30. warp/_src/fem/geometry/grid_2d.py +478 -0
  31. warp/_src/fem/geometry/grid_3d.py +539 -0
  32. warp/_src/fem/geometry/hexmesh.py +956 -0
  33. warp/_src/fem/geometry/nanogrid.py +660 -0
  34. warp/_src/fem/geometry/partition.py +483 -0
  35. warp/_src/fem/geometry/quadmesh.py +597 -0
  36. warp/_src/fem/geometry/tetmesh.py +762 -0
  37. warp/_src/fem/geometry/trimesh.py +588 -0
  38. warp/_src/fem/integrate.py +2507 -0
  39. warp/_src/fem/linalg.py +385 -0
  40. warp/_src/fem/operator.py +398 -0
  41. warp/_src/fem/polynomial.py +231 -0
  42. warp/_src/fem/quadrature/__init__.py +17 -0
  43. warp/_src/fem/quadrature/pic_quadrature.py +318 -0
  44. warp/_src/fem/quadrature/quadrature.py +665 -0
  45. warp/_src/fem/space/__init__.py +248 -0
  46. warp/_src/fem/space/basis_function_space.py +499 -0
  47. warp/_src/fem/space/basis_space.py +681 -0
  48. warp/_src/fem/space/dof_mapper.py +253 -0
  49. warp/_src/fem/space/function_space.py +312 -0
  50. warp/_src/fem/space/grid_2d_function_space.py +179 -0
  51. warp/_src/fem/space/grid_3d_function_space.py +229 -0
  52. warp/_src/fem/space/hexmesh_function_space.py +255 -0
  53. warp/_src/fem/space/nanogrid_function_space.py +199 -0
  54. warp/_src/fem/space/partition.py +435 -0
  55. warp/_src/fem/space/quadmesh_function_space.py +222 -0
  56. warp/_src/fem/space/restriction.py +221 -0
  57. warp/_src/fem/space/shape/__init__.py +152 -0
  58. warp/_src/fem/space/shape/cube_shape_function.py +1107 -0
  59. warp/_src/fem/space/shape/shape_function.py +134 -0
  60. warp/_src/fem/space/shape/square_shape_function.py +928 -0
  61. warp/_src/fem/space/shape/tet_shape_function.py +829 -0
  62. warp/_src/fem/space/shape/triangle_shape_function.py +674 -0
  63. warp/_src/fem/space/tetmesh_function_space.py +270 -0
  64. warp/_src/fem/space/topology.py +461 -0
  65. warp/_src/fem/space/trimesh_function_space.py +193 -0
  66. warp/_src/fem/types.py +114 -0
  67. warp/_src/fem/utils.py +488 -0
  68. warp/_src/jax.py +188 -0
  69. warp/_src/jax_experimental/__init__.py +14 -0
  70. warp/_src/jax_experimental/custom_call.py +389 -0
  71. warp/_src/jax_experimental/ffi.py +1286 -0
  72. warp/_src/jax_experimental/xla_ffi.py +658 -0
  73. warp/_src/marching_cubes.py +710 -0
  74. warp/_src/math.py +416 -0
  75. warp/_src/optim/__init__.py +14 -0
  76. warp/_src/optim/adam.py +165 -0
  77. warp/_src/optim/linear.py +1608 -0
  78. warp/_src/optim/sgd.py +114 -0
  79. warp/_src/paddle.py +408 -0
  80. warp/_src/render/__init__.py +14 -0
  81. warp/_src/render/imgui_manager.py +291 -0
  82. warp/_src/render/render_opengl.py +3638 -0
  83. warp/_src/render/render_usd.py +939 -0
  84. warp/_src/render/utils.py +162 -0
  85. warp/_src/sparse.py +2718 -0
  86. warp/_src/tape.py +1208 -0
  87. warp/_src/thirdparty/__init__.py +0 -0
  88. warp/_src/thirdparty/appdirs.py +598 -0
  89. warp/_src/thirdparty/dlpack.py +145 -0
  90. warp/_src/thirdparty/unittest_parallel.py +676 -0
  91. warp/_src/torch.py +393 -0
  92. warp/_src/types.py +5888 -0
  93. warp/_src/utils.py +1695 -0
  94. warp/autograd.py +33 -0
  95. warp/bin/libwarp-clang.dylib +0 -0
  96. warp/bin/libwarp.dylib +0 -0
  97. warp/build.py +29 -0
  98. warp/build_dll.py +24 -0
  99. warp/codegen.py +24 -0
  100. warp/constants.py +24 -0
  101. warp/context.py +33 -0
  102. warp/dlpack.py +24 -0
  103. warp/examples/__init__.py +24 -0
  104. warp/examples/assets/bear.usd +0 -0
  105. warp/examples/assets/bunny.usd +0 -0
  106. warp/examples/assets/cube.usd +0 -0
  107. warp/examples/assets/nonuniform.usd +0 -0
  108. warp/examples/assets/nvidia_logo.png +0 -0
  109. warp/examples/assets/pixel.jpg +0 -0
  110. warp/examples/assets/rocks.nvdb +0 -0
  111. warp/examples/assets/rocks.usd +0 -0
  112. warp/examples/assets/sphere.usd +0 -0
  113. warp/examples/assets/square_cloth.usd +0 -0
  114. warp/examples/benchmarks/benchmark_api.py +389 -0
  115. warp/examples/benchmarks/benchmark_cloth.py +296 -0
  116. warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
  117. warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
  118. warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
  119. warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
  120. warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
  121. warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
  122. warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
  123. warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
  124. warp/examples/benchmarks/benchmark_gemm.py +164 -0
  125. warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
  126. warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
  127. warp/examples/benchmarks/benchmark_launches.py +301 -0
  128. warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
  129. warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
  130. warp/examples/browse.py +37 -0
  131. warp/examples/core/example_cupy.py +86 -0
  132. warp/examples/core/example_dem.py +241 -0
  133. warp/examples/core/example_fluid.py +299 -0
  134. warp/examples/core/example_graph_capture.py +150 -0
  135. warp/examples/core/example_marching_cubes.py +195 -0
  136. warp/examples/core/example_mesh.py +180 -0
  137. warp/examples/core/example_mesh_intersect.py +211 -0
  138. warp/examples/core/example_nvdb.py +182 -0
  139. warp/examples/core/example_raycast.py +111 -0
  140. warp/examples/core/example_raymarch.py +205 -0
  141. warp/examples/core/example_render_opengl.py +290 -0
  142. warp/examples/core/example_sample_mesh.py +300 -0
  143. warp/examples/core/example_sph.py +411 -0
  144. warp/examples/core/example_spin_lock.py +93 -0
  145. warp/examples/core/example_torch.py +211 -0
  146. warp/examples/core/example_wave.py +269 -0
  147. warp/examples/core/example_work_queue.py +118 -0
  148. warp/examples/distributed/example_jacobi_mpi.py +506 -0
  149. warp/examples/fem/example_adaptive_grid.py +286 -0
  150. warp/examples/fem/example_apic_fluid.py +469 -0
  151. warp/examples/fem/example_burgers.py +261 -0
  152. warp/examples/fem/example_convection_diffusion.py +181 -0
  153. warp/examples/fem/example_convection_diffusion_dg.py +225 -0
  154. warp/examples/fem/example_darcy_ls_optimization.py +489 -0
  155. warp/examples/fem/example_deformed_geometry.py +172 -0
  156. warp/examples/fem/example_diffusion.py +196 -0
  157. warp/examples/fem/example_diffusion_3d.py +225 -0
  158. warp/examples/fem/example_diffusion_mgpu.py +225 -0
  159. warp/examples/fem/example_distortion_energy.py +228 -0
  160. warp/examples/fem/example_elastic_shape_optimization.py +387 -0
  161. warp/examples/fem/example_magnetostatics.py +242 -0
  162. warp/examples/fem/example_mixed_elasticity.py +293 -0
  163. warp/examples/fem/example_navier_stokes.py +263 -0
  164. warp/examples/fem/example_nonconforming_contact.py +300 -0
  165. warp/examples/fem/example_stokes.py +213 -0
  166. warp/examples/fem/example_stokes_transfer.py +262 -0
  167. warp/examples/fem/example_streamlines.py +357 -0
  168. warp/examples/fem/utils.py +1047 -0
  169. warp/examples/interop/example_jax_callable.py +146 -0
  170. warp/examples/interop/example_jax_ffi_callback.py +132 -0
  171. warp/examples/interop/example_jax_kernel.py +232 -0
  172. warp/examples/optim/example_diffray.py +561 -0
  173. warp/examples/optim/example_fluid_checkpoint.py +497 -0
  174. warp/examples/tile/example_tile_block_cholesky.py +502 -0
  175. warp/examples/tile/example_tile_cholesky.py +88 -0
  176. warp/examples/tile/example_tile_convolution.py +66 -0
  177. warp/examples/tile/example_tile_fft.py +55 -0
  178. warp/examples/tile/example_tile_filtering.py +113 -0
  179. warp/examples/tile/example_tile_matmul.py +85 -0
  180. warp/examples/tile/example_tile_mcgp.py +191 -0
  181. warp/examples/tile/example_tile_mlp.py +385 -0
  182. warp/examples/tile/example_tile_nbody.py +199 -0
  183. warp/fabric.py +24 -0
  184. warp/fem/__init__.py +173 -0
  185. warp/fem/adaptivity.py +26 -0
  186. warp/fem/cache.py +30 -0
  187. warp/fem/dirichlet.py +24 -0
  188. warp/fem/field/__init__.py +24 -0
  189. warp/fem/field/field.py +26 -0
  190. warp/fem/geometry/__init__.py +21 -0
  191. warp/fem/geometry/closest_point.py +31 -0
  192. warp/fem/linalg.py +38 -0
  193. warp/fem/operator.py +32 -0
  194. warp/fem/polynomial.py +29 -0
  195. warp/fem/space/__init__.py +22 -0
  196. warp/fem/space/basis_space.py +24 -0
  197. warp/fem/space/shape/__init__.py +68 -0
  198. warp/fem/space/topology.py +24 -0
  199. warp/fem/types.py +24 -0
  200. warp/fem/utils.py +32 -0
  201. warp/jax.py +29 -0
  202. warp/jax_experimental/__init__.py +29 -0
  203. warp/jax_experimental/custom_call.py +29 -0
  204. warp/jax_experimental/ffi.py +39 -0
  205. warp/jax_experimental/xla_ffi.py +24 -0
  206. warp/marching_cubes.py +24 -0
  207. warp/math.py +37 -0
  208. warp/native/array.h +1687 -0
  209. warp/native/builtin.h +2327 -0
  210. warp/native/bvh.cpp +562 -0
  211. warp/native/bvh.cu +826 -0
  212. warp/native/bvh.h +555 -0
  213. warp/native/clang/clang.cpp +541 -0
  214. warp/native/coloring.cpp +622 -0
  215. warp/native/crt.cpp +51 -0
  216. warp/native/crt.h +568 -0
  217. warp/native/cuda_crt.h +1058 -0
  218. warp/native/cuda_util.cpp +677 -0
  219. warp/native/cuda_util.h +313 -0
  220. warp/native/error.cpp +77 -0
  221. warp/native/error.h +36 -0
  222. warp/native/exports.h +2023 -0
  223. warp/native/fabric.h +246 -0
  224. warp/native/hashgrid.cpp +311 -0
  225. warp/native/hashgrid.cu +89 -0
  226. warp/native/hashgrid.h +240 -0
  227. warp/native/initializer_array.h +41 -0
  228. warp/native/intersect.h +1253 -0
  229. warp/native/intersect_adj.h +375 -0
  230. warp/native/intersect_tri.h +348 -0
  231. warp/native/mat.h +5189 -0
  232. warp/native/mathdx.cpp +93 -0
  233. warp/native/matnn.h +221 -0
  234. warp/native/mesh.cpp +266 -0
  235. warp/native/mesh.cu +406 -0
  236. warp/native/mesh.h +2097 -0
  237. warp/native/nanovdb/GridHandle.h +533 -0
  238. warp/native/nanovdb/HostBuffer.h +591 -0
  239. warp/native/nanovdb/NanoVDB.h +6246 -0
  240. warp/native/nanovdb/NodeManager.h +323 -0
  241. warp/native/nanovdb/PNanoVDB.h +3390 -0
  242. warp/native/noise.h +859 -0
  243. warp/native/quat.h +1664 -0
  244. warp/native/rand.h +342 -0
  245. warp/native/range.h +145 -0
  246. warp/native/reduce.cpp +174 -0
  247. warp/native/reduce.cu +363 -0
  248. warp/native/runlength_encode.cpp +79 -0
  249. warp/native/runlength_encode.cu +61 -0
  250. warp/native/scan.cpp +47 -0
  251. warp/native/scan.cu +55 -0
  252. warp/native/scan.h +23 -0
  253. warp/native/solid_angle.h +466 -0
  254. warp/native/sort.cpp +251 -0
  255. warp/native/sort.cu +286 -0
  256. warp/native/sort.h +35 -0
  257. warp/native/sparse.cpp +241 -0
  258. warp/native/sparse.cu +435 -0
  259. warp/native/spatial.h +1306 -0
  260. warp/native/svd.h +727 -0
  261. warp/native/temp_buffer.h +46 -0
  262. warp/native/tile.h +4124 -0
  263. warp/native/tile_radix_sort.h +1112 -0
  264. warp/native/tile_reduce.h +838 -0
  265. warp/native/tile_scan.h +240 -0
  266. warp/native/tuple.h +189 -0
  267. warp/native/vec.h +2199 -0
  268. warp/native/version.h +23 -0
  269. warp/native/volume.cpp +501 -0
  270. warp/native/volume.cu +68 -0
  271. warp/native/volume.h +970 -0
  272. warp/native/volume_builder.cu +483 -0
  273. warp/native/volume_builder.h +52 -0
  274. warp/native/volume_impl.h +70 -0
  275. warp/native/warp.cpp +1143 -0
  276. warp/native/warp.cu +4604 -0
  277. warp/native/warp.h +358 -0
  278. warp/optim/__init__.py +20 -0
  279. warp/optim/adam.py +24 -0
  280. warp/optim/linear.py +35 -0
  281. warp/optim/sgd.py +24 -0
  282. warp/paddle.py +24 -0
  283. warp/py.typed +0 -0
  284. warp/render/__init__.py +22 -0
  285. warp/render/imgui_manager.py +29 -0
  286. warp/render/render_opengl.py +24 -0
  287. warp/render/render_usd.py +24 -0
  288. warp/render/utils.py +24 -0
  289. warp/sparse.py +51 -0
  290. warp/tape.py +24 -0
  291. warp/tests/__init__.py +1 -0
  292. warp/tests/__main__.py +4 -0
  293. warp/tests/assets/curlnoise_golden.npy +0 -0
  294. warp/tests/assets/mlp_golden.npy +0 -0
  295. warp/tests/assets/pixel.npy +0 -0
  296. warp/tests/assets/pnoise_golden.npy +0 -0
  297. warp/tests/assets/spiky.usd +0 -0
  298. warp/tests/assets/test_grid.nvdb +0 -0
  299. warp/tests/assets/test_index_grid.nvdb +0 -0
  300. warp/tests/assets/test_int32_grid.nvdb +0 -0
  301. warp/tests/assets/test_vec_grid.nvdb +0 -0
  302. warp/tests/assets/torus.nvdb +0 -0
  303. warp/tests/assets/torus.usda +105 -0
  304. warp/tests/aux_test_class_kernel.py +34 -0
  305. warp/tests/aux_test_compile_consts_dummy.py +18 -0
  306. warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
  307. warp/tests/aux_test_dependent.py +29 -0
  308. warp/tests/aux_test_grad_customs.py +29 -0
  309. warp/tests/aux_test_instancing_gc.py +26 -0
  310. warp/tests/aux_test_module_aot.py +7 -0
  311. warp/tests/aux_test_module_unload.py +23 -0
  312. warp/tests/aux_test_name_clash1.py +40 -0
  313. warp/tests/aux_test_name_clash2.py +40 -0
  314. warp/tests/aux_test_reference.py +9 -0
  315. warp/tests/aux_test_reference_reference.py +8 -0
  316. warp/tests/aux_test_square.py +16 -0
  317. warp/tests/aux_test_unresolved_func.py +22 -0
  318. warp/tests/aux_test_unresolved_symbol.py +22 -0
  319. warp/tests/cuda/__init__.py +0 -0
  320. warp/tests/cuda/test_async.py +676 -0
  321. warp/tests/cuda/test_conditional_captures.py +1147 -0
  322. warp/tests/cuda/test_ipc.py +124 -0
  323. warp/tests/cuda/test_mempool.py +233 -0
  324. warp/tests/cuda/test_multigpu.py +169 -0
  325. warp/tests/cuda/test_peer.py +139 -0
  326. warp/tests/cuda/test_pinned.py +84 -0
  327. warp/tests/cuda/test_streams.py +691 -0
  328. warp/tests/geometry/__init__.py +0 -0
  329. warp/tests/geometry/test_bvh.py +335 -0
  330. warp/tests/geometry/test_hash_grid.py +259 -0
  331. warp/tests/geometry/test_marching_cubes.py +294 -0
  332. warp/tests/geometry/test_mesh.py +318 -0
  333. warp/tests/geometry/test_mesh_query_aabb.py +392 -0
  334. warp/tests/geometry/test_mesh_query_point.py +935 -0
  335. warp/tests/geometry/test_mesh_query_ray.py +323 -0
  336. warp/tests/geometry/test_volume.py +1103 -0
  337. warp/tests/geometry/test_volume_write.py +346 -0
  338. warp/tests/interop/__init__.py +0 -0
  339. warp/tests/interop/test_dlpack.py +730 -0
  340. warp/tests/interop/test_jax.py +1673 -0
  341. warp/tests/interop/test_paddle.py +800 -0
  342. warp/tests/interop/test_torch.py +1001 -0
  343. warp/tests/run_coverage_serial.py +39 -0
  344. warp/tests/test_adam.py +162 -0
  345. warp/tests/test_arithmetic.py +1096 -0
  346. warp/tests/test_array.py +3756 -0
  347. warp/tests/test_array_reduce.py +156 -0
  348. warp/tests/test_assert.py +303 -0
  349. warp/tests/test_atomic.py +336 -0
  350. warp/tests/test_atomic_bitwise.py +209 -0
  351. warp/tests/test_atomic_cas.py +312 -0
  352. warp/tests/test_bool.py +220 -0
  353. warp/tests/test_builtins_resolution.py +732 -0
  354. warp/tests/test_closest_point_edge_edge.py +327 -0
  355. warp/tests/test_codegen.py +974 -0
  356. warp/tests/test_codegen_instancing.py +1495 -0
  357. warp/tests/test_compile_consts.py +215 -0
  358. warp/tests/test_conditional.py +298 -0
  359. warp/tests/test_context.py +35 -0
  360. warp/tests/test_copy.py +319 -0
  361. warp/tests/test_ctypes.py +618 -0
  362. warp/tests/test_dense.py +73 -0
  363. warp/tests/test_devices.py +127 -0
  364. warp/tests/test_enum.py +136 -0
  365. warp/tests/test_examples.py +424 -0
  366. warp/tests/test_fabricarray.py +998 -0
  367. warp/tests/test_fast_math.py +72 -0
  368. warp/tests/test_fem.py +2204 -0
  369. warp/tests/test_fixedarray.py +229 -0
  370. warp/tests/test_fp16.py +136 -0
  371. warp/tests/test_func.py +501 -0
  372. warp/tests/test_future_annotations.py +100 -0
  373. warp/tests/test_generics.py +656 -0
  374. warp/tests/test_grad.py +893 -0
  375. warp/tests/test_grad_customs.py +339 -0
  376. warp/tests/test_grad_debug.py +341 -0
  377. warp/tests/test_implicit_init.py +411 -0
  378. warp/tests/test_import.py +45 -0
  379. warp/tests/test_indexedarray.py +1140 -0
  380. warp/tests/test_intersect.py +103 -0
  381. warp/tests/test_iter.py +76 -0
  382. warp/tests/test_large.py +177 -0
  383. warp/tests/test_launch.py +411 -0
  384. warp/tests/test_lerp.py +151 -0
  385. warp/tests/test_linear_solvers.py +223 -0
  386. warp/tests/test_lvalue.py +427 -0
  387. warp/tests/test_map.py +526 -0
  388. warp/tests/test_mat.py +3515 -0
  389. warp/tests/test_mat_assign_copy.py +178 -0
  390. warp/tests/test_mat_constructors.py +573 -0
  391. warp/tests/test_mat_lite.py +122 -0
  392. warp/tests/test_mat_scalar_ops.py +2913 -0
  393. warp/tests/test_math.py +212 -0
  394. warp/tests/test_module_aot.py +287 -0
  395. warp/tests/test_module_hashing.py +258 -0
  396. warp/tests/test_modules_lite.py +70 -0
  397. warp/tests/test_noise.py +252 -0
  398. warp/tests/test_operators.py +299 -0
  399. warp/tests/test_options.py +129 -0
  400. warp/tests/test_overwrite.py +551 -0
  401. warp/tests/test_print.py +408 -0
  402. warp/tests/test_quat.py +2653 -0
  403. warp/tests/test_quat_assign_copy.py +145 -0
  404. warp/tests/test_rand.py +339 -0
  405. warp/tests/test_reload.py +303 -0
  406. warp/tests/test_rounding.py +157 -0
  407. warp/tests/test_runlength_encode.py +196 -0
  408. warp/tests/test_scalar_ops.py +133 -0
  409. warp/tests/test_smoothstep.py +108 -0
  410. warp/tests/test_snippet.py +318 -0
  411. warp/tests/test_sparse.py +845 -0
  412. warp/tests/test_spatial.py +2859 -0
  413. warp/tests/test_spatial_assign_copy.py +160 -0
  414. warp/tests/test_special_values.py +361 -0
  415. warp/tests/test_static.py +640 -0
  416. warp/tests/test_struct.py +901 -0
  417. warp/tests/test_tape.py +242 -0
  418. warp/tests/test_transient_module.py +93 -0
  419. warp/tests/test_triangle_closest_point.py +192 -0
  420. warp/tests/test_tuple.py +361 -0
  421. warp/tests/test_types.py +615 -0
  422. warp/tests/test_utils.py +594 -0
  423. warp/tests/test_vec.py +1408 -0
  424. warp/tests/test_vec_assign_copy.py +143 -0
  425. warp/tests/test_vec_constructors.py +325 -0
  426. warp/tests/test_vec_lite.py +80 -0
  427. warp/tests/test_vec_scalar_ops.py +2327 -0
  428. warp/tests/test_verify_fp.py +100 -0
  429. warp/tests/test_version.py +75 -0
  430. warp/tests/tile/__init__.py +0 -0
  431. warp/tests/tile/test_tile.py +1519 -0
  432. warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
  433. warp/tests/tile/test_tile_cholesky.py +608 -0
  434. warp/tests/tile/test_tile_load.py +724 -0
  435. warp/tests/tile/test_tile_mathdx.py +156 -0
  436. warp/tests/tile/test_tile_matmul.py +179 -0
  437. warp/tests/tile/test_tile_mlp.py +400 -0
  438. warp/tests/tile/test_tile_reduce.py +950 -0
  439. warp/tests/tile/test_tile_shared_memory.py +376 -0
  440. warp/tests/tile/test_tile_sort.py +121 -0
  441. warp/tests/tile/test_tile_view.py +173 -0
  442. warp/tests/unittest_serial.py +47 -0
  443. warp/tests/unittest_suites.py +430 -0
  444. warp/tests/unittest_utils.py +469 -0
  445. warp/tests/walkthrough_debug.py +95 -0
  446. warp/torch.py +24 -0
  447. warp/types.py +51 -0
  448. warp/utils.py +31 -0
  449. warp_lang-1.10.0.dist-info/METADATA +459 -0
  450. warp_lang-1.10.0.dist-info/RECORD +468 -0
  451. warp_lang-1.10.0.dist-info/WHEEL +5 -0
  452. warp_lang-1.10.0.dist-info/licenses/LICENSE.md +176 -0
  453. warp_lang-1.10.0.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
  454. warp_lang-1.10.0.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
  455. warp_lang-1.10.0.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
  456. warp_lang-1.10.0.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
  457. warp_lang-1.10.0.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
  458. warp_lang-1.10.0.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
  459. warp_lang-1.10.0.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
  460. warp_lang-1.10.0.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
  461. warp_lang-1.10.0.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
  462. warp_lang-1.10.0.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
  463. warp_lang-1.10.0.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
  464. warp_lang-1.10.0.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
  465. warp_lang-1.10.0.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
  466. warp_lang-1.10.0.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
  467. warp_lang-1.10.0.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
  468. warp_lang-1.10.0.dist-info/top_level.txt +1 -0
warp/native/fabric.h ADDED
@@ -0,0 +1,246 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ */
17
+
18
+ #pragma once
19
+
20
+ #include "builtin.h"
21
+
22
+ namespace wp
23
+ {
24
+
25
+ struct fabricbucket_t
26
+ {
27
+ size_t index_start;
28
+ size_t index_end;
29
+ void* ptr;
30
+ size_t* lengths;
31
+ };
32
+
33
+
34
+ template <typename T>
35
+ struct fabricarray_t
36
+ {
37
+ CUDA_CALLABLE inline fabricarray_t()
38
+ : buckets(nullptr),
39
+ nbuckets(0),
40
+ size(0)
41
+ {}
42
+
43
+ CUDA_CALLABLE inline bool empty() const { return !size; }
44
+
45
+ fabricbucket_t* buckets; // array of fabricbucket_t on the correct device
46
+
47
+ size_t nbuckets;
48
+ size_t size;
49
+ };
50
+
51
+
52
+ template <typename T>
53
+ struct indexedfabricarray_t
54
+ {
55
+ CUDA_CALLABLE inline indexedfabricarray_t()
56
+ : indices(),
57
+ size(0)
58
+ {}
59
+
60
+ CUDA_CALLABLE inline bool empty() const { return !size; }
61
+
62
+ fabricarray_t<T> fa;
63
+
64
+ // TODO: we use 32-bit indices for consistency with other Warp indexed arrays,
65
+ // but Fabric uses 64-bit indexing.
66
+ int* indices;
67
+ size_t size;
68
+ };
69
+
70
+
71
+ #ifndef FABRICARRAY_USE_BINARY_SEARCH
72
+ #define FABRICARRAY_USE_BINARY_SEARCH 1
73
+ #endif
74
+
75
+ template <typename T>
76
+ CUDA_CALLABLE inline const fabricbucket_t* fabricarray_find_bucket(const fabricarray_t<T>& fa, size_t i)
77
+ {
78
+ #if FABRICARRAY_USE_BINARY_SEARCH
79
+ // use binary search to find the right bucket
80
+ const fabricbucket_t* bucket = nullptr;
81
+ size_t lo = 0;
82
+ size_t hi = fa.nbuckets - 1;
83
+ while (hi >= lo)
84
+ {
85
+ size_t mid = (lo + hi) >> 1;
86
+ bucket = fa.buckets + mid;
87
+ if (i >= bucket->index_end)
88
+ lo = mid + 1;
89
+ else if (i < bucket->index_start)
90
+ hi = mid - 1;
91
+ else
92
+ return bucket;
93
+ }
94
+ return nullptr;
95
+ #else
96
+ // use linear search to find the right bucket
97
+ const fabricbucket_t* bucket = fa.buckets;
98
+ const fabricbucket_t* bucket_end = bucket + fa.nbuckets;
99
+ for (; bucket < bucket_end; ++bucket)
100
+ {
101
+ if (i < bucket->index_end)
102
+ return bucket;
103
+ }
104
+ return nullptr;
105
+ #endif
106
+ }
107
+
108
+
109
+ // Compute the pointer to a fabricarray element at index i.
110
+ // This function is similar to wp::index(), but the array data type doesn't need to be known at compile time.
111
+ CUDA_CALLABLE inline void* fabricarray_element_ptr(const fabricarray_t<void>& fa, size_t i, size_t elem_size)
112
+ {
113
+ const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
114
+
115
+ size_t index_in_bucket = i - bucket->index_start;
116
+
117
+ return (char*)bucket->ptr + index_in_bucket * elem_size;
118
+ }
119
+
120
+
121
+ template <typename T>
122
+ CUDA_CALLABLE inline T& index(const fabricarray_t<T>& fa, size_t i)
123
+ {
124
+ const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
125
+ assert(bucket && "Fabric array index out of range");
126
+
127
+ size_t index_in_bucket = i - bucket->index_start;
128
+
129
+ T& result = *((T*)bucket->ptr + index_in_bucket);
130
+
131
+ FP_VERIFY_FWD_1(result)
132
+
133
+ return result;
134
+ }
135
+
136
+
137
+ // indexing for fabric array of arrays
138
+ template <typename T>
139
+ CUDA_CALLABLE inline T& index(const fabricarray_t<T>& fa, size_t i, size_t j)
140
+ {
141
+ const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
142
+ assert(bucket && "Fabric array index out of range");
143
+
144
+ assert(bucket->lengths && "Missing inner array lengths");
145
+
146
+ size_t index_in_bucket = i - bucket->index_start;
147
+
148
+ void* ptr = *((void**)bucket->ptr + index_in_bucket);
149
+ size_t length = *((size_t*)bucket->lengths + index_in_bucket);
150
+
151
+ assert(j < length && "Fabric array inner index out of range");
152
+
153
+ T& result = *((T*)ptr + j);
154
+
155
+ FP_VERIFY_FWD_1(result)
156
+
157
+ return result;
158
+ }
159
+
160
+
161
+ template <typename T>
162
+ CUDA_CALLABLE inline array_t<T> view(fabricarray_t<T>& fa, size_t i)
163
+ {
164
+ const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
165
+ assert(bucket && "Fabric array index out of range");
166
+
167
+ assert(bucket->lengths && "Missing inner array lengths");
168
+
169
+ size_t index_in_bucket = i - bucket->index_start;
170
+
171
+ void* ptr = *((void**)bucket->ptr + index_in_bucket);
172
+ size_t length = *((size_t*)bucket->lengths + index_in_bucket);
173
+
174
+ return array_t<T>((T*)ptr, int(length));
175
+ }
176
+
177
+
178
+ template <typename T>
179
+ CUDA_CALLABLE inline T& index(const indexedfabricarray_t<T>& ifa, size_t i)
180
+ {
181
+ // index lookup
182
+ assert(i < ifa.size);
183
+ i = size_t(ifa.indices[i]);
184
+
185
+ const fabricbucket_t* bucket = fabricarray_find_bucket(ifa.fa, i);
186
+ assert(bucket && "Fabric array index out of range");
187
+
188
+ size_t index_in_bucket = i - bucket->index_start;
189
+
190
+ T& result = *((T*)bucket->ptr + index_in_bucket);
191
+
192
+ FP_VERIFY_FWD_1(result)
193
+
194
+ return result;
195
+ }
196
+
197
+
198
+ // indexing for fabric array of arrays
199
+ template <typename T>
200
+ CUDA_CALLABLE inline T& index(const indexedfabricarray_t<T>& ifa, size_t i, size_t j)
201
+ {
202
+ // index lookup
203
+ assert(i < ifa.size);
204
+ i = size_t(ifa.indices[i]);
205
+
206
+ const fabricbucket_t* bucket = fabricarray_find_bucket(ifa.fa, i);
207
+ assert(bucket && "Fabric array index out of range");
208
+
209
+ assert(bucket->lengths && "Missing inner array lengths");
210
+
211
+ size_t index_in_bucket = i - bucket->index_start;
212
+
213
+ void* ptr = *((void**)bucket->ptr + index_in_bucket);
214
+ size_t length = *((size_t*)bucket->lengths + index_in_bucket);
215
+
216
+ assert(j < length && "Fabric array inner index out of range");
217
+
218
+ T& result = *((T*)ptr + j);
219
+
220
+ FP_VERIFY_FWD_1(result)
221
+
222
+ return result;
223
+ }
224
+
225
+
226
+ template <typename T>
227
+ CUDA_CALLABLE inline array_t<T> view(indexedfabricarray_t<T>& ifa, size_t i)
228
+ {
229
+ // index lookup
230
+ assert(i < ifa.size);
231
+ i = size_t(ifa.indices[i]);
232
+
233
+ const fabricbucket_t* bucket = fabricarray_find_bucket(ifa.fa, i);
234
+ assert(bucket && "Fabric array index out of range");
235
+
236
+ assert(bucket->lengths && "Missing inner array lengths");
237
+
238
+ size_t index_in_bucket = i - bucket->index_start;
239
+
240
+ void* ptr = *((void**)bucket->ptr + index_in_bucket);
241
+ size_t length = *((size_t*)bucket->lengths + index_in_bucket);
242
+
243
+ return array_t<T>((T*)ptr, int(length));
244
+ }
245
+
246
+ } // namespace wp
@@ -0,0 +1,311 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ */
17
+
18
+ #include "warp.h"
19
+ #include "cuda_util.h"
20
+ #include "hashgrid.h"
21
+ #include "sort.h"
22
+ #include "string.h"
23
+
24
+ using namespace wp;
25
+
26
+ #include <map>
27
+
28
+ namespace
29
+ {
30
+ // host-side copy of mesh descriptors, maps GPU mesh address (id) to a CPU desc
31
+ std::map<uint64_t, HashGrid> g_hash_grid_descriptors;
32
+
33
+ } // anonymous namespace
34
+
35
+
36
+ namespace wp
37
+ {
38
+
39
+ bool hash_grid_get_descriptor(uint64_t id, HashGrid& grid)
40
+ {
41
+ const auto& iter = g_hash_grid_descriptors.find(id);
42
+ if (iter == g_hash_grid_descriptors.end())
43
+ return false;
44
+ else
45
+ grid = iter->second;
46
+ return true;
47
+ }
48
+
49
+ void hash_grid_add_descriptor(uint64_t id, const HashGrid& grid)
50
+ {
51
+ g_hash_grid_descriptors[id] = grid;
52
+ }
53
+
54
+ void hash_grid_rem_descriptor(uint64_t id)
55
+ {
56
+ g_hash_grid_descriptors.erase(id);
57
+
58
+ }
59
+
60
+ // implemented in hashgrid.cu
61
+ void hash_grid_rebuild_device(const HashGrid& grid, const wp::array_t<wp::vec3>& points);
62
+
63
+ } // namespace wp
64
+
65
+
66
+ // host methods
67
+ uint64_t wp_hash_grid_create_host(int dim_x, int dim_y, int dim_z)
68
+ {
69
+ HashGrid* grid = new HashGrid();
70
+ memset(grid, 0, sizeof(HashGrid));
71
+
72
+ grid->dim_x = dim_x;
73
+ grid->dim_y = dim_y;
74
+ grid->dim_z = dim_z;
75
+
76
+ const int num_cells = dim_x*dim_y*dim_z;
77
+ grid->cell_starts = (int*)wp_alloc_host(num_cells*sizeof(int));
78
+ grid->cell_ends = (int*)wp_alloc_host(num_cells*sizeof(int));
79
+
80
+ return (uint64_t)(grid);
81
+ }
82
+
83
+ void wp_hash_grid_destroy_host(uint64_t id)
84
+ {
85
+ HashGrid* grid = (HashGrid*)(id);
86
+
87
+ wp_free_host(grid->point_ids);
88
+ wp_free_host(grid->point_cells);
89
+ wp_free_host(grid->cell_starts);
90
+ wp_free_host(grid->cell_ends);
91
+
92
+ delete grid;
93
+ }
94
+
95
+ void wp_hash_grid_reserve_host(uint64_t id, int num_points)
96
+ {
97
+ HashGrid* grid = (HashGrid*)(id);
98
+
99
+ if (num_points > grid->max_points)
100
+ {
101
+ wp_free_host(grid->point_cells);
102
+ wp_free_host(grid->point_ids);
103
+
104
+ const int num_to_alloc = num_points*3/2;
105
+ grid->point_cells = (int*)wp_alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
106
+ grid->point_ids = (int*)wp_alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
107
+
108
+ grid->max_points = num_to_alloc;
109
+ }
110
+
111
+ grid->num_points = num_points;
112
+ }
113
+
114
+ void wp_hash_grid_update_host(uint64_t id, float cell_width, const wp::array_t<wp::vec3>* points)
115
+ {
116
+ // Python enforces this, but let's be defensive anyways
117
+ if (!points || points->ndim != 1)
118
+ {
119
+ fprintf(stderr, "Warp error: Invalid points array passed to %s\n", __FUNCTION__);
120
+ return;
121
+ }
122
+
123
+ if (!id)
124
+ {
125
+ fprintf(stderr, "Warp error: Invalid grid passed to %s\n", __FUNCTION__);
126
+ return;
127
+ }
128
+
129
+ HashGrid* grid = (HashGrid*)(id);
130
+ int num_points = points->shape[0];
131
+
132
+ wp_hash_grid_reserve_host(id, num_points);
133
+
134
+ grid->cell_width = cell_width;
135
+ grid->cell_width_inv = 1.0f / cell_width;
136
+
137
+ // calculate cell for each position
138
+ for (int i=0; i < num_points; ++i)
139
+ {
140
+ const vec3& point = wp::index(*points, i);
141
+ grid->point_cells[i] = hash_grid_index(*grid, point);
142
+ grid->point_ids[i] = i;
143
+ }
144
+
145
+ // sort indices
146
+ radix_sort_pairs_host(grid->point_cells, grid->point_ids, num_points);
147
+
148
+ const int num_cells = grid->dim_x * grid->dim_y * grid->dim_z;
149
+ memset(grid->cell_starts, 0, sizeof(int) * num_cells);
150
+ memset(grid->cell_ends, 0, sizeof(int) * num_cells);
151
+
152
+ // compute cell start / end
153
+ for (int i=0; i < num_points; ++i)
154
+ {
155
+ // scan the particle-cell array to find the start and end
156
+ const int c = grid->point_cells[i];
157
+
158
+ if (i == 0)
159
+ grid->cell_starts[c] = 0;
160
+ else
161
+ {
162
+ const int p = grid->point_cells[i-1];
163
+
164
+ if (c != p)
165
+ {
166
+ grid->cell_starts[c] = i;
167
+ grid->cell_ends[p] = i;
168
+ }
169
+ }
170
+
171
+ if (i == num_points - 1)
172
+ {
173
+ grid->cell_ends[c] = i + 1;
174
+ }
175
+ }
176
+ }
177
+
178
+ // device methods
179
+ uint64_t wp_hash_grid_create_device(void* context, int dim_x, int dim_y, int dim_z)
180
+ {
181
+ ContextGuard guard(context);
182
+
183
+ HashGrid grid;
184
+ memset(&grid, 0, sizeof(HashGrid));
185
+
186
+ grid.context = context ? context : wp_cuda_context_get_current();
187
+
188
+ grid.dim_x = dim_x;
189
+ grid.dim_y = dim_y;
190
+ grid.dim_z = dim_z;
191
+
192
+ const int num_cells = dim_x*dim_y*dim_z;
193
+ grid.cell_starts = (int*)wp_alloc_device(WP_CURRENT_CONTEXT, num_cells*sizeof(int));
194
+ grid.cell_ends = (int*)wp_alloc_device(WP_CURRENT_CONTEXT, num_cells*sizeof(int));
195
+
196
+ // upload to device
197
+ HashGrid* grid_device = (HashGrid*)(wp_alloc_device(WP_CURRENT_CONTEXT, sizeof(HashGrid)));
198
+ wp_memcpy_h2d(WP_CURRENT_CONTEXT, grid_device, &grid, sizeof(HashGrid));
199
+
200
+ uint64_t grid_id = (uint64_t)(grid_device);
201
+ hash_grid_add_descriptor(grid_id, grid);
202
+
203
+ return grid_id;
204
+ }
205
+
206
+ void wp_hash_grid_destroy_device(uint64_t id)
207
+ {
208
+ HashGrid grid;
209
+ if (hash_grid_get_descriptor(id, grid))
210
+ {
211
+ ContextGuard guard(grid.context);
212
+
213
+ wp_free_device(WP_CURRENT_CONTEXT, grid.point_ids);
214
+ wp_free_device(WP_CURRENT_CONTEXT, grid.point_cells);
215
+ wp_free_device(WP_CURRENT_CONTEXT, grid.cell_starts);
216
+ wp_free_device(WP_CURRENT_CONTEXT, grid.cell_ends);
217
+
218
+ wp_free_device(WP_CURRENT_CONTEXT, (HashGrid*)id);
219
+
220
+ hash_grid_rem_descriptor(id);
221
+ }
222
+ }
223
+
224
+
225
+ void wp_hash_grid_reserve_device(uint64_t id, int num_points)
226
+ {
227
+ HashGrid grid;
228
+
229
+ if (hash_grid_get_descriptor(id, grid))
230
+ {
231
+ if (num_points > grid.max_points)
232
+ {
233
+ ContextGuard guard(grid.context);
234
+
235
+ wp_free_device(WP_CURRENT_CONTEXT, grid.point_cells);
236
+ wp_free_device(WP_CURRENT_CONTEXT, grid.point_ids);
237
+
238
+ const int num_to_alloc = num_points*3/2;
239
+ grid.point_cells = (int*)wp_alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
240
+ grid.point_ids = (int*)wp_alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
241
+ grid.max_points = num_to_alloc;
242
+
243
+ // ensure we pre-size our sort routine to avoid
244
+ // allocations during graph capture
245
+ radix_sort_reserve(WP_CURRENT_CONTEXT, num_to_alloc);
246
+
247
+ // update device side grid descriptor, todo: this is
248
+ // slightly redundant since it is performed again
249
+ // inside wp_hash_grid_update_device(), but since
250
+ // reserve can be called from Python we need to make
251
+ // sure it is consistent
252
+ wp_memcpy_h2d(WP_CURRENT_CONTEXT, (HashGrid*)id, &grid, sizeof(HashGrid));
253
+
254
+ // update host side grid descriptor
255
+ hash_grid_add_descriptor(id, grid);
256
+ }
257
+ }
258
+ }
259
+
260
+ void wp_hash_grid_update_device(uint64_t id, float cell_width, const wp::array_t<wp::vec3>* points)
261
+ {
262
+ // Python enforces this, but let's be defensive anyways
263
+ if (!points || points->ndim != 1)
264
+ {
265
+ fprintf(stderr, "Warp error: Invalid points array passed to %s\n", __FUNCTION__);
266
+ return;
267
+ }
268
+
269
+ int num_points = points->shape[0];
270
+
271
+ // ensure we have enough memory reserved for update
272
+ // this must be done before retrieving the descriptor
273
+ // below since it may update it
274
+ wp_hash_grid_reserve_device(id, num_points);
275
+
276
+ // host grid must be static so that we can
277
+ // perform host->device memcpy from this variable
278
+ // and have it safely recorded inside CUDA graphs
279
+ static HashGrid grid;
280
+
281
+ if (hash_grid_get_descriptor(id, grid))
282
+ {
283
+ ContextGuard guard(grid.context);
284
+
285
+ grid.num_points = num_points;
286
+ grid.cell_width = cell_width;
287
+ grid.cell_width_inv = 1.0f / cell_width;
288
+
289
+ hash_grid_rebuild_device(grid, *points);
290
+
291
+ // update device side grid descriptor
292
+ wp_memcpy_h2d(WP_CURRENT_CONTEXT, (HashGrid*)id, &grid, sizeof(HashGrid));
293
+
294
+ // update host side grid descriptor
295
+ hash_grid_add_descriptor(id, grid);
296
+ }
297
+ }
298
+
299
+ #if !WP_ENABLE_CUDA
300
+
301
+ namespace wp
302
+ {
303
+
304
+ void hash_grid_rebuild_device(const HashGrid& grid, const wp::array_t<wp::vec3>& points)
305
+ {
306
+
307
+ }
308
+
309
+ } // namespace wp
310
+
311
+ #endif // !WP_ENABLE_CUDA
@@ -0,0 +1,89 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ */
17
+
18
+ #include "warp.h"
19
+ #include "cuda_util.h"
20
+ #include "hashgrid.h"
21
+ #include "sort.h"
22
+
23
+ extern CUcontext get_current_context();
24
+
25
+ namespace wp
26
+ {
27
+
28
+ __global__ void compute_cell_indices(HashGrid grid, wp::array_t<wp::vec3> points)
29
+ {
30
+ const int tid = blockIdx.x*blockDim.x + threadIdx.x;
31
+
32
+ if (tid < points.shape[0])
33
+ {
34
+ const vec3& point = wp::index(points, tid);
35
+ grid.point_cells[tid] = hash_grid_index(grid, point);
36
+ grid.point_ids[tid] = tid;
37
+ }
38
+ }
39
+
40
+ __global__ void compute_cell_offsets(int* cell_starts, int* cell_ends, const int* point_cells, int num_points)
41
+ {
42
+ const int tid = blockIdx.x*blockDim.x + threadIdx.x;
43
+
44
+ // compute cell start / end
45
+ if (tid < num_points)
46
+ {
47
+ // scan the particle-cell array to find the start and end
48
+ const int c = point_cells[tid];
49
+
50
+ if (tid == 0)
51
+ cell_starts[c] = 0;
52
+ else
53
+ {
54
+ const int p = point_cells[tid-1];
55
+
56
+ if (c != p)
57
+ {
58
+ cell_starts[c] = tid;
59
+ cell_ends[p] = tid;
60
+ }
61
+ }
62
+
63
+ if (tid == num_points - 1)
64
+ {
65
+ cell_ends[c] = tid + 1;
66
+ }
67
+ }
68
+ }
69
+
70
+ void hash_grid_rebuild_device(const wp::HashGrid& grid, const wp::array_t<wp::vec3>& points)
71
+ {
72
+ ContextGuard guard(grid.context);
73
+
74
+ int num_points = points.shape[0];
75
+
76
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_cell_indices, num_points, (grid, points));
77
+
78
+ radix_sort_pairs_device(WP_CURRENT_CONTEXT, grid.point_cells, grid.point_ids, num_points);
79
+
80
+ const int num_cells = grid.dim_x * grid.dim_y * grid.dim_z;
81
+
82
+ wp_memset_device(WP_CURRENT_CONTEXT, grid.cell_starts, 0, sizeof(int) * num_cells);
83
+ wp_memset_device(WP_CURRENT_CONTEXT, grid.cell_ends, 0, sizeof(int) * num_cells);
84
+
85
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_cell_offsets, num_points, (grid.cell_starts, grid.cell_ends, grid.point_cells, num_points));
86
+ }
87
+
88
+
89
+ } // namespace wp