warp-lang 1.10.0__py3-none-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +334 -0
- warp/__init__.pyi +5856 -0
- warp/_src/__init__.py +14 -0
- warp/_src/autograd.py +1077 -0
- warp/_src/build.py +620 -0
- warp/_src/build_dll.py +642 -0
- warp/_src/builtins.py +10555 -0
- warp/_src/codegen.py +4361 -0
- warp/_src/config.py +178 -0
- warp/_src/constants.py +59 -0
- warp/_src/context.py +8352 -0
- warp/_src/dlpack.py +464 -0
- warp/_src/fabric.py +362 -0
- warp/_src/fem/__init__.py +14 -0
- warp/_src/fem/adaptivity.py +510 -0
- warp/_src/fem/cache.py +689 -0
- warp/_src/fem/dirichlet.py +190 -0
- warp/_src/fem/domain.py +553 -0
- warp/_src/fem/field/__init__.py +131 -0
- warp/_src/fem/field/field.py +703 -0
- warp/_src/fem/field/nodal_field.py +403 -0
- warp/_src/fem/field/restriction.py +39 -0
- warp/_src/fem/field/virtual.py +1021 -0
- warp/_src/fem/geometry/__init__.py +32 -0
- warp/_src/fem/geometry/adaptive_nanogrid.py +782 -0
- warp/_src/fem/geometry/closest_point.py +99 -0
- warp/_src/fem/geometry/deformed_geometry.py +277 -0
- warp/_src/fem/geometry/element.py +854 -0
- warp/_src/fem/geometry/geometry.py +693 -0
- warp/_src/fem/geometry/grid_2d.py +478 -0
- warp/_src/fem/geometry/grid_3d.py +539 -0
- warp/_src/fem/geometry/hexmesh.py +956 -0
- warp/_src/fem/geometry/nanogrid.py +660 -0
- warp/_src/fem/geometry/partition.py +483 -0
- warp/_src/fem/geometry/quadmesh.py +597 -0
- warp/_src/fem/geometry/tetmesh.py +762 -0
- warp/_src/fem/geometry/trimesh.py +588 -0
- warp/_src/fem/integrate.py +2507 -0
- warp/_src/fem/linalg.py +385 -0
- warp/_src/fem/operator.py +398 -0
- warp/_src/fem/polynomial.py +231 -0
- warp/_src/fem/quadrature/__init__.py +17 -0
- warp/_src/fem/quadrature/pic_quadrature.py +318 -0
- warp/_src/fem/quadrature/quadrature.py +665 -0
- warp/_src/fem/space/__init__.py +248 -0
- warp/_src/fem/space/basis_function_space.py +499 -0
- warp/_src/fem/space/basis_space.py +681 -0
- warp/_src/fem/space/dof_mapper.py +253 -0
- warp/_src/fem/space/function_space.py +312 -0
- warp/_src/fem/space/grid_2d_function_space.py +179 -0
- warp/_src/fem/space/grid_3d_function_space.py +229 -0
- warp/_src/fem/space/hexmesh_function_space.py +255 -0
- warp/_src/fem/space/nanogrid_function_space.py +199 -0
- warp/_src/fem/space/partition.py +435 -0
- warp/_src/fem/space/quadmesh_function_space.py +222 -0
- warp/_src/fem/space/restriction.py +221 -0
- warp/_src/fem/space/shape/__init__.py +152 -0
- warp/_src/fem/space/shape/cube_shape_function.py +1107 -0
- warp/_src/fem/space/shape/shape_function.py +134 -0
- warp/_src/fem/space/shape/square_shape_function.py +928 -0
- warp/_src/fem/space/shape/tet_shape_function.py +829 -0
- warp/_src/fem/space/shape/triangle_shape_function.py +674 -0
- warp/_src/fem/space/tetmesh_function_space.py +270 -0
- warp/_src/fem/space/topology.py +461 -0
- warp/_src/fem/space/trimesh_function_space.py +193 -0
- warp/_src/fem/types.py +114 -0
- warp/_src/fem/utils.py +488 -0
- warp/_src/jax.py +188 -0
- warp/_src/jax_experimental/__init__.py +14 -0
- warp/_src/jax_experimental/custom_call.py +389 -0
- warp/_src/jax_experimental/ffi.py +1286 -0
- warp/_src/jax_experimental/xla_ffi.py +658 -0
- warp/_src/marching_cubes.py +710 -0
- warp/_src/math.py +416 -0
- warp/_src/optim/__init__.py +14 -0
- warp/_src/optim/adam.py +165 -0
- warp/_src/optim/linear.py +1608 -0
- warp/_src/optim/sgd.py +114 -0
- warp/_src/paddle.py +408 -0
- warp/_src/render/__init__.py +14 -0
- warp/_src/render/imgui_manager.py +291 -0
- warp/_src/render/render_opengl.py +3638 -0
- warp/_src/render/render_usd.py +939 -0
- warp/_src/render/utils.py +162 -0
- warp/_src/sparse.py +2718 -0
- warp/_src/tape.py +1208 -0
- warp/_src/thirdparty/__init__.py +0 -0
- warp/_src/thirdparty/appdirs.py +598 -0
- warp/_src/thirdparty/dlpack.py +145 -0
- warp/_src/thirdparty/unittest_parallel.py +676 -0
- warp/_src/torch.py +393 -0
- warp/_src/types.py +5888 -0
- warp/_src/utils.py +1695 -0
- warp/autograd.py +33 -0
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/build.py +29 -0
- warp/build_dll.py +24 -0
- warp/codegen.py +24 -0
- warp/constants.py +24 -0
- warp/context.py +33 -0
- warp/dlpack.py +24 -0
- warp/examples/__init__.py +24 -0
- warp/examples/assets/bear.usd +0 -0
- warp/examples/assets/bunny.usd +0 -0
- warp/examples/assets/cube.usd +0 -0
- warp/examples/assets/nonuniform.usd +0 -0
- warp/examples/assets/nvidia_logo.png +0 -0
- warp/examples/assets/pixel.jpg +0 -0
- warp/examples/assets/rocks.nvdb +0 -0
- warp/examples/assets/rocks.usd +0 -0
- warp/examples/assets/sphere.usd +0 -0
- warp/examples/assets/square_cloth.usd +0 -0
- warp/examples/benchmarks/benchmark_api.py +389 -0
- warp/examples/benchmarks/benchmark_cloth.py +296 -0
- warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
- warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
- warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
- warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
- warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
- warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
- warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
- warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
- warp/examples/benchmarks/benchmark_gemm.py +164 -0
- warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
- warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
- warp/examples/benchmarks/benchmark_launches.py +301 -0
- warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
- warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
- warp/examples/browse.py +37 -0
- warp/examples/core/example_cupy.py +86 -0
- warp/examples/core/example_dem.py +241 -0
- warp/examples/core/example_fluid.py +299 -0
- warp/examples/core/example_graph_capture.py +150 -0
- warp/examples/core/example_marching_cubes.py +195 -0
- warp/examples/core/example_mesh.py +180 -0
- warp/examples/core/example_mesh_intersect.py +211 -0
- warp/examples/core/example_nvdb.py +182 -0
- warp/examples/core/example_raycast.py +111 -0
- warp/examples/core/example_raymarch.py +205 -0
- warp/examples/core/example_render_opengl.py +290 -0
- warp/examples/core/example_sample_mesh.py +300 -0
- warp/examples/core/example_sph.py +411 -0
- warp/examples/core/example_spin_lock.py +93 -0
- warp/examples/core/example_torch.py +211 -0
- warp/examples/core/example_wave.py +269 -0
- warp/examples/core/example_work_queue.py +118 -0
- warp/examples/distributed/example_jacobi_mpi.py +506 -0
- warp/examples/fem/example_adaptive_grid.py +286 -0
- warp/examples/fem/example_apic_fluid.py +469 -0
- warp/examples/fem/example_burgers.py +261 -0
- warp/examples/fem/example_convection_diffusion.py +181 -0
- warp/examples/fem/example_convection_diffusion_dg.py +225 -0
- warp/examples/fem/example_darcy_ls_optimization.py +489 -0
- warp/examples/fem/example_deformed_geometry.py +172 -0
- warp/examples/fem/example_diffusion.py +196 -0
- warp/examples/fem/example_diffusion_3d.py +225 -0
- warp/examples/fem/example_diffusion_mgpu.py +225 -0
- warp/examples/fem/example_distortion_energy.py +228 -0
- warp/examples/fem/example_elastic_shape_optimization.py +387 -0
- warp/examples/fem/example_magnetostatics.py +242 -0
- warp/examples/fem/example_mixed_elasticity.py +293 -0
- warp/examples/fem/example_navier_stokes.py +263 -0
- warp/examples/fem/example_nonconforming_contact.py +300 -0
- warp/examples/fem/example_stokes.py +213 -0
- warp/examples/fem/example_stokes_transfer.py +262 -0
- warp/examples/fem/example_streamlines.py +357 -0
- warp/examples/fem/utils.py +1047 -0
- warp/examples/interop/example_jax_callable.py +146 -0
- warp/examples/interop/example_jax_ffi_callback.py +132 -0
- warp/examples/interop/example_jax_kernel.py +232 -0
- warp/examples/optim/example_diffray.py +561 -0
- warp/examples/optim/example_fluid_checkpoint.py +497 -0
- warp/examples/tile/example_tile_block_cholesky.py +502 -0
- warp/examples/tile/example_tile_cholesky.py +88 -0
- warp/examples/tile/example_tile_convolution.py +66 -0
- warp/examples/tile/example_tile_fft.py +55 -0
- warp/examples/tile/example_tile_filtering.py +113 -0
- warp/examples/tile/example_tile_matmul.py +85 -0
- warp/examples/tile/example_tile_mcgp.py +191 -0
- warp/examples/tile/example_tile_mlp.py +385 -0
- warp/examples/tile/example_tile_nbody.py +199 -0
- warp/fabric.py +24 -0
- warp/fem/__init__.py +173 -0
- warp/fem/adaptivity.py +26 -0
- warp/fem/cache.py +30 -0
- warp/fem/dirichlet.py +24 -0
- warp/fem/field/__init__.py +24 -0
- warp/fem/field/field.py +26 -0
- warp/fem/geometry/__init__.py +21 -0
- warp/fem/geometry/closest_point.py +31 -0
- warp/fem/linalg.py +38 -0
- warp/fem/operator.py +32 -0
- warp/fem/polynomial.py +29 -0
- warp/fem/space/__init__.py +22 -0
- warp/fem/space/basis_space.py +24 -0
- warp/fem/space/shape/__init__.py +68 -0
- warp/fem/space/topology.py +24 -0
- warp/fem/types.py +24 -0
- warp/fem/utils.py +32 -0
- warp/jax.py +29 -0
- warp/jax_experimental/__init__.py +29 -0
- warp/jax_experimental/custom_call.py +29 -0
- warp/jax_experimental/ffi.py +39 -0
- warp/jax_experimental/xla_ffi.py +24 -0
- warp/marching_cubes.py +24 -0
- warp/math.py +37 -0
- warp/native/array.h +1687 -0
- warp/native/builtin.h +2327 -0
- warp/native/bvh.cpp +562 -0
- warp/native/bvh.cu +826 -0
- warp/native/bvh.h +555 -0
- warp/native/clang/clang.cpp +541 -0
- warp/native/coloring.cpp +622 -0
- warp/native/crt.cpp +51 -0
- warp/native/crt.h +568 -0
- warp/native/cuda_crt.h +1058 -0
- warp/native/cuda_util.cpp +677 -0
- warp/native/cuda_util.h +313 -0
- warp/native/error.cpp +77 -0
- warp/native/error.h +36 -0
- warp/native/exports.h +2023 -0
- warp/native/fabric.h +246 -0
- warp/native/hashgrid.cpp +311 -0
- warp/native/hashgrid.cu +89 -0
- warp/native/hashgrid.h +240 -0
- warp/native/initializer_array.h +41 -0
- warp/native/intersect.h +1253 -0
- warp/native/intersect_adj.h +375 -0
- warp/native/intersect_tri.h +348 -0
- warp/native/mat.h +5189 -0
- warp/native/mathdx.cpp +93 -0
- warp/native/matnn.h +221 -0
- warp/native/mesh.cpp +266 -0
- warp/native/mesh.cu +406 -0
- warp/native/mesh.h +2097 -0
- warp/native/nanovdb/GridHandle.h +533 -0
- warp/native/nanovdb/HostBuffer.h +591 -0
- warp/native/nanovdb/NanoVDB.h +6246 -0
- warp/native/nanovdb/NodeManager.h +323 -0
- warp/native/nanovdb/PNanoVDB.h +3390 -0
- warp/native/noise.h +859 -0
- warp/native/quat.h +1664 -0
- warp/native/rand.h +342 -0
- warp/native/range.h +145 -0
- warp/native/reduce.cpp +174 -0
- warp/native/reduce.cu +363 -0
- warp/native/runlength_encode.cpp +79 -0
- warp/native/runlength_encode.cu +61 -0
- warp/native/scan.cpp +47 -0
- warp/native/scan.cu +55 -0
- warp/native/scan.h +23 -0
- warp/native/solid_angle.h +466 -0
- warp/native/sort.cpp +251 -0
- warp/native/sort.cu +286 -0
- warp/native/sort.h +35 -0
- warp/native/sparse.cpp +241 -0
- warp/native/sparse.cu +435 -0
- warp/native/spatial.h +1306 -0
- warp/native/svd.h +727 -0
- warp/native/temp_buffer.h +46 -0
- warp/native/tile.h +4124 -0
- warp/native/tile_radix_sort.h +1112 -0
- warp/native/tile_reduce.h +838 -0
- warp/native/tile_scan.h +240 -0
- warp/native/tuple.h +189 -0
- warp/native/vec.h +2199 -0
- warp/native/version.h +23 -0
- warp/native/volume.cpp +501 -0
- warp/native/volume.cu +68 -0
- warp/native/volume.h +970 -0
- warp/native/volume_builder.cu +483 -0
- warp/native/volume_builder.h +52 -0
- warp/native/volume_impl.h +70 -0
- warp/native/warp.cpp +1143 -0
- warp/native/warp.cu +4604 -0
- warp/native/warp.h +358 -0
- warp/optim/__init__.py +20 -0
- warp/optim/adam.py +24 -0
- warp/optim/linear.py +35 -0
- warp/optim/sgd.py +24 -0
- warp/paddle.py +24 -0
- warp/py.typed +0 -0
- warp/render/__init__.py +22 -0
- warp/render/imgui_manager.py +29 -0
- warp/render/render_opengl.py +24 -0
- warp/render/render_usd.py +24 -0
- warp/render/utils.py +24 -0
- warp/sparse.py +51 -0
- warp/tape.py +24 -0
- warp/tests/__init__.py +1 -0
- warp/tests/__main__.py +4 -0
- warp/tests/assets/curlnoise_golden.npy +0 -0
- warp/tests/assets/mlp_golden.npy +0 -0
- warp/tests/assets/pixel.npy +0 -0
- warp/tests/assets/pnoise_golden.npy +0 -0
- warp/tests/assets/spiky.usd +0 -0
- warp/tests/assets/test_grid.nvdb +0 -0
- warp/tests/assets/test_index_grid.nvdb +0 -0
- warp/tests/assets/test_int32_grid.nvdb +0 -0
- warp/tests/assets/test_vec_grid.nvdb +0 -0
- warp/tests/assets/torus.nvdb +0 -0
- warp/tests/assets/torus.usda +105 -0
- warp/tests/aux_test_class_kernel.py +34 -0
- warp/tests/aux_test_compile_consts_dummy.py +18 -0
- warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
- warp/tests/aux_test_dependent.py +29 -0
- warp/tests/aux_test_grad_customs.py +29 -0
- warp/tests/aux_test_instancing_gc.py +26 -0
- warp/tests/aux_test_module_aot.py +7 -0
- warp/tests/aux_test_module_unload.py +23 -0
- warp/tests/aux_test_name_clash1.py +40 -0
- warp/tests/aux_test_name_clash2.py +40 -0
- warp/tests/aux_test_reference.py +9 -0
- warp/tests/aux_test_reference_reference.py +8 -0
- warp/tests/aux_test_square.py +16 -0
- warp/tests/aux_test_unresolved_func.py +22 -0
- warp/tests/aux_test_unresolved_symbol.py +22 -0
- warp/tests/cuda/__init__.py +0 -0
- warp/tests/cuda/test_async.py +676 -0
- warp/tests/cuda/test_conditional_captures.py +1147 -0
- warp/tests/cuda/test_ipc.py +124 -0
- warp/tests/cuda/test_mempool.py +233 -0
- warp/tests/cuda/test_multigpu.py +169 -0
- warp/tests/cuda/test_peer.py +139 -0
- warp/tests/cuda/test_pinned.py +84 -0
- warp/tests/cuda/test_streams.py +691 -0
- warp/tests/geometry/__init__.py +0 -0
- warp/tests/geometry/test_bvh.py +335 -0
- warp/tests/geometry/test_hash_grid.py +259 -0
- warp/tests/geometry/test_marching_cubes.py +294 -0
- warp/tests/geometry/test_mesh.py +318 -0
- warp/tests/geometry/test_mesh_query_aabb.py +392 -0
- warp/tests/geometry/test_mesh_query_point.py +935 -0
- warp/tests/geometry/test_mesh_query_ray.py +323 -0
- warp/tests/geometry/test_volume.py +1103 -0
- warp/tests/geometry/test_volume_write.py +346 -0
- warp/tests/interop/__init__.py +0 -0
- warp/tests/interop/test_dlpack.py +730 -0
- warp/tests/interop/test_jax.py +1673 -0
- warp/tests/interop/test_paddle.py +800 -0
- warp/tests/interop/test_torch.py +1001 -0
- warp/tests/run_coverage_serial.py +39 -0
- warp/tests/test_adam.py +162 -0
- warp/tests/test_arithmetic.py +1096 -0
- warp/tests/test_array.py +3756 -0
- warp/tests/test_array_reduce.py +156 -0
- warp/tests/test_assert.py +303 -0
- warp/tests/test_atomic.py +336 -0
- warp/tests/test_atomic_bitwise.py +209 -0
- warp/tests/test_atomic_cas.py +312 -0
- warp/tests/test_bool.py +220 -0
- warp/tests/test_builtins_resolution.py +732 -0
- warp/tests/test_closest_point_edge_edge.py +327 -0
- warp/tests/test_codegen.py +974 -0
- warp/tests/test_codegen_instancing.py +1495 -0
- warp/tests/test_compile_consts.py +215 -0
- warp/tests/test_conditional.py +298 -0
- warp/tests/test_context.py +35 -0
- warp/tests/test_copy.py +319 -0
- warp/tests/test_ctypes.py +618 -0
- warp/tests/test_dense.py +73 -0
- warp/tests/test_devices.py +127 -0
- warp/tests/test_enum.py +136 -0
- warp/tests/test_examples.py +424 -0
- warp/tests/test_fabricarray.py +998 -0
- warp/tests/test_fast_math.py +72 -0
- warp/tests/test_fem.py +2204 -0
- warp/tests/test_fixedarray.py +229 -0
- warp/tests/test_fp16.py +136 -0
- warp/tests/test_func.py +501 -0
- warp/tests/test_future_annotations.py +100 -0
- warp/tests/test_generics.py +656 -0
- warp/tests/test_grad.py +893 -0
- warp/tests/test_grad_customs.py +339 -0
- warp/tests/test_grad_debug.py +341 -0
- warp/tests/test_implicit_init.py +411 -0
- warp/tests/test_import.py +45 -0
- warp/tests/test_indexedarray.py +1140 -0
- warp/tests/test_intersect.py +103 -0
- warp/tests/test_iter.py +76 -0
- warp/tests/test_large.py +177 -0
- warp/tests/test_launch.py +411 -0
- warp/tests/test_lerp.py +151 -0
- warp/tests/test_linear_solvers.py +223 -0
- warp/tests/test_lvalue.py +427 -0
- warp/tests/test_map.py +526 -0
- warp/tests/test_mat.py +3515 -0
- warp/tests/test_mat_assign_copy.py +178 -0
- warp/tests/test_mat_constructors.py +573 -0
- warp/tests/test_mat_lite.py +122 -0
- warp/tests/test_mat_scalar_ops.py +2913 -0
- warp/tests/test_math.py +212 -0
- warp/tests/test_module_aot.py +287 -0
- warp/tests/test_module_hashing.py +258 -0
- warp/tests/test_modules_lite.py +70 -0
- warp/tests/test_noise.py +252 -0
- warp/tests/test_operators.py +299 -0
- warp/tests/test_options.py +129 -0
- warp/tests/test_overwrite.py +551 -0
- warp/tests/test_print.py +408 -0
- warp/tests/test_quat.py +2653 -0
- warp/tests/test_quat_assign_copy.py +145 -0
- warp/tests/test_rand.py +339 -0
- warp/tests/test_reload.py +303 -0
- warp/tests/test_rounding.py +157 -0
- warp/tests/test_runlength_encode.py +196 -0
- warp/tests/test_scalar_ops.py +133 -0
- warp/tests/test_smoothstep.py +108 -0
- warp/tests/test_snippet.py +318 -0
- warp/tests/test_sparse.py +845 -0
- warp/tests/test_spatial.py +2859 -0
- warp/tests/test_spatial_assign_copy.py +160 -0
- warp/tests/test_special_values.py +361 -0
- warp/tests/test_static.py +640 -0
- warp/tests/test_struct.py +901 -0
- warp/tests/test_tape.py +242 -0
- warp/tests/test_transient_module.py +93 -0
- warp/tests/test_triangle_closest_point.py +192 -0
- warp/tests/test_tuple.py +361 -0
- warp/tests/test_types.py +615 -0
- warp/tests/test_utils.py +594 -0
- warp/tests/test_vec.py +1408 -0
- warp/tests/test_vec_assign_copy.py +143 -0
- warp/tests/test_vec_constructors.py +325 -0
- warp/tests/test_vec_lite.py +80 -0
- warp/tests/test_vec_scalar_ops.py +2327 -0
- warp/tests/test_verify_fp.py +100 -0
- warp/tests/test_version.py +75 -0
- warp/tests/tile/__init__.py +0 -0
- warp/tests/tile/test_tile.py +1519 -0
- warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
- warp/tests/tile/test_tile_cholesky.py +608 -0
- warp/tests/tile/test_tile_load.py +724 -0
- warp/tests/tile/test_tile_mathdx.py +156 -0
- warp/tests/tile/test_tile_matmul.py +179 -0
- warp/tests/tile/test_tile_mlp.py +400 -0
- warp/tests/tile/test_tile_reduce.py +950 -0
- warp/tests/tile/test_tile_shared_memory.py +376 -0
- warp/tests/tile/test_tile_sort.py +121 -0
- warp/tests/tile/test_tile_view.py +173 -0
- warp/tests/unittest_serial.py +47 -0
- warp/tests/unittest_suites.py +430 -0
- warp/tests/unittest_utils.py +469 -0
- warp/tests/walkthrough_debug.py +95 -0
- warp/torch.py +24 -0
- warp/types.py +51 -0
- warp/utils.py +31 -0
- warp_lang-1.10.0.dist-info/METADATA +459 -0
- warp_lang-1.10.0.dist-info/RECORD +468 -0
- warp_lang-1.10.0.dist-info/WHEEL +5 -0
- warp_lang-1.10.0.dist-info/licenses/LICENSE.md +176 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
- warp_lang-1.10.0.dist-info/top_level.txt +1 -0
warp/native/fabric.h
ADDED
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#pragma once
|
|
19
|
+
|
|
20
|
+
#include "builtin.h"
|
|
21
|
+
|
|
22
|
+
namespace wp
|
|
23
|
+
{
|
|
24
|
+
|
|
25
|
+
struct fabricbucket_t
|
|
26
|
+
{
|
|
27
|
+
size_t index_start;
|
|
28
|
+
size_t index_end;
|
|
29
|
+
void* ptr;
|
|
30
|
+
size_t* lengths;
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
template <typename T>
|
|
35
|
+
struct fabricarray_t
|
|
36
|
+
{
|
|
37
|
+
CUDA_CALLABLE inline fabricarray_t()
|
|
38
|
+
: buckets(nullptr),
|
|
39
|
+
nbuckets(0),
|
|
40
|
+
size(0)
|
|
41
|
+
{}
|
|
42
|
+
|
|
43
|
+
CUDA_CALLABLE inline bool empty() const { return !size; }
|
|
44
|
+
|
|
45
|
+
fabricbucket_t* buckets; // array of fabricbucket_t on the correct device
|
|
46
|
+
|
|
47
|
+
size_t nbuckets;
|
|
48
|
+
size_t size;
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
template <typename T>
|
|
53
|
+
struct indexedfabricarray_t
|
|
54
|
+
{
|
|
55
|
+
CUDA_CALLABLE inline indexedfabricarray_t()
|
|
56
|
+
: indices(),
|
|
57
|
+
size(0)
|
|
58
|
+
{}
|
|
59
|
+
|
|
60
|
+
CUDA_CALLABLE inline bool empty() const { return !size; }
|
|
61
|
+
|
|
62
|
+
fabricarray_t<T> fa;
|
|
63
|
+
|
|
64
|
+
// TODO: we use 32-bit indices for consistency with other Warp indexed arrays,
|
|
65
|
+
// but Fabric uses 64-bit indexing.
|
|
66
|
+
int* indices;
|
|
67
|
+
size_t size;
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
#ifndef FABRICARRAY_USE_BINARY_SEARCH
|
|
72
|
+
#define FABRICARRAY_USE_BINARY_SEARCH 1
|
|
73
|
+
#endif
|
|
74
|
+
|
|
75
|
+
template <typename T>
|
|
76
|
+
CUDA_CALLABLE inline const fabricbucket_t* fabricarray_find_bucket(const fabricarray_t<T>& fa, size_t i)
|
|
77
|
+
{
|
|
78
|
+
#if FABRICARRAY_USE_BINARY_SEARCH
|
|
79
|
+
// use binary search to find the right bucket
|
|
80
|
+
const fabricbucket_t* bucket = nullptr;
|
|
81
|
+
size_t lo = 0;
|
|
82
|
+
size_t hi = fa.nbuckets - 1;
|
|
83
|
+
while (hi >= lo)
|
|
84
|
+
{
|
|
85
|
+
size_t mid = (lo + hi) >> 1;
|
|
86
|
+
bucket = fa.buckets + mid;
|
|
87
|
+
if (i >= bucket->index_end)
|
|
88
|
+
lo = mid + 1;
|
|
89
|
+
else if (i < bucket->index_start)
|
|
90
|
+
hi = mid - 1;
|
|
91
|
+
else
|
|
92
|
+
return bucket;
|
|
93
|
+
}
|
|
94
|
+
return nullptr;
|
|
95
|
+
#else
|
|
96
|
+
// use linear search to find the right bucket
|
|
97
|
+
const fabricbucket_t* bucket = fa.buckets;
|
|
98
|
+
const fabricbucket_t* bucket_end = bucket + fa.nbuckets;
|
|
99
|
+
for (; bucket < bucket_end; ++bucket)
|
|
100
|
+
{
|
|
101
|
+
if (i < bucket->index_end)
|
|
102
|
+
return bucket;
|
|
103
|
+
}
|
|
104
|
+
return nullptr;
|
|
105
|
+
#endif
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
// Compute the pointer to a fabricarray element at index i.
|
|
110
|
+
// This function is similar to wp::index(), but the array data type doesn't need to be known at compile time.
|
|
111
|
+
CUDA_CALLABLE inline void* fabricarray_element_ptr(const fabricarray_t<void>& fa, size_t i, size_t elem_size)
|
|
112
|
+
{
|
|
113
|
+
const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
|
|
114
|
+
|
|
115
|
+
size_t index_in_bucket = i - bucket->index_start;
|
|
116
|
+
|
|
117
|
+
return (char*)bucket->ptr + index_in_bucket * elem_size;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
template <typename T>
|
|
122
|
+
CUDA_CALLABLE inline T& index(const fabricarray_t<T>& fa, size_t i)
|
|
123
|
+
{
|
|
124
|
+
const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
|
|
125
|
+
assert(bucket && "Fabric array index out of range");
|
|
126
|
+
|
|
127
|
+
size_t index_in_bucket = i - bucket->index_start;
|
|
128
|
+
|
|
129
|
+
T& result = *((T*)bucket->ptr + index_in_bucket);
|
|
130
|
+
|
|
131
|
+
FP_VERIFY_FWD_1(result)
|
|
132
|
+
|
|
133
|
+
return result;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
// indexing for fabric array of arrays
|
|
138
|
+
template <typename T>
|
|
139
|
+
CUDA_CALLABLE inline T& index(const fabricarray_t<T>& fa, size_t i, size_t j)
|
|
140
|
+
{
|
|
141
|
+
const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
|
|
142
|
+
assert(bucket && "Fabric array index out of range");
|
|
143
|
+
|
|
144
|
+
assert(bucket->lengths && "Missing inner array lengths");
|
|
145
|
+
|
|
146
|
+
size_t index_in_bucket = i - bucket->index_start;
|
|
147
|
+
|
|
148
|
+
void* ptr = *((void**)bucket->ptr + index_in_bucket);
|
|
149
|
+
size_t length = *((size_t*)bucket->lengths + index_in_bucket);
|
|
150
|
+
|
|
151
|
+
assert(j < length && "Fabric array inner index out of range");
|
|
152
|
+
|
|
153
|
+
T& result = *((T*)ptr + j);
|
|
154
|
+
|
|
155
|
+
FP_VERIFY_FWD_1(result)
|
|
156
|
+
|
|
157
|
+
return result;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
template <typename T>
|
|
162
|
+
CUDA_CALLABLE inline array_t<T> view(fabricarray_t<T>& fa, size_t i)
|
|
163
|
+
{
|
|
164
|
+
const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
|
|
165
|
+
assert(bucket && "Fabric array index out of range");
|
|
166
|
+
|
|
167
|
+
assert(bucket->lengths && "Missing inner array lengths");
|
|
168
|
+
|
|
169
|
+
size_t index_in_bucket = i - bucket->index_start;
|
|
170
|
+
|
|
171
|
+
void* ptr = *((void**)bucket->ptr + index_in_bucket);
|
|
172
|
+
size_t length = *((size_t*)bucket->lengths + index_in_bucket);
|
|
173
|
+
|
|
174
|
+
return array_t<T>((T*)ptr, int(length));
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
template <typename T>
|
|
179
|
+
CUDA_CALLABLE inline T& index(const indexedfabricarray_t<T>& ifa, size_t i)
|
|
180
|
+
{
|
|
181
|
+
// index lookup
|
|
182
|
+
assert(i < ifa.size);
|
|
183
|
+
i = size_t(ifa.indices[i]);
|
|
184
|
+
|
|
185
|
+
const fabricbucket_t* bucket = fabricarray_find_bucket(ifa.fa, i);
|
|
186
|
+
assert(bucket && "Fabric array index out of range");
|
|
187
|
+
|
|
188
|
+
size_t index_in_bucket = i - bucket->index_start;
|
|
189
|
+
|
|
190
|
+
T& result = *((T*)bucket->ptr + index_in_bucket);
|
|
191
|
+
|
|
192
|
+
FP_VERIFY_FWD_1(result)
|
|
193
|
+
|
|
194
|
+
return result;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
// indexing for fabric array of arrays
|
|
199
|
+
template <typename T>
|
|
200
|
+
CUDA_CALLABLE inline T& index(const indexedfabricarray_t<T>& ifa, size_t i, size_t j)
|
|
201
|
+
{
|
|
202
|
+
// index lookup
|
|
203
|
+
assert(i < ifa.size);
|
|
204
|
+
i = size_t(ifa.indices[i]);
|
|
205
|
+
|
|
206
|
+
const fabricbucket_t* bucket = fabricarray_find_bucket(ifa.fa, i);
|
|
207
|
+
assert(bucket && "Fabric array index out of range");
|
|
208
|
+
|
|
209
|
+
assert(bucket->lengths && "Missing inner array lengths");
|
|
210
|
+
|
|
211
|
+
size_t index_in_bucket = i - bucket->index_start;
|
|
212
|
+
|
|
213
|
+
void* ptr = *((void**)bucket->ptr + index_in_bucket);
|
|
214
|
+
size_t length = *((size_t*)bucket->lengths + index_in_bucket);
|
|
215
|
+
|
|
216
|
+
assert(j < length && "Fabric array inner index out of range");
|
|
217
|
+
|
|
218
|
+
T& result = *((T*)ptr + j);
|
|
219
|
+
|
|
220
|
+
FP_VERIFY_FWD_1(result)
|
|
221
|
+
|
|
222
|
+
return result;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
template <typename T>
|
|
227
|
+
CUDA_CALLABLE inline array_t<T> view(indexedfabricarray_t<T>& ifa, size_t i)
|
|
228
|
+
{
|
|
229
|
+
// index lookup
|
|
230
|
+
assert(i < ifa.size);
|
|
231
|
+
i = size_t(ifa.indices[i]);
|
|
232
|
+
|
|
233
|
+
const fabricbucket_t* bucket = fabricarray_find_bucket(ifa.fa, i);
|
|
234
|
+
assert(bucket && "Fabric array index out of range");
|
|
235
|
+
|
|
236
|
+
assert(bucket->lengths && "Missing inner array lengths");
|
|
237
|
+
|
|
238
|
+
size_t index_in_bucket = i - bucket->index_start;
|
|
239
|
+
|
|
240
|
+
void* ptr = *((void**)bucket->ptr + index_in_bucket);
|
|
241
|
+
size_t length = *((size_t*)bucket->lengths + index_in_bucket);
|
|
242
|
+
|
|
243
|
+
return array_t<T>((T*)ptr, int(length));
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
} // namespace wp
|
warp/native/hashgrid.cpp
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#include "warp.h"
|
|
19
|
+
#include "cuda_util.h"
|
|
20
|
+
#include "hashgrid.h"
|
|
21
|
+
#include "sort.h"
|
|
22
|
+
#include "string.h"
|
|
23
|
+
|
|
24
|
+
using namespace wp;
|
|
25
|
+
|
|
26
|
+
#include <map>
|
|
27
|
+
|
|
28
|
+
namespace
|
|
29
|
+
{
|
|
30
|
+
// host-side copy of mesh descriptors, maps GPU mesh address (id) to a CPU desc
|
|
31
|
+
std::map<uint64_t, HashGrid> g_hash_grid_descriptors;
|
|
32
|
+
|
|
33
|
+
} // anonymous namespace
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
namespace wp
|
|
37
|
+
{
|
|
38
|
+
|
|
39
|
+
bool hash_grid_get_descriptor(uint64_t id, HashGrid& grid)
|
|
40
|
+
{
|
|
41
|
+
const auto& iter = g_hash_grid_descriptors.find(id);
|
|
42
|
+
if (iter == g_hash_grid_descriptors.end())
|
|
43
|
+
return false;
|
|
44
|
+
else
|
|
45
|
+
grid = iter->second;
|
|
46
|
+
return true;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
void hash_grid_add_descriptor(uint64_t id, const HashGrid& grid)
|
|
50
|
+
{
|
|
51
|
+
g_hash_grid_descriptors[id] = grid;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
void hash_grid_rem_descriptor(uint64_t id)
|
|
55
|
+
{
|
|
56
|
+
g_hash_grid_descriptors.erase(id);
|
|
57
|
+
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// implemented in hashgrid.cu
|
|
61
|
+
void hash_grid_rebuild_device(const HashGrid& grid, const wp::array_t<wp::vec3>& points);
|
|
62
|
+
|
|
63
|
+
} // namespace wp
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
// host methods
|
|
67
|
+
uint64_t wp_hash_grid_create_host(int dim_x, int dim_y, int dim_z)
|
|
68
|
+
{
|
|
69
|
+
HashGrid* grid = new HashGrid();
|
|
70
|
+
memset(grid, 0, sizeof(HashGrid));
|
|
71
|
+
|
|
72
|
+
grid->dim_x = dim_x;
|
|
73
|
+
grid->dim_y = dim_y;
|
|
74
|
+
grid->dim_z = dim_z;
|
|
75
|
+
|
|
76
|
+
const int num_cells = dim_x*dim_y*dim_z;
|
|
77
|
+
grid->cell_starts = (int*)wp_alloc_host(num_cells*sizeof(int));
|
|
78
|
+
grid->cell_ends = (int*)wp_alloc_host(num_cells*sizeof(int));
|
|
79
|
+
|
|
80
|
+
return (uint64_t)(grid);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
void wp_hash_grid_destroy_host(uint64_t id)
|
|
84
|
+
{
|
|
85
|
+
HashGrid* grid = (HashGrid*)(id);
|
|
86
|
+
|
|
87
|
+
wp_free_host(grid->point_ids);
|
|
88
|
+
wp_free_host(grid->point_cells);
|
|
89
|
+
wp_free_host(grid->cell_starts);
|
|
90
|
+
wp_free_host(grid->cell_ends);
|
|
91
|
+
|
|
92
|
+
delete grid;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
void wp_hash_grid_reserve_host(uint64_t id, int num_points)
|
|
96
|
+
{
|
|
97
|
+
HashGrid* grid = (HashGrid*)(id);
|
|
98
|
+
|
|
99
|
+
if (num_points > grid->max_points)
|
|
100
|
+
{
|
|
101
|
+
wp_free_host(grid->point_cells);
|
|
102
|
+
wp_free_host(grid->point_ids);
|
|
103
|
+
|
|
104
|
+
const int num_to_alloc = num_points*3/2;
|
|
105
|
+
grid->point_cells = (int*)wp_alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
|
|
106
|
+
grid->point_ids = (int*)wp_alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
|
|
107
|
+
|
|
108
|
+
grid->max_points = num_to_alloc;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
grid->num_points = num_points;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
void wp_hash_grid_update_host(uint64_t id, float cell_width, const wp::array_t<wp::vec3>* points)
|
|
115
|
+
{
|
|
116
|
+
// Python enforces this, but let's be defensive anyways
|
|
117
|
+
if (!points || points->ndim != 1)
|
|
118
|
+
{
|
|
119
|
+
fprintf(stderr, "Warp error: Invalid points array passed to %s\n", __FUNCTION__);
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (!id)
|
|
124
|
+
{
|
|
125
|
+
fprintf(stderr, "Warp error: Invalid grid passed to %s\n", __FUNCTION__);
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
HashGrid* grid = (HashGrid*)(id);
|
|
130
|
+
int num_points = points->shape[0];
|
|
131
|
+
|
|
132
|
+
wp_hash_grid_reserve_host(id, num_points);
|
|
133
|
+
|
|
134
|
+
grid->cell_width = cell_width;
|
|
135
|
+
grid->cell_width_inv = 1.0f / cell_width;
|
|
136
|
+
|
|
137
|
+
// calculate cell for each position
|
|
138
|
+
for (int i=0; i < num_points; ++i)
|
|
139
|
+
{
|
|
140
|
+
const vec3& point = wp::index(*points, i);
|
|
141
|
+
grid->point_cells[i] = hash_grid_index(*grid, point);
|
|
142
|
+
grid->point_ids[i] = i;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// sort indices
|
|
146
|
+
radix_sort_pairs_host(grid->point_cells, grid->point_ids, num_points);
|
|
147
|
+
|
|
148
|
+
const int num_cells = grid->dim_x * grid->dim_y * grid->dim_z;
|
|
149
|
+
memset(grid->cell_starts, 0, sizeof(int) * num_cells);
|
|
150
|
+
memset(grid->cell_ends, 0, sizeof(int) * num_cells);
|
|
151
|
+
|
|
152
|
+
// compute cell start / end
|
|
153
|
+
for (int i=0; i < num_points; ++i)
|
|
154
|
+
{
|
|
155
|
+
// scan the particle-cell array to find the start and end
|
|
156
|
+
const int c = grid->point_cells[i];
|
|
157
|
+
|
|
158
|
+
if (i == 0)
|
|
159
|
+
grid->cell_starts[c] = 0;
|
|
160
|
+
else
|
|
161
|
+
{
|
|
162
|
+
const int p = grid->point_cells[i-1];
|
|
163
|
+
|
|
164
|
+
if (c != p)
|
|
165
|
+
{
|
|
166
|
+
grid->cell_starts[c] = i;
|
|
167
|
+
grid->cell_ends[p] = i;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if (i == num_points - 1)
|
|
172
|
+
{
|
|
173
|
+
grid->cell_ends[c] = i + 1;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// device methods
|
|
179
|
+
uint64_t wp_hash_grid_create_device(void* context, int dim_x, int dim_y, int dim_z)
|
|
180
|
+
{
|
|
181
|
+
ContextGuard guard(context);
|
|
182
|
+
|
|
183
|
+
HashGrid grid;
|
|
184
|
+
memset(&grid, 0, sizeof(HashGrid));
|
|
185
|
+
|
|
186
|
+
grid.context = context ? context : wp_cuda_context_get_current();
|
|
187
|
+
|
|
188
|
+
grid.dim_x = dim_x;
|
|
189
|
+
grid.dim_y = dim_y;
|
|
190
|
+
grid.dim_z = dim_z;
|
|
191
|
+
|
|
192
|
+
const int num_cells = dim_x*dim_y*dim_z;
|
|
193
|
+
grid.cell_starts = (int*)wp_alloc_device(WP_CURRENT_CONTEXT, num_cells*sizeof(int));
|
|
194
|
+
grid.cell_ends = (int*)wp_alloc_device(WP_CURRENT_CONTEXT, num_cells*sizeof(int));
|
|
195
|
+
|
|
196
|
+
// upload to device
|
|
197
|
+
HashGrid* grid_device = (HashGrid*)(wp_alloc_device(WP_CURRENT_CONTEXT, sizeof(HashGrid)));
|
|
198
|
+
wp_memcpy_h2d(WP_CURRENT_CONTEXT, grid_device, &grid, sizeof(HashGrid));
|
|
199
|
+
|
|
200
|
+
uint64_t grid_id = (uint64_t)(grid_device);
|
|
201
|
+
hash_grid_add_descriptor(grid_id, grid);
|
|
202
|
+
|
|
203
|
+
return grid_id;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
void wp_hash_grid_destroy_device(uint64_t id)
|
|
207
|
+
{
|
|
208
|
+
HashGrid grid;
|
|
209
|
+
if (hash_grid_get_descriptor(id, grid))
|
|
210
|
+
{
|
|
211
|
+
ContextGuard guard(grid.context);
|
|
212
|
+
|
|
213
|
+
wp_free_device(WP_CURRENT_CONTEXT, grid.point_ids);
|
|
214
|
+
wp_free_device(WP_CURRENT_CONTEXT, grid.point_cells);
|
|
215
|
+
wp_free_device(WP_CURRENT_CONTEXT, grid.cell_starts);
|
|
216
|
+
wp_free_device(WP_CURRENT_CONTEXT, grid.cell_ends);
|
|
217
|
+
|
|
218
|
+
wp_free_device(WP_CURRENT_CONTEXT, (HashGrid*)id);
|
|
219
|
+
|
|
220
|
+
hash_grid_rem_descriptor(id);
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
void wp_hash_grid_reserve_device(uint64_t id, int num_points)
|
|
226
|
+
{
|
|
227
|
+
HashGrid grid;
|
|
228
|
+
|
|
229
|
+
if (hash_grid_get_descriptor(id, grid))
|
|
230
|
+
{
|
|
231
|
+
if (num_points > grid.max_points)
|
|
232
|
+
{
|
|
233
|
+
ContextGuard guard(grid.context);
|
|
234
|
+
|
|
235
|
+
wp_free_device(WP_CURRENT_CONTEXT, grid.point_cells);
|
|
236
|
+
wp_free_device(WP_CURRENT_CONTEXT, grid.point_ids);
|
|
237
|
+
|
|
238
|
+
const int num_to_alloc = num_points*3/2;
|
|
239
|
+
grid.point_cells = (int*)wp_alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
|
|
240
|
+
grid.point_ids = (int*)wp_alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
|
|
241
|
+
grid.max_points = num_to_alloc;
|
|
242
|
+
|
|
243
|
+
// ensure we pre-size our sort routine to avoid
|
|
244
|
+
// allocations during graph capture
|
|
245
|
+
radix_sort_reserve(WP_CURRENT_CONTEXT, num_to_alloc);
|
|
246
|
+
|
|
247
|
+
// update device side grid descriptor, todo: this is
|
|
248
|
+
// slightly redundant since it is performed again
|
|
249
|
+
// inside wp_hash_grid_update_device(), but since
|
|
250
|
+
// reserve can be called from Python we need to make
|
|
251
|
+
// sure it is consistent
|
|
252
|
+
wp_memcpy_h2d(WP_CURRENT_CONTEXT, (HashGrid*)id, &grid, sizeof(HashGrid));
|
|
253
|
+
|
|
254
|
+
// update host side grid descriptor
|
|
255
|
+
hash_grid_add_descriptor(id, grid);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
void wp_hash_grid_update_device(uint64_t id, float cell_width, const wp::array_t<wp::vec3>* points)
|
|
261
|
+
{
|
|
262
|
+
// Python enforces this, but let's be defensive anyways
|
|
263
|
+
if (!points || points->ndim != 1)
|
|
264
|
+
{
|
|
265
|
+
fprintf(stderr, "Warp error: Invalid points array passed to %s\n", __FUNCTION__);
|
|
266
|
+
return;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
int num_points = points->shape[0];
|
|
270
|
+
|
|
271
|
+
// ensure we have enough memory reserved for update
|
|
272
|
+
// this must be done before retrieving the descriptor
|
|
273
|
+
// below since it may update it
|
|
274
|
+
wp_hash_grid_reserve_device(id, num_points);
|
|
275
|
+
|
|
276
|
+
// host grid must be static so that we can
|
|
277
|
+
// perform host->device memcpy from this variable
|
|
278
|
+
// and have it safely recorded inside CUDA graphs
|
|
279
|
+
static HashGrid grid;
|
|
280
|
+
|
|
281
|
+
if (hash_grid_get_descriptor(id, grid))
|
|
282
|
+
{
|
|
283
|
+
ContextGuard guard(grid.context);
|
|
284
|
+
|
|
285
|
+
grid.num_points = num_points;
|
|
286
|
+
grid.cell_width = cell_width;
|
|
287
|
+
grid.cell_width_inv = 1.0f / cell_width;
|
|
288
|
+
|
|
289
|
+
hash_grid_rebuild_device(grid, *points);
|
|
290
|
+
|
|
291
|
+
// update device side grid descriptor
|
|
292
|
+
wp_memcpy_h2d(WP_CURRENT_CONTEXT, (HashGrid*)id, &grid, sizeof(HashGrid));
|
|
293
|
+
|
|
294
|
+
// update host side grid descriptor
|
|
295
|
+
hash_grid_add_descriptor(id, grid);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
#if !WP_ENABLE_CUDA
|
|
300
|
+
|
|
301
|
+
namespace wp
|
|
302
|
+
{
|
|
303
|
+
|
|
304
|
+
void hash_grid_rebuild_device(const HashGrid& grid, const wp::array_t<wp::vec3>& points)
|
|
305
|
+
{
|
|
306
|
+
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
} // namespace wp
|
|
310
|
+
|
|
311
|
+
#endif // !WP_ENABLE_CUDA
|
warp/native/hashgrid.cu
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#include "warp.h"
|
|
19
|
+
#include "cuda_util.h"
|
|
20
|
+
#include "hashgrid.h"
|
|
21
|
+
#include "sort.h"
|
|
22
|
+
|
|
23
|
+
extern CUcontext get_current_context();
|
|
24
|
+
|
|
25
|
+
namespace wp
|
|
26
|
+
{
|
|
27
|
+
|
|
28
|
+
__global__ void compute_cell_indices(HashGrid grid, wp::array_t<wp::vec3> points)
|
|
29
|
+
{
|
|
30
|
+
const int tid = blockIdx.x*blockDim.x + threadIdx.x;
|
|
31
|
+
|
|
32
|
+
if (tid < points.shape[0])
|
|
33
|
+
{
|
|
34
|
+
const vec3& point = wp::index(points, tid);
|
|
35
|
+
grid.point_cells[tid] = hash_grid_index(grid, point);
|
|
36
|
+
grid.point_ids[tid] = tid;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
__global__ void compute_cell_offsets(int* cell_starts, int* cell_ends, const int* point_cells, int num_points)
|
|
41
|
+
{
|
|
42
|
+
const int tid = blockIdx.x*blockDim.x + threadIdx.x;
|
|
43
|
+
|
|
44
|
+
// compute cell start / end
|
|
45
|
+
if (tid < num_points)
|
|
46
|
+
{
|
|
47
|
+
// scan the particle-cell array to find the start and end
|
|
48
|
+
const int c = point_cells[tid];
|
|
49
|
+
|
|
50
|
+
if (tid == 0)
|
|
51
|
+
cell_starts[c] = 0;
|
|
52
|
+
else
|
|
53
|
+
{
|
|
54
|
+
const int p = point_cells[tid-1];
|
|
55
|
+
|
|
56
|
+
if (c != p)
|
|
57
|
+
{
|
|
58
|
+
cell_starts[c] = tid;
|
|
59
|
+
cell_ends[p] = tid;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (tid == num_points - 1)
|
|
64
|
+
{
|
|
65
|
+
cell_ends[c] = tid + 1;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
void hash_grid_rebuild_device(const wp::HashGrid& grid, const wp::array_t<wp::vec3>& points)
|
|
71
|
+
{
|
|
72
|
+
ContextGuard guard(grid.context);
|
|
73
|
+
|
|
74
|
+
int num_points = points.shape[0];
|
|
75
|
+
|
|
76
|
+
wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_cell_indices, num_points, (grid, points));
|
|
77
|
+
|
|
78
|
+
radix_sort_pairs_device(WP_CURRENT_CONTEXT, grid.point_cells, grid.point_ids, num_points);
|
|
79
|
+
|
|
80
|
+
const int num_cells = grid.dim_x * grid.dim_y * grid.dim_z;
|
|
81
|
+
|
|
82
|
+
wp_memset_device(WP_CURRENT_CONTEXT, grid.cell_starts, 0, sizeof(int) * num_cells);
|
|
83
|
+
wp_memset_device(WP_CURRENT_CONTEXT, grid.cell_ends, 0, sizeof(int) * num_cells);
|
|
84
|
+
|
|
85
|
+
wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_cell_offsets, num_points, (grid.cell_starts, grid.cell_ends, grid.point_cells, num_points));
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
} // namespace wp
|