warp-lang 1.7.0__py3-none-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +139 -0
- warp/__init__.pyi +1 -0
- warp/autograd.py +1142 -0
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +557 -0
- warp/build_dll.py +405 -0
- warp/builtins.py +6855 -0
- warp/codegen.py +3969 -0
- warp/config.py +158 -0
- warp/constants.py +57 -0
- warp/context.py +6812 -0
- warp/dlpack.py +462 -0
- warp/examples/__init__.py +24 -0
- warp/examples/assets/bear.usd +0 -0
- warp/examples/assets/bunny.usd +0 -0
- warp/examples/assets/cartpole.urdf +110 -0
- warp/examples/assets/crazyflie.usd +0 -0
- warp/examples/assets/cube.usd +0 -0
- warp/examples/assets/nonuniform.usd +0 -0
- warp/examples/assets/nv_ant.xml +92 -0
- warp/examples/assets/nv_humanoid.xml +183 -0
- warp/examples/assets/nvidia_logo.png +0 -0
- warp/examples/assets/pixel.jpg +0 -0
- warp/examples/assets/quadruped.urdf +268 -0
- warp/examples/assets/rocks.nvdb +0 -0
- warp/examples/assets/rocks.usd +0 -0
- warp/examples/assets/sphere.usd +0 -0
- warp/examples/assets/square_cloth.usd +0 -0
- warp/examples/benchmarks/benchmark_api.py +389 -0
- warp/examples/benchmarks/benchmark_cloth.py +296 -0
- warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
- warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
- warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
- warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
- warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
- warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
- warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
- warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
- warp/examples/benchmarks/benchmark_gemm.py +164 -0
- warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
- warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
- warp/examples/benchmarks/benchmark_launches.py +301 -0
- warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
- warp/examples/browse.py +37 -0
- warp/examples/core/example_cupy.py +86 -0
- warp/examples/core/example_dem.py +241 -0
- warp/examples/core/example_fluid.py +299 -0
- warp/examples/core/example_graph_capture.py +150 -0
- warp/examples/core/example_marching_cubes.py +194 -0
- warp/examples/core/example_mesh.py +180 -0
- warp/examples/core/example_mesh_intersect.py +211 -0
- warp/examples/core/example_nvdb.py +182 -0
- warp/examples/core/example_raycast.py +111 -0
- warp/examples/core/example_raymarch.py +205 -0
- warp/examples/core/example_render_opengl.py +193 -0
- warp/examples/core/example_sample_mesh.py +300 -0
- warp/examples/core/example_sph.py +411 -0
- warp/examples/core/example_torch.py +211 -0
- warp/examples/core/example_wave.py +269 -0
- warp/examples/fem/example_adaptive_grid.py +286 -0
- warp/examples/fem/example_apic_fluid.py +423 -0
- warp/examples/fem/example_burgers.py +261 -0
- warp/examples/fem/example_convection_diffusion.py +178 -0
- warp/examples/fem/example_convection_diffusion_dg.py +204 -0
- warp/examples/fem/example_deformed_geometry.py +172 -0
- warp/examples/fem/example_diffusion.py +196 -0
- warp/examples/fem/example_diffusion_3d.py +225 -0
- warp/examples/fem/example_diffusion_mgpu.py +220 -0
- warp/examples/fem/example_distortion_energy.py +228 -0
- warp/examples/fem/example_magnetostatics.py +240 -0
- warp/examples/fem/example_mixed_elasticity.py +291 -0
- warp/examples/fem/example_navier_stokes.py +261 -0
- warp/examples/fem/example_nonconforming_contact.py +298 -0
- warp/examples/fem/example_stokes.py +213 -0
- warp/examples/fem/example_stokes_transfer.py +262 -0
- warp/examples/fem/example_streamlines.py +352 -0
- warp/examples/fem/utils.py +1000 -0
- warp/examples/interop/example_jax_callable.py +116 -0
- warp/examples/interop/example_jax_ffi_callback.py +132 -0
- warp/examples/interop/example_jax_kernel.py +205 -0
- warp/examples/optim/example_bounce.py +266 -0
- warp/examples/optim/example_cloth_throw.py +228 -0
- warp/examples/optim/example_diffray.py +561 -0
- warp/examples/optim/example_drone.py +870 -0
- warp/examples/optim/example_fluid_checkpoint.py +497 -0
- warp/examples/optim/example_inverse_kinematics.py +182 -0
- warp/examples/optim/example_inverse_kinematics_torch.py +191 -0
- warp/examples/optim/example_softbody_properties.py +400 -0
- warp/examples/optim/example_spring_cage.py +245 -0
- warp/examples/optim/example_trajectory.py +227 -0
- warp/examples/sim/example_cartpole.py +143 -0
- warp/examples/sim/example_cloth.py +225 -0
- warp/examples/sim/example_cloth_self_contact.py +322 -0
- warp/examples/sim/example_granular.py +130 -0
- warp/examples/sim/example_granular_collision_sdf.py +202 -0
- warp/examples/sim/example_jacobian_ik.py +244 -0
- warp/examples/sim/example_particle_chain.py +124 -0
- warp/examples/sim/example_quadruped.py +203 -0
- warp/examples/sim/example_rigid_chain.py +203 -0
- warp/examples/sim/example_rigid_contact.py +195 -0
- warp/examples/sim/example_rigid_force.py +133 -0
- warp/examples/sim/example_rigid_gyroscopic.py +115 -0
- warp/examples/sim/example_rigid_soft_contact.py +140 -0
- warp/examples/sim/example_soft_body.py +196 -0
- warp/examples/tile/example_tile_cholesky.py +87 -0
- warp/examples/tile/example_tile_convolution.py +66 -0
- warp/examples/tile/example_tile_fft.py +55 -0
- warp/examples/tile/example_tile_filtering.py +113 -0
- warp/examples/tile/example_tile_matmul.py +85 -0
- warp/examples/tile/example_tile_mlp.py +383 -0
- warp/examples/tile/example_tile_nbody.py +199 -0
- warp/examples/tile/example_tile_walker.py +327 -0
- warp/fabric.py +355 -0
- warp/fem/__init__.py +106 -0
- warp/fem/adaptivity.py +508 -0
- warp/fem/cache.py +572 -0
- warp/fem/dirichlet.py +202 -0
- warp/fem/domain.py +411 -0
- warp/fem/field/__init__.py +125 -0
- warp/fem/field/field.py +619 -0
- warp/fem/field/nodal_field.py +326 -0
- warp/fem/field/restriction.py +37 -0
- warp/fem/field/virtual.py +848 -0
- warp/fem/geometry/__init__.py +32 -0
- warp/fem/geometry/adaptive_nanogrid.py +857 -0
- warp/fem/geometry/closest_point.py +84 -0
- warp/fem/geometry/deformed_geometry.py +221 -0
- warp/fem/geometry/element.py +776 -0
- warp/fem/geometry/geometry.py +362 -0
- warp/fem/geometry/grid_2d.py +392 -0
- warp/fem/geometry/grid_3d.py +452 -0
- warp/fem/geometry/hexmesh.py +911 -0
- warp/fem/geometry/nanogrid.py +571 -0
- warp/fem/geometry/partition.py +389 -0
- warp/fem/geometry/quadmesh.py +663 -0
- warp/fem/geometry/tetmesh.py +855 -0
- warp/fem/geometry/trimesh.py +806 -0
- warp/fem/integrate.py +2335 -0
- warp/fem/linalg.py +419 -0
- warp/fem/operator.py +293 -0
- warp/fem/polynomial.py +229 -0
- warp/fem/quadrature/__init__.py +17 -0
- warp/fem/quadrature/pic_quadrature.py +299 -0
- warp/fem/quadrature/quadrature.py +591 -0
- warp/fem/space/__init__.py +228 -0
- warp/fem/space/basis_function_space.py +468 -0
- warp/fem/space/basis_space.py +667 -0
- warp/fem/space/dof_mapper.py +251 -0
- warp/fem/space/function_space.py +309 -0
- warp/fem/space/grid_2d_function_space.py +177 -0
- warp/fem/space/grid_3d_function_space.py +227 -0
- warp/fem/space/hexmesh_function_space.py +257 -0
- warp/fem/space/nanogrid_function_space.py +201 -0
- warp/fem/space/partition.py +367 -0
- warp/fem/space/quadmesh_function_space.py +223 -0
- warp/fem/space/restriction.py +179 -0
- warp/fem/space/shape/__init__.py +143 -0
- warp/fem/space/shape/cube_shape_function.py +1105 -0
- warp/fem/space/shape/shape_function.py +133 -0
- warp/fem/space/shape/square_shape_function.py +926 -0
- warp/fem/space/shape/tet_shape_function.py +834 -0
- warp/fem/space/shape/triangle_shape_function.py +672 -0
- warp/fem/space/tetmesh_function_space.py +271 -0
- warp/fem/space/topology.py +424 -0
- warp/fem/space/trimesh_function_space.py +194 -0
- warp/fem/types.py +99 -0
- warp/fem/utils.py +420 -0
- warp/jax.py +187 -0
- warp/jax_experimental/__init__.py +16 -0
- warp/jax_experimental/custom_call.py +351 -0
- warp/jax_experimental/ffi.py +698 -0
- warp/jax_experimental/xla_ffi.py +602 -0
- warp/math.py +244 -0
- warp/native/array.h +1145 -0
- warp/native/builtin.h +1800 -0
- warp/native/bvh.cpp +492 -0
- warp/native/bvh.cu +791 -0
- warp/native/bvh.h +554 -0
- warp/native/clang/clang.cpp +536 -0
- warp/native/coloring.cpp +613 -0
- warp/native/crt.cpp +51 -0
- warp/native/crt.h +362 -0
- warp/native/cuda_crt.h +1058 -0
- warp/native/cuda_util.cpp +646 -0
- warp/native/cuda_util.h +307 -0
- warp/native/error.cpp +77 -0
- warp/native/error.h +36 -0
- warp/native/exports.h +1878 -0
- warp/native/fabric.h +245 -0
- warp/native/hashgrid.cpp +311 -0
- warp/native/hashgrid.cu +87 -0
- warp/native/hashgrid.h +240 -0
- warp/native/initializer_array.h +41 -0
- warp/native/intersect.h +1230 -0
- warp/native/intersect_adj.h +375 -0
- warp/native/intersect_tri.h +339 -0
- warp/native/marching.cpp +19 -0
- warp/native/marching.cu +514 -0
- warp/native/marching.h +19 -0
- warp/native/mat.h +2220 -0
- warp/native/mathdx.cpp +87 -0
- warp/native/matnn.h +343 -0
- warp/native/mesh.cpp +266 -0
- warp/native/mesh.cu +404 -0
- warp/native/mesh.h +1980 -0
- warp/native/nanovdb/GridHandle.h +366 -0
- warp/native/nanovdb/HostBuffer.h +590 -0
- warp/native/nanovdb/NanoVDB.h +6624 -0
- warp/native/nanovdb/PNanoVDB.h +3390 -0
- warp/native/noise.h +859 -0
- warp/native/quat.h +1371 -0
- warp/native/rand.h +342 -0
- warp/native/range.h +139 -0
- warp/native/reduce.cpp +174 -0
- warp/native/reduce.cu +364 -0
- warp/native/runlength_encode.cpp +79 -0
- warp/native/runlength_encode.cu +61 -0
- warp/native/scan.cpp +47 -0
- warp/native/scan.cu +53 -0
- warp/native/scan.h +23 -0
- warp/native/solid_angle.h +466 -0
- warp/native/sort.cpp +251 -0
- warp/native/sort.cu +277 -0
- warp/native/sort.h +33 -0
- warp/native/sparse.cpp +378 -0
- warp/native/sparse.cu +524 -0
- warp/native/spatial.h +657 -0
- warp/native/svd.h +702 -0
- warp/native/temp_buffer.h +46 -0
- warp/native/tile.h +2584 -0
- warp/native/tile_reduce.h +264 -0
- warp/native/vec.h +1426 -0
- warp/native/volume.cpp +501 -0
- warp/native/volume.cu +67 -0
- warp/native/volume.h +969 -0
- warp/native/volume_builder.cu +477 -0
- warp/native/volume_builder.h +52 -0
- warp/native/volume_impl.h +70 -0
- warp/native/warp.cpp +1082 -0
- warp/native/warp.cu +3636 -0
- warp/native/warp.h +381 -0
- warp/optim/__init__.py +17 -0
- warp/optim/adam.py +163 -0
- warp/optim/linear.py +1137 -0
- warp/optim/sgd.py +112 -0
- warp/paddle.py +407 -0
- warp/render/__init__.py +18 -0
- warp/render/render_opengl.py +3518 -0
- warp/render/render_usd.py +784 -0
- warp/render/utils.py +160 -0
- warp/sim/__init__.py +65 -0
- warp/sim/articulation.py +793 -0
- warp/sim/collide.py +2395 -0
- warp/sim/graph_coloring.py +300 -0
- warp/sim/import_mjcf.py +790 -0
- warp/sim/import_snu.py +227 -0
- warp/sim/import_urdf.py +579 -0
- warp/sim/import_usd.py +894 -0
- warp/sim/inertia.py +324 -0
- warp/sim/integrator.py +242 -0
- warp/sim/integrator_euler.py +1997 -0
- warp/sim/integrator_featherstone.py +2101 -0
- warp/sim/integrator_vbd.py +2048 -0
- warp/sim/integrator_xpbd.py +3292 -0
- warp/sim/model.py +4791 -0
- warp/sim/particles.py +121 -0
- warp/sim/render.py +427 -0
- warp/sim/utils.py +428 -0
- warp/sparse.py +2057 -0
- warp/stubs.py +3333 -0
- warp/tape.py +1203 -0
- warp/tests/__init__.py +1 -0
- warp/tests/__main__.py +4 -0
- warp/tests/assets/curlnoise_golden.npy +0 -0
- warp/tests/assets/mlp_golden.npy +0 -0
- warp/tests/assets/pixel.npy +0 -0
- warp/tests/assets/pnoise_golden.npy +0 -0
- warp/tests/assets/spiky.usd +0 -0
- warp/tests/assets/test_grid.nvdb +0 -0
- warp/tests/assets/test_index_grid.nvdb +0 -0
- warp/tests/assets/test_int32_grid.nvdb +0 -0
- warp/tests/assets/test_vec_grid.nvdb +0 -0
- warp/tests/assets/torus.nvdb +0 -0
- warp/tests/assets/torus.usda +105 -0
- warp/tests/aux_test_class_kernel.py +34 -0
- warp/tests/aux_test_compile_consts_dummy.py +18 -0
- warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
- warp/tests/aux_test_dependent.py +29 -0
- warp/tests/aux_test_grad_customs.py +29 -0
- warp/tests/aux_test_instancing_gc.py +26 -0
- warp/tests/aux_test_module_unload.py +23 -0
- warp/tests/aux_test_name_clash1.py +40 -0
- warp/tests/aux_test_name_clash2.py +40 -0
- warp/tests/aux_test_reference.py +9 -0
- warp/tests/aux_test_reference_reference.py +8 -0
- warp/tests/aux_test_square.py +16 -0
- warp/tests/aux_test_unresolved_func.py +22 -0
- warp/tests/aux_test_unresolved_symbol.py +22 -0
- warp/tests/cuda/__init__.py +0 -0
- warp/tests/cuda/test_async.py +676 -0
- warp/tests/cuda/test_ipc.py +124 -0
- warp/tests/cuda/test_mempool.py +233 -0
- warp/tests/cuda/test_multigpu.py +169 -0
- warp/tests/cuda/test_peer.py +139 -0
- warp/tests/cuda/test_pinned.py +84 -0
- warp/tests/cuda/test_streams.py +634 -0
- warp/tests/geometry/__init__.py +0 -0
- warp/tests/geometry/test_bvh.py +200 -0
- warp/tests/geometry/test_hash_grid.py +221 -0
- warp/tests/geometry/test_marching_cubes.py +74 -0
- warp/tests/geometry/test_mesh.py +316 -0
- warp/tests/geometry/test_mesh_query_aabb.py +399 -0
- warp/tests/geometry/test_mesh_query_point.py +932 -0
- warp/tests/geometry/test_mesh_query_ray.py +311 -0
- warp/tests/geometry/test_volume.py +1103 -0
- warp/tests/geometry/test_volume_write.py +346 -0
- warp/tests/interop/__init__.py +0 -0
- warp/tests/interop/test_dlpack.py +729 -0
- warp/tests/interop/test_jax.py +371 -0
- warp/tests/interop/test_paddle.py +800 -0
- warp/tests/interop/test_torch.py +1001 -0
- warp/tests/run_coverage_serial.py +39 -0
- warp/tests/sim/__init__.py +0 -0
- warp/tests/sim/disabled_kinematics.py +244 -0
- warp/tests/sim/flaky_test_sim_grad.py +290 -0
- warp/tests/sim/test_collision.py +604 -0
- warp/tests/sim/test_coloring.py +258 -0
- warp/tests/sim/test_model.py +224 -0
- warp/tests/sim/test_sim_grad_bounce_linear.py +212 -0
- warp/tests/sim/test_sim_kinematics.py +98 -0
- warp/tests/sim/test_vbd.py +597 -0
- warp/tests/test_adam.py +163 -0
- warp/tests/test_arithmetic.py +1096 -0
- warp/tests/test_array.py +2972 -0
- warp/tests/test_array_reduce.py +156 -0
- warp/tests/test_assert.py +250 -0
- warp/tests/test_atomic.py +153 -0
- warp/tests/test_bool.py +220 -0
- warp/tests/test_builtins_resolution.py +1298 -0
- warp/tests/test_closest_point_edge_edge.py +327 -0
- warp/tests/test_codegen.py +810 -0
- warp/tests/test_codegen_instancing.py +1495 -0
- warp/tests/test_compile_consts.py +215 -0
- warp/tests/test_conditional.py +252 -0
- warp/tests/test_context.py +42 -0
- warp/tests/test_copy.py +238 -0
- warp/tests/test_ctypes.py +638 -0
- warp/tests/test_dense.py +73 -0
- warp/tests/test_devices.py +97 -0
- warp/tests/test_examples.py +482 -0
- warp/tests/test_fabricarray.py +996 -0
- warp/tests/test_fast_math.py +74 -0
- warp/tests/test_fem.py +2003 -0
- warp/tests/test_fp16.py +136 -0
- warp/tests/test_func.py +454 -0
- warp/tests/test_future_annotations.py +98 -0
- warp/tests/test_generics.py +656 -0
- warp/tests/test_grad.py +893 -0
- warp/tests/test_grad_customs.py +339 -0
- warp/tests/test_grad_debug.py +341 -0
- warp/tests/test_implicit_init.py +411 -0
- warp/tests/test_import.py +45 -0
- warp/tests/test_indexedarray.py +1140 -0
- warp/tests/test_intersect.py +73 -0
- warp/tests/test_iter.py +76 -0
- warp/tests/test_large.py +177 -0
- warp/tests/test_launch.py +411 -0
- warp/tests/test_lerp.py +151 -0
- warp/tests/test_linear_solvers.py +193 -0
- warp/tests/test_lvalue.py +427 -0
- warp/tests/test_mat.py +2089 -0
- warp/tests/test_mat_lite.py +122 -0
- warp/tests/test_mat_scalar_ops.py +2913 -0
- warp/tests/test_math.py +178 -0
- warp/tests/test_mlp.py +282 -0
- warp/tests/test_module_hashing.py +258 -0
- warp/tests/test_modules_lite.py +44 -0
- warp/tests/test_noise.py +252 -0
- warp/tests/test_operators.py +299 -0
- warp/tests/test_options.py +129 -0
- warp/tests/test_overwrite.py +551 -0
- warp/tests/test_print.py +339 -0
- warp/tests/test_quat.py +2315 -0
- warp/tests/test_rand.py +339 -0
- warp/tests/test_reload.py +302 -0
- warp/tests/test_rounding.py +185 -0
- warp/tests/test_runlength_encode.py +196 -0
- warp/tests/test_scalar_ops.py +105 -0
- warp/tests/test_smoothstep.py +108 -0
- warp/tests/test_snippet.py +318 -0
- warp/tests/test_sparse.py +582 -0
- warp/tests/test_spatial.py +2229 -0
- warp/tests/test_special_values.py +361 -0
- warp/tests/test_static.py +592 -0
- warp/tests/test_struct.py +734 -0
- warp/tests/test_tape.py +204 -0
- warp/tests/test_transient_module.py +93 -0
- warp/tests/test_triangle_closest_point.py +145 -0
- warp/tests/test_types.py +562 -0
- warp/tests/test_utils.py +588 -0
- warp/tests/test_vec.py +1487 -0
- warp/tests/test_vec_lite.py +80 -0
- warp/tests/test_vec_scalar_ops.py +2327 -0
- warp/tests/test_verify_fp.py +100 -0
- warp/tests/tile/__init__.py +0 -0
- warp/tests/tile/test_tile.py +780 -0
- warp/tests/tile/test_tile_load.py +407 -0
- warp/tests/tile/test_tile_mathdx.py +208 -0
- warp/tests/tile/test_tile_mlp.py +402 -0
- warp/tests/tile/test_tile_reduce.py +447 -0
- warp/tests/tile/test_tile_shared_memory.py +247 -0
- warp/tests/tile/test_tile_view.py +173 -0
- warp/tests/unittest_serial.py +47 -0
- warp/tests/unittest_suites.py +427 -0
- warp/tests/unittest_utils.py +468 -0
- warp/tests/walkthrough_debug.py +93 -0
- warp/thirdparty/__init__.py +0 -0
- warp/thirdparty/appdirs.py +598 -0
- warp/thirdparty/dlpack.py +145 -0
- warp/thirdparty/unittest_parallel.py +570 -0
- warp/torch.py +391 -0
- warp/types.py +5230 -0
- warp/utils.py +1137 -0
- warp_lang-1.7.0.dist-info/METADATA +516 -0
- warp_lang-1.7.0.dist-info/RECORD +429 -0
- warp_lang-1.7.0.dist-info/WHEEL +5 -0
- warp_lang-1.7.0.dist-info/licenses/LICENSE.md +202 -0
- warp_lang-1.7.0.dist-info/top_level.txt +1 -0
warp/native/mathdx.cpp
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#include "builtin.h"
|
|
19
|
+
|
|
20
|
+
// stubs for platforms where there is no CUDA
|
|
21
|
+
#if !WP_ENABLE_CUDA || !WP_ENABLE_MATHDX
|
|
22
|
+
|
|
23
|
+
extern "C"
|
|
24
|
+
{
|
|
25
|
+
|
|
26
|
+
WP_API
|
|
27
|
+
bool cuda_compile_fft(
|
|
28
|
+
const char* ltoir_output_path,
|
|
29
|
+
const char* symbol_name, int num_include_dirs,
|
|
30
|
+
const char** include_dirs,
|
|
31
|
+
const char* mathdx_include_dir,
|
|
32
|
+
int arch,
|
|
33
|
+
int size,
|
|
34
|
+
int elements_per_thread,
|
|
35
|
+
int direction,
|
|
36
|
+
int precision,
|
|
37
|
+
int* shared_memory_size)
|
|
38
|
+
{
|
|
39
|
+
printf("CUDA is disabled and/or Warp was not compiled with MathDx support.\n");
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
WP_API bool cuda_compile_dot(
|
|
44
|
+
const char* fatbin_output_path,
|
|
45
|
+
const char* ltoir_output_path,
|
|
46
|
+
const char* symbol_name,
|
|
47
|
+
int num_include_dirs,
|
|
48
|
+
const char** include_dirs,
|
|
49
|
+
const char* mathdx_include_dir,
|
|
50
|
+
int arch,
|
|
51
|
+
int M,
|
|
52
|
+
int N,
|
|
53
|
+
int K,
|
|
54
|
+
int precision_A,
|
|
55
|
+
int precision_B,
|
|
56
|
+
int precision_C,
|
|
57
|
+
int type,
|
|
58
|
+
int a_arrangement,
|
|
59
|
+
int b_arrangement,
|
|
60
|
+
int c_arrangement,
|
|
61
|
+
int num_threads)
|
|
62
|
+
{
|
|
63
|
+
printf("CUDA is disabled and/or Warp was not compiled with MathDx support.\n");
|
|
64
|
+
return false;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
WP_API bool cuda_compile_solver(
|
|
68
|
+
const char* ltoir_output_path,
|
|
69
|
+
const char* symbol_name,
|
|
70
|
+
int num_include_dirs,
|
|
71
|
+
const char** include_dirs,
|
|
72
|
+
const char* mathdx_include_dir,
|
|
73
|
+
int arch,
|
|
74
|
+
int M,
|
|
75
|
+
int N,
|
|
76
|
+
int function,
|
|
77
|
+
int precision,
|
|
78
|
+
int fill_mode,
|
|
79
|
+
int num_threads)
|
|
80
|
+
{
|
|
81
|
+
printf("CUDA is disabled and/or Warp was not compiled with MathDx support.\n");
|
|
82
|
+
return false;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
} // extern "C"
|
|
86
|
+
|
|
87
|
+
#endif // !WP_ENABLE_CUDA || !WP_ENABLE_MATHDX
|
warp/native/matnn.h
ADDED
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#pragma once
|
|
19
|
+
|
|
20
|
+
namespace wp
|
|
21
|
+
{
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
CUDA_CALLABLE inline int dense_index(int stride, int i, int j)
|
|
25
|
+
{
|
|
26
|
+
return i*stride + j;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
template <bool transpose>
|
|
30
|
+
CUDA_CALLABLE inline int dense_index(int rows, int cols, int i, int j)
|
|
31
|
+
{
|
|
32
|
+
if (transpose)
|
|
33
|
+
return j*rows + i;
|
|
34
|
+
else
|
|
35
|
+
return i*cols + j;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
template <bool t1, bool t2, bool add>
|
|
41
|
+
CUDA_CALLABLE inline void dense_gemm_impl(int m, int n, int p, const float* __restrict__ A, const float* __restrict__ B, float* __restrict__ C)
|
|
42
|
+
{
|
|
43
|
+
for (int i=0; i < m; i++)
|
|
44
|
+
{
|
|
45
|
+
for (int j=0; j < n; ++j)
|
|
46
|
+
{
|
|
47
|
+
float sum = 0.0f;
|
|
48
|
+
|
|
49
|
+
for (int k=0; k < p; ++k)
|
|
50
|
+
{
|
|
51
|
+
sum += A[dense_index<t1>(m, p, i, k)]*B[dense_index<t2>(p, n, k, j)];
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (add)
|
|
55
|
+
C[i*n + j] += sum;
|
|
56
|
+
else
|
|
57
|
+
C[i*n + j] = sum;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
template <bool add=false>
|
|
64
|
+
CUDA_CALLABLE inline void dense_gemm(int m, int n, int p, int t1, int t2, const array_t<float>& A, const array_t<float>& B, array_t<float>& C)
|
|
65
|
+
{
|
|
66
|
+
if (t1 == 0 && t2 == 0)
|
|
67
|
+
dense_gemm_impl<false, false, add>(m, n, p, A.data, B.data, C.data);
|
|
68
|
+
else if (t1 == 1 && t2 == 0)
|
|
69
|
+
dense_gemm_impl<true, false, add>(m, n, p, A.data, B.data, C.data);
|
|
70
|
+
else if (t1 == 0 && t2 == 1)
|
|
71
|
+
dense_gemm_impl<false, true, add>(m, n, p, A.data, B.data, C.data);
|
|
72
|
+
else if (t1 == 1 && t2 == 1)
|
|
73
|
+
dense_gemm_impl<true, true, add>(m, n, p, A.data, B.data, C.data);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
void CUDA_CALLABLE inline dense_chol(int n, const array_t<float>& A, float regularization, array_t<float>& L)
|
|
80
|
+
{
|
|
81
|
+
for (int j=0; j < n; ++j)
|
|
82
|
+
{
|
|
83
|
+
float s = A.data[dense_index(n, j, j)] + regularization;
|
|
84
|
+
|
|
85
|
+
for (int k=0; k < j; ++k)
|
|
86
|
+
{
|
|
87
|
+
float r = L.data[dense_index(n, j, k)];
|
|
88
|
+
s -= r*r;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
s = sqrt(s);
|
|
92
|
+
const float invS = 1.0f/s;
|
|
93
|
+
|
|
94
|
+
L.data[dense_index(n, j, j)] = s;
|
|
95
|
+
|
|
96
|
+
for (int i=j+1; i < n; ++i)
|
|
97
|
+
{
|
|
98
|
+
s = A.data[dense_index(n, i, j)];
|
|
99
|
+
|
|
100
|
+
for (int k=0; k < j; ++k)
|
|
101
|
+
{
|
|
102
|
+
s -= L.data[dense_index(n, i, k)]*L.data[dense_index(n, j, k)];
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
L.data[dense_index(n, i, j)] = s*invS;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
// Solves (L*L^T)x = b given the Cholesky factor L
|
|
114
|
+
CUDA_CALLABLE inline void dense_subs(int n, const array_t<float>& L, const array_t<float>& b, array_t<float>& x)
|
|
115
|
+
{
|
|
116
|
+
// forward substitution
|
|
117
|
+
for (int i=0; i < n; ++i)
|
|
118
|
+
{
|
|
119
|
+
float s = b.data[i];
|
|
120
|
+
|
|
121
|
+
for (int j=0; j < i; ++j)
|
|
122
|
+
{
|
|
123
|
+
s -= L.data[dense_index(n, i, j)]*x.data[j];
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
x.data[i] = s/L.data[dense_index(n, i, i)];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// backward substitution
|
|
130
|
+
for (int i=n-1; i >= 0; --i)
|
|
131
|
+
{
|
|
132
|
+
float s = x.data[i];
|
|
133
|
+
|
|
134
|
+
for (int j=i+1; j < n; ++j)
|
|
135
|
+
{
|
|
136
|
+
s -= L.data[dense_index(n, j, i)]*x.data[j];
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
x.data[i] = s/L.data[dense_index(n, i, i)];
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
CUDA_CALLABLE inline void dense_solve(int n, const array_t<float>& A, const array_t<float>& L, const array_t<float>& b, array_t<float>& x)
|
|
144
|
+
{
|
|
145
|
+
dense_subs(n, L, b, x);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
// CUDA_CALLABLE inline void print_matrix(const char* name, int m, int n, const float* data)
|
|
150
|
+
// {
|
|
151
|
+
// printf("%s = [", name);
|
|
152
|
+
|
|
153
|
+
// for (int i=0; i < m; ++i)
|
|
154
|
+
// {
|
|
155
|
+
// for (int j=0; j < n; ++j)
|
|
156
|
+
// {
|
|
157
|
+
// printf("%f ", data[dense_index(n, i, j)]);
|
|
158
|
+
// }
|
|
159
|
+
|
|
160
|
+
// printf(";\n");
|
|
161
|
+
// }
|
|
162
|
+
|
|
163
|
+
// printf("]\n");
|
|
164
|
+
// }
|
|
165
|
+
|
|
166
|
+
// adjoint methods
|
|
167
|
+
CUDA_CALLABLE inline void adj_dense_gemm(
|
|
168
|
+
int m, int n, int p, int t1, int t2, const array_t<float>& A, const array_t<float>& B, array_t<float>& C,
|
|
169
|
+
int adj_m, int adj_n, int adj_p, int adj_t1, int adj_t2, array_t<float>& adj_A, array_t<float>& adj_B, const array_t<float>& adj_C)
|
|
170
|
+
{
|
|
171
|
+
|
|
172
|
+
// print_matrix("A", m, p, A);
|
|
173
|
+
// print_matrix("B", p, n, B);
|
|
174
|
+
// printf("t1: %d t2: %d\n", t1, t2);
|
|
175
|
+
|
|
176
|
+
if (t1)
|
|
177
|
+
{
|
|
178
|
+
dense_gemm<true>(p, m, n, 0, 1, B, adj_C, adj_A);
|
|
179
|
+
dense_gemm<true>(p, n, m, int(!t1), 0, A, adj_C, adj_B);
|
|
180
|
+
}
|
|
181
|
+
else
|
|
182
|
+
{
|
|
183
|
+
dense_gemm<true>(m, p, n, 0, int(!t2), adj_C, B, adj_A);
|
|
184
|
+
dense_gemm<true>(p, n, m, int(!t1), 0, A, adj_C, adj_B);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
CUDA_CALLABLE inline void adj_dense_chol(
|
|
190
|
+
int n, const array_t<float>& A, float regularization, array_t<float>& L,
|
|
191
|
+
int adj_n, const array_t<float>& adj_A, float adj_regularization, array_t<float>& adj_L)
|
|
192
|
+
{
|
|
193
|
+
// nop, use dense_solve to differentiate through (A^-1)b = x
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
CUDA_CALLABLE inline void adj_dense_subs(
|
|
197
|
+
int n, const array_t<float>& L, const array_t<float>& b, array_t<float>& x,
|
|
198
|
+
int adj_n, const array_t<float>& adj_L, const array_t<float>& adj_b, array_t<float>& adj_x)
|
|
199
|
+
{
|
|
200
|
+
// nop, use dense_solve to differentiate through (A^-1)b = x
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
CUDA_CALLABLE inline void adj_dense_solve(int n,
|
|
205
|
+
const array_t<float>& A, const array_t<float>& L, const array_t<float>& b, const array_t<float>& x,
|
|
206
|
+
int adj_n, array_t<float>& adj_A, array_t<float>& adj_L, array_t<float>& adj_b, const array_t<float>& adj_x)
|
|
207
|
+
{
|
|
208
|
+
// see https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pwp, section 2.3.1
|
|
209
|
+
dense_subs(n, L, adj_x, adj_b);
|
|
210
|
+
|
|
211
|
+
// A* = -adj_b*x^T
|
|
212
|
+
for (int i=0; i < n; ++i)
|
|
213
|
+
{
|
|
214
|
+
for (int j=0; j < n; ++j)
|
|
215
|
+
{
|
|
216
|
+
adj_A.data[dense_index(n, i, j)] += -adj_b.data[i]*x.data[j];
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
template <typename F>
|
|
223
|
+
CUDA_CALLABLE inline void mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int index, const array_t<float>& x, array_t<float>& out)
|
|
224
|
+
{
|
|
225
|
+
const int m = weights.shape[0];
|
|
226
|
+
const int n = weights.shape[1];
|
|
227
|
+
const int b = x.shape[1];
|
|
228
|
+
|
|
229
|
+
for (int i=0; i < m; ++i)
|
|
230
|
+
{
|
|
231
|
+
float tmp = bias.data[i];
|
|
232
|
+
|
|
233
|
+
for(int j=0; j < n; ++j)
|
|
234
|
+
{
|
|
235
|
+
tmp += weights.data[i*n + j]*x.data[index + b*j];
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
out.data[index + b*i] = activation(tmp);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
template <typename F, typename AdjF>
|
|
243
|
+
CUDA_CALLABLE inline void adj_mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int index, const array_t<float>& x, array_t<float>& out,
|
|
244
|
+
array_t<float>& adj_weights, array_t<float>& adj_bias, AdjF adj_activation, int adj_index, array_t<float>& adj_x, array_t<float>& adj_out)
|
|
245
|
+
{
|
|
246
|
+
const int m = weights.shape[0];
|
|
247
|
+
const int n = weights.shape[1];
|
|
248
|
+
const int b = x.shape[1];
|
|
249
|
+
|
|
250
|
+
for (int i=0; i < m; ++i)
|
|
251
|
+
{
|
|
252
|
+
// recompute forward pass so we don't have to store pre-activation outputs
|
|
253
|
+
float tmp = bias.data[i];
|
|
254
|
+
|
|
255
|
+
for(int j=0; j < n; ++j)
|
|
256
|
+
{
|
|
257
|
+
tmp += weights.data[i*n + j]*x.data[index + b*j];
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// adjoint w.r.t to activation
|
|
261
|
+
float adj_f = 0.0f;
|
|
262
|
+
|
|
263
|
+
if (adj_out.data)
|
|
264
|
+
adj_activation(tmp, adj_f, adj_out.data[index + b*i]);
|
|
265
|
+
|
|
266
|
+
for (int j=0; j < n; ++j)
|
|
267
|
+
{
|
|
268
|
+
// adjoint w.r.t M_i
|
|
269
|
+
if (adj_weights.data)
|
|
270
|
+
atomic_add(&adj_weights.data[i*n + j], x.data[index + b*j]*adj_f); // todo: reduce these atomic stores using warp/block level reductions
|
|
271
|
+
|
|
272
|
+
// adjoint w.r.t x
|
|
273
|
+
if (adj_x.data)
|
|
274
|
+
atomic_add(&adj_x.data[index + b*j], weights.data[i*n + j]*adj_f);
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// adjoint w.r.t b
|
|
278
|
+
if (adj_bias.data)
|
|
279
|
+
atomic_add(&adj_bias.data[i], adj_f);
|
|
280
|
+
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
// template <typename F>
|
|
286
|
+
// CUDA_CALLABLE inline void mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int m, int n, int b, int index, const array_t<float>& x, array_t<float>& out)
|
|
287
|
+
// {
|
|
288
|
+
// x += index*n;
|
|
289
|
+
// out += index*m;
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
// for (int i=0; i < m; ++i)
|
|
293
|
+
// {
|
|
294
|
+
// float tmp = bias[i];
|
|
295
|
+
|
|
296
|
+
// for(int j=0; j < n; ++j)
|
|
297
|
+
// {
|
|
298
|
+
// tmp += weights[i*n + j]*x[j];
|
|
299
|
+
// }
|
|
300
|
+
|
|
301
|
+
// out[i] = activation(tmp);
|
|
302
|
+
// }
|
|
303
|
+
// }
|
|
304
|
+
|
|
305
|
+
// template <typename F, typename AdjF>
|
|
306
|
+
// CUDA_CALLABLE inline void adj_mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int m, int n, int b, int index, const array_t<float>& x, const array_t<float>& out,
|
|
307
|
+
// array_t<float>& adj_weights, array_t<float>& adj_bias, AdjF adj_activation, int adj_m, int adj_n, int adj_b, int adj_index, array_t<float>& adj_x, array_t<float>& adj_out)
|
|
308
|
+
// {
|
|
309
|
+
// x += index*n;
|
|
310
|
+
// out += index*m;
|
|
311
|
+
|
|
312
|
+
// adj_x += index*n;
|
|
313
|
+
// adj_out += index*m;
|
|
314
|
+
|
|
315
|
+
// for (int i=0; i < m; ++i)
|
|
316
|
+
// {
|
|
317
|
+
// // recompute forward pass so we don't have to store pre-activation outputs
|
|
318
|
+
// float tmp = bias[i];
|
|
319
|
+
|
|
320
|
+
// for(int j=0; j < n; ++j)
|
|
321
|
+
// {
|
|
322
|
+
// tmp += weights[i*n + j]*x[index + b*j];
|
|
323
|
+
// }
|
|
324
|
+
|
|
325
|
+
// // adjoint w.r.t to activation
|
|
326
|
+
// float adj_f = 0.0f;
|
|
327
|
+
// adj_activation(tmp, adj_f, adj_out[index + b*i]);
|
|
328
|
+
|
|
329
|
+
// for (int j=0; j < n; ++j)
|
|
330
|
+
// {
|
|
331
|
+
// // adjoint w.r.t M_i
|
|
332
|
+
// adj_weights[i*n + j] += x[j]*adj_f;
|
|
333
|
+
|
|
334
|
+
// // adjoint w.r.t x
|
|
335
|
+
// adj_x[index + b*j] += weights[i*n + j]*adj_f;
|
|
336
|
+
// }
|
|
337
|
+
|
|
338
|
+
// // adjoint w.r.t b
|
|
339
|
+
// adj_bias[i] += adj_f;
|
|
340
|
+
// }
|
|
341
|
+
// }
|
|
342
|
+
|
|
343
|
+
} // namespace wp
|
warp/native/mesh.cpp
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#include "mesh.h"
|
|
19
|
+
#include "bvh.h"
|
|
20
|
+
#include "warp.h"
|
|
21
|
+
#include "cuda_util.h"
|
|
22
|
+
|
|
23
|
+
using namespace wp;
|
|
24
|
+
|
|
25
|
+
#include <map>
|
|
26
|
+
|
|
27
|
+
namespace
|
|
28
|
+
{
|
|
29
|
+
// host-side copy of mesh descriptors, maps GPU mesh address (id) to a CPU desc
|
|
30
|
+
std::map<uint64_t, Mesh> g_mesh_descriptors;
|
|
31
|
+
|
|
32
|
+
} // anonymous namespace
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
namespace wp
|
|
36
|
+
{
|
|
37
|
+
|
|
38
|
+
bool mesh_get_descriptor(uint64_t id, Mesh& mesh)
|
|
39
|
+
{
|
|
40
|
+
const auto& iter = g_mesh_descriptors.find(id);
|
|
41
|
+
if (iter == g_mesh_descriptors.end())
|
|
42
|
+
return false;
|
|
43
|
+
else
|
|
44
|
+
mesh = iter->second;
|
|
45
|
+
return true;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
bool mesh_set_descriptor(uint64_t id, const Mesh& mesh)
|
|
49
|
+
{
|
|
50
|
+
const auto& iter = g_mesh_descriptors.find(id);
|
|
51
|
+
if (iter == g_mesh_descriptors.end())
|
|
52
|
+
return false;
|
|
53
|
+
else
|
|
54
|
+
iter->second = mesh;
|
|
55
|
+
return true;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
void mesh_add_descriptor(uint64_t id, const Mesh& mesh)
|
|
59
|
+
{
|
|
60
|
+
g_mesh_descriptors[id] = mesh;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
void mesh_rem_descriptor(uint64_t id)
|
|
64
|
+
{
|
|
65
|
+
g_mesh_descriptors.erase(id);
|
|
66
|
+
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
} // namespace wp
|
|
70
|
+
|
|
71
|
+
void bvh_refit_with_solid_angle_recursive_host(BVH& bvh, int index, Mesh& mesh)
|
|
72
|
+
{
|
|
73
|
+
BVHPackedNodeHalf& lower = bvh.node_lowers[index];
|
|
74
|
+
BVHPackedNodeHalf& upper = bvh.node_uppers[index];
|
|
75
|
+
|
|
76
|
+
if (lower.b)
|
|
77
|
+
{
|
|
78
|
+
// Leaf, compute properties
|
|
79
|
+
const int start = lower.i;
|
|
80
|
+
const int end = upper.i;
|
|
81
|
+
// loops through primitives in the leaf
|
|
82
|
+
for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
|
|
83
|
+
{
|
|
84
|
+
int primitive_index = mesh.bvh.primitive_indices[primitive_counter];
|
|
85
|
+
if (primitive_counter == start)
|
|
86
|
+
{
|
|
87
|
+
precompute_triangle_solid_angle_props(mesh.points[mesh.indices[primitive_index * 3 + 0]], mesh.points[mesh.indices[primitive_index * 3 + 1]],
|
|
88
|
+
mesh.points[mesh.indices[primitive_index * 3 + 2]], mesh.solid_angle_props[index]);
|
|
89
|
+
}
|
|
90
|
+
else
|
|
91
|
+
{
|
|
92
|
+
SolidAngleProps triangle_solid_angle_props;
|
|
93
|
+
precompute_triangle_solid_angle_props(mesh.points[mesh.indices[primitive_index * 3 + 0]], mesh.points[mesh.indices[primitive_index * 3 + 1]],
|
|
94
|
+
mesh.points[mesh.indices[primitive_index * 3 + 2]], triangle_solid_angle_props);
|
|
95
|
+
mesh.solid_angle_props[index] = combine_precomputed_solid_angle_props(&mesh.solid_angle_props[index], &triangle_solid_angle_props);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
(vec3&)lower = mesh.solid_angle_props[index].box.lower;
|
|
100
|
+
(vec3&)upper = mesh.solid_angle_props[index].box.upper;
|
|
101
|
+
}
|
|
102
|
+
else
|
|
103
|
+
{
|
|
104
|
+
int left_index = lower.i;
|
|
105
|
+
int right_index = upper.i;
|
|
106
|
+
|
|
107
|
+
bvh_refit_with_solid_angle_recursive_host(bvh, left_index, mesh);
|
|
108
|
+
bvh_refit_with_solid_angle_recursive_host(bvh, right_index, mesh);
|
|
109
|
+
|
|
110
|
+
// combine
|
|
111
|
+
SolidAngleProps* left_child_data = &mesh.solid_angle_props[left_index];
|
|
112
|
+
SolidAngleProps* right_child_data = (left_index != right_index) ? &mesh.solid_angle_props[right_index] : NULL;
|
|
113
|
+
|
|
114
|
+
combine_precomputed_solid_angle_props(mesh.solid_angle_props[index], left_child_data, right_child_data);
|
|
115
|
+
|
|
116
|
+
// compute union of children
|
|
117
|
+
const vec3& left_lower = (vec3&)bvh.node_lowers[left_index];
|
|
118
|
+
const vec3& left_upper = (vec3&)bvh.node_uppers[left_index];
|
|
119
|
+
|
|
120
|
+
const vec3& right_lower = (vec3&)bvh.node_lowers[right_index];
|
|
121
|
+
const vec3& right_upper = (vec3&)bvh.node_uppers[right_index];
|
|
122
|
+
|
|
123
|
+
// union of child bounds
|
|
124
|
+
vec3 new_lower = min(left_lower, right_lower);
|
|
125
|
+
vec3 new_upper = max(left_upper, right_upper);
|
|
126
|
+
|
|
127
|
+
// write new BVH nodes
|
|
128
|
+
(vec3&)lower = new_lower;
|
|
129
|
+
(vec3&)upper = new_upper;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
void bvh_refit_with_solid_angle_host(BVH& bvh, Mesh& mesh)
|
|
134
|
+
{
|
|
135
|
+
bvh_refit_with_solid_angle_recursive_host(bvh, 0, mesh);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris, int support_winding_number, int constructor_type)
|
|
139
|
+
{
|
|
140
|
+
Mesh* m = new Mesh(points, velocities, indices, num_points, num_tris);
|
|
141
|
+
|
|
142
|
+
m->lowers = new vec3[num_tris];
|
|
143
|
+
m->uppers = new vec3[num_tris];
|
|
144
|
+
|
|
145
|
+
float sum = 0.0;
|
|
146
|
+
for (int i=0; i < num_tris; ++i)
|
|
147
|
+
{
|
|
148
|
+
wp::vec3& p0 = points[indices[i*3+0]];
|
|
149
|
+
wp::vec3& p1 = points[indices[i*3+1]];
|
|
150
|
+
wp::vec3& p2 = points[indices[i*3+2]];
|
|
151
|
+
|
|
152
|
+
// compute triangle bounds
|
|
153
|
+
bounds3 b;
|
|
154
|
+
b.add_point(p0);
|
|
155
|
+
b.add_point(p1);
|
|
156
|
+
b.add_point(p2);
|
|
157
|
+
|
|
158
|
+
m->lowers[i] = b.lower;
|
|
159
|
+
m->uppers[i] = b.upper;
|
|
160
|
+
|
|
161
|
+
// compute edge lengths
|
|
162
|
+
sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
|
|
163
|
+
}
|
|
164
|
+
m->average_edge_length = sum / (num_tris*3);
|
|
165
|
+
|
|
166
|
+
wp::bvh_create_host(m->lowers, m->uppers, num_tris, constructor_type, m->bvh);
|
|
167
|
+
|
|
168
|
+
if (support_winding_number)
|
|
169
|
+
{
|
|
170
|
+
// Let's first compute the sold
|
|
171
|
+
int num_bvh_nodes = 2*num_tris-1;
|
|
172
|
+
m->solid_angle_props = new SolidAngleProps[num_bvh_nodes];
|
|
173
|
+
bvh_refit_with_solid_angle_host(m->bvh, *m);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return (uint64_t)m;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
void mesh_destroy_host(uint64_t id)
|
|
181
|
+
{
|
|
182
|
+
Mesh* m = (Mesh*)(id);
|
|
183
|
+
|
|
184
|
+
delete[] m->lowers;
|
|
185
|
+
delete[] m->uppers;
|
|
186
|
+
|
|
187
|
+
if (m->solid_angle_props) {
|
|
188
|
+
delete [] m->solid_angle_props;
|
|
189
|
+
}
|
|
190
|
+
bvh_destroy_host(m->bvh);
|
|
191
|
+
|
|
192
|
+
delete m;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
void mesh_refit_host(uint64_t id)
|
|
196
|
+
{
|
|
197
|
+
Mesh* m = (Mesh*)(id);
|
|
198
|
+
|
|
199
|
+
float sum = 0.0;
|
|
200
|
+
for (int i=0; i < m->num_tris; ++i)
|
|
201
|
+
{
|
|
202
|
+
wp::vec3 p0 = m->points.data[m->indices.data[i*3+0]];
|
|
203
|
+
wp::vec3 p1 = m->points.data[m->indices.data[i*3+1]];
|
|
204
|
+
wp::vec3 p2 = m->points.data[m->indices.data[i*3+2]];
|
|
205
|
+
|
|
206
|
+
// compute triangle bounds
|
|
207
|
+
bounds3 b;
|
|
208
|
+
b.add_point(p0);
|
|
209
|
+
b.add_point(p1);
|
|
210
|
+
b.add_point(p2);
|
|
211
|
+
|
|
212
|
+
m->lowers[i] = b.lower;
|
|
213
|
+
m->uppers[i] = b.upper;
|
|
214
|
+
|
|
215
|
+
sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
|
|
216
|
+
}
|
|
217
|
+
m->average_edge_length = sum / (m->num_tris*3);
|
|
218
|
+
|
|
219
|
+
if (m->solid_angle_props)
|
|
220
|
+
{
|
|
221
|
+
// If solid angle were used, use refit solid angle
|
|
222
|
+
bvh_refit_with_solid_angle_host(m->bvh, *m);
|
|
223
|
+
}
|
|
224
|
+
else
|
|
225
|
+
{
|
|
226
|
+
bvh_refit_host(m->bvh);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
void mesh_set_points_host(uint64_t id, wp::array_t<wp::vec3> points)
|
|
231
|
+
{
|
|
232
|
+
Mesh* m = (Mesh*)(id);
|
|
233
|
+
if (points.ndim != 1 || points.shape[0] != m->points.shape[0])
|
|
234
|
+
{
|
|
235
|
+
fprintf(stderr, "The new points input for mesh_set_points_host does not match the shape of the original points!\n");
|
|
236
|
+
return;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
m->points = points;
|
|
240
|
+
|
|
241
|
+
mesh_refit_host(id);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
void mesh_set_velocities_host(uint64_t id, wp::array_t<wp::vec3> velocities)
|
|
245
|
+
{
|
|
246
|
+
Mesh* m = (Mesh*)(id);
|
|
247
|
+
if (velocities.ndim != 1 || velocities.shape[0] != m->velocities.shape[0])
|
|
248
|
+
{
|
|
249
|
+
fprintf(stderr, "The new velocities input for mesh_set_velocities_host does not match the shape of the original velocities!\n");
|
|
250
|
+
return;
|
|
251
|
+
}
|
|
252
|
+
m->velocities = velocities;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// stubs for non-CUDA platforms
|
|
256
|
+
#if !WP_ENABLE_CUDA
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
WP_API uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> tris, int num_points, int num_tris, int support_winding_number, int constructor_type) { return 0; }
|
|
260
|
+
WP_API void mesh_destroy_device(uint64_t id) {}
|
|
261
|
+
WP_API void mesh_refit_device(uint64_t id) {}
|
|
262
|
+
WP_API void mesh_set_points_device(uint64_t id, wp::array_t<wp::vec3> points) {};
|
|
263
|
+
WP_API void mesh_set_velocities_device(uint64_t id, wp::array_t<wp::vec3> points) {};
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
#endif // !WP_ENABLE_CUDA
|