warp-lang 1.6.1__py3-none-win_amd64.whl → 1.7.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +21 -7
- warp/autograd.py +14 -6
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +424 -6
- warp/build_dll.py +20 -20
- warp/builtins.py +467 -368
- warp/codegen.py +193 -125
- warp/config.py +56 -12
- warp/constants.py +14 -6
- warp/context.py +524 -277
- warp/dlpack.py +22 -12
- warp/examples/__init__.py +14 -6
- warp/examples/assets/nonuniform.usd +0 -0
- warp/examples/assets/nvidia_logo.png +0 -0
- warp/examples/benchmarks/benchmark_api.py +14 -6
- warp/examples/benchmarks/benchmark_cloth.py +14 -6
- warp/examples/benchmarks/benchmark_cloth_cupy.py +14 -6
- warp/examples/benchmarks/benchmark_cloth_jax.py +14 -6
- warp/examples/benchmarks/benchmark_cloth_numba.py +15 -0
- warp/examples/benchmarks/benchmark_cloth_numpy.py +14 -6
- warp/examples/benchmarks/benchmark_cloth_paddle.py +14 -6
- warp/examples/benchmarks/benchmark_cloth_pytorch.py +14 -6
- warp/examples/benchmarks/benchmark_cloth_taichi.py +14 -6
- warp/examples/benchmarks/benchmark_cloth_warp.py +14 -6
- warp/examples/benchmarks/benchmark_gemm.py +82 -48
- warp/examples/benchmarks/benchmark_interop_paddle.py +14 -6
- warp/examples/benchmarks/benchmark_interop_torch.py +14 -6
- warp/examples/benchmarks/benchmark_launches.py +14 -6
- warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
- warp/examples/browse.py +14 -6
- warp/examples/core/example_cupy.py +14 -6
- warp/examples/core/example_dem.py +14 -6
- warp/examples/core/example_fluid.py +14 -6
- warp/examples/core/example_graph_capture.py +14 -6
- warp/examples/core/example_marching_cubes.py +14 -6
- warp/examples/core/example_mesh.py +14 -6
- warp/examples/core/example_mesh_intersect.py +14 -6
- warp/examples/core/example_nvdb.py +14 -6
- warp/examples/core/example_raycast.py +14 -6
- warp/examples/core/example_raymarch.py +14 -6
- warp/examples/core/example_render_opengl.py +14 -6
- warp/examples/core/example_sample_mesh.py +300 -0
- warp/examples/core/example_sph.py +14 -6
- warp/examples/core/example_torch.py +14 -6
- warp/examples/core/example_wave.py +14 -6
- warp/examples/fem/example_adaptive_grid.py +14 -6
- warp/examples/fem/example_apic_fluid.py +15 -7
- warp/examples/fem/example_burgers.py +16 -8
- warp/examples/fem/example_convection_diffusion.py +14 -6
- warp/examples/fem/example_convection_diffusion_dg.py +14 -6
- warp/examples/fem/example_deformed_geometry.py +15 -7
- warp/examples/fem/example_diffusion.py +14 -6
- warp/examples/fem/example_diffusion_3d.py +14 -6
- warp/examples/fem/example_diffusion_mgpu.py +14 -6
- warp/examples/fem/example_distortion_energy.py +15 -7
- warp/examples/fem/example_magnetostatics.py +20 -12
- warp/examples/fem/example_mixed_elasticity.py +14 -6
- warp/examples/fem/example_navier_stokes.py +14 -6
- warp/examples/fem/example_nonconforming_contact.py +14 -6
- warp/examples/fem/example_stokes.py +14 -6
- warp/examples/fem/example_stokes_transfer.py +14 -6
- warp/examples/fem/example_streamlines.py +14 -6
- warp/examples/fem/utils.py +24 -3
- warp/examples/interop/example_jax_callable.py +116 -0
- warp/examples/interop/example_jax_ffi_callback.py +132 -0
- warp/examples/interop/example_jax_kernel.py +205 -0
- warp/examples/optim/example_bounce.py +14 -6
- warp/examples/optim/example_cloth_throw.py +14 -6
- warp/examples/optim/example_diffray.py +14 -6
- warp/examples/optim/example_drone.py +14 -6
- warp/examples/optim/example_fluid_checkpoint.py +497 -0
- warp/examples/optim/example_inverse_kinematics.py +14 -6
- warp/examples/optim/example_inverse_kinematics_torch.py +14 -6
- warp/examples/optim/example_softbody_properties.py +14 -6
- warp/examples/optim/example_spring_cage.py +14 -6
- warp/examples/optim/example_trajectory.py +14 -6
- warp/examples/sim/example_cartpole.py +14 -6
- warp/examples/sim/example_cloth.py +14 -6
- warp/examples/sim/example_cloth_self_contact.py +14 -6
- warp/examples/sim/example_granular.py +14 -6
- warp/examples/sim/example_granular_collision_sdf.py +14 -6
- warp/examples/sim/example_jacobian_ik.py +14 -6
- warp/examples/sim/example_particle_chain.py +14 -6
- warp/examples/sim/example_quadruped.py +14 -6
- warp/examples/sim/example_rigid_chain.py +14 -6
- warp/examples/sim/example_rigid_contact.py +14 -6
- warp/examples/sim/example_rigid_force.py +14 -6
- warp/examples/sim/example_rigid_gyroscopic.py +14 -6
- warp/examples/sim/example_rigid_soft_contact.py +14 -6
- warp/examples/sim/example_soft_body.py +14 -6
- warp/examples/tile/example_tile_cholesky.py +14 -6
- warp/examples/tile/example_tile_convolution.py +14 -6
- warp/examples/tile/example_tile_fft.py +14 -6
- warp/examples/tile/example_tile_filtering.py +14 -6
- warp/examples/tile/example_tile_matmul.py +16 -10
- warp/examples/tile/example_tile_mlp.py +14 -6
- warp/examples/tile/example_tile_nbody.py +14 -6
- warp/examples/tile/example_tile_walker.py +14 -6
- warp/fabric.py +15 -0
- warp/fem/__init__.py +26 -1
- warp/fem/adaptivity.py +19 -4
- warp/fem/cache.py +15 -0
- warp/fem/dirichlet.py +15 -0
- warp/fem/domain.py +15 -0
- warp/fem/field/__init__.py +15 -0
- warp/fem/field/field.py +15 -0
- warp/fem/field/nodal_field.py +37 -68
- warp/fem/field/restriction.py +15 -0
- warp/fem/field/virtual.py +77 -23
- warp/fem/geometry/__init__.py +15 -0
- warp/fem/geometry/adaptive_nanogrid.py +24 -10
- warp/fem/geometry/closest_point.py +16 -1
- warp/fem/geometry/deformed_geometry.py +20 -2
- warp/fem/geometry/element.py +15 -0
- warp/fem/geometry/geometry.py +20 -0
- warp/fem/geometry/grid_2d.py +27 -12
- warp/fem/geometry/grid_3d.py +27 -15
- warp/fem/geometry/hexmesh.py +20 -7
- warp/fem/geometry/nanogrid.py +24 -11
- warp/fem/geometry/partition.py +15 -0
- warp/fem/geometry/quadmesh.py +28 -13
- warp/fem/geometry/tetmesh.py +18 -4
- warp/fem/geometry/trimesh.py +18 -8
- warp/fem/integrate.py +277 -93
- warp/fem/linalg.py +20 -5
- warp/fem/operator.py +15 -0
- warp/fem/polynomial.py +15 -0
- warp/fem/quadrature/__init__.py +15 -0
- warp/fem/quadrature/pic_quadrature.py +52 -22
- warp/fem/quadrature/quadrature.py +209 -25
- warp/fem/space/__init__.py +16 -1
- warp/fem/space/basis_function_space.py +19 -2
- warp/fem/space/basis_space.py +40 -18
- warp/fem/space/dof_mapper.py +15 -0
- warp/fem/space/function_space.py +15 -0
- warp/fem/space/grid_2d_function_space.py +15 -0
- warp/fem/space/grid_3d_function_space.py +15 -0
- warp/fem/space/hexmesh_function_space.py +17 -2
- warp/fem/space/nanogrid_function_space.py +15 -0
- warp/fem/space/partition.py +21 -2
- warp/fem/space/quadmesh_function_space.py +23 -8
- warp/fem/space/restriction.py +15 -0
- warp/fem/space/shape/__init__.py +15 -0
- warp/fem/space/shape/cube_shape_function.py +38 -23
- warp/fem/space/shape/shape_function.py +15 -0
- warp/fem/space/shape/square_shape_function.py +27 -12
- warp/fem/space/shape/tet_shape_function.py +15 -0
- warp/fem/space/shape/triangle_shape_function.py +16 -1
- warp/fem/space/tetmesh_function_space.py +18 -3
- warp/fem/space/topology.py +15 -0
- warp/fem/space/trimesh_function_space.py +17 -2
- warp/fem/types.py +15 -0
- warp/fem/utils.py +27 -6
- warp/jax.py +28 -7
- warp/jax_experimental/__init__.py +16 -0
- warp/{jax_experimental.py → jax_experimental/custom_call.py} +28 -33
- warp/jax_experimental/ffi.py +698 -0
- warp/jax_experimental/xla_ffi.py +602 -0
- warp/math.py +103 -6
- warp/native/array.h +28 -6
- warp/native/builtin.h +44 -9
- warp/native/bvh.cpp +18 -7
- warp/native/bvh.cu +57 -20
- warp/native/bvh.h +17 -7
- warp/native/clang/clang.cpp +45 -9
- warp/native/coloring.cpp +15 -6
- warp/native/crt.cpp +15 -6
- warp/native/crt.h +15 -6
- warp/native/cuda_crt.h +15 -6
- warp/native/cuda_util.cpp +29 -6
- warp/native/cuda_util.h +17 -6
- warp/native/error.cpp +15 -6
- warp/native/error.h +15 -6
- warp/native/exports.h +85 -63
- warp/native/fabric.h +15 -6
- warp/native/hashgrid.cpp +15 -6
- warp/native/hashgrid.cu +15 -6
- warp/native/hashgrid.h +15 -6
- warp/native/initializer_array.h +15 -6
- warp/native/intersect.h +41 -32
- warp/native/intersect_adj.h +48 -39
- warp/native/intersect_tri.h +17 -0
- warp/native/marching.cpp +16 -0
- warp/native/marching.cu +16 -7
- warp/native/marching.h +17 -0
- warp/native/mat.h +528 -15
- warp/native/mathdx.cpp +15 -6
- warp/native/matnn.h +15 -6
- warp/native/mesh.cpp +15 -6
- warp/native/mesh.cu +15 -6
- warp/native/mesh.h +25 -16
- warp/native/noise.h +15 -6
- warp/native/quat.h +114 -17
- warp/native/rand.h +21 -6
- warp/native/range.h +15 -6
- warp/native/reduce.cpp +15 -6
- warp/native/reduce.cu +15 -6
- warp/native/runlength_encode.cpp +15 -6
- warp/native/runlength_encode.cu +15 -6
- warp/native/scan.cpp +15 -6
- warp/native/scan.cu +15 -6
- warp/native/scan.h +15 -6
- warp/native/solid_angle.h +17 -0
- warp/native/sort.cpp +137 -65
- warp/native/sort.cu +167 -21
- warp/native/sort.h +23 -7
- warp/native/sparse.cpp +58 -28
- warp/native/sparse.cu +67 -23
- warp/native/spatial.h +15 -6
- warp/native/svd.h +131 -6
- warp/native/temp_buffer.h +15 -6
- warp/native/tile.h +316 -111
- warp/native/tile_reduce.h +61 -9
- warp/native/vec.h +83 -13
- warp/native/volume.cpp +100 -119
- warp/native/volume.cu +15 -6
- warp/native/volume.h +15 -6
- warp/native/volume_builder.cu +40 -16
- warp/native/volume_builder.h +21 -6
- warp/native/volume_impl.h +15 -6
- warp/native/warp.cpp +20 -12
- warp/native/warp.cu +114 -16
- warp/native/warp.h +34 -16
- warp/optim/__init__.py +14 -6
- warp/optim/adam.py +14 -6
- warp/optim/linear.py +25 -10
- warp/optim/sgd.py +14 -6
- warp/paddle.py +14 -6
- warp/render/__init__.py +14 -6
- warp/render/render_opengl.py +14 -6
- warp/render/render_usd.py +14 -6
- warp/render/utils.py +14 -6
- warp/sim/__init__.py +14 -7
- warp/sim/articulation.py +18 -10
- warp/sim/collide.py +35 -16
- warp/sim/graph_coloring.py +14 -6
- warp/sim/import_mjcf.py +463 -162
- warp/sim/import_snu.py +14 -7
- warp/sim/import_urdf.py +46 -18
- warp/sim/import_usd.py +14 -7
- warp/sim/inertia.py +14 -6
- warp/sim/integrator.py +14 -6
- warp/sim/integrator_euler.py +19 -11
- warp/sim/integrator_featherstone.py +17 -16
- warp/sim/integrator_vbd.py +222 -8
- warp/sim/integrator_xpbd.py +19 -11
- warp/sim/model.py +56 -19
- warp/sim/particles.py +14 -6
- warp/sim/render.py +14 -6
- warp/sim/utils.py +17 -2
- warp/sparse.py +657 -555
- warp/stubs.py +231 -19
- warp/tape.py +14 -6
- warp/tests/aux_test_class_kernel.py +14 -6
- warp/tests/aux_test_compile_consts_dummy.py +14 -6
- warp/tests/aux_test_conditional_unequal_types_kernels.py +14 -6
- warp/tests/aux_test_dependent.py +14 -6
- warp/tests/aux_test_grad_customs.py +14 -6
- warp/tests/aux_test_instancing_gc.py +14 -6
- warp/tests/aux_test_module_unload.py +14 -6
- warp/tests/aux_test_name_clash1.py +14 -6
- warp/tests/aux_test_name_clash2.py +14 -6
- warp/tests/aux_test_unresolved_func.py +14 -6
- warp/tests/aux_test_unresolved_symbol.py +14 -6
- warp/tests/cuda/__init__.py +0 -0
- warp/tests/{test_async.py → cuda/test_async.py} +14 -6
- warp/tests/{test_ipc.py → cuda/test_ipc.py} +14 -6
- warp/tests/{test_mempool.py → cuda/test_mempool.py} +53 -6
- warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +14 -6
- warp/tests/{test_peer.py → cuda/test_peer.py} +14 -6
- warp/tests/{test_pinned.py → cuda/test_pinned.py} +14 -6
- warp/tests/{test_streams.py → cuda/test_streams.py} +85 -6
- warp/tests/geometry/__init__.py +0 -0
- warp/tests/{test_bvh.py → geometry/test_bvh.py} +14 -6
- warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +14 -6
- warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +14 -6
- warp/tests/{test_mesh.py → geometry/test_mesh.py} +14 -6
- warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +14 -6
- warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +80 -69
- warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +15 -7
- warp/tests/{test_volume.py → geometry/test_volume.py} +55 -12
- warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +14 -6
- warp/tests/interop/__init__.py +0 -0
- warp/tests/{test_dlpack.py → interop/test_dlpack.py} +42 -11
- warp/tests/{test_jax.py → interop/test_jax.py} +14 -6
- warp/tests/{test_paddle.py → interop/test_paddle.py} +14 -6
- warp/tests/{test_torch.py → interop/test_torch.py} +14 -6
- warp/tests/run_coverage_serial.py +14 -6
- warp/tests/sim/__init__.py +0 -0
- warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +23 -16
- warp/tests/{flaky_test_sim_grad.py → sim/flaky_test_sim_grad.py} +14 -6
- warp/tests/{test_collision.py → sim/test_collision.py} +16 -8
- warp/tests/{test_coloring.py → sim/test_coloring.py} +14 -7
- warp/tests/{test_model.py → sim/test_model.py} +55 -7
- warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +14 -6
- warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +16 -7
- warp/tests/sim/test_vbd.py +597 -0
- warp/tests/test_adam.py +14 -6
- warp/tests/test_arithmetic.py +14 -6
- warp/tests/test_array.py +14 -6
- warp/tests/test_array_reduce.py +14 -6
- warp/tests/test_assert.py +14 -6
- warp/tests/test_atomic.py +14 -6
- warp/tests/test_bool.py +15 -7
- warp/tests/test_builtins_resolution.py +14 -6
- warp/tests/test_closest_point_edge_edge.py +14 -6
- warp/tests/test_codegen.py +14 -6
- warp/tests/test_codegen_instancing.py +14 -6
- warp/tests/test_compile_consts.py +14 -6
- warp/tests/test_conditional.py +14 -6
- warp/tests/test_context.py +14 -6
- warp/tests/test_copy.py +14 -6
- warp/tests/test_ctypes.py +14 -6
- warp/tests/test_dense.py +14 -6
- warp/tests/test_devices.py +14 -6
- warp/tests/test_examples.py +42 -42
- warp/tests/test_fabricarray.py +14 -6
- warp/tests/test_fast_math.py +14 -6
- warp/tests/test_fem.py +37 -10
- warp/tests/test_fp16.py +14 -6
- warp/tests/test_func.py +14 -6
- warp/tests/test_future_annotations.py +14 -6
- warp/tests/test_generics.py +14 -6
- warp/tests/test_grad.py +14 -6
- warp/tests/test_grad_customs.py +14 -6
- warp/tests/test_grad_debug.py +14 -6
- warp/tests/test_implicit_init.py +14 -6
- warp/tests/test_import.py +14 -6
- warp/tests/test_indexedarray.py +14 -6
- warp/tests/test_intersect.py +14 -6
- warp/tests/test_iter.py +14 -6
- warp/tests/test_large.py +14 -6
- warp/tests/test_launch.py +14 -6
- warp/tests/test_lerp.py +14 -6
- warp/tests/test_linear_solvers.py +15 -11
- warp/tests/test_lvalue.py +14 -6
- warp/tests/test_mat.py +247 -85
- warp/tests/test_mat_lite.py +14 -6
- warp/tests/test_mat_scalar_ops.py +18 -10
- warp/tests/test_math.py +14 -6
- warp/tests/test_mlp.py +14 -6
- warp/tests/test_module_hashing.py +14 -6
- warp/tests/test_modules_lite.py +14 -6
- warp/tests/test_noise.py +14 -6
- warp/tests/test_operators.py +14 -6
- warp/tests/test_options.py +14 -6
- warp/tests/test_overwrite.py +15 -60
- warp/tests/test_print.py +14 -6
- warp/tests/test_quat.py +81 -52
- warp/tests/test_rand.py +58 -43
- warp/tests/test_reload.py +14 -6
- warp/tests/test_rounding.py +14 -6
- warp/tests/test_runlength_encode.py +14 -6
- warp/tests/test_scalar_ops.py +14 -6
- warp/tests/test_smoothstep.py +14 -6
- warp/tests/test_snippet.py +15 -0
- warp/tests/test_sparse.py +61 -12
- warp/tests/test_spatial.py +89 -6
- warp/tests/test_special_values.py +14 -6
- warp/tests/test_static.py +15 -7
- warp/tests/test_struct.py +14 -6
- warp/tests/test_tape.py +14 -6
- warp/tests/test_transient_module.py +14 -6
- warp/tests/test_triangle_closest_point.py +14 -6
- warp/tests/test_types.py +14 -6
- warp/tests/test_utils.py +98 -10
- warp/tests/test_vec.py +60 -40
- warp/tests/test_vec_lite.py +14 -6
- warp/tests/test_vec_scalar_ops.py +14 -6
- warp/tests/test_verify_fp.py +14 -6
- warp/tests/tile/__init__.py +0 -0
- warp/tests/{test_tile.py → tile/test_tile.py} +150 -57
- warp/tests/{test_tile_load.py → tile/test_tile_load.py} +15 -7
- warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +23 -12
- warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +39 -20
- warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +74 -7
- warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +14 -6
- warp/tests/{test_tile_view.py → tile/test_tile_view.py} +15 -7
- warp/tests/unittest_serial.py +15 -6
- warp/tests/unittest_suites.py +59 -65
- warp/tests/unittest_utils.py +16 -7
- warp/tests/walkthrough_debug.py +14 -6
- warp/thirdparty/unittest_parallel.py +15 -8
- warp/torch.py +14 -6
- warp/types.py +124 -664
- warp/utils.py +151 -78
- {warp_lang-1.6.1.dist-info → warp_lang-1.7.0.dist-info}/METADATA +39 -12
- warp_lang-1.7.0.dist-info/RECORD +429 -0
- {warp_lang-1.6.1.dist-info → warp_lang-1.7.0.dist-info}/WHEEL +1 -1
- warp_lang-1.7.0.dist-info/licenses/LICENSE.md +202 -0
- warp/examples/optim/example_walker.py +0 -309
- warp/native/cutlass_gemm.cpp +0 -34
- warp/native/cutlass_gemm.cu +0 -373
- warp/tests/test_matmul.py +0 -503
- warp/tests/test_matmul_lite.py +0 -403
- warp/tests/test_vbd.py +0 -378
- warp/tests/unused_test_misc.py +0 -69
- warp_lang-1.6.1.dist-info/LICENSE.md +0 -126
- warp_lang-1.6.1.dist-info/RECORD +0 -419
- {warp_lang-1.6.1.dist-info → warp_lang-1.7.0.dist-info}/top_level.txt +0 -0
warp/math.py
CHANGED
|
@@ -1,9 +1,17 @@
|
|
|
1
|
-
# Copyright (c) 2024 NVIDIA CORPORATION.
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
#
|
|
6
|
-
#
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
7
15
|
|
|
8
16
|
from typing import Any
|
|
9
17
|
|
|
@@ -19,6 +27,8 @@ __all__ = [
|
|
|
19
27
|
"norm_huber",
|
|
20
28
|
"norm_pseudo_huber",
|
|
21
29
|
"smooth_normalize",
|
|
30
|
+
"transform_from_matrix",
|
|
31
|
+
"transform_to_matrix",
|
|
22
32
|
]
|
|
23
33
|
|
|
24
34
|
|
|
@@ -123,6 +133,85 @@ def smooth_normalize(v: Any, delta: float = 1.0):
|
|
|
123
133
|
return v / norm_pseudo_huber(v, delta)
|
|
124
134
|
|
|
125
135
|
|
|
136
|
+
def create_transform_from_matrix_func(dtype):
|
|
137
|
+
mat44 = wp.types.matrix((4, 4), dtype)
|
|
138
|
+
vec3 = wp.types.vector(3, dtype)
|
|
139
|
+
transform = wp.types.transformation(dtype)
|
|
140
|
+
|
|
141
|
+
def transform_from_matrix(mat: mat44) -> transform:
|
|
142
|
+
"""
|
|
143
|
+
Construct a transformation from a 4x4 matrix.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
mat (Matrix[4, 4, Float]): Matrix to convert.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Transformation[Float]: The transformation.
|
|
150
|
+
"""
|
|
151
|
+
p = vec3(mat[0][3], mat[1][3], mat[2][3])
|
|
152
|
+
q = wp.quat_from_matrix(mat)
|
|
153
|
+
return transform(p, q)
|
|
154
|
+
|
|
155
|
+
return transform_from_matrix
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
transform_from_matrix = wp.func(
|
|
159
|
+
create_transform_from_matrix_func(wp.float32),
|
|
160
|
+
name="transform_from_matrix",
|
|
161
|
+
)
|
|
162
|
+
wp.func(
|
|
163
|
+
create_transform_from_matrix_func(wp.float16),
|
|
164
|
+
name="transform_from_matrix",
|
|
165
|
+
)
|
|
166
|
+
wp.func(
|
|
167
|
+
create_transform_from_matrix_func(wp.float64),
|
|
168
|
+
name="transform_from_matrix",
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def create_transform_to_matrix_func(dtype):
|
|
173
|
+
mat44 = wp.types.matrix((4, 4), dtype)
|
|
174
|
+
transform = wp.types.transformation(dtype)
|
|
175
|
+
|
|
176
|
+
def transform_to_matrix(xform: transform) -> mat44:
|
|
177
|
+
"""
|
|
178
|
+
Convert a transformation to a 4x4 matrix.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
xform (Transformation[Float]): Transformation to convert.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
Matrix[4, 4, Float]: The matrix.
|
|
185
|
+
"""
|
|
186
|
+
p = wp.transform_get_translation(xform)
|
|
187
|
+
q = wp.transform_get_rotation(xform)
|
|
188
|
+
rot = wp.quat_to_matrix(q)
|
|
189
|
+
# fmt: off
|
|
190
|
+
return mat44(
|
|
191
|
+
rot[0][0], rot[0][1], rot[0][2], p[0],
|
|
192
|
+
rot[1][0], rot[1][1], rot[1][2], p[1],
|
|
193
|
+
rot[2][0], rot[2][1], rot[2][2], p[2],
|
|
194
|
+
dtype(0.0), dtype(0.0), dtype(0.0), dtype(1.0),
|
|
195
|
+
)
|
|
196
|
+
# fmt: on
|
|
197
|
+
|
|
198
|
+
return transform_to_matrix
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
transform_to_matrix = wp.func(
|
|
202
|
+
create_transform_to_matrix_func(wp.float32),
|
|
203
|
+
name="transform_to_matrix",
|
|
204
|
+
)
|
|
205
|
+
wp.func(
|
|
206
|
+
create_transform_to_matrix_func(wp.float16),
|
|
207
|
+
name="transform_to_matrix",
|
|
208
|
+
)
|
|
209
|
+
wp.func(
|
|
210
|
+
create_transform_to_matrix_func(wp.float64),
|
|
211
|
+
name="transform_to_matrix",
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
|
|
126
215
|
# register API functions so they appear in the documentation
|
|
127
216
|
|
|
128
217
|
wp.context.register_api_function(
|
|
@@ -145,3 +234,11 @@ wp.context.register_api_function(
|
|
|
145
234
|
smooth_normalize,
|
|
146
235
|
group="Vector Math",
|
|
147
236
|
)
|
|
237
|
+
wp.context.register_api_function(
|
|
238
|
+
transform_from_matrix,
|
|
239
|
+
group="Transformations",
|
|
240
|
+
)
|
|
241
|
+
wp.context.register_api_function(
|
|
242
|
+
transform_to_matrix,
|
|
243
|
+
group="Transformations",
|
|
244
|
+
)
|
warp/native/array.h
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
* NVIDIA CORPORATION
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
7
16
|
*/
|
|
8
17
|
|
|
9
18
|
#pragma once
|
|
@@ -802,6 +811,19 @@ CUDA_CALLABLE inline void adj_select(const array_t<T1>& arr, const T2& a, const
|
|
|
802
811
|
adj_a += adj_ret;
|
|
803
812
|
}
|
|
804
813
|
|
|
814
|
+
// where operator to check for array being null, opposite convention compared to select
|
|
815
|
+
template <typename T1, typename T2>
|
|
816
|
+
CUDA_CALLABLE inline T2 where(const array_t<T1>& arr, const T2& a, const T2& b) { return arr.data?a:b; }
|
|
817
|
+
|
|
818
|
+
template <typename T1, typename T2>
|
|
819
|
+
CUDA_CALLABLE inline void adj_where(const array_t<T1>& arr, const T2& a, const T2& b, const array_t<T1>& adj_cond, T2& adj_a, T2& adj_b, const T2& adj_ret)
|
|
820
|
+
{
|
|
821
|
+
if (arr.data)
|
|
822
|
+
adj_a += adj_ret;
|
|
823
|
+
else
|
|
824
|
+
adj_b += adj_ret;
|
|
825
|
+
}
|
|
826
|
+
|
|
805
827
|
// stub for the case where we have an nested array inside a struct and
|
|
806
828
|
// atomic add the whole struct onto an array (e.g.: during backwards pass)
|
|
807
829
|
template <typename T>
|
warp/native/builtin.h
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
* NVIDIA CORPORATION
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
7
16
|
*/
|
|
8
17
|
|
|
9
18
|
#pragma once
|
|
@@ -224,6 +233,16 @@ inline CUDA_CALLABLE half operator * (half a,half b)
|
|
|
224
233
|
return float_to_half( half_to_float(a) * half_to_float(b) );
|
|
225
234
|
}
|
|
226
235
|
|
|
236
|
+
inline CUDA_CALLABLE half operator * (half a,float b)
|
|
237
|
+
{
|
|
238
|
+
return float_to_half( half_to_float(a) * b );
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
inline CUDA_CALLABLE half operator * (float a,half b)
|
|
242
|
+
{
|
|
243
|
+
return float_to_half( a * half_to_float(b) );
|
|
244
|
+
}
|
|
245
|
+
|
|
227
246
|
inline CUDA_CALLABLE half operator * (half a,double b)
|
|
228
247
|
{
|
|
229
248
|
return float_to_half( half_to_float(a) * b );
|
|
@@ -1075,6 +1094,23 @@ CUDA_CALLABLE inline void adj_select(const C& cond, const T& a, const T& b, C& a
|
|
|
1075
1094
|
adj_a += adj_ret;
|
|
1076
1095
|
}
|
|
1077
1096
|
|
|
1097
|
+
template <typename C, typename T>
|
|
1098
|
+
CUDA_CALLABLE inline T where(const C& cond, const T& a, const T& b)
|
|
1099
|
+
{
|
|
1100
|
+
// The double NOT operator !! casts to bool without compiler warnings.
|
|
1101
|
+
return (!!cond) ? a : b;
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
template <typename C, typename T>
|
|
1105
|
+
CUDA_CALLABLE inline void adj_where(const C& cond, const T& a, const T& b, C& adj_cond, T& adj_a, T& adj_b, const T& adj_ret)
|
|
1106
|
+
{
|
|
1107
|
+
// The double NOT operator !! casts to bool without compiler warnings.
|
|
1108
|
+
if (!!cond)
|
|
1109
|
+
adj_a += adj_ret;
|
|
1110
|
+
else
|
|
1111
|
+
adj_b += adj_ret;
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1078
1114
|
template <typename T>
|
|
1079
1115
|
CUDA_CALLABLE inline T copy(const T& src)
|
|
1080
1116
|
{
|
|
@@ -1758,8 +1794,7 @@ inline CUDA_CALLABLE void adj_expect_near(const vec3& actual, const vec3& expect
|
|
|
1758
1794
|
#include "noise.h"
|
|
1759
1795
|
#include "matnn.h"
|
|
1760
1796
|
|
|
1761
|
-
// only include in kernels for now
|
|
1762
|
-
#if defined(__CUDACC_RTC__)
|
|
1797
|
+
#if !defined(WP_ENABLE_CUDA) // only include in kernels for now
|
|
1763
1798
|
#include "tile.h"
|
|
1764
1799
|
#include "tile_reduce.h"
|
|
1765
|
-
#endif
|
|
1800
|
+
#endif //!defined(WP_ENABLE_CUDA)
|
warp/native/bvh.cpp
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
* NVIDIA CORPORATION
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
7
16
|
*/
|
|
8
17
|
|
|
9
18
|
#include <vector>
|
|
@@ -276,7 +285,9 @@ int TopDownBVHBuilder::build_recursive(BVH& bvh, const vec3* lowers, const vec3*
|
|
|
276
285
|
|
|
277
286
|
bounds3 b = calc_bounds(lowers, uppers, bvh.primitive_indices, start, end);
|
|
278
287
|
|
|
279
|
-
|
|
288
|
+
// If the depth exceeds BVH_QUERY_STACK_SIZE, an out-of-bounds access bug may occur during querying.
|
|
289
|
+
// In that case, we merge the following nodes into a single large leaf node.
|
|
290
|
+
if (n <= BVH_LEAF_SIZE || depth >= BVH_QUERY_STACK_SIZE - 1)
|
|
280
291
|
{
|
|
281
292
|
bvh.node_lowers[node_index] = make_node(b.lower, start, true);
|
|
282
293
|
bvh.node_uppers[node_index] = make_node(b.upper, end, false);
|
warp/native/bvh.cu
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
* NVIDIA CORPORATION
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
7
16
|
*/
|
|
8
17
|
|
|
9
18
|
#include "warp.h"
|
|
@@ -31,7 +40,7 @@ namespace wp
|
|
|
31
40
|
// for LBVH: this will start with some muted leaf nodes, but that is okay, we can still trace up because there parents information is still valid
|
|
32
41
|
// the only thing worth mentioning is that when the parent leaf node is also a leaf node, we need to recompute its bounds, since their child information are lost
|
|
33
42
|
// for a compact tree such as those from SAH or Median constructor, there is no muted leaf nodes
|
|
34
|
-
__global__ void bvh_refit_kernel(int n, const int* __restrict__ parents, int* __restrict__ child_count, int* __restrict__ primitive_indices, BVHPackedNodeHalf* __restrict__ node_lowers, BVHPackedNodeHalf* __restrict__ node_uppers, const vec3* item_lowers, const vec3* item_uppers)
|
|
43
|
+
__global__ void bvh_refit_kernel(int n, const int* __restrict__ parents, int* __restrict__ child_count, const int* __restrict__ primitive_indices, BVHPackedNodeHalf* __restrict__ node_lowers, BVHPackedNodeHalf* __restrict__ node_uppers, const vec3* __restrict__ item_lowers, const vec3* __restrict__ item_uppers)
|
|
35
44
|
{
|
|
36
45
|
int index = blockDim.x*blockIdx.x + threadIdx.x;
|
|
37
46
|
|
|
@@ -248,7 +257,7 @@ __global__ void build_leaves(const vec3* __restrict__ item_lowers, const vec3* _
|
|
|
248
257
|
// there is one thread launched per-leaf node, each thread calculates it's parent node and assigns
|
|
249
258
|
// itself to either the left or right parent slot, the last child to complete the parent and moves
|
|
250
259
|
// up the hierarchy
|
|
251
|
-
__global__ void build_hierarchy(int n, int* root, const int* __restrict__ deltas, int* __restrict__ num_children, volatile int* __restrict__ range_lefts, volatile int* __restrict__ range_rights, volatile int* __restrict__ parents, volatile BVHPackedNodeHalf* __restrict__ lowers, volatile BVHPackedNodeHalf* __restrict__ uppers)
|
|
260
|
+
__global__ void build_hierarchy(int n, int* root, const int* __restrict__ deltas, int* __restrict__ num_children, const int* __restrict__ primitive_indices, volatile int* __restrict__ range_lefts, volatile int* __restrict__ range_rights, volatile int* __restrict__ parents, volatile BVHPackedNodeHalf* __restrict__ lowers, volatile BVHPackedNodeHalf* __restrict__ uppers)
|
|
252
261
|
{
|
|
253
262
|
int index = blockDim.x*blockIdx.x + threadIdx.x;
|
|
254
263
|
|
|
@@ -274,13 +283,34 @@ __global__ void build_hierarchy(int n, int* root, const int* __restrict__ deltas
|
|
|
274
283
|
|
|
275
284
|
int parent;
|
|
276
285
|
|
|
277
|
-
|
|
286
|
+
bool parent_right = false;
|
|
287
|
+
if (left == 0)
|
|
288
|
+
{
|
|
289
|
+
parent_right = true;
|
|
290
|
+
}
|
|
291
|
+
else if ((right != n - 1 && deltas[right] <= deltas[left - 1]))
|
|
292
|
+
{
|
|
293
|
+
// tie breaking, this avoid always choosing the right node which can result in a very deep tree
|
|
294
|
+
// generate a pseudo-random binary value to randomly choose left or right groupings
|
|
295
|
+
// since the primitives with same Morton code are not sorted at all, determining order based on primitive_indices may also be unreliable.
|
|
296
|
+
// Here, the decision is made using the XOR result of whether the keys before and after the internal node are divisible by 2.
|
|
297
|
+
if (deltas[right] == deltas[left - 1])
|
|
298
|
+
{
|
|
299
|
+
parent_right = (primitive_indices[left - 1] % 2) ^ (primitive_indices[right] % 2);
|
|
300
|
+
}
|
|
301
|
+
else
|
|
302
|
+
{
|
|
303
|
+
parent_right = true;
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
if (parent_right)
|
|
278
308
|
{
|
|
279
309
|
parent = right + internal_offset;
|
|
280
310
|
|
|
281
311
|
// set parent left child
|
|
282
312
|
parents[index] = parent;
|
|
283
|
-
lowers[parent].i = index;
|
|
313
|
+
lowers[parent].i = index;
|
|
284
314
|
range_lefts[parent] = left;
|
|
285
315
|
|
|
286
316
|
// ensure above writes are visible to all threads
|
|
@@ -354,27 +384,34 @@ __global__ void build_hierarchy(int n, int* root, const int* __restrict__ deltas
|
|
|
354
384
|
* <= BVH_LEAF_SIZE into a new leaf node. This process is done using the new kernel function called
|
|
355
385
|
* mark_packed_leaf_nodes .
|
|
356
386
|
*/
|
|
357
|
-
__global__ void mark_packed_leaf_nodes(int n,
|
|
358
|
-
|
|
387
|
+
__global__ void mark_packed_leaf_nodes(int n, const int* __restrict__ range_lefts, const int* __restrict__ range_rights, const int* __restrict__ parents,
|
|
388
|
+
BVHPackedNodeHalf* __restrict__ lowers, BVHPackedNodeHalf* __restrict__ uppers)
|
|
359
389
|
{
|
|
360
390
|
int node_index = blockDim.x * blockIdx.x + threadIdx.x;
|
|
361
391
|
if (node_index < n)
|
|
362
392
|
{
|
|
363
|
-
// mark the node as leaf if its range is less than LEAF_SIZE_LBVH
|
|
393
|
+
// mark the node as leaf if its range is less than LEAF_SIZE_LBVH or it is deeper than BVH_QUERY_STACK_SIZE
|
|
364
394
|
// this will forever mute its child nodes so that they will never be accessed
|
|
365
395
|
|
|
396
|
+
// calculate depth
|
|
397
|
+
int depth = 1;
|
|
398
|
+
int parent = parents[node_index];
|
|
399
|
+
while (parent != -1)
|
|
400
|
+
{
|
|
401
|
+
int old_parent = parent;
|
|
402
|
+
parent = parents[parent];
|
|
403
|
+
depth++;
|
|
404
|
+
}
|
|
405
|
+
|
|
366
406
|
int left = range_lefts[node_index];
|
|
367
407
|
// the LBVH constructor's range is defined as left <= i <= right
|
|
368
408
|
// we need to convert it to our convention: left <= i < right
|
|
369
409
|
int right = range_rights[node_index] + 1;
|
|
370
|
-
|
|
371
|
-
if (right - left <= BVH_LEAF_SIZE)
|
|
410
|
+
if (right - left <= BVH_LEAF_SIZE || depth >= BVH_QUERY_STACK_SIZE)
|
|
372
411
|
{
|
|
373
412
|
lowers[node_index].b = 1;
|
|
374
413
|
lowers[node_index].i = left;
|
|
375
414
|
uppers[node_index].i = right;
|
|
376
|
-
|
|
377
|
-
// printf("node %d (left %d right %d) is set to child\n", node_index, left, right);
|
|
378
415
|
}
|
|
379
416
|
}
|
|
380
417
|
}
|
|
@@ -507,8 +544,8 @@ void LinearBVHBuilderGPU::build(BVH& bvh, const vec3* item_lowers, const vec3* i
|
|
|
507
544
|
memset_device(WP_CURRENT_CONTEXT, num_children, 0, sizeof(int)*bvh.max_nodes);
|
|
508
545
|
|
|
509
546
|
// build the tree and internal node bounds
|
|
510
|
-
wp_launch_device(WP_CURRENT_CONTEXT, build_hierarchy, num_items, (num_items, bvh.root, deltas, num_children, range_lefts, range_rights, bvh.node_parents, bvh.node_lowers, bvh.node_uppers));
|
|
511
|
-
wp_launch_device(WP_CURRENT_CONTEXT, mark_packed_leaf_nodes, bvh.max_nodes, (bvh.max_nodes, range_lefts, range_rights, bvh.node_lowers, bvh.node_uppers));
|
|
547
|
+
wp_launch_device(WP_CURRENT_CONTEXT, build_hierarchy, num_items, (num_items, bvh.root, deltas, num_children, bvh.primitive_indices, range_lefts, range_rights, bvh.node_parents, bvh.node_lowers, bvh.node_uppers));
|
|
548
|
+
wp_launch_device(WP_CURRENT_CONTEXT, mark_packed_leaf_nodes, bvh.max_nodes, (bvh.max_nodes, range_lefts, range_rights, bvh.node_parents, bvh.node_lowers, bvh.node_uppers));
|
|
512
549
|
|
|
513
550
|
// free temporary memory
|
|
514
551
|
free_device(WP_CURRENT_CONTEXT, indices);
|
|
@@ -664,7 +701,7 @@ void bvh_create_device(void* context, vec3* lowers, vec3* uppers, int num_items,
|
|
|
664
701
|
else if (constructor_type == BVH_CONSTRUCTOR_LBVH)
|
|
665
702
|
{
|
|
666
703
|
bvh_device_on_host.num_items = num_items;
|
|
667
|
-
bvh_device_on_host.max_nodes = 2 * num_items;
|
|
704
|
+
bvh_device_on_host.max_nodes = 2 * num_items - 1;
|
|
668
705
|
bvh_device_on_host.num_leaf_nodes = num_items;
|
|
669
706
|
bvh_device_on_host.node_lowers = (BVHPackedNodeHalf*)alloc_device(WP_CURRENT_CONTEXT, sizeof(BVHPackedNodeHalf) * bvh_device_on_host.max_nodes);
|
|
670
707
|
memset_device(WP_CURRENT_CONTEXT, bvh_device_on_host.node_lowers, 0, sizeof(BVHPackedNodeHalf) * bvh_device_on_host.max_nodes);
|
warp/native/bvh.h
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
* NVIDIA CORPORATION
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
7
16
|
*/
|
|
8
17
|
|
|
9
18
|
#pragma once
|
|
@@ -14,6 +23,7 @@
|
|
|
14
23
|
#define BVH_LEAF_SIZE (4)
|
|
15
24
|
#define SAH_NUM_BUCKETS (16)
|
|
16
25
|
#define USE_LOAD4
|
|
26
|
+
#define BVH_QUERY_STACK_SIZE (32)
|
|
17
27
|
|
|
18
28
|
#define BVH_CONSTRUCTOR_SAH (0)
|
|
19
29
|
#define BVH_CONSTRUCTOR_MEDIAN (1)
|
|
@@ -291,7 +301,7 @@ struct bvh_query_t
|
|
|
291
301
|
BVH bvh;
|
|
292
302
|
|
|
293
303
|
// BVH traversal stack:
|
|
294
|
-
int stack[
|
|
304
|
+
int stack[BVH_QUERY_STACK_SIZE];
|
|
295
305
|
int count;
|
|
296
306
|
|
|
297
307
|
// >= 0 if currently in a packed leaf node
|
warp/native/clang/clang.cpp
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
* NVIDIA CORPORATION
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
7
16
|
*/
|
|
8
17
|
|
|
9
18
|
#include "../native/crt.h"
|
|
@@ -49,10 +58,30 @@
|
|
|
49
58
|
#if defined(_WIN64)
|
|
50
59
|
extern "C" void __chkstk();
|
|
51
60
|
#elif defined(__APPLE__)
|
|
52
|
-
|
|
61
|
+
|
|
62
|
+
#if defined(__MACH__) && defined(__aarch64__)
|
|
63
|
+
extern "C" void _bzero(void *s, size_t n) {
|
|
64
|
+
memset(s, 0, n);
|
|
65
|
+
}
|
|
66
|
+
extern "C" void __bzero(void *s, size_t n) {
|
|
67
|
+
memset(s, 0, n);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
extern "C" void _memset_pattern16(void *s, const void *pattern, size_t n);
|
|
71
|
+
extern "C" void __memset_pattern16(void *s, const void *pattern, size_t n);
|
|
72
|
+
|
|
73
|
+
#else
|
|
74
|
+
// // Intel Mac's define bzero in libSystem.dylib
|
|
75
|
+
extern "C" void __bzero(void *s, size_t n);
|
|
76
|
+
|
|
77
|
+
extern "C" void _memset_pattern16(void *s, const void *pattern, size_t n);
|
|
78
|
+
extern "C" void __memset_pattern16(void *s, const void *pattern, size_t n);
|
|
79
|
+
|
|
80
|
+
#endif
|
|
81
|
+
|
|
53
82
|
extern "C" __double2 __sincos_stret(double);
|
|
54
83
|
extern "C" __float2 __sincosf_stret(float);
|
|
55
|
-
#endif
|
|
84
|
+
#endif // defined(__APPLE__)
|
|
56
85
|
|
|
57
86
|
extern "C" {
|
|
58
87
|
|
|
@@ -425,7 +454,14 @@ WP_API int load_obj(const char* object_file, const char* module_name)
|
|
|
425
454
|
// triggering the stack overflow guards.
|
|
426
455
|
SYMBOL(__chkstk),
|
|
427
456
|
#elif defined(__APPLE__)
|
|
428
|
-
|
|
457
|
+
#if defined(__MACH__) && defined(__aarch64__)
|
|
458
|
+
SYMBOL(bzero),
|
|
459
|
+
SYMBOL(_bzero),
|
|
460
|
+
#else
|
|
461
|
+
// Intel Mac
|
|
462
|
+
SYMBOL(__bzero),
|
|
463
|
+
#endif
|
|
464
|
+
SYMBOL(memset_pattern16),
|
|
429
465
|
SYMBOL(__sincos_stret), SYMBOL(__sincosf_stret),
|
|
430
466
|
#else
|
|
431
467
|
SYMBOL(sincosf), SYMBOL_T(sincos, void(*)(double,double*,double*)),
|
warp/native/coloring.cpp
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
* NVIDIA CORPORATION
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
7
16
|
*/
|
|
8
17
|
|
|
9
18
|
// The Apache 2 License
|
warp/native/crt.cpp
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
* NVIDIA CORPORATION
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
7
16
|
*/
|
|
8
17
|
|
|
9
18
|
#include "crt.h"
|
warp/native/crt.h
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
* NVIDIA CORPORATION
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
7
16
|
*/
|
|
8
17
|
|
|
9
18
|
#pragma once
|
warp/native/cuda_crt.h
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
* NVIDIA CORPORATION
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
7
16
|
*/
|
|
8
17
|
|
|
9
18
|
#pragma once
|