warp-lang 1.9.1__py3-none-win_amd64.whl → 1.10.0rc2__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +301 -287
- warp/__init__.pyi +794 -305
- warp/_src/__init__.py +14 -0
- warp/_src/autograd.py +1075 -0
- warp/_src/build.py +618 -0
- warp/_src/build_dll.py +640 -0
- warp/{builtins.py → _src/builtins.py} +1382 -377
- warp/_src/codegen.py +4359 -0
- warp/{config.py → _src/config.py} +178 -169
- warp/_src/constants.py +57 -0
- warp/_src/context.py +8294 -0
- warp/_src/dlpack.py +462 -0
- warp/_src/fabric.py +355 -0
- warp/_src/fem/__init__.py +14 -0
- warp/_src/fem/adaptivity.py +508 -0
- warp/_src/fem/cache.py +687 -0
- warp/_src/fem/dirichlet.py +188 -0
- warp/{fem → _src/fem}/domain.py +40 -30
- warp/_src/fem/field/__init__.py +131 -0
- warp/_src/fem/field/field.py +701 -0
- warp/{fem → _src/fem}/field/nodal_field.py +30 -15
- warp/{fem → _src/fem}/field/restriction.py +1 -1
- warp/{fem → _src/fem}/field/virtual.py +53 -27
- warp/_src/fem/geometry/__init__.py +32 -0
- warp/{fem → _src/fem}/geometry/adaptive_nanogrid.py +77 -163
- warp/_src/fem/geometry/closest_point.py +97 -0
- warp/{fem → _src/fem}/geometry/deformed_geometry.py +14 -22
- warp/{fem → _src/fem}/geometry/element.py +32 -10
- warp/{fem → _src/fem}/geometry/geometry.py +48 -20
- warp/{fem → _src/fem}/geometry/grid_2d.py +12 -23
- warp/{fem → _src/fem}/geometry/grid_3d.py +12 -23
- warp/{fem → _src/fem}/geometry/hexmesh.py +40 -63
- warp/{fem → _src/fem}/geometry/nanogrid.py +255 -248
- warp/{fem → _src/fem}/geometry/partition.py +121 -63
- warp/{fem → _src/fem}/geometry/quadmesh.py +26 -45
- warp/{fem → _src/fem}/geometry/tetmesh.py +40 -63
- warp/{fem → _src/fem}/geometry/trimesh.py +26 -45
- warp/{fem → _src/fem}/integrate.py +164 -158
- warp/_src/fem/linalg.py +383 -0
- warp/_src/fem/operator.py +396 -0
- warp/_src/fem/polynomial.py +229 -0
- warp/{fem → _src/fem}/quadrature/pic_quadrature.py +15 -20
- warp/{fem → _src/fem}/quadrature/quadrature.py +95 -47
- warp/_src/fem/space/__init__.py +248 -0
- warp/{fem → _src/fem}/space/basis_function_space.py +20 -11
- warp/_src/fem/space/basis_space.py +679 -0
- warp/{fem → _src/fem}/space/dof_mapper.py +3 -3
- warp/{fem → _src/fem}/space/function_space.py +14 -13
- warp/{fem → _src/fem}/space/grid_2d_function_space.py +4 -7
- warp/{fem → _src/fem}/space/grid_3d_function_space.py +4 -4
- warp/{fem → _src/fem}/space/hexmesh_function_space.py +4 -10
- warp/{fem → _src/fem}/space/nanogrid_function_space.py +3 -9
- warp/{fem → _src/fem}/space/partition.py +117 -60
- warp/{fem → _src/fem}/space/quadmesh_function_space.py +4 -10
- warp/{fem → _src/fem}/space/restriction.py +66 -33
- warp/_src/fem/space/shape/__init__.py +152 -0
- warp/{fem → _src/fem}/space/shape/cube_shape_function.py +9 -9
- warp/{fem → _src/fem}/space/shape/shape_function.py +8 -9
- warp/{fem → _src/fem}/space/shape/square_shape_function.py +6 -6
- warp/{fem → _src/fem}/space/shape/tet_shape_function.py +3 -3
- warp/{fem → _src/fem}/space/shape/triangle_shape_function.py +3 -3
- warp/{fem → _src/fem}/space/tetmesh_function_space.py +3 -9
- warp/_src/fem/space/topology.py +459 -0
- warp/{fem → _src/fem}/space/trimesh_function_space.py +3 -9
- warp/_src/fem/types.py +112 -0
- warp/_src/fem/utils.py +486 -0
- warp/_src/jax.py +186 -0
- warp/_src/jax_experimental/__init__.py +14 -0
- warp/_src/jax_experimental/custom_call.py +387 -0
- warp/_src/jax_experimental/ffi.py +1284 -0
- warp/_src/jax_experimental/xla_ffi.py +656 -0
- warp/_src/marching_cubes.py +708 -0
- warp/_src/math.py +414 -0
- warp/_src/optim/__init__.py +14 -0
- warp/_src/optim/adam.py +163 -0
- warp/_src/optim/linear.py +1606 -0
- warp/_src/optim/sgd.py +112 -0
- warp/_src/paddle.py +406 -0
- warp/_src/render/__init__.py +14 -0
- warp/_src/render/imgui_manager.py +289 -0
- warp/_src/render/render_opengl.py +3636 -0
- warp/_src/render/render_usd.py +937 -0
- warp/_src/render/utils.py +160 -0
- warp/_src/sparse.py +2716 -0
- warp/_src/tape.py +1206 -0
- warp/{thirdparty → _src/thirdparty}/unittest_parallel.py +9 -2
- warp/_src/torch.py +391 -0
- warp/_src/types.py +5870 -0
- warp/_src/utils.py +1693 -0
- warp/autograd.py +12 -1054
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +8 -588
- warp/build_dll.py +6 -721
- warp/codegen.py +6 -4251
- warp/constants.py +6 -39
- warp/context.py +12 -8062
- warp/dlpack.py +6 -444
- warp/examples/distributed/example_jacobi_mpi.py +4 -5
- warp/examples/fem/example_adaptive_grid.py +1 -1
- warp/examples/fem/example_apic_fluid.py +1 -1
- warp/examples/fem/example_burgers.py +8 -8
- warp/examples/fem/example_diffusion.py +1 -1
- warp/examples/fem/example_distortion_energy.py +1 -1
- warp/examples/fem/example_mixed_elasticity.py +2 -2
- warp/examples/fem/example_navier_stokes.py +1 -1
- warp/examples/fem/example_nonconforming_contact.py +7 -7
- warp/examples/fem/example_stokes.py +1 -1
- warp/examples/fem/example_stokes_transfer.py +1 -1
- warp/examples/fem/utils.py +2 -2
- warp/examples/interop/example_jax_callable.py +1 -1
- warp/examples/interop/example_jax_ffi_callback.py +1 -1
- warp/examples/interop/example_jax_kernel.py +1 -1
- warp/examples/tile/example_tile_mcgp.py +191 -0
- warp/fabric.py +6 -337
- warp/fem/__init__.py +159 -97
- warp/fem/adaptivity.py +7 -489
- warp/fem/cache.py +9 -648
- warp/fem/dirichlet.py +6 -184
- warp/fem/field/__init__.py +8 -109
- warp/fem/field/field.py +7 -652
- warp/fem/geometry/__init__.py +7 -18
- warp/fem/geometry/closest_point.py +11 -77
- warp/fem/linalg.py +18 -366
- warp/fem/operator.py +11 -369
- warp/fem/polynomial.py +9 -209
- warp/fem/space/__init__.py +5 -211
- warp/fem/space/basis_space.py +6 -662
- warp/fem/space/shape/__init__.py +41 -118
- warp/fem/space/topology.py +6 -437
- warp/fem/types.py +6 -81
- warp/fem/utils.py +11 -444
- warp/jax.py +8 -165
- warp/jax_experimental/__init__.py +14 -1
- warp/jax_experimental/custom_call.py +8 -365
- warp/jax_experimental/ffi.py +17 -873
- warp/jax_experimental/xla_ffi.py +5 -605
- warp/marching_cubes.py +5 -689
- warp/math.py +16 -393
- warp/native/array.h +385 -37
- warp/native/builtin.h +314 -37
- warp/native/bvh.cpp +43 -9
- warp/native/bvh.cu +62 -27
- warp/native/bvh.h +310 -309
- warp/native/clang/clang.cpp +102 -97
- warp/native/coloring.cpp +0 -1
- warp/native/crt.h +208 -0
- warp/native/exports.h +156 -0
- warp/native/hashgrid.cu +2 -0
- warp/native/intersect.h +24 -1
- warp/native/intersect_tri.h +44 -35
- warp/native/mat.h +1456 -276
- warp/native/mesh.cpp +4 -4
- warp/native/mesh.cu +4 -2
- warp/native/mesh.h +176 -61
- warp/native/quat.h +0 -52
- warp/native/scan.cu +2 -0
- warp/native/sparse.cu +7 -3
- warp/native/spatial.h +12 -0
- warp/native/tile.h +681 -89
- warp/native/tile_radix_sort.h +1 -1
- warp/native/tile_reduce.h +394 -46
- warp/native/tile_scan.h +4 -4
- warp/native/vec.h +469 -0
- warp/native/version.h +23 -0
- warp/native/volume.cpp +1 -1
- warp/native/volume.cu +1 -0
- warp/native/volume.h +1 -1
- warp/native/volume_builder.cu +2 -0
- warp/native/warp.cpp +57 -29
- warp/native/warp.cu +253 -171
- warp/native/warp.h +11 -8
- warp/optim/__init__.py +6 -3
- warp/optim/adam.py +6 -145
- warp/optim/linear.py +14 -1585
- warp/optim/sgd.py +6 -94
- warp/paddle.py +6 -388
- warp/render/__init__.py +8 -4
- warp/render/imgui_manager.py +7 -267
- warp/render/render_opengl.py +6 -3618
- warp/render/render_usd.py +6 -919
- warp/render/utils.py +6 -142
- warp/sparse.py +37 -2563
- warp/tape.py +6 -1188
- warp/tests/__main__.py +1 -1
- warp/tests/cuda/test_async.py +4 -4
- warp/tests/cuda/test_conditional_captures.py +1 -1
- warp/tests/cuda/test_multigpu.py +1 -1
- warp/tests/cuda/test_streams.py +58 -1
- warp/tests/geometry/test_bvh.py +157 -22
- warp/tests/geometry/test_marching_cubes.py +0 -1
- warp/tests/geometry/test_mesh.py +5 -3
- warp/tests/geometry/test_mesh_query_aabb.py +5 -12
- warp/tests/geometry/test_mesh_query_point.py +5 -2
- warp/tests/geometry/test_mesh_query_ray.py +15 -3
- warp/tests/geometry/test_volume_write.py +5 -5
- warp/tests/interop/test_dlpack.py +14 -14
- warp/tests/interop/test_jax.py +772 -49
- warp/tests/interop/test_paddle.py +1 -1
- warp/tests/test_adam.py +0 -1
- warp/tests/test_arithmetic.py +9 -9
- warp/tests/test_array.py +527 -100
- warp/tests/test_array_reduce.py +3 -3
- warp/tests/test_atomic.py +12 -8
- warp/tests/test_atomic_bitwise.py +209 -0
- warp/tests/test_atomic_cas.py +4 -4
- warp/tests/test_bool.py +2 -2
- warp/tests/test_builtins_resolution.py +5 -571
- warp/tests/test_codegen.py +33 -14
- warp/tests/test_conditional.py +1 -1
- warp/tests/test_context.py +6 -6
- warp/tests/test_copy.py +242 -161
- warp/tests/test_ctypes.py +3 -3
- warp/tests/test_devices.py +24 -2
- warp/tests/test_examples.py +16 -84
- warp/tests/test_fabricarray.py +35 -35
- warp/tests/test_fast_math.py +0 -2
- warp/tests/test_fem.py +56 -10
- warp/tests/test_fixedarray.py +3 -3
- warp/tests/test_func.py +8 -5
- warp/tests/test_generics.py +1 -1
- warp/tests/test_indexedarray.py +24 -24
- warp/tests/test_intersect.py +39 -9
- warp/tests/test_large.py +1 -1
- warp/tests/test_lerp.py +3 -1
- warp/tests/test_linear_solvers.py +1 -1
- warp/tests/test_map.py +35 -4
- warp/tests/test_mat.py +52 -62
- warp/tests/test_mat_constructors.py +4 -5
- warp/tests/test_mat_lite.py +1 -1
- warp/tests/test_mat_scalar_ops.py +121 -121
- warp/tests/test_math.py +34 -0
- warp/tests/test_module_aot.py +4 -4
- warp/tests/test_modules_lite.py +28 -2
- warp/tests/test_print.py +11 -11
- warp/tests/test_quat.py +93 -58
- warp/tests/test_runlength_encode.py +1 -1
- warp/tests/test_scalar_ops.py +38 -10
- warp/tests/test_smoothstep.py +1 -1
- warp/tests/test_sparse.py +126 -15
- warp/tests/test_spatial.py +105 -87
- warp/tests/test_special_values.py +6 -6
- warp/tests/test_static.py +7 -7
- warp/tests/test_struct.py +13 -2
- warp/tests/test_triangle_closest_point.py +48 -1
- warp/tests/test_types.py +27 -15
- warp/tests/test_utils.py +52 -52
- warp/tests/test_vec.py +29 -29
- warp/tests/test_vec_constructors.py +5 -5
- warp/tests/test_vec_scalar_ops.py +97 -97
- warp/tests/test_version.py +75 -0
- warp/tests/tile/test_tile.py +178 -0
- warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
- warp/tests/tile/test_tile_cholesky.py +7 -4
- warp/tests/tile/test_tile_load.py +26 -2
- warp/tests/tile/test_tile_mathdx.py +3 -3
- warp/tests/tile/test_tile_matmul.py +1 -1
- warp/tests/tile/test_tile_mlp.py +2 -4
- warp/tests/tile/test_tile_reduce.py +214 -13
- warp/tests/unittest_suites.py +6 -14
- warp/tests/unittest_utils.py +10 -9
- warp/tests/walkthrough_debug.py +3 -1
- warp/torch.py +6 -373
- warp/types.py +29 -5764
- warp/utils.py +10 -1659
- {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/METADATA +46 -99
- warp_lang-1.10.0rc2.dist-info/RECORD +468 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
- warp/examples/assets/cartpole.urdf +0 -110
- warp/examples/assets/crazyflie.usd +0 -0
- warp/examples/assets/nv_ant.xml +0 -92
- warp/examples/assets/nv_humanoid.xml +0 -183
- warp/examples/assets/quadruped.urdf +0 -268
- warp/examples/optim/example_bounce.py +0 -266
- warp/examples/optim/example_cloth_throw.py +0 -228
- warp/examples/optim/example_drone.py +0 -870
- warp/examples/optim/example_inverse_kinematics.py +0 -182
- warp/examples/optim/example_inverse_kinematics_torch.py +0 -191
- warp/examples/optim/example_softbody_properties.py +0 -400
- warp/examples/optim/example_spring_cage.py +0 -245
- warp/examples/optim/example_trajectory.py +0 -227
- warp/examples/sim/example_cartpole.py +0 -143
- warp/examples/sim/example_cloth.py +0 -225
- warp/examples/sim/example_cloth_self_contact.py +0 -316
- warp/examples/sim/example_granular.py +0 -130
- warp/examples/sim/example_granular_collision_sdf.py +0 -202
- warp/examples/sim/example_jacobian_ik.py +0 -244
- warp/examples/sim/example_particle_chain.py +0 -124
- warp/examples/sim/example_quadruped.py +0 -203
- warp/examples/sim/example_rigid_chain.py +0 -203
- warp/examples/sim/example_rigid_contact.py +0 -195
- warp/examples/sim/example_rigid_force.py +0 -133
- warp/examples/sim/example_rigid_gyroscopic.py +0 -115
- warp/examples/sim/example_rigid_soft_contact.py +0 -140
- warp/examples/sim/example_soft_body.py +0 -196
- warp/examples/tile/example_tile_walker.py +0 -327
- warp/sim/__init__.py +0 -74
- warp/sim/articulation.py +0 -793
- warp/sim/collide.py +0 -2570
- warp/sim/graph_coloring.py +0 -307
- warp/sim/import_mjcf.py +0 -791
- warp/sim/import_snu.py +0 -227
- warp/sim/import_urdf.py +0 -579
- warp/sim/import_usd.py +0 -898
- warp/sim/inertia.py +0 -357
- warp/sim/integrator.py +0 -245
- warp/sim/integrator_euler.py +0 -2000
- warp/sim/integrator_featherstone.py +0 -2101
- warp/sim/integrator_vbd.py +0 -2487
- warp/sim/integrator_xpbd.py +0 -3295
- warp/sim/model.py +0 -4821
- warp/sim/particles.py +0 -121
- warp/sim/render.py +0 -431
- warp/sim/utils.py +0 -431
- warp/tests/sim/disabled_kinematics.py +0 -244
- warp/tests/sim/test_cloth.py +0 -863
- warp/tests/sim/test_collision.py +0 -743
- warp/tests/sim/test_coloring.py +0 -347
- warp/tests/sim/test_inertia.py +0 -161
- warp/tests/sim/test_model.py +0 -226
- warp/tests/sim/test_sim_grad.py +0 -287
- warp/tests/sim/test_sim_grad_bounce_linear.py +0 -212
- warp/tests/sim/test_sim_kinematics.py +0 -98
- warp/thirdparty/__init__.py +0 -0
- warp_lang-1.9.1.dist-info/RECORD +0 -456
- /warp/{fem → _src/fem}/quadrature/__init__.py +0 -0
- /warp/{tests/sim → _src/thirdparty}/__init__.py +0 -0
- /warp/{thirdparty → _src/thirdparty}/appdirs.py +0 -0
- /warp/{thirdparty → _src/thirdparty}/dlpack.py +0 -0
- {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/WHEEL +0 -0
- {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/top_level.txt +0 -0
warp/native/builtin.h
CHANGED
|
@@ -35,6 +35,11 @@
|
|
|
35
35
|
#define CUDA_CALLABLE_DEVICE __device__
|
|
36
36
|
#endif
|
|
37
37
|
|
|
38
|
+
// Tile block dimension used while building the warp core library
|
|
39
|
+
#ifndef WP_TILE_BLOCK_DIM
|
|
40
|
+
#define WP_TILE_BLOCK_DIM 256
|
|
41
|
+
#endif
|
|
42
|
+
|
|
38
43
|
#ifdef WP_VERIFY_FP
|
|
39
44
|
#define FP_CHECK 1
|
|
40
45
|
#define DO_IF_FPCHECK(X) {X}
|
|
@@ -48,8 +53,28 @@
|
|
|
48
53
|
#define RAD_TO_DEG 57.29577951308232087679
|
|
49
54
|
#define DEG_TO_RAD 0.01745329251994329577
|
|
50
55
|
|
|
56
|
+
#ifndef M_PI_F
|
|
57
|
+
#define M_PI_F 3.14159265358979323846f
|
|
58
|
+
#endif
|
|
59
|
+
|
|
60
|
+
#ifndef M_2_SQRT_PI_F
|
|
61
|
+
#define M_2_SQRT_PI_F 1.1283791670955125739f // 2/sqrt(pi)
|
|
62
|
+
#endif
|
|
63
|
+
|
|
64
|
+
#ifndef M_2_SQRT_PI
|
|
65
|
+
#define M_2_SQRT_PI 1.1283791670955125738961589031215 // 2/sqrt(pi)
|
|
66
|
+
#endif
|
|
67
|
+
|
|
68
|
+
#ifndef M_SQRT_PI_F_2
|
|
69
|
+
#define M_SQRT_PI_F_2 0.88622692545275801364f // sqrt(pi)/2
|
|
70
|
+
#endif
|
|
71
|
+
|
|
72
|
+
#ifndef M_SQRT_PI_2
|
|
73
|
+
#define M_SQRT_PI_2 0.88622692545275801364908374167057 // sqrt(pi)/2
|
|
74
|
+
#endif
|
|
75
|
+
|
|
51
76
|
#if defined(__CUDACC__) && !defined(_MSC_VER)
|
|
52
|
-
__device__ void __debugbreak() { __brkpt(); }
|
|
77
|
+
__device__ inline void __debugbreak() { __brkpt(); }
|
|
53
78
|
#endif
|
|
54
79
|
|
|
55
80
|
#if defined(__clang__) && defined(__CUDA__) && defined(__CUDA_ARCH__)
|
|
@@ -267,6 +292,13 @@ inline CUDA_CALLABLE half operator / (half a,half b)
|
|
|
267
292
|
|
|
268
293
|
|
|
269
294
|
|
|
295
|
+
template<typename TRet, typename T>
|
|
296
|
+
inline CUDA_CALLABLE TRet cast(T a)
|
|
297
|
+
{
|
|
298
|
+
static_assert(sizeof(TRet) == sizeof(T), "source and destination must have the same size");
|
|
299
|
+
return *reinterpret_cast<TRet*>(&a);
|
|
300
|
+
}
|
|
301
|
+
|
|
270
302
|
template <typename T>
|
|
271
303
|
CUDA_CALLABLE inline float cast_float(T x) { return (float)(x); }
|
|
272
304
|
|
|
@@ -782,6 +814,7 @@ inline CUDA_CALLABLE half floordiv(half a, half b)
|
|
|
782
814
|
#endif
|
|
783
815
|
return floorf(float(a/b));
|
|
784
816
|
}
|
|
817
|
+
|
|
785
818
|
inline CUDA_CALLABLE float floordiv(float a, float b)
|
|
786
819
|
{
|
|
787
820
|
#if FP_CHECK
|
|
@@ -793,6 +826,7 @@ inline CUDA_CALLABLE float floordiv(float a, float b)
|
|
|
793
826
|
#endif
|
|
794
827
|
return floorf(a/b);
|
|
795
828
|
}
|
|
829
|
+
|
|
796
830
|
inline CUDA_CALLABLE double floordiv(double a, double b)
|
|
797
831
|
{
|
|
798
832
|
#if FP_CHECK
|
|
@@ -805,6 +839,210 @@ inline CUDA_CALLABLE double floordiv(double a, double b)
|
|
|
805
839
|
return ::floor(a/b);
|
|
806
840
|
}
|
|
807
841
|
|
|
842
|
+
inline CUDA_CALLABLE half erf(half a)
|
|
843
|
+
{
|
|
844
|
+
return erff(float(a));
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
inline CUDA_CALLABLE float erf(float a)
|
|
848
|
+
{
|
|
849
|
+
return erff(a);
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
inline CUDA_CALLABLE double erf(double a)
|
|
853
|
+
{
|
|
854
|
+
return ::erf(a);
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
inline CUDA_CALLABLE half erfc(half a)
|
|
858
|
+
{
|
|
859
|
+
return erfcf(float(a));
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
inline CUDA_CALLABLE float erfc(float a)
|
|
863
|
+
{
|
|
864
|
+
return erfcf(a);
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
inline CUDA_CALLABLE double erfc(double a)
|
|
868
|
+
{
|
|
869
|
+
return ::erfc(a);
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
inline CUDA_CALLABLE half erfinv(half a)
|
|
873
|
+
{
|
|
874
|
+
#if FP_CHECK
|
|
875
|
+
if (float(a) < -1.0f || float(a) > 1.0f)
|
|
876
|
+
{
|
|
877
|
+
printf("%s:%d erfinv(%f)\n", __FILE__, __LINE__, float(a));
|
|
878
|
+
assert(0);
|
|
879
|
+
}
|
|
880
|
+
#endif
|
|
881
|
+
return ::erfinvf(float(a));
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
inline CUDA_CALLABLE float erfinv(float a)
|
|
885
|
+
{
|
|
886
|
+
#if FP_CHECK
|
|
887
|
+
if (a < -1.0f || a > 1.0f)
|
|
888
|
+
{
|
|
889
|
+
printf("%s:%d erfinv(%f)\n", __FILE__, __LINE__, a);
|
|
890
|
+
assert(0);
|
|
891
|
+
}
|
|
892
|
+
#endif
|
|
893
|
+
return ::erfinvf(a);
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
inline CUDA_CALLABLE double erfinv(double a)
|
|
897
|
+
{
|
|
898
|
+
#if FP_CHECK
|
|
899
|
+
if (a < -1.0 || a > 1.0)
|
|
900
|
+
{
|
|
901
|
+
printf("%s:%d erfinv(%f)\n", __FILE__, __LINE__, a);
|
|
902
|
+
assert(0);
|
|
903
|
+
}
|
|
904
|
+
#endif
|
|
905
|
+
return ::erfinv(a);
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
inline CUDA_CALLABLE half erfcinv(half a)
|
|
909
|
+
{
|
|
910
|
+
#if FP_CHECK
|
|
911
|
+
if (float(a) < 0.0f || float(a) > 2.0f)
|
|
912
|
+
{
|
|
913
|
+
printf("%s:%d erfcinv(%f)\n", __FILE__, __LINE__, float(a));
|
|
914
|
+
assert(0);
|
|
915
|
+
}
|
|
916
|
+
#endif
|
|
917
|
+
return ::erfcinvf(float(a));
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
inline CUDA_CALLABLE float erfcinv(float a)
|
|
921
|
+
{
|
|
922
|
+
#if FP_CHECK
|
|
923
|
+
if (a < 0.0f || a > 2.0f)
|
|
924
|
+
{
|
|
925
|
+
printf("%s:%d erfcinv(%f)\n", __FILE__, __LINE__, a);
|
|
926
|
+
assert(0);
|
|
927
|
+
}
|
|
928
|
+
#endif
|
|
929
|
+
return ::erfcinvf(a);
|
|
930
|
+
}
|
|
931
|
+
|
|
932
|
+
inline CUDA_CALLABLE double erfcinv(double a)
|
|
933
|
+
{
|
|
934
|
+
#if FP_CHECK
|
|
935
|
+
if (a < 0.0 || a > 2.0)
|
|
936
|
+
{
|
|
937
|
+
printf("%s:%d erfcinv(%f)\n", __FILE__, __LINE__, a);
|
|
938
|
+
assert(0);
|
|
939
|
+
}
|
|
940
|
+
#endif
|
|
941
|
+
return ::erfcinv(a);
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
inline CUDA_CALLABLE void adj_erf(half a, half& adj_a, half adj_ret)
|
|
945
|
+
{
|
|
946
|
+
adj_a += half(M_2_SQRT_PI_F * ::expf(-float(a)*float(a))) * adj_ret;
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
inline CUDA_CALLABLE void adj_erf(float a, float& adj_a, float adj_ret)
|
|
950
|
+
{
|
|
951
|
+
adj_a += M_2_SQRT_PI_F * ::expf(-a*a) * adj_ret;
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
inline CUDA_CALLABLE void adj_erf(double a, double& adj_a, double adj_ret)
|
|
955
|
+
{
|
|
956
|
+
adj_a += M_2_SQRT_PI * ::exp(-a*a) * adj_ret;
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
inline CUDA_CALLABLE void adj_erfc(half a, half& adj_a, half adj_ret)
|
|
960
|
+
{
|
|
961
|
+
adj_a -= half(M_2_SQRT_PI_F * ::expf(-float(a)*float(a))) * adj_ret;
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
inline CUDA_CALLABLE void adj_erfc(float a, float& adj_a, float adj_ret)
|
|
965
|
+
{
|
|
966
|
+
adj_a -= M_2_SQRT_PI_F * ::expf(-a*a) * adj_ret;
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
inline CUDA_CALLABLE void adj_erfc(double a, double& adj_a, double adj_ret)
|
|
970
|
+
{
|
|
971
|
+
adj_a -= M_2_SQRT_PI * ::exp(-a*a) * adj_ret;
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
inline CUDA_CALLABLE void adj_erfinv(half a, half ret, half& adj_a, half adj_ret)
|
|
975
|
+
{
|
|
976
|
+
#if FP_CHECK
|
|
977
|
+
if (float(a) < -1.0f || float(a) > 1.0f)
|
|
978
|
+
{
|
|
979
|
+
printf("%s:%d adj_erfinv(%f)\n", __FILE__, __LINE__, float(a));
|
|
980
|
+
assert(0);
|
|
981
|
+
}
|
|
982
|
+
#endif
|
|
983
|
+
adj_a += half(M_SQRT_PI_F_2 * ::expf(float(ret)*float(ret))) * adj_ret;
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
inline CUDA_CALLABLE void adj_erfinv(float a, float ret, float& adj_a, float adj_ret)
|
|
987
|
+
{
|
|
988
|
+
#if FP_CHECK
|
|
989
|
+
if (a < -1.0f || a > 1.0f)
|
|
990
|
+
{
|
|
991
|
+
printf("%s:%d adj_erfinv(%f)\n", __FILE__, __LINE__, a);
|
|
992
|
+
assert(0);
|
|
993
|
+
}
|
|
994
|
+
#endif
|
|
995
|
+
adj_a += M_SQRT_PI_F_2 * ::expf(ret*ret) * adj_ret;
|
|
996
|
+
}
|
|
997
|
+
|
|
998
|
+
inline CUDA_CALLABLE void adj_erfinv(double a, double ret, double& adj_a, double adj_ret)
|
|
999
|
+
{
|
|
1000
|
+
#if FP_CHECK
|
|
1001
|
+
if (a < -1.0 || a > 1.0)
|
|
1002
|
+
{
|
|
1003
|
+
printf("%s:%d adj_erfinv(%f)\n", __FILE__, __LINE__, a);
|
|
1004
|
+
assert(0);
|
|
1005
|
+
}
|
|
1006
|
+
#endif
|
|
1007
|
+
adj_a += M_SQRT_PI_2 * ::exp(ret*ret) * adj_ret;
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
inline CUDA_CALLABLE void adj_erfcinv(half a, half ret, half& adj_a, half adj_ret)
|
|
1011
|
+
{
|
|
1012
|
+
#if FP_CHECK
|
|
1013
|
+
if (float(a) < 0.0f || float(a) > 2.0f)
|
|
1014
|
+
{
|
|
1015
|
+
printf("%s:%d adj_erfcinv(%f)\n", __FILE__, __LINE__, float(a));
|
|
1016
|
+
assert(0);
|
|
1017
|
+
}
|
|
1018
|
+
#endif
|
|
1019
|
+
adj_a -= half(M_SQRT_PI_F_2 * ::expf(float(ret)*float(ret))) * adj_ret;
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
inline CUDA_CALLABLE void adj_erfcinv(float a, float ret, float& adj_a, float adj_ret)
|
|
1023
|
+
{
|
|
1024
|
+
#if FP_CHECK
|
|
1025
|
+
if (a < 0.0f || a > 2.0f)
|
|
1026
|
+
{
|
|
1027
|
+
printf("%s:%d adj_erfcinv(%f)\n", __FILE__, __LINE__, a);
|
|
1028
|
+
assert(0);
|
|
1029
|
+
}
|
|
1030
|
+
#endif
|
|
1031
|
+
adj_a -= M_SQRT_PI_F_2 * ::expf(ret*ret) * adj_ret;
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
inline CUDA_CALLABLE void adj_erfcinv(double a, double ret, double& adj_a, double adj_ret)
|
|
1035
|
+
{
|
|
1036
|
+
#if FP_CHECK
|
|
1037
|
+
if (a < 0.0 || a > 2.0)
|
|
1038
|
+
{
|
|
1039
|
+
printf("%s:%d adj_erfcinv(%f)\n", __FILE__, __LINE__, a);
|
|
1040
|
+
assert(0);
|
|
1041
|
+
}
|
|
1042
|
+
#endif
|
|
1043
|
+
adj_a -= M_SQRT_PI_2 * ::exp(ret*ret) * adj_ret;
|
|
1044
|
+
}
|
|
1045
|
+
|
|
808
1046
|
inline CUDA_CALLABLE float leaky_min(float a, float b, float r) { return min(a, b); }
|
|
809
1047
|
inline CUDA_CALLABLE float leaky_max(float a, float b, float r) { return max(a, b); }
|
|
810
1048
|
|
|
@@ -1086,23 +1324,6 @@ DECLARE_ADJOINTS(float16)
|
|
|
1086
1324
|
DECLARE_ADJOINTS(float32)
|
|
1087
1325
|
DECLARE_ADJOINTS(float64)
|
|
1088
1326
|
|
|
1089
|
-
template <typename C, typename T>
|
|
1090
|
-
CUDA_CALLABLE inline T select(const C& cond, const T& a, const T& b)
|
|
1091
|
-
{
|
|
1092
|
-
// The double NOT operator !! casts to bool without compiler warnings.
|
|
1093
|
-
return (!!cond) ? b : a;
|
|
1094
|
-
}
|
|
1095
|
-
|
|
1096
|
-
template <typename C, typename TA, typename TB, typename TRet>
|
|
1097
|
-
CUDA_CALLABLE inline void adj_select(const C& cond, const TA& a, const TB& b, C& adj_cond, TA& adj_a, TB& adj_b, const TRet& adj_ret)
|
|
1098
|
-
{
|
|
1099
|
-
// The double NOT operator !! casts to bool without compiler warnings.
|
|
1100
|
-
if (!!cond)
|
|
1101
|
-
adj_b += adj_ret;
|
|
1102
|
-
else
|
|
1103
|
-
adj_a += adj_ret;
|
|
1104
|
-
}
|
|
1105
|
-
|
|
1106
1327
|
template <typename C, typename T>
|
|
1107
1328
|
CUDA_CALLABLE inline T where(const C& cond, const T& a, const T& b)
|
|
1108
1329
|
{
|
|
@@ -1157,12 +1378,30 @@ CUDA_CALLABLE inline T& operator += (T& a, const T& b) { a = add(a, b); return a
|
|
|
1157
1378
|
template <typename T>
|
|
1158
1379
|
CUDA_CALLABLE inline T& operator -= (T& a, const T& b) { a = sub(a, b); return a; }
|
|
1159
1380
|
|
|
1381
|
+
template <typename T>
|
|
1382
|
+
CUDA_CALLABLE inline T& operator &= (T& a, const T& b) { a = bit_and(a, b); return a; }
|
|
1383
|
+
|
|
1384
|
+
template <typename T>
|
|
1385
|
+
CUDA_CALLABLE inline T& operator |= (T& a, const T& b) { a = bit_or(a, b); return a; }
|
|
1386
|
+
|
|
1387
|
+
template <typename T>
|
|
1388
|
+
CUDA_CALLABLE inline T& operator ^= (T& a, const T& b) { a = bit_xor(a, b); return a; }
|
|
1389
|
+
|
|
1160
1390
|
template <typename T>
|
|
1161
1391
|
CUDA_CALLABLE inline T operator+(const T& a, const T& b) { return add(a, b); }
|
|
1162
1392
|
|
|
1163
1393
|
template <typename T>
|
|
1164
1394
|
CUDA_CALLABLE inline T operator-(const T& a, const T& b) { return sub(a, b); }
|
|
1165
1395
|
|
|
1396
|
+
template <typename T>
|
|
1397
|
+
CUDA_CALLABLE inline T operator&(const T& a, const T& b) { return bit_and(a, b); }
|
|
1398
|
+
|
|
1399
|
+
template <typename T>
|
|
1400
|
+
CUDA_CALLABLE inline T operator|(const T& a, const T& b) { return bit_or(a, b); }
|
|
1401
|
+
|
|
1402
|
+
template <typename T>
|
|
1403
|
+
CUDA_CALLABLE inline T operator^(const T& a, const T& b) { return bit_xor(a, b); }
|
|
1404
|
+
|
|
1166
1405
|
template <typename T>
|
|
1167
1406
|
CUDA_CALLABLE inline T pos(const T& x) { return x; }
|
|
1168
1407
|
template <typename T>
|
|
@@ -1278,8 +1517,8 @@ inline CUDA_CALLABLE_DEVICE void tid(int& i, int& j, int& k, int& l, size_t inde
|
|
|
1278
1517
|
}
|
|
1279
1518
|
|
|
1280
1519
|
// should match types.py
|
|
1281
|
-
|
|
1282
|
-
|
|
1520
|
+
static const int SLICE_BEGIN = (1LL << (sizeof(int) * 8 - 1)) - 1; // std::numeric_limits<int>::max()
|
|
1521
|
+
static const int SLICE_END = -(1LL << (sizeof(int) * 8 - 1)); // std::numeric_limits<int>::min()
|
|
1283
1522
|
|
|
1284
1523
|
struct slice_t
|
|
1285
1524
|
{
|
|
@@ -1366,26 +1605,18 @@ inline CUDA_CALLABLE T atomic_add(T* buf, T value)
|
|
|
1366
1605
|
#endif
|
|
1367
1606
|
}
|
|
1368
1607
|
|
|
1369
|
-
// emulate atomic int64 add with atomicCAS()
|
|
1370
1608
|
template <>
|
|
1371
|
-
inline CUDA_CALLABLE int64 atomic_add(int64*
|
|
1609
|
+
inline CUDA_CALLABLE int64 atomic_add(int64* buf, int64 value)
|
|
1372
1610
|
{
|
|
1373
|
-
#if defined(__CUDA_ARCH__)
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
while (val < (int64)old)
|
|
1378
|
-
{
|
|
1379
|
-
assumed = old;
|
|
1380
|
-
old = atomicCAS(address_as_ull, assumed, (int64)val);
|
|
1381
|
-
}
|
|
1382
|
-
|
|
1383
|
-
return (int64)old;
|
|
1384
|
-
|
|
1385
|
-
#else
|
|
1386
|
-
int64 old = *address;
|
|
1387
|
-
*address = min(old, val);
|
|
1611
|
+
#if !defined(__CUDA_ARCH__)
|
|
1612
|
+
int64 old = buf[0];
|
|
1613
|
+
buf[0] += value;
|
|
1388
1614
|
return old;
|
|
1615
|
+
#else // CUDA compiled by NVRTC
|
|
1616
|
+
unsigned long long int *buf_as_ull = (unsigned long long int*)buf;
|
|
1617
|
+
unsigned long long int unsigned_value = static_cast<unsigned long long int>(value);
|
|
1618
|
+
unsigned long long int result = atomicAdd(buf_as_ull, unsigned_value);
|
|
1619
|
+
return static_cast<int64>(result);
|
|
1389
1620
|
#endif
|
|
1390
1621
|
}
|
|
1391
1622
|
|
|
@@ -1727,6 +1958,52 @@ CUDA_CALLABLE inline void adj_atomic_exch(T* address, T val, T* adj_address, T&
|
|
|
1727
1958
|
}
|
|
1728
1959
|
|
|
1729
1960
|
|
|
1961
|
+
template<typename T>
|
|
1962
|
+
inline CUDA_CALLABLE T atomic_and(T* buf, T value)
|
|
1963
|
+
{
|
|
1964
|
+
#if defined(__CUDA_ARCH__)
|
|
1965
|
+
return atomicAnd(buf, value);
|
|
1966
|
+
#else
|
|
1967
|
+
T old = buf[0];
|
|
1968
|
+
buf[0] &= value;
|
|
1969
|
+
return old;
|
|
1970
|
+
#endif
|
|
1971
|
+
}
|
|
1972
|
+
|
|
1973
|
+
template<typename T>
|
|
1974
|
+
inline CUDA_CALLABLE T atomic_or(T* buf, T value)
|
|
1975
|
+
{
|
|
1976
|
+
#if defined(__CUDA_ARCH__)
|
|
1977
|
+
return atomicOr(buf, value);
|
|
1978
|
+
#else
|
|
1979
|
+
T old = buf[0];
|
|
1980
|
+
buf[0] |= value;
|
|
1981
|
+
return old;
|
|
1982
|
+
#endif
|
|
1983
|
+
}
|
|
1984
|
+
|
|
1985
|
+
template<typename T>
|
|
1986
|
+
inline CUDA_CALLABLE T atomic_xor(T* buf, T value)
|
|
1987
|
+
{
|
|
1988
|
+
#if defined(__CUDA_ARCH__)
|
|
1989
|
+
return atomicXor(buf, value);
|
|
1990
|
+
#else
|
|
1991
|
+
T old = buf[0];
|
|
1992
|
+
buf[0] ^= value;
|
|
1993
|
+
return old;
|
|
1994
|
+
#endif
|
|
1995
|
+
}
|
|
1996
|
+
|
|
1997
|
+
|
|
1998
|
+
// for bitwise operations we do not accumulate gradients
|
|
1999
|
+
template<typename T>
|
|
2000
|
+
CUDA_CALLABLE inline void adj_atomic_and(T* buf, T* adj_buf, T &value, T &adj_value) { }
|
|
2001
|
+
template<typename T>
|
|
2002
|
+
CUDA_CALLABLE inline void adj_atomic_or(T* buf, T* adj_buf, T &value, T &adj_value) { }
|
|
2003
|
+
template<typename T>
|
|
2004
|
+
CUDA_CALLABLE inline void adj_atomic_xor(T* buf, T* adj_buf, T &value, T &adj_value) { }
|
|
2005
|
+
|
|
2006
|
+
|
|
1730
2007
|
} // namespace wp
|
|
1731
2008
|
|
|
1732
2009
|
|
warp/native/bvh.cpp
CHANGED
|
@@ -39,20 +39,19 @@ class TopDownBVHBuilder
|
|
|
39
39
|
public:
|
|
40
40
|
|
|
41
41
|
void build(BVH& bvh, const vec3* lowers, const vec3* uppers, int n, int in_constructor_type);
|
|
42
|
+
void rebuild(BVH& bvh, int in_constructor_type);
|
|
42
43
|
|
|
43
44
|
private:
|
|
44
45
|
|
|
45
46
|
void initialize_empty(BVH& bvh);
|
|
46
47
|
|
|
47
48
|
bounds3 calc_bounds(const vec3* lowers, const vec3* uppers, const int* indices, int start, int end);
|
|
48
|
-
|
|
49
|
+
int build_recursive(BVH& bvh, const vec3* lowers, const vec3* uppers, int start, int end, int depth, int parent);
|
|
49
50
|
int partition_median(const vec3* lowers, const vec3* uppers, int* indices, int start, int end, bounds3 range_bounds);
|
|
50
51
|
int partition_midpoint(const vec3* lowers, const vec3* uppers, int* indices, int start, int end, bounds3 range_bounds);
|
|
51
52
|
float partition_sah(BVH& bvh, const vec3* lowers, const vec3* uppers,
|
|
52
53
|
int start, int end, bounds3 range_bounds, int& split_axis);
|
|
53
54
|
|
|
54
|
-
int build_recursive(BVH& bvh, const vec3* lowers, const vec3* uppers, int start, int end, int depth, int parent);
|
|
55
|
-
|
|
56
55
|
int constructor_type = -1;
|
|
57
56
|
};
|
|
58
57
|
|
|
@@ -69,6 +68,7 @@ void TopDownBVHBuilder::initialize_empty(BVH& bvh)
|
|
|
69
68
|
bvh.root = nullptr;
|
|
70
69
|
bvh.primitive_indices = nullptr;
|
|
71
70
|
bvh.num_leaf_nodes = 0;
|
|
71
|
+
bvh.num_items = 0;
|
|
72
72
|
}
|
|
73
73
|
|
|
74
74
|
void TopDownBVHBuilder::build(BVH& bvh, const vec3* lowers, const vec3* uppers, int n, int in_constructor_type)
|
|
@@ -112,6 +112,7 @@ void TopDownBVHBuilder::build(BVH& bvh, const vec3* lowers, const vec3* uppers,
|
|
|
112
112
|
bvh.node_uppers = new BVHPackedNodeHalf[bvh.max_nodes];
|
|
113
113
|
bvh.node_parents = new int[bvh.max_nodes];
|
|
114
114
|
bvh.node_counts = nullptr;
|
|
115
|
+
bvh.num_items = n;
|
|
115
116
|
|
|
116
117
|
// root is always in first slot for top down builders
|
|
117
118
|
bvh.root = new int[1];
|
|
@@ -124,6 +125,27 @@ void TopDownBVHBuilder::build(BVH& bvh, const vec3* lowers, const vec3* uppers,
|
|
|
124
125
|
build_recursive(bvh, lowers, uppers, 0, n, 0, -1);
|
|
125
126
|
}
|
|
126
127
|
|
|
128
|
+
void TopDownBVHBuilder::rebuild(BVH& bvh, int in_constructor_type)
|
|
129
|
+
{
|
|
130
|
+
if (in_constructor_type != BVH_CONSTRUCTOR_SAH && in_constructor_type != BVH_CONSTRUCTOR_MEDIAN)
|
|
131
|
+
{
|
|
132
|
+
fprintf(stderr, "Unrecognized Constructor type: %d! For CPU constructor it should be either SAH (%d) or Median (%d)!\n",
|
|
133
|
+
in_constructor_type, BVH_CONSTRUCTOR_SAH, BVH_CONSTRUCTOR_MEDIAN);
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
if (bvh.num_items == 0)
|
|
137
|
+
return;
|
|
138
|
+
|
|
139
|
+
constructor_type = in_constructor_type;
|
|
140
|
+
for (int i = 0; i < bvh.num_items; ++i)
|
|
141
|
+
bvh.primitive_indices[i] = i;
|
|
142
|
+
|
|
143
|
+
bvh.max_depth = 0;
|
|
144
|
+
bvh.num_nodes = 0;
|
|
145
|
+
bvh.num_leaf_nodes = 0;
|
|
146
|
+
build_recursive(bvh, bvh.item_lowers, bvh.item_uppers, 0, bvh.num_items, 0, -1);
|
|
147
|
+
}
|
|
148
|
+
|
|
127
149
|
|
|
128
150
|
bounds3 TopDownBVHBuilder::calc_bounds(const vec3* lowers, const vec3* uppers, const int* indices, int start, int end)
|
|
129
151
|
{
|
|
@@ -323,7 +345,7 @@ int TopDownBVHBuilder::build_recursive(BVH& bvh, const vec3* lowers, const vec3*
|
|
|
323
345
|
|
|
324
346
|
// If the depth exceeds BVH_QUERY_STACK_SIZE, an out-of-bounds access bug may occur during querying.
|
|
325
347
|
// In that case, we merge the following nodes into a single large leaf node.
|
|
326
|
-
if (n <=
|
|
348
|
+
if (n <= bvh.leaf_size || depth >= BVH_QUERY_STACK_SIZE - 1)
|
|
327
349
|
{
|
|
328
350
|
bvh.node_lowers[node_index] = make_node(b.lower, start, true);
|
|
329
351
|
bvh.node_uppers[node_index] = make_node(b.upper, end, false);
|
|
@@ -421,7 +443,11 @@ void bvh_refit_host(BVH& bvh)
|
|
|
421
443
|
{
|
|
422
444
|
bvh_refit_recursive(bvh, 0);
|
|
423
445
|
}
|
|
424
|
-
|
|
446
|
+
void bvh_rebuild_host(BVH& bvh, int constructor_type)
|
|
447
|
+
{
|
|
448
|
+
TopDownBVHBuilder builder;
|
|
449
|
+
builder.rebuild(bvh, constructor_type);
|
|
450
|
+
}
|
|
425
451
|
|
|
426
452
|
} // namespace wp
|
|
427
453
|
|
|
@@ -464,13 +490,14 @@ void bvh_rem_descriptor(uint64_t id)
|
|
|
464
490
|
|
|
465
491
|
|
|
466
492
|
// create in-place given existing descriptor
|
|
467
|
-
void bvh_create_host(vec3* lowers, vec3* uppers, int num_items, int constructor_type, BVH& bvh)
|
|
493
|
+
void bvh_create_host(vec3* lowers, vec3* uppers, int num_items, int constructor_type, BVH& bvh, int leaf_size)
|
|
468
494
|
{
|
|
469
495
|
memset(&bvh, 0, sizeof(BVH));
|
|
470
496
|
|
|
471
497
|
bvh.item_lowers = lowers;
|
|
472
498
|
bvh.item_uppers = uppers;
|
|
473
499
|
bvh.num_items = num_items;
|
|
500
|
+
bvh.leaf_size = leaf_size;
|
|
474
501
|
|
|
475
502
|
TopDownBVHBuilder builder;
|
|
476
503
|
builder.build(bvh, lowers, uppers, num_items, constructor_type);
|
|
@@ -496,10 +523,10 @@ void bvh_destroy_host(BVH& bvh)
|
|
|
496
523
|
|
|
497
524
|
} // namespace wp
|
|
498
525
|
|
|
499
|
-
uint64_t wp_bvh_create_host(vec3* lowers, vec3* uppers, int num_items, int constructor_type)
|
|
526
|
+
uint64_t wp_bvh_create_host(vec3* lowers, vec3* uppers, int num_items, int constructor_type, int leaf_size)
|
|
500
527
|
{
|
|
501
528
|
BVH* bvh = new BVH();
|
|
502
|
-
wp::bvh_create_host(lowers, uppers, num_items, constructor_type, *bvh);
|
|
529
|
+
wp::bvh_create_host(lowers, uppers, num_items, constructor_type, *bvh, leaf_size);
|
|
503
530
|
|
|
504
531
|
return (uint64_t)bvh;
|
|
505
532
|
}
|
|
@@ -510,6 +537,12 @@ void wp_bvh_refit_host(uint64_t id)
|
|
|
510
537
|
wp::bvh_refit_host(*bvh);
|
|
511
538
|
}
|
|
512
539
|
|
|
540
|
+
void wp_bvh_rebuild_host(uint64_t id, int constructor_type)
|
|
541
|
+
{
|
|
542
|
+
BVH* bvh = (BVH*)(id);
|
|
543
|
+
wp::bvh_rebuild_host(*bvh, constructor_type);
|
|
544
|
+
}
|
|
545
|
+
|
|
513
546
|
void wp_bvh_destroy_host(uint64_t id)
|
|
514
547
|
{
|
|
515
548
|
BVH* bvh = (BVH*)(id);
|
|
@@ -521,8 +554,9 @@ void wp_bvh_destroy_host(uint64_t id)
|
|
|
521
554
|
// stubs for non-CUDA platforms
|
|
522
555
|
#if !WP_ENABLE_CUDA
|
|
523
556
|
|
|
524
|
-
uint64_t wp_bvh_create_device(void* context, wp::vec3* lowers, wp::vec3* uppers, int num_items, int constructor_type) { return 0; }
|
|
557
|
+
uint64_t wp_bvh_create_device(void* context, wp::vec3* lowers, wp::vec3* uppers, int num_items, int constructor_type, int leaf_size) { return 0; }
|
|
525
558
|
void wp_bvh_refit_device(uint64_t id) {}
|
|
526
559
|
void wp_bvh_destroy_device(uint64_t id) {}
|
|
560
|
+
void wp_bvh_rebuild_device(uint64_t id) {}
|
|
527
561
|
|
|
528
562
|
#endif // !WP_ENABLE_CUDA
|