warp-lang 1.0.2__py3-none-manylinux2014_x86_64.whl → 1.1.0__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +108 -97
- warp/__init__.pyi +1 -1
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +115 -113
- warp/build_dll.py +383 -375
- warp/builtins.py +3425 -3354
- warp/codegen.py +2878 -2792
- warp/config.py +40 -36
- warp/constants.py +45 -45
- warp/context.py +5194 -5102
- warp/dlpack.py +442 -442
- warp/examples/__init__.py +16 -16
- warp/examples/assets/bear.usd +0 -0
- warp/examples/assets/bunny.usd +0 -0
- warp/examples/assets/cartpole.urdf +110 -110
- warp/examples/assets/crazyflie.usd +0 -0
- warp/examples/assets/cube.usd +0 -0
- warp/examples/assets/nv_ant.xml +92 -92
- warp/examples/assets/nv_humanoid.xml +183 -183
- warp/examples/assets/quadruped.urdf +267 -267
- warp/examples/assets/rocks.nvdb +0 -0
- warp/examples/assets/rocks.usd +0 -0
- warp/examples/assets/sphere.usd +0 -0
- warp/examples/benchmarks/benchmark_api.py +383 -383
- warp/examples/benchmarks/benchmark_cloth.py +278 -277
- warp/examples/benchmarks/benchmark_cloth_cupy.py +88 -88
- warp/examples/benchmarks/benchmark_cloth_jax.py +97 -100
- warp/examples/benchmarks/benchmark_cloth_numba.py +146 -142
- warp/examples/benchmarks/benchmark_cloth_numpy.py +77 -77
- warp/examples/benchmarks/benchmark_cloth_pytorch.py +86 -86
- warp/examples/benchmarks/benchmark_cloth_taichi.py +112 -112
- warp/examples/benchmarks/benchmark_cloth_warp.py +146 -146
- warp/examples/benchmarks/benchmark_launches.py +295 -295
- warp/examples/browse.py +29 -29
- warp/examples/core/example_dem.py +234 -219
- warp/examples/core/example_fluid.py +293 -267
- warp/examples/core/example_graph_capture.py +144 -126
- warp/examples/core/example_marching_cubes.py +188 -174
- warp/examples/core/example_mesh.py +174 -155
- warp/examples/core/example_mesh_intersect.py +205 -193
- warp/examples/core/example_nvdb.py +176 -170
- warp/examples/core/example_raycast.py +105 -90
- warp/examples/core/example_raymarch.py +199 -178
- warp/examples/core/example_render_opengl.py +185 -141
- warp/examples/core/example_sph.py +405 -387
- warp/examples/core/example_torch.py +222 -181
- warp/examples/core/example_wave.py +263 -248
- warp/examples/fem/bsr_utils.py +378 -380
- warp/examples/fem/example_apic_fluid.py +407 -389
- warp/examples/fem/example_convection_diffusion.py +182 -168
- warp/examples/fem/example_convection_diffusion_dg.py +219 -209
- warp/examples/fem/example_convection_diffusion_dg0.py +204 -194
- warp/examples/fem/example_deformed_geometry.py +177 -159
- warp/examples/fem/example_diffusion.py +201 -173
- warp/examples/fem/example_diffusion_3d.py +177 -152
- warp/examples/fem/example_diffusion_mgpu.py +221 -214
- warp/examples/fem/example_mixed_elasticity.py +244 -222
- warp/examples/fem/example_navier_stokes.py +259 -243
- warp/examples/fem/example_stokes.py +220 -192
- warp/examples/fem/example_stokes_transfer.py +265 -249
- warp/examples/fem/mesh_utils.py +133 -109
- warp/examples/fem/plot_utils.py +292 -287
- warp/examples/optim/example_bounce.py +260 -246
- warp/examples/optim/example_cloth_throw.py +222 -209
- warp/examples/optim/example_diffray.py +566 -536
- warp/examples/optim/example_drone.py +864 -835
- warp/examples/optim/example_inverse_kinematics.py +176 -168
- warp/examples/optim/example_inverse_kinematics_torch.py +185 -169
- warp/examples/optim/example_spring_cage.py +239 -231
- warp/examples/optim/example_trajectory.py +223 -199
- warp/examples/optim/example_walker.py +306 -293
- warp/examples/sim/example_cartpole.py +139 -129
- warp/examples/sim/example_cloth.py +196 -186
- warp/examples/sim/example_granular.py +124 -111
- warp/examples/sim/example_granular_collision_sdf.py +197 -186
- warp/examples/sim/example_jacobian_ik.py +236 -214
- warp/examples/sim/example_particle_chain.py +118 -105
- warp/examples/sim/example_quadruped.py +193 -180
- warp/examples/sim/example_rigid_chain.py +197 -187
- warp/examples/sim/example_rigid_contact.py +189 -177
- warp/examples/sim/example_rigid_force.py +127 -125
- warp/examples/sim/example_rigid_gyroscopic.py +109 -95
- warp/examples/sim/example_rigid_soft_contact.py +134 -122
- warp/examples/sim/example_soft_body.py +190 -177
- warp/fabric.py +337 -335
- warp/fem/__init__.py +60 -27
- warp/fem/cache.py +401 -388
- warp/fem/dirichlet.py +178 -179
- warp/fem/domain.py +262 -263
- warp/fem/field/__init__.py +100 -101
- warp/fem/field/field.py +148 -149
- warp/fem/field/nodal_field.py +298 -299
- warp/fem/field/restriction.py +22 -21
- warp/fem/field/test.py +180 -181
- warp/fem/field/trial.py +183 -183
- warp/fem/geometry/__init__.py +15 -19
- warp/fem/geometry/closest_point.py +69 -70
- warp/fem/geometry/deformed_geometry.py +270 -271
- warp/fem/geometry/element.py +744 -744
- warp/fem/geometry/geometry.py +184 -186
- warp/fem/geometry/grid_2d.py +380 -373
- warp/fem/geometry/grid_3d.py +441 -435
- warp/fem/geometry/hexmesh.py +953 -953
- warp/fem/geometry/partition.py +374 -376
- warp/fem/geometry/quadmesh_2d.py +532 -532
- warp/fem/geometry/tetmesh.py +840 -840
- warp/fem/geometry/trimesh_2d.py +577 -577
- warp/fem/integrate.py +1630 -1615
- warp/fem/operator.py +190 -191
- warp/fem/polynomial.py +214 -213
- warp/fem/quadrature/__init__.py +2 -2
- warp/fem/quadrature/pic_quadrature.py +243 -245
- warp/fem/quadrature/quadrature.py +295 -294
- warp/fem/space/__init__.py +294 -292
- warp/fem/space/basis_space.py +488 -489
- warp/fem/space/collocated_function_space.py +100 -105
- warp/fem/space/dof_mapper.py +236 -236
- warp/fem/space/function_space.py +148 -145
- warp/fem/space/grid_2d_function_space.py +267 -267
- warp/fem/space/grid_3d_function_space.py +305 -306
- warp/fem/space/hexmesh_function_space.py +350 -352
- warp/fem/space/partition.py +350 -350
- warp/fem/space/quadmesh_2d_function_space.py +368 -369
- warp/fem/space/restriction.py +158 -160
- warp/fem/space/shape/__init__.py +13 -15
- warp/fem/space/shape/cube_shape_function.py +738 -738
- warp/fem/space/shape/shape_function.py +102 -103
- warp/fem/space/shape/square_shape_function.py +611 -611
- warp/fem/space/shape/tet_shape_function.py +565 -567
- warp/fem/space/shape/triangle_shape_function.py +429 -429
- warp/fem/space/tetmesh_function_space.py +294 -292
- warp/fem/space/topology.py +297 -295
- warp/fem/space/trimesh_2d_function_space.py +223 -221
- warp/fem/types.py +77 -77
- warp/fem/utils.py +495 -495
- warp/jax.py +166 -141
- warp/jax_experimental.py +341 -339
- warp/native/array.h +1072 -1025
- warp/native/builtin.h +1560 -1560
- warp/native/bvh.cpp +398 -398
- warp/native/bvh.cu +525 -525
- warp/native/bvh.h +429 -429
- warp/native/clang/clang.cpp +495 -464
- warp/native/crt.cpp +31 -31
- warp/native/crt.h +334 -334
- warp/native/cuda_crt.h +1049 -1049
- warp/native/cuda_util.cpp +549 -540
- warp/native/cuda_util.h +288 -203
- warp/native/cutlass_gemm.cpp +34 -34
- warp/native/cutlass_gemm.cu +372 -372
- warp/native/error.cpp +66 -66
- warp/native/error.h +27 -27
- warp/native/fabric.h +228 -228
- warp/native/hashgrid.cpp +301 -278
- warp/native/hashgrid.cu +78 -77
- warp/native/hashgrid.h +227 -227
- warp/native/initializer_array.h +32 -32
- warp/native/intersect.h +1204 -1204
- warp/native/intersect_adj.h +365 -365
- warp/native/intersect_tri.h +322 -322
- warp/native/marching.cpp +2 -2
- warp/native/marching.cu +497 -497
- warp/native/marching.h +2 -2
- warp/native/mat.h +1498 -1498
- warp/native/matnn.h +333 -333
- warp/native/mesh.cpp +203 -203
- warp/native/mesh.cu +293 -293
- warp/native/mesh.h +1887 -1887
- warp/native/nanovdb/NanoVDB.h +4782 -4782
- warp/native/nanovdb/PNanoVDB.h +2553 -2553
- warp/native/nanovdb/PNanoVDBWrite.h +294 -294
- warp/native/noise.h +850 -850
- warp/native/quat.h +1084 -1084
- warp/native/rand.h +299 -299
- warp/native/range.h +108 -108
- warp/native/reduce.cpp +156 -156
- warp/native/reduce.cu +348 -348
- warp/native/runlength_encode.cpp +61 -61
- warp/native/runlength_encode.cu +46 -46
- warp/native/scan.cpp +30 -30
- warp/native/scan.cu +36 -36
- warp/native/scan.h +7 -7
- warp/native/solid_angle.h +442 -442
- warp/native/sort.cpp +94 -94
- warp/native/sort.cu +97 -97
- warp/native/sort.h +14 -14
- warp/native/sparse.cpp +337 -337
- warp/native/sparse.cu +544 -544
- warp/native/spatial.h +630 -630
- warp/native/svd.h +562 -562
- warp/native/temp_buffer.h +30 -30
- warp/native/vec.h +1132 -1132
- warp/native/volume.cpp +297 -297
- warp/native/volume.cu +32 -32
- warp/native/volume.h +538 -538
- warp/native/volume_builder.cu +425 -425
- warp/native/volume_builder.h +19 -19
- warp/native/warp.cpp +1057 -1052
- warp/native/warp.cu +2943 -2828
- warp/native/warp.h +313 -305
- warp/optim/__init__.py +9 -9
- warp/optim/adam.py +120 -120
- warp/optim/linear.py +1104 -939
- warp/optim/sgd.py +104 -92
- warp/render/__init__.py +10 -10
- warp/render/render_opengl.py +3217 -3204
- warp/render/render_usd.py +768 -749
- warp/render/utils.py +152 -150
- warp/sim/__init__.py +52 -59
- warp/sim/articulation.py +685 -685
- warp/sim/collide.py +1594 -1590
- warp/sim/import_mjcf.py +489 -481
- warp/sim/import_snu.py +220 -221
- warp/sim/import_urdf.py +536 -516
- warp/sim/import_usd.py +887 -881
- warp/sim/inertia.py +316 -317
- warp/sim/integrator.py +234 -233
- warp/sim/integrator_euler.py +1956 -1956
- warp/sim/integrator_featherstone.py +1910 -1991
- warp/sim/integrator_xpbd.py +3294 -3312
- warp/sim/model.py +4473 -4314
- warp/sim/particles.py +113 -112
- warp/sim/render.py +417 -403
- warp/sim/utils.py +413 -410
- warp/sparse.py +1227 -1227
- warp/stubs.py +2109 -2469
- warp/tape.py +1162 -225
- warp/tests/__init__.py +1 -1
- warp/tests/__main__.py +4 -4
- warp/tests/assets/torus.usda +105 -105
- warp/tests/aux_test_class_kernel.py +26 -26
- warp/tests/aux_test_compile_consts_dummy.py +10 -10
- warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -21
- warp/tests/aux_test_dependent.py +22 -22
- warp/tests/aux_test_grad_customs.py +23 -23
- warp/tests/aux_test_reference.py +11 -11
- warp/tests/aux_test_reference_reference.py +10 -10
- warp/tests/aux_test_square.py +17 -17
- warp/tests/aux_test_unresolved_func.py +14 -14
- warp/tests/aux_test_unresolved_symbol.py +14 -14
- warp/tests/disabled_kinematics.py +239 -239
- warp/tests/run_coverage_serial.py +31 -31
- warp/tests/test_adam.py +157 -157
- warp/tests/test_arithmetic.py +1124 -1124
- warp/tests/test_array.py +2417 -2326
- warp/tests/test_array_reduce.py +150 -150
- warp/tests/test_async.py +668 -656
- warp/tests/test_atomic.py +141 -141
- warp/tests/test_bool.py +204 -149
- warp/tests/test_builtins_resolution.py +1292 -1292
- warp/tests/test_bvh.py +164 -171
- warp/tests/test_closest_point_edge_edge.py +228 -228
- warp/tests/test_codegen.py +566 -553
- warp/tests/test_compile_consts.py +97 -101
- warp/tests/test_conditional.py +246 -246
- warp/tests/test_copy.py +232 -215
- warp/tests/test_ctypes.py +632 -632
- warp/tests/test_dense.py +67 -67
- warp/tests/test_devices.py +91 -98
- warp/tests/test_dlpack.py +530 -529
- warp/tests/test_examples.py +400 -378
- warp/tests/test_fabricarray.py +955 -955
- warp/tests/test_fast_math.py +62 -54
- warp/tests/test_fem.py +1277 -1278
- warp/tests/test_fp16.py +130 -130
- warp/tests/test_func.py +338 -337
- warp/tests/test_generics.py +571 -571
- warp/tests/test_grad.py +746 -640
- warp/tests/test_grad_customs.py +333 -336
- warp/tests/test_hash_grid.py +210 -164
- warp/tests/test_import.py +39 -39
- warp/tests/test_indexedarray.py +1134 -1134
- warp/tests/test_intersect.py +67 -67
- warp/tests/test_jax.py +307 -307
- warp/tests/test_large.py +167 -164
- warp/tests/test_launch.py +354 -354
- warp/tests/test_lerp.py +261 -261
- warp/tests/test_linear_solvers.py +191 -171
- warp/tests/test_lvalue.py +421 -493
- warp/tests/test_marching_cubes.py +65 -65
- warp/tests/test_mat.py +1801 -1827
- warp/tests/test_mat_lite.py +115 -115
- warp/tests/test_mat_scalar_ops.py +2907 -2889
- warp/tests/test_math.py +126 -193
- warp/tests/test_matmul.py +500 -499
- warp/tests/test_matmul_lite.py +410 -410
- warp/tests/test_mempool.py +188 -190
- warp/tests/test_mesh.py +284 -324
- warp/tests/test_mesh_query_aabb.py +228 -241
- warp/tests/test_mesh_query_point.py +692 -702
- warp/tests/test_mesh_query_ray.py +292 -303
- warp/tests/test_mlp.py +276 -276
- warp/tests/test_model.py +110 -110
- warp/tests/test_modules_lite.py +39 -39
- warp/tests/test_multigpu.py +163 -163
- warp/tests/test_noise.py +248 -248
- warp/tests/test_operators.py +250 -250
- warp/tests/test_options.py +123 -125
- warp/tests/test_peer.py +133 -137
- warp/tests/test_pinned.py +78 -78
- warp/tests/test_print.py +54 -54
- warp/tests/test_quat.py +2086 -2086
- warp/tests/test_rand.py +288 -288
- warp/tests/test_reload.py +217 -217
- warp/tests/test_rounding.py +179 -179
- warp/tests/test_runlength_encode.py +190 -190
- warp/tests/test_sim_grad.py +243 -0
- warp/tests/test_sim_kinematics.py +91 -97
- warp/tests/test_smoothstep.py +168 -168
- warp/tests/test_snippet.py +305 -266
- warp/tests/test_sparse.py +468 -460
- warp/tests/test_spatial.py +2148 -2148
- warp/tests/test_streams.py +486 -473
- warp/tests/test_struct.py +710 -675
- warp/tests/test_tape.py +173 -148
- warp/tests/test_torch.py +743 -743
- warp/tests/test_transient_module.py +87 -87
- warp/tests/test_types.py +556 -659
- warp/tests/test_utils.py +490 -499
- warp/tests/test_vec.py +1264 -1268
- warp/tests/test_vec_lite.py +73 -73
- warp/tests/test_vec_scalar_ops.py +2099 -2099
- warp/tests/test_verify_fp.py +94 -94
- warp/tests/test_volume.py +737 -736
- warp/tests/test_volume_write.py +255 -265
- warp/tests/unittest_serial.py +37 -37
- warp/tests/unittest_suites.py +363 -359
- warp/tests/unittest_utils.py +603 -578
- warp/tests/unused_test_misc.py +71 -71
- warp/tests/walkthrough_debug.py +85 -85
- warp/thirdparty/appdirs.py +598 -598
- warp/thirdparty/dlpack.py +143 -143
- warp/thirdparty/unittest_parallel.py +566 -561
- warp/torch.py +321 -295
- warp/types.py +4504 -4450
- warp/utils.py +1008 -821
- {warp_lang-1.0.2.dist-info → warp_lang-1.1.0.dist-info}/LICENSE.md +126 -126
- {warp_lang-1.0.2.dist-info → warp_lang-1.1.0.dist-info}/METADATA +338 -400
- warp_lang-1.1.0.dist-info/RECORD +352 -0
- warp/examples/assets/cube.usda +0 -42
- warp/examples/assets/sphere.usda +0 -56
- warp/examples/assets/torus.usda +0 -105
- warp_lang-1.0.2.dist-info/RECORD +0 -352
- {warp_lang-1.0.2.dist-info → warp_lang-1.1.0.dist-info}/WHEEL +0 -0
- {warp_lang-1.0.2.dist-info → warp_lang-1.1.0.dist-info}/top_level.txt +0 -0
warp/native/volume_builder.cu
CHANGED
|
@@ -1,425 +1,425 @@
|
|
|
1
|
-
#include "volume_builder.h"
|
|
2
|
-
|
|
3
|
-
#include <cuda.h>
|
|
4
|
-
#include <cuda_runtime_api.h>
|
|
5
|
-
|
|
6
|
-
#include <cub/cub.cuh>
|
|
7
|
-
#include <cub/util_allocator.cuh>
|
|
8
|
-
|
|
9
|
-
// Explanation of key types
|
|
10
|
-
// ------------------------
|
|
11
|
-
//
|
|
12
|
-
// leaf_key:
|
|
13
|
-
// .__.__. .... .__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.
|
|
14
|
-
// 63 62 .... 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
|
|
15
|
-
// XX|< tile key >|< upper offset >|< lower offset >|
|
|
16
|
-
//
|
|
17
|
-
// tile key (36 bit):
|
|
18
|
-
// (uint32(ijk[2]) >> ChildT::TOTAL) |
|
|
19
|
-
// (uint64_t(uint32(ijk[1]) >> ChildT::TOTAL)) << 12 |
|
|
20
|
-
// (uint64_t(uint32(ijk[0]) >> ChildT::TOTAL)) << 24
|
|
21
|
-
//
|
|
22
|
-
// lower_key (51 bits) == leaf_key >> 12
|
|
23
|
-
//
|
|
24
|
-
// upper_key (36 bits) == lower_key >> 15 == leaf_key >> 27 == tile key
|
|
25
|
-
|
|
26
|
-
CUDA_CALLABLE inline uint64_t coord_to_full_key(const nanovdb::Coord& ijk)
|
|
27
|
-
{
|
|
28
|
-
using Tree = nanovdb::FloatTree; // any type is fine at this point
|
|
29
|
-
assert((abs(ijk[0]) >> 24) == 0);
|
|
30
|
-
assert((abs(ijk[1]) >> 24) == 0);
|
|
31
|
-
assert((abs(ijk[2]) >> 24) == 0);
|
|
32
|
-
constexpr uint32_t MASK_12BITS = (1u << 12) - 1u;
|
|
33
|
-
const uint64_t tile_key36 =
|
|
34
|
-
((uint32_t(ijk[2]) >> 12) & MASK_12BITS) | // z is the lower 12 bits
|
|
35
|
-
(uint64_t((uint32_t(ijk[1]) >> 12) & MASK_12BITS) << 12) | // y is the middle 12 bits
|
|
36
|
-
(uint64_t((uint32_t(ijk[0]) >> 12) & MASK_12BITS) << 24); // x is the upper 12 bits
|
|
37
|
-
const uint32_t upper_offset = Tree::Node2::CoordToOffset(ijk);
|
|
38
|
-
const uint32_t lower_offset = Tree::Node1::CoordToOffset(ijk);
|
|
39
|
-
return (tile_key36 << 27) | (upper_offset << 12) | lower_offset;
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
__global__
|
|
43
|
-
void generate_keys(size_t num_points, const nanovdb::Coord* points, uint64_t* all_leaf_keys)
|
|
44
|
-
{
|
|
45
|
-
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
|
46
|
-
if (tid >= num_points) return;
|
|
47
|
-
|
|
48
|
-
all_leaf_keys[tid] = coord_to_full_key(points[tid]);
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
__global__
|
|
52
|
-
void generate_keys(size_t num_points, const nanovdb::Vec3f* points, uint64_t* all_leaf_keys, float one_over_voxel_size, nanovdb::Vec3f translation)
|
|
53
|
-
{
|
|
54
|
-
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
|
55
|
-
if (tid >= num_points) return;
|
|
56
|
-
|
|
57
|
-
const nanovdb::Coord ijk = ((points[tid] - translation) * one_over_voxel_size).round();
|
|
58
|
-
all_leaf_keys[tid] = coord_to_full_key(ijk);
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
// Convert a 36 bit tile key to the ijk origin of the addressed tile
|
|
62
|
-
CUDA_CALLABLE inline nanovdb::Coord tile_key36_to_coord(uint64_t tile_key36) {
|
|
63
|
-
auto extend_sign = [](uint32_t i) -> int32_t { return i | ((i>>11 & 1) * 0xFFFFF800);};
|
|
64
|
-
constexpr uint32_t MASK_12BITS = (1u << 12) - 1u;
|
|
65
|
-
const int32_t i = extend_sign(uint32_t(tile_key36 >> 24) & MASK_12BITS);
|
|
66
|
-
const int32_t j = extend_sign(uint32_t(tile_key36 >> 12) & MASK_12BITS);
|
|
67
|
-
const int32_t k = extend_sign(uint32_t(tile_key36) & MASK_12BITS);
|
|
68
|
-
return nanovdb::Coord(i, j, k) << 12;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
// --- CUB helpers ---
|
|
73
|
-
template<uint8_t bits, typename InType, typename OutType>
|
|
74
|
-
struct ShiftRight {
|
|
75
|
-
CUDA_CALLABLE inline OutType operator()(const InType& v) const {
|
|
76
|
-
return static_cast<OutType>(v >> bits);
|
|
77
|
-
}
|
|
78
|
-
};
|
|
79
|
-
|
|
80
|
-
template<uint8_t bits, typename InType = uint64_t, typename OutType = uint64_t>
|
|
81
|
-
struct ShiftRightIterator : public cub::TransformInputIterator<OutType, ShiftRight<bits, InType, OutType>, InType*> {
|
|
82
|
-
using BASE = cub::TransformInputIterator<OutType, ShiftRight<bits, InType, OutType>, InType*>;
|
|
83
|
-
CUDA_CALLABLE inline ShiftRightIterator(uint64_t* input_itr)
|
|
84
|
-
: BASE(input_itr, ShiftRight<bits, InType, OutType>()) {}
|
|
85
|
-
};
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
// --- Atomic instructions for NanoVDB construction ---
|
|
89
|
-
template<typename MaskT>
|
|
90
|
-
CUDA_CALLABLE_DEVICE void set_mask_atomic(MaskT& mask, uint32_t n) {
|
|
91
|
-
unsigned long long int* words = reinterpret_cast<unsigned long long int*>(&mask);
|
|
92
|
-
atomicOr(words + (n / 64), 1ull << (n & 63));
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
template<typename Vec3T>
|
|
96
|
-
CUDA_CALLABLE_DEVICE void expand_cwise_atomic(nanovdb::BBox<Vec3T>& bbox, const Vec3T& v) {
|
|
97
|
-
atomicMin(&bbox.mCoord[0][0], v[0]);
|
|
98
|
-
atomicMin(&bbox.mCoord[0][1], v[1]);
|
|
99
|
-
atomicMin(&bbox.mCoord[0][2], v[2]);
|
|
100
|
-
atomicMax(&bbox.mCoord[1][0], v[0]);
|
|
101
|
-
atomicMax(&bbox.mCoord[1][1], v[1]);
|
|
102
|
-
atomicMax(&bbox.mCoord[1][2], v[2]);
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
template<typename RootDataType>
|
|
106
|
-
__hostdev__ const typename RootDataType::Tile* find_tile(const RootDataType* root_data, const nanovdb::Coord& ijk)
|
|
107
|
-
{
|
|
108
|
-
using Tile = typename RootDataType::Tile;
|
|
109
|
-
const Tile *tiles = reinterpret_cast<const Tile *>(root_data + 1);
|
|
110
|
-
const auto key = RootDataType::CoordToKey(ijk);
|
|
111
|
-
|
|
112
|
-
for (uint32_t i = 0; i < root_data->mTableSize; ++i)
|
|
113
|
-
{
|
|
114
|
-
if (tiles[i].key == key)
|
|
115
|
-
return &tiles[i];
|
|
116
|
-
}
|
|
117
|
-
return nullptr;
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
// --- Wrapper for launching lambda kernels
|
|
121
|
-
template<typename Func, typename... Args>
|
|
122
|
-
__global__ void kernel(const size_t num_items, Func f, Args... args)
|
|
123
|
-
{
|
|
124
|
-
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
|
125
|
-
if (tid >= num_items) return;
|
|
126
|
-
f(tid, args...);
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
template <typename BuildT>
|
|
130
|
-
void build_grid_from_tiles(nanovdb::Grid<nanovdb::NanoTree<BuildT>> *&out_grid,
|
|
131
|
-
size_t &out_grid_size,
|
|
132
|
-
const void *points,
|
|
133
|
-
size_t num_points,
|
|
134
|
-
bool points_in_world_space,
|
|
135
|
-
const BuildGridParams<BuildT> ¶ms)
|
|
136
|
-
{
|
|
137
|
-
using FloatT = typename nanovdb::FloatTraits<BuildT>::FloatType;
|
|
138
|
-
const BuildT ZERO_VAL{0};
|
|
139
|
-
const FloatT ZERO_SCALAR{0};
|
|
140
|
-
|
|
141
|
-
// Don't want to access "params" in kernels
|
|
142
|
-
const double dx = params.voxel_size;
|
|
143
|
-
const double Tx = params.translation[0], Ty = params.translation[1], Tz = params.translation[2];
|
|
144
|
-
const BuildT background_value = params.background_value;
|
|
145
|
-
|
|
146
|
-
const unsigned int num_threads = 256;
|
|
147
|
-
unsigned int num_blocks;
|
|
148
|
-
|
|
149
|
-
out_grid = nullptr;
|
|
150
|
-
out_grid_size = 0;
|
|
151
|
-
|
|
152
|
-
cub::CachingDeviceAllocator allocator;
|
|
153
|
-
|
|
154
|
-
uint64_t* leaf_keys;
|
|
155
|
-
uint64_t* lower_keys;
|
|
156
|
-
uint64_t* upper_keys;
|
|
157
|
-
uint32_t* node_counts;
|
|
158
|
-
uint32_t leaf_count, lower_node_count, upper_node_count;
|
|
159
|
-
|
|
160
|
-
allocator.DeviceAllocate((void**)&leaf_keys, sizeof(uint64_t) * num_points);
|
|
161
|
-
allocator.DeviceAllocate((void**)&node_counts, sizeof(uint32_t) * 3);
|
|
162
|
-
|
|
163
|
-
// Phase 1: counting the nodes
|
|
164
|
-
{
|
|
165
|
-
// Generating keys from coords
|
|
166
|
-
uint64_t* all_leaf_keys;
|
|
167
|
-
uint64_t* all_leaf_keys_sorted;
|
|
168
|
-
allocator.DeviceAllocate((void**)&all_leaf_keys, sizeof(uint64_t) * num_points);
|
|
169
|
-
allocator.DeviceAllocate((void**)&all_leaf_keys_sorted, sizeof(uint64_t) * num_points);
|
|
170
|
-
|
|
171
|
-
num_blocks = (static_cast<unsigned int>(num_points) + num_threads - 1) / num_threads;
|
|
172
|
-
if (points_in_world_space) {
|
|
173
|
-
generate_keys<<<num_blocks, num_threads>>>(num_points, static_cast<const nanovdb::Vec3f*>(points), all_leaf_keys, static_cast<float>(1.0 / dx), nanovdb::Vec3f(params.translation));
|
|
174
|
-
} else {
|
|
175
|
-
generate_keys<<<num_blocks, num_threads>>>(num_points, static_cast<const nanovdb::Coord*>(points), all_leaf_keys);
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
void* d_temp_storage = nullptr;
|
|
179
|
-
size_t temp_storage_bytes;
|
|
180
|
-
|
|
181
|
-
// Sort the keys, then get an array of unique keys
|
|
182
|
-
cub::DeviceRadixSort::SortKeys(nullptr, temp_storage_bytes, all_leaf_keys, all_leaf_keys_sorted, static_cast<int>(num_points), /* begin_bit = */ 0, /* end_bit = */ 63);
|
|
183
|
-
allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes);
|
|
184
|
-
cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, all_leaf_keys, all_leaf_keys_sorted, static_cast<int>(num_points), /* begin_bit = */ 0, /* end_bit = */ 63);
|
|
185
|
-
allocator.DeviceFree(d_temp_storage);
|
|
186
|
-
|
|
187
|
-
cub::DeviceSelect::Unique(nullptr, temp_storage_bytes, all_leaf_keys_sorted, leaf_keys, node_counts, static_cast<int>(num_points));
|
|
188
|
-
allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes);
|
|
189
|
-
cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, all_leaf_keys_sorted, leaf_keys, node_counts, static_cast<int>(num_points));
|
|
190
|
-
allocator.DeviceFree(d_temp_storage);
|
|
191
|
-
check_cuda(cudaMemcpy(&leaf_count, node_counts, sizeof(uint32_t), cudaMemcpyDeviceToHost));
|
|
192
|
-
|
|
193
|
-
allocator.DeviceFree(all_leaf_keys);
|
|
194
|
-
all_leaf_keys = nullptr;
|
|
195
|
-
allocator.DeviceFree(all_leaf_keys_sorted);
|
|
196
|
-
all_leaf_keys_sorted = nullptr;
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
// Get the keys unique to lower nodes and the number of them
|
|
200
|
-
allocator.DeviceAllocate((void**)&lower_keys, sizeof(uint64_t) * leaf_count);
|
|
201
|
-
cub::DeviceSelect::Unique(nullptr, temp_storage_bytes, ShiftRightIterator<12>(leaf_keys), lower_keys, node_counts + 1, leaf_count);
|
|
202
|
-
allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes);
|
|
203
|
-
cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, ShiftRightIterator<12>(leaf_keys), lower_keys, node_counts + 1, leaf_count);
|
|
204
|
-
allocator.DeviceFree(d_temp_storage);
|
|
205
|
-
check_cuda(cudaMemcpy(&lower_node_count, node_counts + 1, sizeof(uint32_t), cudaMemcpyDeviceToHost));
|
|
206
|
-
|
|
207
|
-
// Get the keys unique to upper nodes and the number of them
|
|
208
|
-
allocator.DeviceAllocate((void**)&upper_keys, sizeof(uint64_t) * lower_node_count);
|
|
209
|
-
cub::DeviceSelect::Unique(nullptr, temp_storage_bytes, ShiftRightIterator<15>(lower_keys), upper_keys, node_counts + 2, lower_node_count);
|
|
210
|
-
allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes);
|
|
211
|
-
cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, ShiftRightIterator<15>(lower_keys), upper_keys, node_counts + 2, lower_node_count);
|
|
212
|
-
allocator.DeviceFree(d_temp_storage);
|
|
213
|
-
check_cuda(cudaMemcpy(&upper_node_count, node_counts + 2, sizeof(uint32_t), cudaMemcpyDeviceToHost));
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
using Tree = nanovdb::NanoTree<BuildT>;
|
|
217
|
-
using Grid = nanovdb::Grid<Tree>;
|
|
218
|
-
|
|
219
|
-
const size_t total_bytes =
|
|
220
|
-
sizeof(Grid) +
|
|
221
|
-
sizeof(Tree) +
|
|
222
|
-
sizeof(typename Tree::RootType) +
|
|
223
|
-
sizeof(typename Tree::RootType::Tile) * upper_node_count +
|
|
224
|
-
sizeof(typename Tree::Node2) * upper_node_count +
|
|
225
|
-
sizeof(typename Tree::Node1) * lower_node_count +
|
|
226
|
-
sizeof(typename Tree::Node0) * leaf_count;
|
|
227
|
-
|
|
228
|
-
const int64_t upper_mem_offset =
|
|
229
|
-
sizeof(nanovdb::GridData) + sizeof(Tree) + sizeof(typename Tree::RootType) +
|
|
230
|
-
sizeof(typename Tree::RootType::Tile) * upper_node_count;
|
|
231
|
-
const int64_t lower_mem_offset = upper_mem_offset + sizeof(typename Tree::Node2) * upper_node_count;
|
|
232
|
-
const int64_t leaf_mem_offset = lower_mem_offset + sizeof(typename Tree::Node1) * lower_node_count;
|
|
233
|
-
|
|
234
|
-
typename Grid::DataType* grid;
|
|
235
|
-
check_cuda(cudaMalloc(&grid, total_bytes));
|
|
236
|
-
|
|
237
|
-
typename Tree::DataType* const tree = reinterpret_cast<typename Tree::DataType*>(grid + 1); // The tree is immediately after the grid
|
|
238
|
-
typename Tree::RootType::DataType* const root = reinterpret_cast<typename Tree::RootType::DataType*>(tree + 1); // The root is immediately after the tree
|
|
239
|
-
typename Tree::RootType::Tile* const tiles = reinterpret_cast<typename Tree::RootType::Tile*>(root + 1);
|
|
240
|
-
typename Tree::Node2::DataType* const upper_nodes = nanovdb::PtrAdd<typename Tree::Node2::DataType>(grid, upper_mem_offset);
|
|
241
|
-
typename Tree::Node1::DataType* const lower_nodes = nanovdb::PtrAdd<typename Tree::Node1::DataType>(grid, lower_mem_offset);
|
|
242
|
-
typename Tree::Node0::DataType* const leaf_nodes = nanovdb::PtrAdd<typename Tree::Node0::DataType>(grid, leaf_mem_offset);
|
|
243
|
-
|
|
244
|
-
// Phase 2: building the tree
|
|
245
|
-
{
|
|
246
|
-
// Setting up the tree and root node
|
|
247
|
-
kernel<<<1, 1>>>(1, [=] __device__(size_t i) {
|
|
248
|
-
tree->mNodeOffset[3] = sizeof(Tree);
|
|
249
|
-
tree->mNodeOffset[2] = tree->mNodeOffset[3] + sizeof(typename Tree::RootType) + sizeof(typename Tree::RootType::Tile) * upper_node_count;
|
|
250
|
-
tree->mNodeOffset[1] = tree->mNodeOffset[2] + sizeof(typename Tree::Node2) * upper_node_count;
|
|
251
|
-
tree->mNodeOffset[0] = tree->mNodeOffset[1] + sizeof(typename Tree::Node1) * lower_node_count;
|
|
252
|
-
tree->mNodeCount[2] = tree->mTileCount[2] = upper_node_count;
|
|
253
|
-
tree->mNodeCount[1] = tree->mTileCount[1] = lower_node_count;
|
|
254
|
-
tree->mNodeCount[0] = tree->mTileCount[0] = leaf_count;
|
|
255
|
-
tree->mVoxelCount = Tree::Node0::SIZE * leaf_count; // assuming full leaves
|
|
256
|
-
|
|
257
|
-
root->mBBox = nanovdb::CoordBBox(); // init to empty
|
|
258
|
-
root->mTableSize = upper_node_count;
|
|
259
|
-
root->mBackground = background_value;
|
|
260
|
-
root->mMinimum = ZERO_VAL;
|
|
261
|
-
root->mMaximum = ZERO_VAL;
|
|
262
|
-
root->mAverage = ZERO_SCALAR;
|
|
263
|
-
root->mStdDevi = ZERO_SCALAR;
|
|
264
|
-
});
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
// Add tiles and upper nodes
|
|
268
|
-
// i : 0 .. upper_node_count-1
|
|
269
|
-
num_blocks = (upper_node_count + num_threads - 1) / num_threads;
|
|
270
|
-
{
|
|
271
|
-
kernel<<<num_blocks, num_threads>>>(upper_node_count, [=] __device__(size_t i) {
|
|
272
|
-
tiles[i].key = root->CoordToKey(tile_key36_to_coord(upper_keys[i]));
|
|
273
|
-
tiles[i].child = sizeof(typename Tree::RootType) + sizeof(typename Tree::RootType::Tile) * upper_node_count + sizeof(typename Tree::Node2) * i;
|
|
274
|
-
tiles[i].state = 0;
|
|
275
|
-
tiles[i].value = background_value;
|
|
276
|
-
|
|
277
|
-
assert(reinterpret_cast<const char*>(root->getChild(tiles + i)) == reinterpret_cast<const char*>(upper_nodes + i));
|
|
278
|
-
auto& node = upper_nodes[i];
|
|
279
|
-
node.mBBox = nanovdb::CoordBBox();
|
|
280
|
-
node.mFlags = 0;
|
|
281
|
-
node.mValueMask.setOff();
|
|
282
|
-
node.mChildMask.setOff();
|
|
283
|
-
node.mMinimum = ZERO_VAL;
|
|
284
|
-
node.mMaximum = ZERO_VAL;
|
|
285
|
-
node.mAverage = ZERO_SCALAR;
|
|
286
|
-
node.mStdDevi = ZERO_SCALAR;
|
|
287
|
-
for (size_t n = 0; n < Tree::Node2::SIZE; ++n) {
|
|
288
|
-
node.mTable[n].value = background_value;
|
|
289
|
-
}
|
|
290
|
-
});
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
constexpr uint32_t MASK_15BITS = (1u << 15) - 1u;
|
|
294
|
-
constexpr uint32_t MASK_12BITS = (1u << 12) - 1u;
|
|
295
|
-
|
|
296
|
-
// Init lower nodes and register to parent
|
|
297
|
-
// i : 0 .. lower_node_count-1
|
|
298
|
-
num_blocks = (lower_node_count + num_threads - 1) / num_threads;
|
|
299
|
-
{
|
|
300
|
-
kernel<<<num_blocks, num_threads>>>(lower_node_count, [=] __device__(size_t i) {
|
|
301
|
-
uint32_t upper_offset = lower_keys[i] & MASK_15BITS;
|
|
302
|
-
auto* upper_node = root->getChild(find_tile(root, tile_key36_to_coord(lower_keys[i] >> 15)))->data();
|
|
303
|
-
set_mask_atomic(upper_node->mChildMask, upper_offset);
|
|
304
|
-
upper_node->setChild(upper_offset, lower_nodes + i);
|
|
305
|
-
|
|
306
|
-
auto& node = lower_nodes[i];
|
|
307
|
-
node.mBBox = nanovdb::CoordBBox();
|
|
308
|
-
node.mFlags = 0;
|
|
309
|
-
node.mValueMask.setOff();
|
|
310
|
-
node.mChildMask.setOff();
|
|
311
|
-
node.mMinimum = ZERO_VAL;
|
|
312
|
-
node.mMaximum = ZERO_VAL;
|
|
313
|
-
node.mAverage = ZERO_SCALAR;
|
|
314
|
-
node.mStdDevi = ZERO_SCALAR;
|
|
315
|
-
for (size_t n = 0; n < Tree::Node1::SIZE; ++n) {
|
|
316
|
-
node.mTable[n].value = background_value;
|
|
317
|
-
}
|
|
318
|
-
});
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
// Init leaf nodes and register to parent
|
|
322
|
-
// i : 0 .. leaf_count-1
|
|
323
|
-
num_blocks = (leaf_count + num_threads - 1) / num_threads;
|
|
324
|
-
{
|
|
325
|
-
kernel<<<num_blocks, num_threads>>>(leaf_count, [=] __device__(size_t i) {
|
|
326
|
-
uint32_t lower_offset = leaf_keys[i] & MASK_12BITS;
|
|
327
|
-
uint32_t upper_offset = (leaf_keys[i] >> 12) & MASK_15BITS;
|
|
328
|
-
const nanovdb::Coord ijk = tile_key36_to_coord(leaf_keys[i] >> 27);
|
|
329
|
-
|
|
330
|
-
auto* upper_node = root->getChild(find_tile(root, ijk))->data();
|
|
331
|
-
auto* lower_node = upper_node->getChild(upper_offset)->data();
|
|
332
|
-
set_mask_atomic(lower_node->mChildMask, lower_offset);
|
|
333
|
-
lower_node->setChild(lower_offset, leaf_nodes + i);
|
|
334
|
-
|
|
335
|
-
const nanovdb::Coord localUpperIjk = Tree::Node2::OffsetToLocalCoord(upper_offset) << Tree::Node1::TOTAL;
|
|
336
|
-
const nanovdb::Coord localLowerIjk = Tree::Node1::OffsetToLocalCoord(lower_offset) << Tree::Node0::TOTAL;
|
|
337
|
-
const nanovdb::Coord leafOrigin = ijk + localUpperIjk + localLowerIjk;
|
|
338
|
-
|
|
339
|
-
auto& node = leaf_nodes[i];
|
|
340
|
-
node.mBBoxMin = leafOrigin;
|
|
341
|
-
node.mBBoxDif[0] = leaf_nodes[i].mBBoxDif[1] = leaf_nodes[i].mBBoxDif[2] = Tree::Node0::DIM;
|
|
342
|
-
node.mFlags = 0;
|
|
343
|
-
node.mValueMask.setOn();
|
|
344
|
-
node.mMinimum = ZERO_VAL;
|
|
345
|
-
node.mMaximum = ZERO_VAL;
|
|
346
|
-
node.mAverage = ZERO_SCALAR;
|
|
347
|
-
node.mStdDevi = ZERO_SCALAR;
|
|
348
|
-
// mValues is undefined
|
|
349
|
-
|
|
350
|
-
// propagating bbox up:
|
|
351
|
-
expand_cwise_atomic(lower_node->mBBox, leafOrigin);
|
|
352
|
-
expand_cwise_atomic(lower_node->mBBox, leafOrigin + nanovdb::Coord(Tree::Node0::DIM));
|
|
353
|
-
});
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
// Propagating bounding boxes from lower nodes to upper nodes
|
|
357
|
-
// i : 0 .. lower_node_count-1
|
|
358
|
-
num_blocks = (lower_node_count + num_threads - 1) / num_threads;
|
|
359
|
-
{
|
|
360
|
-
kernel<<<num_blocks, num_threads>>>(lower_node_count, [=] __device__(size_t i) {
|
|
361
|
-
auto* upper_node = root->getChild(find_tile(root, tile_key36_to_coord(lower_keys[i] >> 15)))->data();
|
|
362
|
-
expand_cwise_atomic(upper_node->mBBox, lower_nodes[i].mBBox.min());
|
|
363
|
-
expand_cwise_atomic(upper_node->mBBox, lower_nodes[i].mBBox.max());
|
|
364
|
-
});
|
|
365
|
-
}
|
|
366
|
-
|
|
367
|
-
// Setting up root bounding box and grid
|
|
368
|
-
{
|
|
369
|
-
kernel<<<1, 1>>>(1, [=] __device__(size_t i) {
|
|
370
|
-
for (int i = 0; i < upper_node_count; ++i) {
|
|
371
|
-
root->mBBox.expand(upper_nodes[i].mBBox.min());
|
|
372
|
-
root->mBBox.expand(upper_nodes[i].mBBox.max());
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
nanovdb::Map map;
|
|
376
|
-
{
|
|
377
|
-
const double mat[4][4] = {
|
|
378
|
-
{dx, 0.0, 0.0, 0.0}, // row 0
|
|
379
|
-
{0.0, dx, 0.0, 0.0}, // row 1
|
|
380
|
-
{0.0, 0.0, dx, 0.0}, // row 2
|
|
381
|
-
{Tx, Ty, Tz, 1.0}, // row 3
|
|
382
|
-
};
|
|
383
|
-
const double invMat[4][4] = {
|
|
384
|
-
{1 / dx, 0.0, 0.0, 0.0}, // row 0
|
|
385
|
-
{0.0, 1 / dx, 0.0, 0.0}, // row 1
|
|
386
|
-
{0.0, 0.0, 1 / dx, 0.0}, // row 2
|
|
387
|
-
{0.0, 0.0, 0.0, 0.0}, // row 3, ignored by Map::set
|
|
388
|
-
};
|
|
389
|
-
map.set(mat, invMat, 1.0);
|
|
390
|
-
}
|
|
391
|
-
|
|
392
|
-
grid->mMagic = NANOVDB_MAGIC_NUMBER;
|
|
393
|
-
grid->mChecksum = 0xFFFFFFFFFFFFFFFFull;
|
|
394
|
-
grid->mVersion = nanovdb::Version();
|
|
395
|
-
grid->mFlags = static_cast<uint32_t>(nanovdb::GridFlags::HasBBox) |
|
|
396
|
-
static_cast<uint32_t>(nanovdb::GridFlags::IsBreadthFirst);
|
|
397
|
-
grid->mGridIndex = 0;
|
|
398
|
-
grid->mGridCount = 1;
|
|
399
|
-
grid->mGridSize = total_bytes;
|
|
400
|
-
// mGridName is set below
|
|
401
|
-
grid->mWorldBBox.mCoord[0] = map.applyMap(nanovdb::Vec3R(root->mBBox.mCoord[0]));
|
|
402
|
-
grid->mWorldBBox.mCoord[1] = map.applyMap(nanovdb::Vec3R(root->mBBox.mCoord[1]));
|
|
403
|
-
grid->mVoxelSize = nanovdb::Vec3d(dx);
|
|
404
|
-
grid->mMap = map;
|
|
405
|
-
grid->mGridClass = nanovdb::GridClass::Unknown;
|
|
406
|
-
grid->mGridType = nanovdb::mapToGridType<BuildT>();
|
|
407
|
-
grid->mBlindMetadataOffset = total_bytes;
|
|
408
|
-
grid->mBlindMetadataCount = 0;
|
|
409
|
-
});
|
|
410
|
-
}
|
|
411
|
-
|
|
412
|
-
check_cuda(cudaMemcpy(grid->mGridName, params.name, 256, cudaMemcpyHostToDevice));
|
|
413
|
-
|
|
414
|
-
allocator.DeviceFree(lower_keys);
|
|
415
|
-
allocator.DeviceFree(upper_keys);
|
|
416
|
-
allocator.DeviceFree(leaf_keys);
|
|
417
|
-
allocator.DeviceFree(node_counts);
|
|
418
|
-
|
|
419
|
-
out_grid = reinterpret_cast<Grid*>(grid);
|
|
420
|
-
out_grid_size = total_bytes;
|
|
421
|
-
}
|
|
422
|
-
|
|
423
|
-
template void build_grid_from_tiles(nanovdb::Grid<nanovdb::NanoTree<float>>*&, size_t&, const void*, size_t, bool, const BuildGridParams<float>&);
|
|
424
|
-
template void build_grid_from_tiles(nanovdb::Grid<nanovdb::NanoTree<nanovdb::Vec3f>>*&, size_t&, const void*, size_t, bool, const BuildGridParams<nanovdb::Vec3f>&);
|
|
425
|
-
template void build_grid_from_tiles(nanovdb::Grid<nanovdb::NanoTree<int32_t>>*&, size_t&, const void*, size_t, bool, const BuildGridParams<int32_t>&);
|
|
1
|
+
#include "volume_builder.h"
|
|
2
|
+
|
|
3
|
+
#include <cuda.h>
|
|
4
|
+
#include <cuda_runtime_api.h>
|
|
5
|
+
|
|
6
|
+
#include <cub/cub.cuh>
|
|
7
|
+
#include <cub/util_allocator.cuh>
|
|
8
|
+
|
|
9
|
+
// Explanation of key types
|
|
10
|
+
// ------------------------
|
|
11
|
+
//
|
|
12
|
+
// leaf_key:
|
|
13
|
+
// .__.__. .... .__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.
|
|
14
|
+
// 63 62 .... 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
|
|
15
|
+
// XX|< tile key >|< upper offset >|< lower offset >|
|
|
16
|
+
//
|
|
17
|
+
// tile key (36 bit):
|
|
18
|
+
// (uint32(ijk[2]) >> ChildT::TOTAL) |
|
|
19
|
+
// (uint64_t(uint32(ijk[1]) >> ChildT::TOTAL)) << 12 |
|
|
20
|
+
// (uint64_t(uint32(ijk[0]) >> ChildT::TOTAL)) << 24
|
|
21
|
+
//
|
|
22
|
+
// lower_key (51 bits) == leaf_key >> 12
|
|
23
|
+
//
|
|
24
|
+
// upper_key (36 bits) == lower_key >> 15 == leaf_key >> 27 == tile key
|
|
25
|
+
|
|
26
|
+
CUDA_CALLABLE inline uint64_t coord_to_full_key(const nanovdb::Coord& ijk)
|
|
27
|
+
{
|
|
28
|
+
using Tree = nanovdb::FloatTree; // any type is fine at this point
|
|
29
|
+
assert((abs(ijk[0]) >> 24) == 0);
|
|
30
|
+
assert((abs(ijk[1]) >> 24) == 0);
|
|
31
|
+
assert((abs(ijk[2]) >> 24) == 0);
|
|
32
|
+
constexpr uint32_t MASK_12BITS = (1u << 12) - 1u;
|
|
33
|
+
const uint64_t tile_key36 =
|
|
34
|
+
((uint32_t(ijk[2]) >> 12) & MASK_12BITS) | // z is the lower 12 bits
|
|
35
|
+
(uint64_t((uint32_t(ijk[1]) >> 12) & MASK_12BITS) << 12) | // y is the middle 12 bits
|
|
36
|
+
(uint64_t((uint32_t(ijk[0]) >> 12) & MASK_12BITS) << 24); // x is the upper 12 bits
|
|
37
|
+
const uint32_t upper_offset = Tree::Node2::CoordToOffset(ijk);
|
|
38
|
+
const uint32_t lower_offset = Tree::Node1::CoordToOffset(ijk);
|
|
39
|
+
return (tile_key36 << 27) | (upper_offset << 12) | lower_offset;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
__global__
|
|
43
|
+
void generate_keys(size_t num_points, const nanovdb::Coord* points, uint64_t* all_leaf_keys)
|
|
44
|
+
{
|
|
45
|
+
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
|
46
|
+
if (tid >= num_points) return;
|
|
47
|
+
|
|
48
|
+
all_leaf_keys[tid] = coord_to_full_key(points[tid]);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
__global__
|
|
52
|
+
void generate_keys(size_t num_points, const nanovdb::Vec3f* points, uint64_t* all_leaf_keys, float one_over_voxel_size, nanovdb::Vec3f translation)
|
|
53
|
+
{
|
|
54
|
+
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
|
55
|
+
if (tid >= num_points) return;
|
|
56
|
+
|
|
57
|
+
const nanovdb::Coord ijk = ((points[tid] - translation) * one_over_voxel_size).round();
|
|
58
|
+
all_leaf_keys[tid] = coord_to_full_key(ijk);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Convert a 36 bit tile key to the ijk origin of the addressed tile
|
|
62
|
+
CUDA_CALLABLE inline nanovdb::Coord tile_key36_to_coord(uint64_t tile_key36) {
|
|
63
|
+
auto extend_sign = [](uint32_t i) -> int32_t { return i | ((i>>11 & 1) * 0xFFFFF800);};
|
|
64
|
+
constexpr uint32_t MASK_12BITS = (1u << 12) - 1u;
|
|
65
|
+
const int32_t i = extend_sign(uint32_t(tile_key36 >> 24) & MASK_12BITS);
|
|
66
|
+
const int32_t j = extend_sign(uint32_t(tile_key36 >> 12) & MASK_12BITS);
|
|
67
|
+
const int32_t k = extend_sign(uint32_t(tile_key36) & MASK_12BITS);
|
|
68
|
+
return nanovdb::Coord(i, j, k) << 12;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
// --- CUB helpers ---
|
|
73
|
+
template<uint8_t bits, typename InType, typename OutType>
|
|
74
|
+
struct ShiftRight {
|
|
75
|
+
CUDA_CALLABLE inline OutType operator()(const InType& v) const {
|
|
76
|
+
return static_cast<OutType>(v >> bits);
|
|
77
|
+
}
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
template<uint8_t bits, typename InType = uint64_t, typename OutType = uint64_t>
|
|
81
|
+
struct ShiftRightIterator : public cub::TransformInputIterator<OutType, ShiftRight<bits, InType, OutType>, InType*> {
|
|
82
|
+
using BASE = cub::TransformInputIterator<OutType, ShiftRight<bits, InType, OutType>, InType*>;
|
|
83
|
+
CUDA_CALLABLE inline ShiftRightIterator(uint64_t* input_itr)
|
|
84
|
+
: BASE(input_itr, ShiftRight<bits, InType, OutType>()) {}
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
// --- Atomic instructions for NanoVDB construction ---
|
|
89
|
+
template<typename MaskT>
|
|
90
|
+
CUDA_CALLABLE_DEVICE void set_mask_atomic(MaskT& mask, uint32_t n) {
|
|
91
|
+
unsigned long long int* words = reinterpret_cast<unsigned long long int*>(&mask);
|
|
92
|
+
atomicOr(words + (n / 64), 1ull << (n & 63));
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
template<typename Vec3T>
|
|
96
|
+
CUDA_CALLABLE_DEVICE void expand_cwise_atomic(nanovdb::BBox<Vec3T>& bbox, const Vec3T& v) {
|
|
97
|
+
atomicMin(&bbox.mCoord[0][0], v[0]);
|
|
98
|
+
atomicMin(&bbox.mCoord[0][1], v[1]);
|
|
99
|
+
atomicMin(&bbox.mCoord[0][2], v[2]);
|
|
100
|
+
atomicMax(&bbox.mCoord[1][0], v[0]);
|
|
101
|
+
atomicMax(&bbox.mCoord[1][1], v[1]);
|
|
102
|
+
atomicMax(&bbox.mCoord[1][2], v[2]);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
template<typename RootDataType>
|
|
106
|
+
__hostdev__ const typename RootDataType::Tile* find_tile(const RootDataType* root_data, const nanovdb::Coord& ijk)
|
|
107
|
+
{
|
|
108
|
+
using Tile = typename RootDataType::Tile;
|
|
109
|
+
const Tile *tiles = reinterpret_cast<const Tile *>(root_data + 1);
|
|
110
|
+
const auto key = RootDataType::CoordToKey(ijk);
|
|
111
|
+
|
|
112
|
+
for (uint32_t i = 0; i < root_data->mTableSize; ++i)
|
|
113
|
+
{
|
|
114
|
+
if (tiles[i].key == key)
|
|
115
|
+
return &tiles[i];
|
|
116
|
+
}
|
|
117
|
+
return nullptr;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// --- Wrapper for launching lambda kernels
|
|
121
|
+
template<typename Func, typename... Args>
|
|
122
|
+
__global__ void kernel(const size_t num_items, Func f, Args... args)
|
|
123
|
+
{
|
|
124
|
+
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
|
125
|
+
if (tid >= num_items) return;
|
|
126
|
+
f(tid, args...);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
template <typename BuildT>
|
|
130
|
+
void build_grid_from_tiles(nanovdb::Grid<nanovdb::NanoTree<BuildT>> *&out_grid,
|
|
131
|
+
size_t &out_grid_size,
|
|
132
|
+
const void *points,
|
|
133
|
+
size_t num_points,
|
|
134
|
+
bool points_in_world_space,
|
|
135
|
+
const BuildGridParams<BuildT> ¶ms)
|
|
136
|
+
{
|
|
137
|
+
using FloatT = typename nanovdb::FloatTraits<BuildT>::FloatType;
|
|
138
|
+
const BuildT ZERO_VAL{0};
|
|
139
|
+
const FloatT ZERO_SCALAR{0};
|
|
140
|
+
|
|
141
|
+
// Don't want to access "params" in kernels
|
|
142
|
+
const double dx = params.voxel_size;
|
|
143
|
+
const double Tx = params.translation[0], Ty = params.translation[1], Tz = params.translation[2];
|
|
144
|
+
const BuildT background_value = params.background_value;
|
|
145
|
+
|
|
146
|
+
const unsigned int num_threads = 256;
|
|
147
|
+
unsigned int num_blocks;
|
|
148
|
+
|
|
149
|
+
out_grid = nullptr;
|
|
150
|
+
out_grid_size = 0;
|
|
151
|
+
|
|
152
|
+
cub::CachingDeviceAllocator allocator;
|
|
153
|
+
|
|
154
|
+
uint64_t* leaf_keys;
|
|
155
|
+
uint64_t* lower_keys;
|
|
156
|
+
uint64_t* upper_keys;
|
|
157
|
+
uint32_t* node_counts;
|
|
158
|
+
uint32_t leaf_count, lower_node_count, upper_node_count;
|
|
159
|
+
|
|
160
|
+
allocator.DeviceAllocate((void**)&leaf_keys, sizeof(uint64_t) * num_points);
|
|
161
|
+
allocator.DeviceAllocate((void**)&node_counts, sizeof(uint32_t) * 3);
|
|
162
|
+
|
|
163
|
+
// Phase 1: counting the nodes
|
|
164
|
+
{
|
|
165
|
+
// Generating keys from coords
|
|
166
|
+
uint64_t* all_leaf_keys;
|
|
167
|
+
uint64_t* all_leaf_keys_sorted;
|
|
168
|
+
allocator.DeviceAllocate((void**)&all_leaf_keys, sizeof(uint64_t) * num_points);
|
|
169
|
+
allocator.DeviceAllocate((void**)&all_leaf_keys_sorted, sizeof(uint64_t) * num_points);
|
|
170
|
+
|
|
171
|
+
num_blocks = (static_cast<unsigned int>(num_points) + num_threads - 1) / num_threads;
|
|
172
|
+
if (points_in_world_space) {
|
|
173
|
+
generate_keys<<<num_blocks, num_threads>>>(num_points, static_cast<const nanovdb::Vec3f*>(points), all_leaf_keys, static_cast<float>(1.0 / dx), nanovdb::Vec3f(params.translation));
|
|
174
|
+
} else {
|
|
175
|
+
generate_keys<<<num_blocks, num_threads>>>(num_points, static_cast<const nanovdb::Coord*>(points), all_leaf_keys);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
void* d_temp_storage = nullptr;
|
|
179
|
+
size_t temp_storage_bytes;
|
|
180
|
+
|
|
181
|
+
// Sort the keys, then get an array of unique keys
|
|
182
|
+
cub::DeviceRadixSort::SortKeys(nullptr, temp_storage_bytes, all_leaf_keys, all_leaf_keys_sorted, static_cast<int>(num_points), /* begin_bit = */ 0, /* end_bit = */ 63);
|
|
183
|
+
allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes);
|
|
184
|
+
cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, all_leaf_keys, all_leaf_keys_sorted, static_cast<int>(num_points), /* begin_bit = */ 0, /* end_bit = */ 63);
|
|
185
|
+
allocator.DeviceFree(d_temp_storage);
|
|
186
|
+
|
|
187
|
+
cub::DeviceSelect::Unique(nullptr, temp_storage_bytes, all_leaf_keys_sorted, leaf_keys, node_counts, static_cast<int>(num_points));
|
|
188
|
+
allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes);
|
|
189
|
+
cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, all_leaf_keys_sorted, leaf_keys, node_counts, static_cast<int>(num_points));
|
|
190
|
+
allocator.DeviceFree(d_temp_storage);
|
|
191
|
+
check_cuda(cudaMemcpy(&leaf_count, node_counts, sizeof(uint32_t), cudaMemcpyDeviceToHost));
|
|
192
|
+
|
|
193
|
+
allocator.DeviceFree(all_leaf_keys);
|
|
194
|
+
all_leaf_keys = nullptr;
|
|
195
|
+
allocator.DeviceFree(all_leaf_keys_sorted);
|
|
196
|
+
all_leaf_keys_sorted = nullptr;
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
// Get the keys unique to lower nodes and the number of them
|
|
200
|
+
allocator.DeviceAllocate((void**)&lower_keys, sizeof(uint64_t) * leaf_count);
|
|
201
|
+
cub::DeviceSelect::Unique(nullptr, temp_storage_bytes, ShiftRightIterator<12>(leaf_keys), lower_keys, node_counts + 1, leaf_count);
|
|
202
|
+
allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes);
|
|
203
|
+
cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, ShiftRightIterator<12>(leaf_keys), lower_keys, node_counts + 1, leaf_count);
|
|
204
|
+
allocator.DeviceFree(d_temp_storage);
|
|
205
|
+
check_cuda(cudaMemcpy(&lower_node_count, node_counts + 1, sizeof(uint32_t), cudaMemcpyDeviceToHost));
|
|
206
|
+
|
|
207
|
+
// Get the keys unique to upper nodes and the number of them
|
|
208
|
+
allocator.DeviceAllocate((void**)&upper_keys, sizeof(uint64_t) * lower_node_count);
|
|
209
|
+
cub::DeviceSelect::Unique(nullptr, temp_storage_bytes, ShiftRightIterator<15>(lower_keys), upper_keys, node_counts + 2, lower_node_count);
|
|
210
|
+
allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes);
|
|
211
|
+
cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, ShiftRightIterator<15>(lower_keys), upper_keys, node_counts + 2, lower_node_count);
|
|
212
|
+
allocator.DeviceFree(d_temp_storage);
|
|
213
|
+
check_cuda(cudaMemcpy(&upper_node_count, node_counts + 2, sizeof(uint32_t), cudaMemcpyDeviceToHost));
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
using Tree = nanovdb::NanoTree<BuildT>;
|
|
217
|
+
using Grid = nanovdb::Grid<Tree>;
|
|
218
|
+
|
|
219
|
+
const size_t total_bytes =
|
|
220
|
+
sizeof(Grid) +
|
|
221
|
+
sizeof(Tree) +
|
|
222
|
+
sizeof(typename Tree::RootType) +
|
|
223
|
+
sizeof(typename Tree::RootType::Tile) * upper_node_count +
|
|
224
|
+
sizeof(typename Tree::Node2) * upper_node_count +
|
|
225
|
+
sizeof(typename Tree::Node1) * lower_node_count +
|
|
226
|
+
sizeof(typename Tree::Node0) * leaf_count;
|
|
227
|
+
|
|
228
|
+
const int64_t upper_mem_offset =
|
|
229
|
+
sizeof(nanovdb::GridData) + sizeof(Tree) + sizeof(typename Tree::RootType) +
|
|
230
|
+
sizeof(typename Tree::RootType::Tile) * upper_node_count;
|
|
231
|
+
const int64_t lower_mem_offset = upper_mem_offset + sizeof(typename Tree::Node2) * upper_node_count;
|
|
232
|
+
const int64_t leaf_mem_offset = lower_mem_offset + sizeof(typename Tree::Node1) * lower_node_count;
|
|
233
|
+
|
|
234
|
+
typename Grid::DataType* grid;
|
|
235
|
+
check_cuda(cudaMalloc(&grid, total_bytes));
|
|
236
|
+
|
|
237
|
+
typename Tree::DataType* const tree = reinterpret_cast<typename Tree::DataType*>(grid + 1); // The tree is immediately after the grid
|
|
238
|
+
typename Tree::RootType::DataType* const root = reinterpret_cast<typename Tree::RootType::DataType*>(tree + 1); // The root is immediately after the tree
|
|
239
|
+
typename Tree::RootType::Tile* const tiles = reinterpret_cast<typename Tree::RootType::Tile*>(root + 1);
|
|
240
|
+
typename Tree::Node2::DataType* const upper_nodes = nanovdb::PtrAdd<typename Tree::Node2::DataType>(grid, upper_mem_offset);
|
|
241
|
+
typename Tree::Node1::DataType* const lower_nodes = nanovdb::PtrAdd<typename Tree::Node1::DataType>(grid, lower_mem_offset);
|
|
242
|
+
typename Tree::Node0::DataType* const leaf_nodes = nanovdb::PtrAdd<typename Tree::Node0::DataType>(grid, leaf_mem_offset);
|
|
243
|
+
|
|
244
|
+
// Phase 2: building the tree
|
|
245
|
+
{
|
|
246
|
+
// Setting up the tree and root node
|
|
247
|
+
kernel<<<1, 1>>>(1, [=] __device__(size_t i) {
|
|
248
|
+
tree->mNodeOffset[3] = sizeof(Tree);
|
|
249
|
+
tree->mNodeOffset[2] = tree->mNodeOffset[3] + sizeof(typename Tree::RootType) + sizeof(typename Tree::RootType::Tile) * upper_node_count;
|
|
250
|
+
tree->mNodeOffset[1] = tree->mNodeOffset[2] + sizeof(typename Tree::Node2) * upper_node_count;
|
|
251
|
+
tree->mNodeOffset[0] = tree->mNodeOffset[1] + sizeof(typename Tree::Node1) * lower_node_count;
|
|
252
|
+
tree->mNodeCount[2] = tree->mTileCount[2] = upper_node_count;
|
|
253
|
+
tree->mNodeCount[1] = tree->mTileCount[1] = lower_node_count;
|
|
254
|
+
tree->mNodeCount[0] = tree->mTileCount[0] = leaf_count;
|
|
255
|
+
tree->mVoxelCount = Tree::Node0::SIZE * leaf_count; // assuming full leaves
|
|
256
|
+
|
|
257
|
+
root->mBBox = nanovdb::CoordBBox(); // init to empty
|
|
258
|
+
root->mTableSize = upper_node_count;
|
|
259
|
+
root->mBackground = background_value;
|
|
260
|
+
root->mMinimum = ZERO_VAL;
|
|
261
|
+
root->mMaximum = ZERO_VAL;
|
|
262
|
+
root->mAverage = ZERO_SCALAR;
|
|
263
|
+
root->mStdDevi = ZERO_SCALAR;
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// Add tiles and upper nodes
|
|
268
|
+
// i : 0 .. upper_node_count-1
|
|
269
|
+
num_blocks = (upper_node_count + num_threads - 1) / num_threads;
|
|
270
|
+
{
|
|
271
|
+
kernel<<<num_blocks, num_threads>>>(upper_node_count, [=] __device__(size_t i) {
|
|
272
|
+
tiles[i].key = root->CoordToKey(tile_key36_to_coord(upper_keys[i]));
|
|
273
|
+
tiles[i].child = sizeof(typename Tree::RootType) + sizeof(typename Tree::RootType::Tile) * upper_node_count + sizeof(typename Tree::Node2) * i;
|
|
274
|
+
tiles[i].state = 0;
|
|
275
|
+
tiles[i].value = background_value;
|
|
276
|
+
|
|
277
|
+
assert(reinterpret_cast<const char*>(root->getChild(tiles + i)) == reinterpret_cast<const char*>(upper_nodes + i));
|
|
278
|
+
auto& node = upper_nodes[i];
|
|
279
|
+
node.mBBox = nanovdb::CoordBBox();
|
|
280
|
+
node.mFlags = 0;
|
|
281
|
+
node.mValueMask.setOff();
|
|
282
|
+
node.mChildMask.setOff();
|
|
283
|
+
node.mMinimum = ZERO_VAL;
|
|
284
|
+
node.mMaximum = ZERO_VAL;
|
|
285
|
+
node.mAverage = ZERO_SCALAR;
|
|
286
|
+
node.mStdDevi = ZERO_SCALAR;
|
|
287
|
+
for (size_t n = 0; n < Tree::Node2::SIZE; ++n) {
|
|
288
|
+
node.mTable[n].value = background_value;
|
|
289
|
+
}
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
constexpr uint32_t MASK_15BITS = (1u << 15) - 1u;
|
|
294
|
+
constexpr uint32_t MASK_12BITS = (1u << 12) - 1u;
|
|
295
|
+
|
|
296
|
+
// Init lower nodes and register to parent
|
|
297
|
+
// i : 0 .. lower_node_count-1
|
|
298
|
+
num_blocks = (lower_node_count + num_threads - 1) / num_threads;
|
|
299
|
+
{
|
|
300
|
+
kernel<<<num_blocks, num_threads>>>(lower_node_count, [=] __device__(size_t i) {
|
|
301
|
+
uint32_t upper_offset = lower_keys[i] & MASK_15BITS;
|
|
302
|
+
auto* upper_node = root->getChild(find_tile(root, tile_key36_to_coord(lower_keys[i] >> 15)))->data();
|
|
303
|
+
set_mask_atomic(upper_node->mChildMask, upper_offset);
|
|
304
|
+
upper_node->setChild(upper_offset, lower_nodes + i);
|
|
305
|
+
|
|
306
|
+
auto& node = lower_nodes[i];
|
|
307
|
+
node.mBBox = nanovdb::CoordBBox();
|
|
308
|
+
node.mFlags = 0;
|
|
309
|
+
node.mValueMask.setOff();
|
|
310
|
+
node.mChildMask.setOff();
|
|
311
|
+
node.mMinimum = ZERO_VAL;
|
|
312
|
+
node.mMaximum = ZERO_VAL;
|
|
313
|
+
node.mAverage = ZERO_SCALAR;
|
|
314
|
+
node.mStdDevi = ZERO_SCALAR;
|
|
315
|
+
for (size_t n = 0; n < Tree::Node1::SIZE; ++n) {
|
|
316
|
+
node.mTable[n].value = background_value;
|
|
317
|
+
}
|
|
318
|
+
});
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
// Init leaf nodes and register to parent
|
|
322
|
+
// i : 0 .. leaf_count-1
|
|
323
|
+
num_blocks = (leaf_count + num_threads - 1) / num_threads;
|
|
324
|
+
{
|
|
325
|
+
kernel<<<num_blocks, num_threads>>>(leaf_count, [=] __device__(size_t i) {
|
|
326
|
+
uint32_t lower_offset = leaf_keys[i] & MASK_12BITS;
|
|
327
|
+
uint32_t upper_offset = (leaf_keys[i] >> 12) & MASK_15BITS;
|
|
328
|
+
const nanovdb::Coord ijk = tile_key36_to_coord(leaf_keys[i] >> 27);
|
|
329
|
+
|
|
330
|
+
auto* upper_node = root->getChild(find_tile(root, ijk))->data();
|
|
331
|
+
auto* lower_node = upper_node->getChild(upper_offset)->data();
|
|
332
|
+
set_mask_atomic(lower_node->mChildMask, lower_offset);
|
|
333
|
+
lower_node->setChild(lower_offset, leaf_nodes + i);
|
|
334
|
+
|
|
335
|
+
const nanovdb::Coord localUpperIjk = Tree::Node2::OffsetToLocalCoord(upper_offset) << Tree::Node1::TOTAL;
|
|
336
|
+
const nanovdb::Coord localLowerIjk = Tree::Node1::OffsetToLocalCoord(lower_offset) << Tree::Node0::TOTAL;
|
|
337
|
+
const nanovdb::Coord leafOrigin = ijk + localUpperIjk + localLowerIjk;
|
|
338
|
+
|
|
339
|
+
auto& node = leaf_nodes[i];
|
|
340
|
+
node.mBBoxMin = leafOrigin;
|
|
341
|
+
node.mBBoxDif[0] = leaf_nodes[i].mBBoxDif[1] = leaf_nodes[i].mBBoxDif[2] = Tree::Node0::DIM;
|
|
342
|
+
node.mFlags = 0;
|
|
343
|
+
node.mValueMask.setOn();
|
|
344
|
+
node.mMinimum = ZERO_VAL;
|
|
345
|
+
node.mMaximum = ZERO_VAL;
|
|
346
|
+
node.mAverage = ZERO_SCALAR;
|
|
347
|
+
node.mStdDevi = ZERO_SCALAR;
|
|
348
|
+
// mValues is undefined
|
|
349
|
+
|
|
350
|
+
// propagating bbox up:
|
|
351
|
+
expand_cwise_atomic(lower_node->mBBox, leafOrigin);
|
|
352
|
+
expand_cwise_atomic(lower_node->mBBox, leafOrigin + nanovdb::Coord(Tree::Node0::DIM));
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
// Propagating bounding boxes from lower nodes to upper nodes
|
|
357
|
+
// i : 0 .. lower_node_count-1
|
|
358
|
+
num_blocks = (lower_node_count + num_threads - 1) / num_threads;
|
|
359
|
+
{
|
|
360
|
+
kernel<<<num_blocks, num_threads>>>(lower_node_count, [=] __device__(size_t i) {
|
|
361
|
+
auto* upper_node = root->getChild(find_tile(root, tile_key36_to_coord(lower_keys[i] >> 15)))->data();
|
|
362
|
+
expand_cwise_atomic(upper_node->mBBox, lower_nodes[i].mBBox.min());
|
|
363
|
+
expand_cwise_atomic(upper_node->mBBox, lower_nodes[i].mBBox.max());
|
|
364
|
+
});
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// Setting up root bounding box and grid
|
|
368
|
+
{
|
|
369
|
+
kernel<<<1, 1>>>(1, [=] __device__(size_t i) {
|
|
370
|
+
for (int i = 0; i < upper_node_count; ++i) {
|
|
371
|
+
root->mBBox.expand(upper_nodes[i].mBBox.min());
|
|
372
|
+
root->mBBox.expand(upper_nodes[i].mBBox.max());
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
nanovdb::Map map;
|
|
376
|
+
{
|
|
377
|
+
const double mat[4][4] = {
|
|
378
|
+
{dx, 0.0, 0.0, 0.0}, // row 0
|
|
379
|
+
{0.0, dx, 0.0, 0.0}, // row 1
|
|
380
|
+
{0.0, 0.0, dx, 0.0}, // row 2
|
|
381
|
+
{Tx, Ty, Tz, 1.0}, // row 3
|
|
382
|
+
};
|
|
383
|
+
const double invMat[4][4] = {
|
|
384
|
+
{1 / dx, 0.0, 0.0, 0.0}, // row 0
|
|
385
|
+
{0.0, 1 / dx, 0.0, 0.0}, // row 1
|
|
386
|
+
{0.0, 0.0, 1 / dx, 0.0}, // row 2
|
|
387
|
+
{0.0, 0.0, 0.0, 0.0}, // row 3, ignored by Map::set
|
|
388
|
+
};
|
|
389
|
+
map.set(mat, invMat, 1.0);
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
grid->mMagic = NANOVDB_MAGIC_NUMBER;
|
|
393
|
+
grid->mChecksum = 0xFFFFFFFFFFFFFFFFull;
|
|
394
|
+
grid->mVersion = nanovdb::Version();
|
|
395
|
+
grid->mFlags = static_cast<uint32_t>(nanovdb::GridFlags::HasBBox) |
|
|
396
|
+
static_cast<uint32_t>(nanovdb::GridFlags::IsBreadthFirst);
|
|
397
|
+
grid->mGridIndex = 0;
|
|
398
|
+
grid->mGridCount = 1;
|
|
399
|
+
grid->mGridSize = total_bytes;
|
|
400
|
+
// mGridName is set below
|
|
401
|
+
grid->mWorldBBox.mCoord[0] = map.applyMap(nanovdb::Vec3R(root->mBBox.mCoord[0]));
|
|
402
|
+
grid->mWorldBBox.mCoord[1] = map.applyMap(nanovdb::Vec3R(root->mBBox.mCoord[1]));
|
|
403
|
+
grid->mVoxelSize = nanovdb::Vec3d(dx);
|
|
404
|
+
grid->mMap = map;
|
|
405
|
+
grid->mGridClass = nanovdb::GridClass::Unknown;
|
|
406
|
+
grid->mGridType = nanovdb::mapToGridType<BuildT>();
|
|
407
|
+
grid->mBlindMetadataOffset = total_bytes;
|
|
408
|
+
grid->mBlindMetadataCount = 0;
|
|
409
|
+
});
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
check_cuda(cudaMemcpy(grid->mGridName, params.name, 256, cudaMemcpyHostToDevice));
|
|
413
|
+
|
|
414
|
+
allocator.DeviceFree(lower_keys);
|
|
415
|
+
allocator.DeviceFree(upper_keys);
|
|
416
|
+
allocator.DeviceFree(leaf_keys);
|
|
417
|
+
allocator.DeviceFree(node_counts);
|
|
418
|
+
|
|
419
|
+
out_grid = reinterpret_cast<Grid*>(grid);
|
|
420
|
+
out_grid_size = total_bytes;
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
template void build_grid_from_tiles(nanovdb::Grid<nanovdb::NanoTree<float>>*&, size_t&, const void*, size_t, bool, const BuildGridParams<float>&);
|
|
424
|
+
template void build_grid_from_tiles(nanovdb::Grid<nanovdb::NanoTree<nanovdb::Vec3f>>*&, size_t&, const void*, size_t, bool, const BuildGridParams<nanovdb::Vec3f>&);
|
|
425
|
+
template void build_grid_from_tiles(nanovdb::Grid<nanovdb::NanoTree<int32_t>>*&, size_t&, const void*, size_t, bool, const BuildGridParams<int32_t>&);
|