warp-lang 1.1.0__py3-none-macosx_10_13_universal2.whl → 1.2.0__py3-none-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/build.py +10 -37
- warp/build_dll.py +2 -2
- warp/builtins.py +274 -6
- warp/codegen.py +51 -4
- warp/config.py +2 -2
- warp/constants.py +4 -0
- warp/context.py +418 -203
- warp/examples/benchmarks/benchmark_api.py +0 -2
- warp/examples/benchmarks/benchmark_cloth_warp.py +0 -1
- warp/examples/benchmarks/benchmark_launches.py +0 -2
- warp/examples/core/example_dem.py +0 -2
- warp/examples/core/example_fluid.py +0 -2
- warp/examples/core/example_graph_capture.py +0 -2
- warp/examples/core/example_marching_cubes.py +0 -2
- warp/examples/core/example_mesh.py +0 -2
- warp/examples/core/example_mesh_intersect.py +0 -2
- warp/examples/core/example_nvdb.py +0 -2
- warp/examples/core/example_raycast.py +0 -2
- warp/examples/core/example_raymarch.py +0 -2
- warp/examples/core/example_render_opengl.py +0 -2
- warp/examples/core/example_sph.py +0 -2
- warp/examples/core/example_torch.py +0 -3
- warp/examples/core/example_wave.py +0 -2
- warp/examples/fem/example_apic_fluid.py +140 -115
- warp/examples/fem/example_burgers.py +262 -0
- warp/examples/fem/example_convection_diffusion.py +0 -2
- warp/examples/fem/example_convection_diffusion_dg.py +0 -2
- warp/examples/fem/example_deformed_geometry.py +0 -2
- warp/examples/fem/example_diffusion.py +0 -2
- warp/examples/fem/example_diffusion_3d.py +5 -4
- warp/examples/fem/example_diffusion_mgpu.py +0 -2
- warp/examples/fem/example_mixed_elasticity.py +0 -2
- warp/examples/fem/example_navier_stokes.py +0 -2
- warp/examples/fem/example_stokes.py +0 -2
- warp/examples/fem/example_stokes_transfer.py +0 -2
- warp/examples/optim/example_bounce.py +0 -2
- warp/examples/optim/example_cloth_throw.py +0 -2
- warp/examples/optim/example_diffray.py +0 -2
- warp/examples/optim/example_drone.py +0 -2
- warp/examples/optim/example_inverse_kinematics.py +0 -2
- warp/examples/optim/example_inverse_kinematics_torch.py +0 -2
- warp/examples/optim/example_spring_cage.py +0 -2
- warp/examples/optim/example_trajectory.py +0 -2
- warp/examples/optim/example_walker.py +0 -2
- warp/examples/sim/example_cartpole.py +0 -2
- warp/examples/sim/example_cloth.py +0 -2
- warp/examples/sim/example_granular.py +0 -2
- warp/examples/sim/example_granular_collision_sdf.py +0 -2
- warp/examples/sim/example_jacobian_ik.py +0 -2
- warp/examples/sim/example_particle_chain.py +0 -2
- warp/examples/sim/example_quadruped.py +0 -2
- warp/examples/sim/example_rigid_chain.py +0 -2
- warp/examples/sim/example_rigid_contact.py +0 -2
- warp/examples/sim/example_rigid_force.py +0 -2
- warp/examples/sim/example_rigid_gyroscopic.py +0 -2
- warp/examples/sim/example_rigid_soft_contact.py +0 -2
- warp/examples/sim/example_soft_body.py +0 -2
- warp/fem/__init__.py +1 -0
- warp/fem/cache.py +3 -1
- warp/fem/geometry/__init__.py +1 -0
- warp/fem/geometry/element.py +4 -0
- warp/fem/geometry/grid_3d.py +0 -4
- warp/fem/geometry/nanogrid.py +455 -0
- warp/fem/integrate.py +63 -9
- warp/fem/space/__init__.py +43 -158
- warp/fem/space/basis_space.py +34 -0
- warp/fem/space/collocated_function_space.py +1 -1
- warp/fem/space/grid_2d_function_space.py +13 -132
- warp/fem/space/grid_3d_function_space.py +16 -154
- warp/fem/space/hexmesh_function_space.py +37 -134
- warp/fem/space/nanogrid_function_space.py +202 -0
- warp/fem/space/quadmesh_2d_function_space.py +12 -119
- warp/fem/space/restriction.py +4 -1
- warp/fem/space/shape/__init__.py +77 -0
- warp/fem/space/shape/cube_shape_function.py +5 -15
- warp/fem/space/tetmesh_function_space.py +6 -76
- warp/fem/space/trimesh_2d_function_space.py +6 -76
- warp/native/array.h +12 -3
- warp/native/builtin.h +48 -5
- warp/native/bvh.cpp +14 -10
- warp/native/bvh.cu +23 -15
- warp/native/bvh.h +1 -0
- warp/native/clang/clang.cpp +2 -1
- warp/native/crt.cpp +11 -1
- warp/native/crt.h +18 -1
- warp/native/exports.h +187 -0
- warp/native/mat.h +47 -0
- warp/native/mesh.cpp +1 -1
- warp/native/mesh.cu +1 -2
- warp/native/nanovdb/GridHandle.h +366 -0
- warp/native/nanovdb/HostBuffer.h +590 -0
- warp/native/nanovdb/NanoVDB.h +3999 -2157
- warp/native/nanovdb/PNanoVDB.h +936 -99
- warp/native/quat.h +28 -1
- warp/native/rand.h +5 -1
- warp/native/vec.h +45 -1
- warp/native/volume.cpp +335 -103
- warp/native/volume.cu +39 -13
- warp/native/volume.h +725 -303
- warp/native/volume_builder.cu +381 -360
- warp/native/volume_builder.h +16 -1
- warp/native/volume_impl.h +61 -0
- warp/native/warp.cu +8 -2
- warp/native/warp.h +15 -7
- warp/render/render_opengl.py +191 -52
- warp/sim/integrator_featherstone.py +10 -3
- warp/sim/integrator_xpbd.py +16 -22
- warp/sparse.py +89 -27
- warp/stubs.py +83 -0
- warp/tests/assets/test_index_grid.nvdb +0 -0
- warp/tests/aux_test_dependent.py +0 -2
- warp/tests/aux_test_grad_customs.py +0 -2
- warp/tests/aux_test_reference.py +0 -2
- warp/tests/aux_test_reference_reference.py +0 -2
- warp/tests/aux_test_square.py +0 -2
- warp/tests/disabled_kinematics.py +0 -2
- warp/tests/test_adam.py +0 -2
- warp/tests/test_arithmetic.py +0 -36
- warp/tests/test_array.py +9 -11
- warp/tests/test_array_reduce.py +0 -2
- warp/tests/test_async.py +0 -2
- warp/tests/test_atomic.py +0 -2
- warp/tests/test_bool.py +58 -50
- warp/tests/test_builtins_resolution.py +0 -2
- warp/tests/test_bvh.py +0 -2
- warp/tests/test_closest_point_edge_edge.py +0 -1
- warp/tests/test_codegen.py +0 -4
- warp/tests/test_compile_consts.py +130 -10
- warp/tests/test_conditional.py +0 -2
- warp/tests/test_copy.py +0 -2
- warp/tests/test_ctypes.py +6 -8
- warp/tests/test_dense.py +0 -2
- warp/tests/test_devices.py +0 -2
- warp/tests/test_dlpack.py +9 -11
- warp/tests/test_examples.py +42 -39
- warp/tests/test_fabricarray.py +0 -3
- warp/tests/test_fast_math.py +0 -2
- warp/tests/test_fem.py +75 -54
- warp/tests/test_fp16.py +0 -2
- warp/tests/test_func.py +0 -2
- warp/tests/test_generics.py +27 -2
- warp/tests/test_grad.py +147 -8
- warp/tests/test_grad_customs.py +0 -2
- warp/tests/test_hash_grid.py +1 -3
- warp/tests/test_import.py +0 -2
- warp/tests/test_indexedarray.py +0 -2
- warp/tests/test_intersect.py +0 -2
- warp/tests/test_jax.py +0 -2
- warp/tests/test_large.py +11 -9
- warp/tests/test_launch.py +0 -2
- warp/tests/test_lerp.py +10 -54
- warp/tests/test_linear_solvers.py +3 -5
- warp/tests/test_lvalue.py +0 -2
- warp/tests/test_marching_cubes.py +0 -2
- warp/tests/test_mat.py +0 -2
- warp/tests/test_mat_lite.py +0 -2
- warp/tests/test_mat_scalar_ops.py +0 -2
- warp/tests/test_math.py +0 -2
- warp/tests/test_matmul.py +35 -37
- warp/tests/test_matmul_lite.py +29 -31
- warp/tests/test_mempool.py +0 -2
- warp/tests/test_mesh.py +0 -3
- warp/tests/test_mesh_query_aabb.py +0 -2
- warp/tests/test_mesh_query_point.py +0 -2
- warp/tests/test_mesh_query_ray.py +0 -2
- warp/tests/test_mlp.py +0 -2
- warp/tests/test_model.py +0 -2
- warp/tests/test_module_hashing.py +111 -0
- warp/tests/test_modules_lite.py +0 -3
- warp/tests/test_multigpu.py +0 -2
- warp/tests/test_noise.py +0 -4
- warp/tests/test_operators.py +0 -2
- warp/tests/test_options.py +0 -2
- warp/tests/test_peer.py +0 -2
- warp/tests/test_pinned.py +0 -2
- warp/tests/test_print.py +0 -2
- warp/tests/test_quat.py +0 -2
- warp/tests/test_rand.py +41 -5
- warp/tests/test_reload.py +0 -10
- warp/tests/test_rounding.py +0 -2
- warp/tests/test_runlength_encode.py +0 -2
- warp/tests/test_sim_grad.py +0 -2
- warp/tests/test_sim_kinematics.py +0 -2
- warp/tests/test_smoothstep.py +0 -2
- warp/tests/test_snippet.py +0 -2
- warp/tests/test_sparse.py +0 -2
- warp/tests/test_spatial.py +0 -2
- warp/tests/test_special_values.py +362 -0
- warp/tests/test_streams.py +0 -2
- warp/tests/test_struct.py +0 -2
- warp/tests/test_tape.py +0 -2
- warp/tests/test_torch.py +0 -2
- warp/tests/test_transient_module.py +0 -2
- warp/tests/test_types.py +0 -2
- warp/tests/test_utils.py +0 -2
- warp/tests/test_vec.py +0 -2
- warp/tests/test_vec_lite.py +0 -2
- warp/tests/test_vec_scalar_ops.py +0 -2
- warp/tests/test_verify_fp.py +0 -2
- warp/tests/test_volume.py +237 -13
- warp/tests/test_volume_write.py +86 -3
- warp/tests/unittest_serial.py +10 -9
- warp/tests/unittest_suites.py +6 -2
- warp/tests/unittest_utils.py +2 -171
- warp/tests/unused_test_misc.py +0 -2
- warp/tests/walkthrough_debug.py +1 -1
- warp/thirdparty/unittest_parallel.py +37 -40
- warp/types.py +514 -77
- {warp_lang-1.1.0.dist-info → warp_lang-1.2.0.dist-info}/METADATA +57 -30
- warp_lang-1.2.0.dist-info/RECORD +359 -0
- warp/examples/fem/example_convection_diffusion_dg0.py +0 -204
- warp/native/nanovdb/PNanoVDBWrite.h +0 -295
- warp_lang-1.1.0.dist-info/RECORD +0 -352
- {warp_lang-1.1.0.dist-info → warp_lang-1.2.0.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.1.0.dist-info → warp_lang-1.2.0.dist-info}/WHEEL +0 -0
- {warp_lang-1.1.0.dist-info → warp_lang-1.2.0.dist-info}/top_level.txt +0 -0
warp/native/volume_builder.cu
CHANGED
|
@@ -1,425 +1,446 @@
|
|
|
1
1
|
#include "volume_builder.h"
|
|
2
2
|
|
|
3
|
+
#include <nanovdb/tools/cuda/PointsToGrid.cuh>
|
|
4
|
+
|
|
3
5
|
#include <cuda.h>
|
|
4
6
|
#include <cuda_runtime_api.h>
|
|
5
7
|
|
|
6
8
|
#include <cub/cub.cuh>
|
|
7
|
-
#include <cub/util_allocator.cuh>
|
|
8
|
-
|
|
9
|
-
// Explanation of key types
|
|
10
|
-
// ------------------------
|
|
11
|
-
//
|
|
12
|
-
// leaf_key:
|
|
13
|
-
// .__.__. .... .__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.
|
|
14
|
-
// 63 62 .... 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
|
|
15
|
-
// XX|< tile key >|< upper offset >|< lower offset >|
|
|
16
|
-
//
|
|
17
|
-
// tile key (36 bit):
|
|
18
|
-
// (uint32(ijk[2]) >> ChildT::TOTAL) |
|
|
19
|
-
// (uint64_t(uint32(ijk[1]) >> ChildT::TOTAL)) << 12 |
|
|
20
|
-
// (uint64_t(uint32(ijk[0]) >> ChildT::TOTAL)) << 24
|
|
21
|
-
//
|
|
22
|
-
// lower_key (51 bits) == leaf_key >> 12
|
|
23
|
-
//
|
|
24
|
-
// upper_key (36 bits) == lower_key >> 15 == leaf_key >> 27 == tile key
|
|
25
|
-
|
|
26
|
-
CUDA_CALLABLE inline uint64_t coord_to_full_key(const nanovdb::Coord& ijk)
|
|
27
|
-
{
|
|
28
|
-
using Tree = nanovdb::FloatTree; // any type is fine at this point
|
|
29
|
-
assert((abs(ijk[0]) >> 24) == 0);
|
|
30
|
-
assert((abs(ijk[1]) >> 24) == 0);
|
|
31
|
-
assert((abs(ijk[2]) >> 24) == 0);
|
|
32
|
-
constexpr uint32_t MASK_12BITS = (1u << 12) - 1u;
|
|
33
|
-
const uint64_t tile_key36 =
|
|
34
|
-
((uint32_t(ijk[2]) >> 12) & MASK_12BITS) | // z is the lower 12 bits
|
|
35
|
-
(uint64_t((uint32_t(ijk[1]) >> 12) & MASK_12BITS) << 12) | // y is the middle 12 bits
|
|
36
|
-
(uint64_t((uint32_t(ijk[0]) >> 12) & MASK_12BITS) << 24); // x is the upper 12 bits
|
|
37
|
-
const uint32_t upper_offset = Tree::Node2::CoordToOffset(ijk);
|
|
38
|
-
const uint32_t lower_offset = Tree::Node1::CoordToOffset(ijk);
|
|
39
|
-
return (tile_key36 << 27) | (upper_offset << 12) | lower_offset;
|
|
40
|
-
}
|
|
41
9
|
|
|
42
|
-
|
|
43
|
-
|
|
10
|
+
#if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
|
|
11
|
+
// dynamic initialization is not supported for a function-scope static __shared__ variable within a
|
|
12
|
+
// __device__/__global__ function
|
|
13
|
+
#pragma nv_diag_suppress 20054
|
|
14
|
+
#elif defined(__NVCC__)
|
|
15
|
+
#pragma diag_suppress 20054
|
|
16
|
+
#endif
|
|
17
|
+
namespace
|
|
18
|
+
{
|
|
19
|
+
/// Allocator class following interface of cub::cachingDeviceAllocator, as expected by naovdb::PointsToGrid
|
|
20
|
+
struct Allocator
|
|
44
21
|
{
|
|
45
|
-
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
|
46
|
-
if (tid >= num_points) return;
|
|
47
22
|
|
|
48
|
-
|
|
49
|
-
|
|
23
|
+
cudaError_t DeviceAllocate(void **d_ptr, ///< [out] Reference to pointer to the allocation
|
|
24
|
+
size_t bytes, ///< [in] Minimum number of bytes for the allocation
|
|
25
|
+
cudaStream_t active_stream) ///< [in] The stream to be associated with this allocation
|
|
26
|
+
{
|
|
27
|
+
// in PointsToGrid stream argument always coincide with current stream, ignore
|
|
28
|
+
*d_ptr = alloc_device(WP_CURRENT_CONTEXT, bytes);
|
|
29
|
+
return cudaSuccess;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
cudaError_t DeviceFree(void *d_ptr)
|
|
33
|
+
{
|
|
34
|
+
free_device(WP_CURRENT_CONTEXT, d_ptr);
|
|
35
|
+
return cudaSuccess;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
cudaError_t FreeAllCached()
|
|
39
|
+
{
|
|
40
|
+
return cudaSuccess;
|
|
41
|
+
}
|
|
42
|
+
};
|
|
50
43
|
|
|
51
|
-
|
|
52
|
-
|
|
44
|
+
/// @brief Implementation of NanoVDB's DeviceBuffer that uses warp allocators
|
|
45
|
+
class DeviceBuffer
|
|
53
46
|
{
|
|
54
|
-
|
|
55
|
-
|
|
47
|
+
uint64_t mSize; // total number of bytes managed by this buffer (assumed to be identical for host and device)
|
|
48
|
+
void *mCpuData, *mGpuData; // raw pointers to the host and device buffers
|
|
49
|
+
bool mManaged;
|
|
50
|
+
|
|
51
|
+
public:
|
|
52
|
+
/// @brief Static factory method that return an instance of this buffer
|
|
53
|
+
/// @param size byte size of buffer to be initialized
|
|
54
|
+
/// @param dummy this argument is currently ignored but required to match the API of the HostBuffer
|
|
55
|
+
/// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU
|
|
56
|
+
/// @param stream optional stream argument (defaults to stream NULL)
|
|
57
|
+
/// @return An instance of this class using move semantics
|
|
58
|
+
static DeviceBuffer create(uint64_t size, const DeviceBuffer *dummy = nullptr, bool host = true,
|
|
59
|
+
void *stream = nullptr)
|
|
60
|
+
{
|
|
61
|
+
return DeviceBuffer(size, host, stream);
|
|
62
|
+
}
|
|
56
63
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
64
|
+
/// @brief Static factory method that return an instance of this buffer that wraps externally managed memory
|
|
65
|
+
/// @param size byte size of buffer specified by external memory
|
|
66
|
+
/// @param cpuData pointer to externally managed host memory
|
|
67
|
+
/// @param gpuData pointer to externally managed device memory
|
|
68
|
+
/// @return An instance of this class using move semantics
|
|
69
|
+
static DeviceBuffer create(uint64_t size, void *cpuData, void *gpuData)
|
|
70
|
+
{
|
|
71
|
+
return DeviceBuffer(size, cpuData, gpuData);
|
|
72
|
+
}
|
|
60
73
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
74
|
+
/// @brief Constructor
|
|
75
|
+
/// @param size byte size of buffer to be initialized
|
|
76
|
+
/// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU
|
|
77
|
+
/// @param stream optional stream argument (defaults to stream NULL)
|
|
78
|
+
DeviceBuffer(uint64_t size = 0, bool host = true, void *stream = nullptr)
|
|
79
|
+
: mSize(0), mCpuData(nullptr), mGpuData(nullptr), mManaged(false)
|
|
80
|
+
{
|
|
81
|
+
if (size > 0)
|
|
82
|
+
this->init(size, host, stream);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
DeviceBuffer(uint64_t size, void *cpuData, void *gpuData)
|
|
86
|
+
: mSize(size), mCpuData(cpuData), mGpuData(gpuData), mManaged(false)
|
|
87
|
+
{
|
|
88
|
+
}
|
|
70
89
|
|
|
90
|
+
/// @brief Disallow copy-construction
|
|
91
|
+
DeviceBuffer(const DeviceBuffer &) = delete;
|
|
71
92
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
93
|
+
/// @brief Move copy-constructor
|
|
94
|
+
DeviceBuffer(DeviceBuffer &&other) noexcept
|
|
95
|
+
: mSize(other.mSize), mCpuData(other.mCpuData), mGpuData(other.mGpuData), mManaged(other.mManaged)
|
|
96
|
+
{
|
|
97
|
+
other.mSize = 0;
|
|
98
|
+
other.mCpuData = nullptr;
|
|
99
|
+
other.mGpuData = nullptr;
|
|
100
|
+
other.mManaged = false;
|
|
77
101
|
}
|
|
78
|
-
};
|
|
79
102
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
using BASE = cub::TransformInputIterator<OutType, ShiftRight<bits, InType, OutType>, InType*>;
|
|
83
|
-
CUDA_CALLABLE inline ShiftRightIterator(uint64_t* input_itr)
|
|
84
|
-
: BASE(input_itr, ShiftRight<bits, InType, OutType>()) {}
|
|
85
|
-
};
|
|
103
|
+
/// @brief Disallow copy assignment operation
|
|
104
|
+
DeviceBuffer &operator=(const DeviceBuffer &) = delete;
|
|
86
105
|
|
|
106
|
+
/// @brief Move copy assignment operation
|
|
107
|
+
DeviceBuffer &operator=(DeviceBuffer &&other) noexcept
|
|
108
|
+
{
|
|
109
|
+
this->clear();
|
|
110
|
+
mSize = other.mSize;
|
|
111
|
+
mCpuData = other.mCpuData;
|
|
112
|
+
mGpuData = other.mGpuData;
|
|
113
|
+
mManaged = other.mManaged;
|
|
114
|
+
other.mSize = 0;
|
|
115
|
+
other.mCpuData = nullptr;
|
|
116
|
+
other.mGpuData = nullptr;
|
|
117
|
+
other.mManaged = false;
|
|
118
|
+
return *this;
|
|
119
|
+
}
|
|
87
120
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
121
|
+
/// @brief Destructor frees memory on both the host and device
|
|
122
|
+
~DeviceBuffer()
|
|
123
|
+
{
|
|
124
|
+
this->clear();
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
/// @brief Initialize buffer
|
|
128
|
+
/// @param size byte size of buffer to be initialized
|
|
129
|
+
/// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU
|
|
130
|
+
/// @note All existing buffers are first cleared
|
|
131
|
+
/// @warning size is expected to be non-zero. Use clear() clear buffer!
|
|
132
|
+
void init(uint64_t size, bool host = true, void *stream = nullptr)
|
|
133
|
+
{
|
|
134
|
+
if (mSize > 0)
|
|
135
|
+
this->clear(stream);
|
|
136
|
+
NANOVDB_ASSERT(size > 0);
|
|
137
|
+
if (host)
|
|
138
|
+
{
|
|
139
|
+
mCpuData =
|
|
140
|
+
alloc_pinned(size); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned
|
|
141
|
+
}
|
|
142
|
+
else
|
|
143
|
+
{
|
|
144
|
+
mGpuData = alloc_device(WP_CURRENT_CONTEXT, size);
|
|
145
|
+
}
|
|
146
|
+
mSize = size;
|
|
147
|
+
mManaged = true;
|
|
148
|
+
}
|
|
94
149
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
150
|
+
/// @brief Returns a raw pointer to the host/CPU buffer managed by this allocator.
|
|
151
|
+
/// @warning Note that the pointer can be NULL!
|
|
152
|
+
void *data() const
|
|
153
|
+
{
|
|
154
|
+
return mCpuData;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/// @brief Returns a raw pointer to the device/GPU buffer managed by this allocator.
|
|
158
|
+
/// @warning Note that the pointer can be NULL!
|
|
159
|
+
void *deviceData() const
|
|
160
|
+
{
|
|
161
|
+
return mGpuData;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/// @brief Returns the size in bytes of the raw memory buffer managed by this allocator.
|
|
165
|
+
uint64_t size() const
|
|
166
|
+
{
|
|
167
|
+
return mSize;
|
|
168
|
+
}
|
|
104
169
|
|
|
105
|
-
|
|
106
|
-
|
|
170
|
+
//@{
|
|
171
|
+
/// @brief Returns true if this allocator is empty, i.e. has no allocated memory
|
|
172
|
+
bool empty() const
|
|
173
|
+
{
|
|
174
|
+
return mSize == 0;
|
|
175
|
+
}
|
|
176
|
+
bool isEmpty() const
|
|
177
|
+
{
|
|
178
|
+
return mSize == 0;
|
|
179
|
+
}
|
|
180
|
+
//@}
|
|
181
|
+
|
|
182
|
+
/// @brief Detach device data so it is not dealloced when this buffer is destroyed
|
|
183
|
+
void detachDeviceData()
|
|
184
|
+
{
|
|
185
|
+
mGpuData = nullptr;
|
|
186
|
+
if (!mCpuData)
|
|
187
|
+
{
|
|
188
|
+
mSize = 0;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/// @brief De-allocate all memory managed by this allocator and set all pointers to NULL
|
|
193
|
+
void clear(void *stream = nullptr)
|
|
194
|
+
{
|
|
195
|
+
if (mManaged && mGpuData)
|
|
196
|
+
free_device(WP_CURRENT_CONTEXT, mGpuData);
|
|
197
|
+
if (mManaged && mCpuData)
|
|
198
|
+
free_pinned(mCpuData);
|
|
199
|
+
mCpuData = mGpuData = nullptr;
|
|
200
|
+
mSize = 0;
|
|
201
|
+
mManaged = false;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
}; // DeviceBuffer class
|
|
205
|
+
|
|
206
|
+
template <typename Tree> __global__ void activateAllLeafVoxels(Tree *tree)
|
|
107
207
|
{
|
|
108
|
-
|
|
109
|
-
const Tile *tiles = reinterpret_cast<const Tile *>(root_data + 1);
|
|
110
|
-
const auto key = RootDataType::CoordToKey(ijk);
|
|
208
|
+
const unsigned leaf_count = tree->mNodeCount[0];
|
|
111
209
|
|
|
112
|
-
|
|
210
|
+
const unsigned tid = blockIdx.x * blockDim.x + threadIdx.x;
|
|
211
|
+
|
|
212
|
+
if (tid < leaf_count)
|
|
113
213
|
{
|
|
114
|
-
|
|
115
|
-
|
|
214
|
+
// activate all leaf voxels
|
|
215
|
+
typename Tree::LeafNodeType *const leaf_nodes = tree->getFirstLeaf();
|
|
216
|
+
typename Tree::LeafNodeType &leaf = leaf_nodes[tid];
|
|
217
|
+
leaf.mValueMask.setOn();
|
|
218
|
+
leaf.updateBBox();
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
if (tid == 0)
|
|
222
|
+
{
|
|
223
|
+
tree->mVoxelCount = Tree::LeafNodeType::SIZE * leaf_count; // full leaves
|
|
116
224
|
}
|
|
117
|
-
return nullptr;
|
|
118
225
|
}
|
|
119
226
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
227
|
+
template <typename Node>
|
|
228
|
+
__device__ std::enable_if_t<!nanovdb::BuildTraits<typename Node::BuildType>::is_index> setBackgroundValue(
|
|
229
|
+
Node &node, unsigned tile_id, const typename Node::BuildType background_value)
|
|
123
230
|
{
|
|
124
|
-
|
|
125
|
-
if (tid >= num_items) return;
|
|
126
|
-
f(tid, args...);
|
|
231
|
+
node.setValue(tile_id, background_value);
|
|
127
232
|
}
|
|
128
233
|
|
|
129
|
-
template <typename
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
const void *points,
|
|
133
|
-
size_t num_points,
|
|
134
|
-
bool points_in_world_space,
|
|
135
|
-
const BuildGridParams<BuildT> ¶ms)
|
|
234
|
+
template <typename Node>
|
|
235
|
+
__device__ std::enable_if_t<nanovdb::BuildTraits<typename Node::BuildType>::is_index> setBackgroundValue(
|
|
236
|
+
Node &node, unsigned tile_id, const typename Node::BuildType background_value)
|
|
136
237
|
{
|
|
137
|
-
|
|
138
|
-
const BuildT ZERO_VAL{0};
|
|
139
|
-
const FloatT ZERO_SCALAR{0};
|
|
140
|
-
|
|
141
|
-
// Don't want to access "params" in kernels
|
|
142
|
-
const double dx = params.voxel_size;
|
|
143
|
-
const double Tx = params.translation[0], Ty = params.translation[1], Tz = params.translation[2];
|
|
144
|
-
const BuildT background_value = params.background_value;
|
|
238
|
+
}
|
|
145
239
|
|
|
146
|
-
|
|
147
|
-
|
|
240
|
+
template <typename Node>
|
|
241
|
+
__device__ std::enable_if_t<!nanovdb::BuildTraits<typename Node::BuildType>::is_index> setBackgroundValue(
|
|
242
|
+
Node &node, const typename Node::BuildType background_value)
|
|
243
|
+
{
|
|
244
|
+
node.mBackground = background_value;
|
|
245
|
+
}
|
|
148
246
|
|
|
149
|
-
|
|
150
|
-
|
|
247
|
+
template <typename Node>
|
|
248
|
+
__device__ std::enable_if_t<nanovdb::BuildTraits<typename Node::BuildType>::is_index> setBackgroundValue(
|
|
249
|
+
Node &node, const typename Node::BuildType background_value)
|
|
250
|
+
{
|
|
251
|
+
}
|
|
151
252
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
uint32_t* node_counts;
|
|
158
|
-
uint32_t leaf_count, lower_node_count, upper_node_count;
|
|
253
|
+
template <typename Tree, typename NodeT>
|
|
254
|
+
__global__ void setInternalBBoxAndBackgroundValue(Tree *tree, const typename Tree::BuildType background_value)
|
|
255
|
+
{
|
|
256
|
+
using BBox = nanovdb::math::BBox<typename NodeT::CoordT>;
|
|
257
|
+
__shared__ BBox bbox;
|
|
159
258
|
|
|
160
|
-
|
|
161
|
-
|
|
259
|
+
const unsigned node_count = tree->mNodeCount[NodeT::LEVEL];
|
|
260
|
+
const unsigned node_id = blockIdx.x;
|
|
162
261
|
|
|
163
|
-
|
|
262
|
+
if (node_id < node_count)
|
|
164
263
|
{
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
allocator.DeviceAllocate((void**)&all_leaf_keys_sorted, sizeof(uint64_t) * num_points);
|
|
170
|
-
|
|
171
|
-
num_blocks = (static_cast<unsigned int>(num_points) + num_threads - 1) / num_threads;
|
|
172
|
-
if (points_in_world_space) {
|
|
173
|
-
generate_keys<<<num_blocks, num_threads>>>(num_points, static_cast<const nanovdb::Vec3f*>(points), all_leaf_keys, static_cast<float>(1.0 / dx), nanovdb::Vec3f(params.translation));
|
|
174
|
-
} else {
|
|
175
|
-
generate_keys<<<num_blocks, num_threads>>>(num_points, static_cast<const nanovdb::Coord*>(points), all_leaf_keys);
|
|
264
|
+
|
|
265
|
+
if (threadIdx.x == 0)
|
|
266
|
+
{
|
|
267
|
+
bbox = BBox();
|
|
176
268
|
}
|
|
177
269
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
// Get the keys unique to lower nodes and the number of them
|
|
200
|
-
allocator.DeviceAllocate((void**)&lower_keys, sizeof(uint64_t) * leaf_count);
|
|
201
|
-
cub::DeviceSelect::Unique(nullptr, temp_storage_bytes, ShiftRightIterator<12>(leaf_keys), lower_keys, node_counts + 1, leaf_count);
|
|
202
|
-
allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes);
|
|
203
|
-
cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, ShiftRightIterator<12>(leaf_keys), lower_keys, node_counts + 1, leaf_count);
|
|
204
|
-
allocator.DeviceFree(d_temp_storage);
|
|
205
|
-
check_cuda(cudaMemcpy(&lower_node_count, node_counts + 1, sizeof(uint32_t), cudaMemcpyDeviceToHost));
|
|
206
|
-
|
|
207
|
-
// Get the keys unique to upper nodes and the number of them
|
|
208
|
-
allocator.DeviceAllocate((void**)&upper_keys, sizeof(uint64_t) * lower_node_count);
|
|
209
|
-
cub::DeviceSelect::Unique(nullptr, temp_storage_bytes, ShiftRightIterator<15>(lower_keys), upper_keys, node_counts + 2, lower_node_count);
|
|
210
|
-
allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes);
|
|
211
|
-
cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, ShiftRightIterator<15>(lower_keys), upper_keys, node_counts + 2, lower_node_count);
|
|
212
|
-
allocator.DeviceFree(d_temp_storage);
|
|
213
|
-
check_cuda(cudaMemcpy(&upper_node_count, node_counts + 2, sizeof(uint32_t), cudaMemcpyDeviceToHost));
|
|
270
|
+
__syncthreads();
|
|
271
|
+
|
|
272
|
+
NodeT &node = tree->template getFirstNode<NodeT>()[node_id];
|
|
273
|
+
for (unsigned child_id = threadIdx.x; child_id < NodeT::SIZE; child_id += blockDim.x)
|
|
274
|
+
{
|
|
275
|
+
if (node.isChild(child_id))
|
|
276
|
+
{
|
|
277
|
+
bbox.expandAtomic(node.getChild(child_id)->bbox());
|
|
278
|
+
}
|
|
279
|
+
else
|
|
280
|
+
{
|
|
281
|
+
setBackgroundValue(node, child_id, background_value);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
__syncthreads();
|
|
286
|
+
|
|
287
|
+
if (threadIdx.x == 0)
|
|
288
|
+
{
|
|
289
|
+
node.mBBox = bbox;
|
|
290
|
+
}
|
|
214
291
|
}
|
|
292
|
+
}
|
|
215
293
|
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
const int64_t upper_mem_offset =
|
|
229
|
-
sizeof(nanovdb::GridData) + sizeof(Tree) + sizeof(typename Tree::RootType) +
|
|
230
|
-
sizeof(typename Tree::RootType::Tile) * upper_node_count;
|
|
231
|
-
const int64_t lower_mem_offset = upper_mem_offset + sizeof(typename Tree::Node2) * upper_node_count;
|
|
232
|
-
const int64_t leaf_mem_offset = lower_mem_offset + sizeof(typename Tree::Node1) * lower_node_count;
|
|
233
|
-
|
|
234
|
-
typename Grid::DataType* grid;
|
|
235
|
-
check_cuda(cudaMalloc(&grid, total_bytes));
|
|
236
|
-
|
|
237
|
-
typename Tree::DataType* const tree = reinterpret_cast<typename Tree::DataType*>(grid + 1); // The tree is immediately after the grid
|
|
238
|
-
typename Tree::RootType::DataType* const root = reinterpret_cast<typename Tree::RootType::DataType*>(tree + 1); // The root is immediately after the tree
|
|
239
|
-
typename Tree::RootType::Tile* const tiles = reinterpret_cast<typename Tree::RootType::Tile*>(root + 1);
|
|
240
|
-
typename Tree::Node2::DataType* const upper_nodes = nanovdb::PtrAdd<typename Tree::Node2::DataType>(grid, upper_mem_offset);
|
|
241
|
-
typename Tree::Node1::DataType* const lower_nodes = nanovdb::PtrAdd<typename Tree::Node1::DataType>(grid, lower_mem_offset);
|
|
242
|
-
typename Tree::Node0::DataType* const leaf_nodes = nanovdb::PtrAdd<typename Tree::Node0::DataType>(grid, leaf_mem_offset);
|
|
243
|
-
|
|
244
|
-
// Phase 2: building the tree
|
|
294
|
+
template <typename Tree>
|
|
295
|
+
__global__ void setRootBBoxAndBackgroundValue(nanovdb::Grid<Tree> *grid,
|
|
296
|
+
const typename Tree::BuildType background_value)
|
|
297
|
+
{
|
|
298
|
+
using BBox = typename Tree::RootNodeType::BBoxType;
|
|
299
|
+
__shared__ BBox bbox;
|
|
300
|
+
|
|
301
|
+
Tree &tree = grid->tree();
|
|
302
|
+
const unsigned upper_count = tree.mNodeCount[2];
|
|
303
|
+
|
|
304
|
+
if (threadIdx.x == 0)
|
|
245
305
|
{
|
|
246
|
-
|
|
247
|
-
kernel<<<1, 1>>>(1, [=] __device__(size_t i) {
|
|
248
|
-
tree->mNodeOffset[3] = sizeof(Tree);
|
|
249
|
-
tree->mNodeOffset[2] = tree->mNodeOffset[3] + sizeof(typename Tree::RootType) + sizeof(typename Tree::RootType::Tile) * upper_node_count;
|
|
250
|
-
tree->mNodeOffset[1] = tree->mNodeOffset[2] + sizeof(typename Tree::Node2) * upper_node_count;
|
|
251
|
-
tree->mNodeOffset[0] = tree->mNodeOffset[1] + sizeof(typename Tree::Node1) * lower_node_count;
|
|
252
|
-
tree->mNodeCount[2] = tree->mTileCount[2] = upper_node_count;
|
|
253
|
-
tree->mNodeCount[1] = tree->mTileCount[1] = lower_node_count;
|
|
254
|
-
tree->mNodeCount[0] = tree->mTileCount[0] = leaf_count;
|
|
255
|
-
tree->mVoxelCount = Tree::Node0::SIZE * leaf_count; // assuming full leaves
|
|
256
|
-
|
|
257
|
-
root->mBBox = nanovdb::CoordBBox(); // init to empty
|
|
258
|
-
root->mTableSize = upper_node_count;
|
|
259
|
-
root->mBackground = background_value;
|
|
260
|
-
root->mMinimum = ZERO_VAL;
|
|
261
|
-
root->mMaximum = ZERO_VAL;
|
|
262
|
-
root->mAverage = ZERO_SCALAR;
|
|
263
|
-
root->mStdDevi = ZERO_SCALAR;
|
|
264
|
-
});
|
|
306
|
+
bbox = BBox();
|
|
265
307
|
}
|
|
266
308
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
309
|
+
__syncthreads();
|
|
310
|
+
|
|
311
|
+
for (unsigned upper_id = threadIdx.x; upper_id < upper_count; upper_id += blockDim.x)
|
|
270
312
|
{
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
tiles[i].child = sizeof(typename Tree::RootType) + sizeof(typename Tree::RootType::Tile) * upper_node_count + sizeof(typename Tree::Node2) * i;
|
|
274
|
-
tiles[i].state = 0;
|
|
275
|
-
tiles[i].value = background_value;
|
|
276
|
-
|
|
277
|
-
assert(reinterpret_cast<const char*>(root->getChild(tiles + i)) == reinterpret_cast<const char*>(upper_nodes + i));
|
|
278
|
-
auto& node = upper_nodes[i];
|
|
279
|
-
node.mBBox = nanovdb::CoordBBox();
|
|
280
|
-
node.mFlags = 0;
|
|
281
|
-
node.mValueMask.setOff();
|
|
282
|
-
node.mChildMask.setOff();
|
|
283
|
-
node.mMinimum = ZERO_VAL;
|
|
284
|
-
node.mMaximum = ZERO_VAL;
|
|
285
|
-
node.mAverage = ZERO_SCALAR;
|
|
286
|
-
node.mStdDevi = ZERO_SCALAR;
|
|
287
|
-
for (size_t n = 0; n < Tree::Node2::SIZE; ++n) {
|
|
288
|
-
node.mTable[n].value = background_value;
|
|
289
|
-
}
|
|
290
|
-
});
|
|
313
|
+
typename Tree::UpperNodeType &upper = tree.getFirstUpper()[upper_id];
|
|
314
|
+
bbox.expandAtomic(upper.bbox());
|
|
291
315
|
}
|
|
292
316
|
|
|
293
|
-
|
|
294
|
-
constexpr uint32_t MASK_12BITS = (1u << 12) - 1u;
|
|
317
|
+
__syncthreads();
|
|
295
318
|
|
|
296
|
-
|
|
297
|
-
// i : 0 .. lower_node_count-1
|
|
298
|
-
num_blocks = (lower_node_count + num_threads - 1) / num_threads;
|
|
319
|
+
if (threadIdx.x == 0)
|
|
299
320
|
{
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
auto& node = lower_nodes[i];
|
|
307
|
-
node.mBBox = nanovdb::CoordBBox();
|
|
308
|
-
node.mFlags = 0;
|
|
309
|
-
node.mValueMask.setOff();
|
|
310
|
-
node.mChildMask.setOff();
|
|
311
|
-
node.mMinimum = ZERO_VAL;
|
|
312
|
-
node.mMaximum = ZERO_VAL;
|
|
313
|
-
node.mAverage = ZERO_SCALAR;
|
|
314
|
-
node.mStdDevi = ZERO_SCALAR;
|
|
315
|
-
for (size_t n = 0; n < Tree::Node1::SIZE; ++n) {
|
|
316
|
-
node.mTable[n].value = background_value;
|
|
317
|
-
}
|
|
318
|
-
});
|
|
321
|
+
typename Tree::RootNodeType &root = tree.root();
|
|
322
|
+
setBackgroundValue(root, background_value);
|
|
323
|
+
root.mBBox = bbox;
|
|
324
|
+
|
|
325
|
+
grid->mWorldBBox = root.mBBox.transform(grid->map());
|
|
319
326
|
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
template <typename BuildT>
|
|
330
|
+
void finalize_grid(nanovdb::Grid<nanovdb::NanoTree<BuildT>> &out_grid, const BuildGridParams<BuildT> ¶ms)
|
|
331
|
+
{
|
|
332
|
+
// set background value, activate all voxels for allocated tiles and update bbox
|
|
333
|
+
|
|
334
|
+
using Tree = nanovdb::NanoTree<BuildT>;
|
|
335
|
+
Tree *tree = &out_grid.tree();
|
|
336
|
+
|
|
337
|
+
int node_counts[3];
|
|
338
|
+
memcpy_d2h(WP_CURRENT_CONTEXT, node_counts, tree->mNodeCount, sizeof(node_counts));
|
|
339
|
+
// synchronization below is unnecessary as node_counts is in pageable memory.
|
|
340
|
+
// keep it for clarity
|
|
341
|
+
cudaStream_t stream = static_cast<cudaStream_t>(cuda_stream_get_current());
|
|
342
|
+
cuda_stream_synchronize(stream);
|
|
343
|
+
|
|
344
|
+
const unsigned int leaf_count = node_counts[0];
|
|
345
|
+
const unsigned int lower_count = node_counts[1];
|
|
346
|
+
const unsigned int upper_count = node_counts[2];
|
|
347
|
+
|
|
348
|
+
constexpr unsigned NUM_THREADS = 256;
|
|
349
|
+
const unsigned leaf_blocks = (leaf_count + NUM_THREADS - 1) / NUM_THREADS;
|
|
350
|
+
activateAllLeafVoxels<Tree><<<leaf_blocks, NUM_THREADS, 0, stream>>>(tree);
|
|
351
|
+
|
|
352
|
+
setInternalBBoxAndBackgroundValue<Tree, typename Tree::LowerNodeType>
|
|
353
|
+
<<<lower_count, NUM_THREADS, 0, stream>>>(tree, params.background_value);
|
|
354
|
+
setInternalBBoxAndBackgroundValue<Tree, typename Tree::UpperNodeType>
|
|
355
|
+
<<<upper_count, NUM_THREADS, 0, stream>>>(tree, params.background_value);
|
|
356
|
+
setRootBBoxAndBackgroundValue<Tree><<<1, NUM_THREADS, 0, stream>>>(&out_grid, params.background_value);
|
|
357
|
+
|
|
358
|
+
check_cuda(cuda_context_check(WP_CURRENT_CONTEXT));
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
template <>
|
|
362
|
+
void finalize_grid(nanovdb::Grid<nanovdb::NanoTree<nanovdb::ValueOnIndex>> &out_grid,
|
|
363
|
+
const BuildGridParams<nanovdb::ValueOnIndex> ¶ms)
|
|
364
|
+
{
|
|
365
|
+
// nothing to do for OnIndex grids
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/// "fancy-pointer" that transforms from world to index coordinates
|
|
369
|
+
struct WorldSpacePointsPtr
|
|
370
|
+
{
|
|
371
|
+
const nanovdb::Vec3f *points;
|
|
372
|
+
const nanovdb::Map map;
|
|
320
373
|
|
|
321
|
-
|
|
322
|
-
// i : 0 .. leaf_count-1
|
|
323
|
-
num_blocks = (leaf_count + num_threads - 1) / num_threads;
|
|
374
|
+
__device__ nanovdb::Vec3f operator[](int idx) const
|
|
324
375
|
{
|
|
325
|
-
|
|
326
|
-
uint32_t lower_offset = leaf_keys[i] & MASK_12BITS;
|
|
327
|
-
uint32_t upper_offset = (leaf_keys[i] >> 12) & MASK_15BITS;
|
|
328
|
-
const nanovdb::Coord ijk = tile_key36_to_coord(leaf_keys[i] >> 27);
|
|
329
|
-
|
|
330
|
-
auto* upper_node = root->getChild(find_tile(root, ijk))->data();
|
|
331
|
-
auto* lower_node = upper_node->getChild(upper_offset)->data();
|
|
332
|
-
set_mask_atomic(lower_node->mChildMask, lower_offset);
|
|
333
|
-
lower_node->setChild(lower_offset, leaf_nodes + i);
|
|
334
|
-
|
|
335
|
-
const nanovdb::Coord localUpperIjk = Tree::Node2::OffsetToLocalCoord(upper_offset) << Tree::Node1::TOTAL;
|
|
336
|
-
const nanovdb::Coord localLowerIjk = Tree::Node1::OffsetToLocalCoord(lower_offset) << Tree::Node0::TOTAL;
|
|
337
|
-
const nanovdb::Coord leafOrigin = ijk + localUpperIjk + localLowerIjk;
|
|
338
|
-
|
|
339
|
-
auto& node = leaf_nodes[i];
|
|
340
|
-
node.mBBoxMin = leafOrigin;
|
|
341
|
-
node.mBBoxDif[0] = leaf_nodes[i].mBBoxDif[1] = leaf_nodes[i].mBBoxDif[2] = Tree::Node0::DIM;
|
|
342
|
-
node.mFlags = 0;
|
|
343
|
-
node.mValueMask.setOn();
|
|
344
|
-
node.mMinimum = ZERO_VAL;
|
|
345
|
-
node.mMaximum = ZERO_VAL;
|
|
346
|
-
node.mAverage = ZERO_SCALAR;
|
|
347
|
-
node.mStdDevi = ZERO_SCALAR;
|
|
348
|
-
// mValues is undefined
|
|
349
|
-
|
|
350
|
-
// propagating bbox up:
|
|
351
|
-
expand_cwise_atomic(lower_node->mBBox, leafOrigin);
|
|
352
|
-
expand_cwise_atomic(lower_node->mBBox, leafOrigin + nanovdb::Coord(Tree::Node0::DIM));
|
|
353
|
-
});
|
|
376
|
+
return map.applyInverseMapF(points[idx]);
|
|
354
377
|
}
|
|
355
378
|
|
|
356
|
-
|
|
357
|
-
// i : 0 .. lower_node_count-1
|
|
358
|
-
num_blocks = (lower_node_count + num_threads - 1) / num_threads;
|
|
379
|
+
__device__ nanovdb::Vec3f operator*() const
|
|
359
380
|
{
|
|
360
|
-
|
|
361
|
-
auto* upper_node = root->getChild(find_tile(root, tile_key36_to_coord(lower_keys[i] >> 15)))->data();
|
|
362
|
-
expand_cwise_atomic(upper_node->mBBox, lower_nodes[i].mBBox.min());
|
|
363
|
-
expand_cwise_atomic(upper_node->mBBox, lower_nodes[i].mBBox.max());
|
|
364
|
-
});
|
|
381
|
+
return (*this)[0];
|
|
365
382
|
}
|
|
383
|
+
};
|
|
366
384
|
|
|
367
|
-
|
|
368
|
-
{
|
|
369
|
-
kernel<<<1, 1>>>(1, [=] __device__(size_t i) {
|
|
370
|
-
for (int i = 0; i < upper_node_count; ++i) {
|
|
371
|
-
root->mBBox.expand(upper_nodes[i].mBBox.min());
|
|
372
|
-
root->mBBox.expand(upper_nodes[i].mBBox.max());
|
|
373
|
-
}
|
|
385
|
+
} // namespace
|
|
374
386
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
387
|
+
namespace nanovdb
|
|
388
|
+
{
|
|
389
|
+
template <> struct BufferTraits<DeviceBuffer>
|
|
390
|
+
{
|
|
391
|
+
static constexpr bool hasDeviceDual = true;
|
|
392
|
+
};
|
|
393
|
+
|
|
394
|
+
} // namespace nanovdb
|
|
395
|
+
|
|
396
|
+
template <typename BuildT>
|
|
397
|
+
void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<BuildT>> *&out_grid, size_t &out_grid_size,
|
|
398
|
+
const void *points, size_t num_points, bool points_in_world_space,
|
|
399
|
+
const BuildGridParams<BuildT> ¶ms)
|
|
400
|
+
{
|
|
401
|
+
|
|
402
|
+
out_grid = nullptr;
|
|
403
|
+
out_grid_size = 0;
|
|
391
404
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
405
|
+
cudaStream_t stream = static_cast<cudaStream_t>(cuda_stream_get_current());
|
|
406
|
+
nanovdb::Map map(params.voxel_size, params.translation);
|
|
407
|
+
nanovdb::tools::cuda::PointsToGrid<BuildT, Allocator> p2g(map, stream);
|
|
408
|
+
|
|
409
|
+
// p2g.setVerbose(2);
|
|
410
|
+
p2g.setGridName(params.name);
|
|
411
|
+
p2g.setChecksum(nanovdb::CheckMode::Disable);
|
|
412
|
+
|
|
413
|
+
// Only compute bbox for OnIndex grids. Otherwise bbox will be computed after activating all leaf voxels
|
|
414
|
+
p2g.includeBBox(nanovdb::BuildTraits<BuildT>::is_onindex);
|
|
415
|
+
|
|
416
|
+
nanovdb::GridHandle<DeviceBuffer> grid_handle;
|
|
417
|
+
|
|
418
|
+
if (points_in_world_space)
|
|
419
|
+
{
|
|
420
|
+
grid_handle = p2g.getHandle(WorldSpacePointsPtr{static_cast<const nanovdb::Vec3f *>(points), map}, num_points,
|
|
421
|
+
DeviceBuffer());
|
|
422
|
+
}
|
|
423
|
+
else
|
|
424
|
+
{
|
|
425
|
+
grid_handle = p2g.getHandle(static_cast<const nanovdb::Coord *>(points), num_points, DeviceBuffer());
|
|
410
426
|
}
|
|
411
427
|
|
|
412
|
-
|
|
428
|
+
out_grid = grid_handle.deviceGrid<BuildT>();
|
|
429
|
+
out_grid_size = grid_handle.gridSize();
|
|
413
430
|
|
|
414
|
-
|
|
415
|
-
allocator.DeviceFree(upper_keys);
|
|
416
|
-
allocator.DeviceFree(leaf_keys);
|
|
417
|
-
allocator.DeviceFree(node_counts);
|
|
431
|
+
finalize_grid(*out_grid, params);
|
|
418
432
|
|
|
419
|
-
|
|
420
|
-
|
|
433
|
+
// So that buffer is not destroyed when handles goes out of scope
|
|
434
|
+
grid_handle.buffer().detachDeviceData();
|
|
421
435
|
}
|
|
422
436
|
|
|
423
|
-
template void
|
|
424
|
-
|
|
425
|
-
template void
|
|
437
|
+
template void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<float>> *&, size_t &, const void *, size_t, bool,
|
|
438
|
+
const BuildGridParams<float> &);
|
|
439
|
+
template void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<nanovdb::Vec3f>> *&, size_t &, const void *,
|
|
440
|
+
size_t, bool, const BuildGridParams<nanovdb::Vec3f> &);
|
|
441
|
+
template void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<int32_t>> *&, size_t &, const void *, size_t, bool,
|
|
442
|
+
const BuildGridParams<int32_t> &);
|
|
443
|
+
template void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<nanovdb::ValueIndex>> *&, size_t &, const void *,
|
|
444
|
+
size_t, bool, const BuildGridParams<nanovdb::ValueIndex> &);
|
|
445
|
+
template void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<nanovdb::ValueOnIndex>> *&, size_t &, const void *,
|
|
446
|
+
size_t, bool, const BuildGridParams<nanovdb::ValueOnIndex> &);
|