warp-lang 1.1.0__py3-none-manylinux2014_aarch64.whl → 1.2.1__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (218) hide show
  1. warp/bin/warp-clang.so +0 -0
  2. warp/bin/warp.so +0 -0
  3. warp/build.py +10 -37
  4. warp/build_dll.py +2 -2
  5. warp/builtins.py +274 -6
  6. warp/codegen.py +51 -4
  7. warp/config.py +2 -2
  8. warp/constants.py +4 -0
  9. warp/context.py +422 -203
  10. warp/examples/benchmarks/benchmark_api.py +0 -2
  11. warp/examples/benchmarks/benchmark_cloth_warp.py +0 -1
  12. warp/examples/benchmarks/benchmark_launches.py +0 -2
  13. warp/examples/core/example_dem.py +0 -2
  14. warp/examples/core/example_fluid.py +0 -2
  15. warp/examples/core/example_graph_capture.py +0 -2
  16. warp/examples/core/example_marching_cubes.py +0 -2
  17. warp/examples/core/example_mesh.py +0 -2
  18. warp/examples/core/example_mesh_intersect.py +0 -2
  19. warp/examples/core/example_nvdb.py +0 -2
  20. warp/examples/core/example_raycast.py +0 -2
  21. warp/examples/core/example_raymarch.py +0 -2
  22. warp/examples/core/example_render_opengl.py +0 -2
  23. warp/examples/core/example_sph.py +0 -2
  24. warp/examples/core/example_torch.py +0 -3
  25. warp/examples/core/example_wave.py +0 -2
  26. warp/examples/fem/example_apic_fluid.py +140 -115
  27. warp/examples/fem/example_burgers.py +262 -0
  28. warp/examples/fem/example_convection_diffusion.py +0 -2
  29. warp/examples/fem/example_convection_diffusion_dg.py +0 -2
  30. warp/examples/fem/example_deformed_geometry.py +0 -2
  31. warp/examples/fem/example_diffusion.py +0 -2
  32. warp/examples/fem/example_diffusion_3d.py +5 -4
  33. warp/examples/fem/example_diffusion_mgpu.py +0 -2
  34. warp/examples/fem/example_mixed_elasticity.py +0 -2
  35. warp/examples/fem/example_navier_stokes.py +0 -2
  36. warp/examples/fem/example_stokes.py +0 -2
  37. warp/examples/fem/example_stokes_transfer.py +0 -2
  38. warp/examples/optim/example_bounce.py +0 -2
  39. warp/examples/optim/example_cloth_throw.py +0 -2
  40. warp/examples/optim/example_diffray.py +0 -2
  41. warp/examples/optim/example_drone.py +0 -2
  42. warp/examples/optim/example_inverse_kinematics.py +0 -2
  43. warp/examples/optim/example_inverse_kinematics_torch.py +0 -2
  44. warp/examples/optim/example_spring_cage.py +0 -2
  45. warp/examples/optim/example_trajectory.py +0 -2
  46. warp/examples/optim/example_walker.py +0 -2
  47. warp/examples/sim/example_cartpole.py +0 -2
  48. warp/examples/sim/example_cloth.py +0 -2
  49. warp/examples/sim/example_granular.py +0 -2
  50. warp/examples/sim/example_granular_collision_sdf.py +0 -2
  51. warp/examples/sim/example_jacobian_ik.py +0 -2
  52. warp/examples/sim/example_particle_chain.py +0 -2
  53. warp/examples/sim/example_quadruped.py +0 -2
  54. warp/examples/sim/example_rigid_chain.py +0 -2
  55. warp/examples/sim/example_rigid_contact.py +0 -2
  56. warp/examples/sim/example_rigid_force.py +0 -2
  57. warp/examples/sim/example_rigid_gyroscopic.py +0 -2
  58. warp/examples/sim/example_rigid_soft_contact.py +0 -2
  59. warp/examples/sim/example_soft_body.py +0 -2
  60. warp/fem/__init__.py +1 -0
  61. warp/fem/cache.py +3 -1
  62. warp/fem/geometry/__init__.py +1 -0
  63. warp/fem/geometry/element.py +4 -0
  64. warp/fem/geometry/grid_3d.py +0 -4
  65. warp/fem/geometry/nanogrid.py +455 -0
  66. warp/fem/integrate.py +63 -9
  67. warp/fem/space/__init__.py +43 -158
  68. warp/fem/space/basis_space.py +34 -0
  69. warp/fem/space/collocated_function_space.py +1 -1
  70. warp/fem/space/grid_2d_function_space.py +13 -132
  71. warp/fem/space/grid_3d_function_space.py +16 -154
  72. warp/fem/space/hexmesh_function_space.py +37 -134
  73. warp/fem/space/nanogrid_function_space.py +202 -0
  74. warp/fem/space/quadmesh_2d_function_space.py +12 -119
  75. warp/fem/space/restriction.py +4 -1
  76. warp/fem/space/shape/__init__.py +77 -0
  77. warp/fem/space/shape/cube_shape_function.py +5 -15
  78. warp/fem/space/tetmesh_function_space.py +6 -76
  79. warp/fem/space/trimesh_2d_function_space.py +6 -76
  80. warp/native/array.h +12 -3
  81. warp/native/builtin.h +48 -5
  82. warp/native/bvh.cpp +14 -10
  83. warp/native/bvh.cu +23 -15
  84. warp/native/bvh.h +1 -0
  85. warp/native/clang/clang.cpp +2 -1
  86. warp/native/crt.cpp +11 -1
  87. warp/native/crt.h +18 -1
  88. warp/native/exports.h +187 -0
  89. warp/native/mat.h +47 -0
  90. warp/native/mesh.cpp +1 -1
  91. warp/native/mesh.cu +1 -2
  92. warp/native/nanovdb/GridHandle.h +366 -0
  93. warp/native/nanovdb/HostBuffer.h +590 -0
  94. warp/native/nanovdb/NanoVDB.h +3999 -2157
  95. warp/native/nanovdb/PNanoVDB.h +936 -99
  96. warp/native/quat.h +28 -1
  97. warp/native/rand.h +5 -1
  98. warp/native/vec.h +45 -1
  99. warp/native/volume.cpp +335 -103
  100. warp/native/volume.cu +39 -13
  101. warp/native/volume.h +725 -303
  102. warp/native/volume_builder.cu +381 -360
  103. warp/native/volume_builder.h +16 -1
  104. warp/native/volume_impl.h +61 -0
  105. warp/native/warp.cu +8 -2
  106. warp/native/warp.h +15 -7
  107. warp/render/render_opengl.py +191 -52
  108. warp/sim/integrator_featherstone.py +10 -3
  109. warp/sim/integrator_xpbd.py +16 -22
  110. warp/sparse.py +89 -27
  111. warp/stubs.py +83 -0
  112. warp/tests/assets/test_index_grid.nvdb +0 -0
  113. warp/tests/aux_test_dependent.py +0 -2
  114. warp/tests/aux_test_grad_customs.py +0 -2
  115. warp/tests/aux_test_reference.py +0 -2
  116. warp/tests/aux_test_reference_reference.py +0 -2
  117. warp/tests/aux_test_square.py +0 -2
  118. warp/tests/disabled_kinematics.py +0 -2
  119. warp/tests/test_adam.py +0 -2
  120. warp/tests/test_arithmetic.py +0 -36
  121. warp/tests/test_array.py +9 -11
  122. warp/tests/test_array_reduce.py +0 -2
  123. warp/tests/test_async.py +0 -2
  124. warp/tests/test_atomic.py +0 -2
  125. warp/tests/test_bool.py +58 -50
  126. warp/tests/test_builtins_resolution.py +0 -2
  127. warp/tests/test_bvh.py +0 -2
  128. warp/tests/test_closest_point_edge_edge.py +0 -1
  129. warp/tests/test_codegen.py +0 -4
  130. warp/tests/test_compile_consts.py +130 -10
  131. warp/tests/test_conditional.py +0 -2
  132. warp/tests/test_copy.py +0 -2
  133. warp/tests/test_ctypes.py +6 -8
  134. warp/tests/test_dense.py +0 -2
  135. warp/tests/test_devices.py +0 -2
  136. warp/tests/test_dlpack.py +9 -11
  137. warp/tests/test_examples.py +42 -39
  138. warp/tests/test_fabricarray.py +0 -3
  139. warp/tests/test_fast_math.py +0 -2
  140. warp/tests/test_fem.py +75 -54
  141. warp/tests/test_fp16.py +0 -2
  142. warp/tests/test_func.py +0 -2
  143. warp/tests/test_generics.py +27 -2
  144. warp/tests/test_grad.py +147 -8
  145. warp/tests/test_grad_customs.py +0 -2
  146. warp/tests/test_hash_grid.py +1 -3
  147. warp/tests/test_import.py +0 -2
  148. warp/tests/test_indexedarray.py +0 -2
  149. warp/tests/test_intersect.py +0 -2
  150. warp/tests/test_jax.py +0 -2
  151. warp/tests/test_large.py +11 -9
  152. warp/tests/test_launch.py +0 -2
  153. warp/tests/test_lerp.py +10 -54
  154. warp/tests/test_linear_solvers.py +3 -5
  155. warp/tests/test_lvalue.py +0 -2
  156. warp/tests/test_marching_cubes.py +0 -2
  157. warp/tests/test_mat.py +0 -2
  158. warp/tests/test_mat_lite.py +0 -2
  159. warp/tests/test_mat_scalar_ops.py +0 -2
  160. warp/tests/test_math.py +0 -2
  161. warp/tests/test_matmul.py +35 -37
  162. warp/tests/test_matmul_lite.py +29 -31
  163. warp/tests/test_mempool.py +0 -2
  164. warp/tests/test_mesh.py +0 -3
  165. warp/tests/test_mesh_query_aabb.py +0 -2
  166. warp/tests/test_mesh_query_point.py +0 -2
  167. warp/tests/test_mesh_query_ray.py +0 -2
  168. warp/tests/test_mlp.py +0 -2
  169. warp/tests/test_model.py +0 -2
  170. warp/tests/test_module_hashing.py +111 -0
  171. warp/tests/test_modules_lite.py +0 -3
  172. warp/tests/test_multigpu.py +0 -2
  173. warp/tests/test_noise.py +0 -4
  174. warp/tests/test_operators.py +0 -2
  175. warp/tests/test_options.py +0 -2
  176. warp/tests/test_peer.py +0 -2
  177. warp/tests/test_pinned.py +0 -2
  178. warp/tests/test_print.py +0 -2
  179. warp/tests/test_quat.py +0 -2
  180. warp/tests/test_rand.py +41 -5
  181. warp/tests/test_reload.py +0 -10
  182. warp/tests/test_rounding.py +0 -2
  183. warp/tests/test_runlength_encode.py +0 -2
  184. warp/tests/test_sim_grad.py +0 -2
  185. warp/tests/test_sim_kinematics.py +0 -2
  186. warp/tests/test_smoothstep.py +0 -2
  187. warp/tests/test_snippet.py +0 -2
  188. warp/tests/test_sparse.py +0 -2
  189. warp/tests/test_spatial.py +0 -2
  190. warp/tests/test_special_values.py +362 -0
  191. warp/tests/test_streams.py +0 -2
  192. warp/tests/test_struct.py +0 -2
  193. warp/tests/test_tape.py +0 -2
  194. warp/tests/test_torch.py +0 -2
  195. warp/tests/test_transient_module.py +0 -2
  196. warp/tests/test_types.py +0 -2
  197. warp/tests/test_utils.py +0 -2
  198. warp/tests/test_vec.py +0 -2
  199. warp/tests/test_vec_lite.py +0 -2
  200. warp/tests/test_vec_scalar_ops.py +0 -2
  201. warp/tests/test_verify_fp.py +0 -2
  202. warp/tests/test_volume.py +237 -13
  203. warp/tests/test_volume_write.py +86 -3
  204. warp/tests/unittest_serial.py +10 -9
  205. warp/tests/unittest_suites.py +6 -2
  206. warp/tests/unittest_utils.py +2 -171
  207. warp/tests/unused_test_misc.py +0 -2
  208. warp/tests/walkthrough_debug.py +1 -1
  209. warp/thirdparty/unittest_parallel.py +37 -40
  210. warp/types.py +526 -85
  211. {warp_lang-1.1.0.dist-info → warp_lang-1.2.1.dist-info}/METADATA +61 -31
  212. warp_lang-1.2.1.dist-info/RECORD +359 -0
  213. warp/examples/fem/example_convection_diffusion_dg0.py +0 -204
  214. warp/native/nanovdb/PNanoVDBWrite.h +0 -295
  215. warp_lang-1.1.0.dist-info/RECORD +0 -352
  216. {warp_lang-1.1.0.dist-info → warp_lang-1.2.1.dist-info}/LICENSE.md +0 -0
  217. {warp_lang-1.1.0.dist-info → warp_lang-1.2.1.dist-info}/WHEEL +0 -0
  218. {warp_lang-1.1.0.dist-info → warp_lang-1.2.1.dist-info}/top_level.txt +0 -0
@@ -1,425 +1,446 @@
1
1
  #include "volume_builder.h"
2
2
 
3
+ #include <nanovdb/tools/cuda/PointsToGrid.cuh>
4
+
3
5
  #include <cuda.h>
4
6
  #include <cuda_runtime_api.h>
5
7
 
6
8
  #include <cub/cub.cuh>
7
- #include <cub/util_allocator.cuh>
8
-
9
- // Explanation of key types
10
- // ------------------------
11
- //
12
- // leaf_key:
13
- // .__.__. .... .__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.__.
14
- // 63 62 .... 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
15
- // XX|< tile key >|< upper offset >|< lower offset >|
16
- //
17
- // tile key (36 bit):
18
- // (uint32(ijk[2]) >> ChildT::TOTAL) |
19
- // (uint64_t(uint32(ijk[1]) >> ChildT::TOTAL)) << 12 |
20
- // (uint64_t(uint32(ijk[0]) >> ChildT::TOTAL)) << 24
21
- //
22
- // lower_key (51 bits) == leaf_key >> 12
23
- //
24
- // upper_key (36 bits) == lower_key >> 15 == leaf_key >> 27 == tile key
25
-
26
- CUDA_CALLABLE inline uint64_t coord_to_full_key(const nanovdb::Coord& ijk)
27
- {
28
- using Tree = nanovdb::FloatTree; // any type is fine at this point
29
- assert((abs(ijk[0]) >> 24) == 0);
30
- assert((abs(ijk[1]) >> 24) == 0);
31
- assert((abs(ijk[2]) >> 24) == 0);
32
- constexpr uint32_t MASK_12BITS = (1u << 12) - 1u;
33
- const uint64_t tile_key36 =
34
- ((uint32_t(ijk[2]) >> 12) & MASK_12BITS) | // z is the lower 12 bits
35
- (uint64_t((uint32_t(ijk[1]) >> 12) & MASK_12BITS) << 12) | // y is the middle 12 bits
36
- (uint64_t((uint32_t(ijk[0]) >> 12) & MASK_12BITS) << 24); // x is the upper 12 bits
37
- const uint32_t upper_offset = Tree::Node2::CoordToOffset(ijk);
38
- const uint32_t lower_offset = Tree::Node1::CoordToOffset(ijk);
39
- return (tile_key36 << 27) | (upper_offset << 12) | lower_offset;
40
- }
41
9
 
42
- __global__
43
- void generate_keys(size_t num_points, const nanovdb::Coord* points, uint64_t* all_leaf_keys)
10
+ #if defined(__NVCC_DIAG_PRAGMA_SUPPORT__)
11
+ // dynamic initialization is not supported for a function-scope static __shared__ variable within a
12
+ // __device__/__global__ function
13
+ #pragma nv_diag_suppress 20054
14
+ #elif defined(__NVCC__)
15
+ #pragma diag_suppress 20054
16
+ #endif
17
+ namespace
18
+ {
19
+ /// Allocator class following interface of cub::cachingDeviceAllocator, as expected by naovdb::PointsToGrid
20
+ struct Allocator
44
21
  {
45
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
46
- if (tid >= num_points) return;
47
22
 
48
- all_leaf_keys[tid] = coord_to_full_key(points[tid]);
49
- }
23
+ cudaError_t DeviceAllocate(void **d_ptr, ///< [out] Reference to pointer to the allocation
24
+ size_t bytes, ///< [in] Minimum number of bytes for the allocation
25
+ cudaStream_t active_stream) ///< [in] The stream to be associated with this allocation
26
+ {
27
+ // in PointsToGrid stream argument always coincide with current stream, ignore
28
+ *d_ptr = alloc_device(WP_CURRENT_CONTEXT, bytes);
29
+ return cudaSuccess;
30
+ }
31
+
32
+ cudaError_t DeviceFree(void *d_ptr)
33
+ {
34
+ free_device(WP_CURRENT_CONTEXT, d_ptr);
35
+ return cudaSuccess;
36
+ }
37
+
38
+ cudaError_t FreeAllCached()
39
+ {
40
+ return cudaSuccess;
41
+ }
42
+ };
50
43
 
51
- __global__
52
- void generate_keys(size_t num_points, const nanovdb::Vec3f* points, uint64_t* all_leaf_keys, float one_over_voxel_size, nanovdb::Vec3f translation)
44
+ /// @brief Implementation of NanoVDB's DeviceBuffer that uses warp allocators
45
+ class DeviceBuffer
53
46
  {
54
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
55
- if (tid >= num_points) return;
47
+ uint64_t mSize; // total number of bytes managed by this buffer (assumed to be identical for host and device)
48
+ void *mCpuData, *mGpuData; // raw pointers to the host and device buffers
49
+ bool mManaged;
50
+
51
+ public:
52
+ /// @brief Static factory method that return an instance of this buffer
53
+ /// @param size byte size of buffer to be initialized
54
+ /// @param dummy this argument is currently ignored but required to match the API of the HostBuffer
55
+ /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU
56
+ /// @param stream optional stream argument (defaults to stream NULL)
57
+ /// @return An instance of this class using move semantics
58
+ static DeviceBuffer create(uint64_t size, const DeviceBuffer *dummy = nullptr, bool host = true,
59
+ void *stream = nullptr)
60
+ {
61
+ return DeviceBuffer(size, host, stream);
62
+ }
56
63
 
57
- const nanovdb::Coord ijk = ((points[tid] - translation) * one_over_voxel_size).round();
58
- all_leaf_keys[tid] = coord_to_full_key(ijk);
59
- }
64
+ /// @brief Static factory method that return an instance of this buffer that wraps externally managed memory
65
+ /// @param size byte size of buffer specified by external memory
66
+ /// @param cpuData pointer to externally managed host memory
67
+ /// @param gpuData pointer to externally managed device memory
68
+ /// @return An instance of this class using move semantics
69
+ static DeviceBuffer create(uint64_t size, void *cpuData, void *gpuData)
70
+ {
71
+ return DeviceBuffer(size, cpuData, gpuData);
72
+ }
60
73
 
61
- // Convert a 36 bit tile key to the ijk origin of the addressed tile
62
- CUDA_CALLABLE inline nanovdb::Coord tile_key36_to_coord(uint64_t tile_key36) {
63
- auto extend_sign = [](uint32_t i) -> int32_t { return i | ((i>>11 & 1) * 0xFFFFF800);};
64
- constexpr uint32_t MASK_12BITS = (1u << 12) - 1u;
65
- const int32_t i = extend_sign(uint32_t(tile_key36 >> 24) & MASK_12BITS);
66
- const int32_t j = extend_sign(uint32_t(tile_key36 >> 12) & MASK_12BITS);
67
- const int32_t k = extend_sign(uint32_t(tile_key36) & MASK_12BITS);
68
- return nanovdb::Coord(i, j, k) << 12;
69
- }
74
+ /// @brief Constructor
75
+ /// @param size byte size of buffer to be initialized
76
+ /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU
77
+ /// @param stream optional stream argument (defaults to stream NULL)
78
+ DeviceBuffer(uint64_t size = 0, bool host = true, void *stream = nullptr)
79
+ : mSize(0), mCpuData(nullptr), mGpuData(nullptr), mManaged(false)
80
+ {
81
+ if (size > 0)
82
+ this->init(size, host, stream);
83
+ }
84
+
85
+ DeviceBuffer(uint64_t size, void *cpuData, void *gpuData)
86
+ : mSize(size), mCpuData(cpuData), mGpuData(gpuData), mManaged(false)
87
+ {
88
+ }
70
89
 
90
+ /// @brief Disallow copy-construction
91
+ DeviceBuffer(const DeviceBuffer &) = delete;
71
92
 
72
- // --- CUB helpers ---
73
- template<uint8_t bits, typename InType, typename OutType>
74
- struct ShiftRight {
75
- CUDA_CALLABLE inline OutType operator()(const InType& v) const {
76
- return static_cast<OutType>(v >> bits);
93
+ /// @brief Move copy-constructor
94
+ DeviceBuffer(DeviceBuffer &&other) noexcept
95
+ : mSize(other.mSize), mCpuData(other.mCpuData), mGpuData(other.mGpuData), mManaged(other.mManaged)
96
+ {
97
+ other.mSize = 0;
98
+ other.mCpuData = nullptr;
99
+ other.mGpuData = nullptr;
100
+ other.mManaged = false;
77
101
  }
78
- };
79
102
 
80
- template<uint8_t bits, typename InType = uint64_t, typename OutType = uint64_t>
81
- struct ShiftRightIterator : public cub::TransformInputIterator<OutType, ShiftRight<bits, InType, OutType>, InType*> {
82
- using BASE = cub::TransformInputIterator<OutType, ShiftRight<bits, InType, OutType>, InType*>;
83
- CUDA_CALLABLE inline ShiftRightIterator(uint64_t* input_itr)
84
- : BASE(input_itr, ShiftRight<bits, InType, OutType>()) {}
85
- };
103
+ /// @brief Disallow copy assignment operation
104
+ DeviceBuffer &operator=(const DeviceBuffer &) = delete;
86
105
 
106
+ /// @brief Move copy assignment operation
107
+ DeviceBuffer &operator=(DeviceBuffer &&other) noexcept
108
+ {
109
+ this->clear();
110
+ mSize = other.mSize;
111
+ mCpuData = other.mCpuData;
112
+ mGpuData = other.mGpuData;
113
+ mManaged = other.mManaged;
114
+ other.mSize = 0;
115
+ other.mCpuData = nullptr;
116
+ other.mGpuData = nullptr;
117
+ other.mManaged = false;
118
+ return *this;
119
+ }
87
120
 
88
- // --- Atomic instructions for NanoVDB construction ---
89
- template<typename MaskT>
90
- CUDA_CALLABLE_DEVICE void set_mask_atomic(MaskT& mask, uint32_t n) {
91
- unsigned long long int* words = reinterpret_cast<unsigned long long int*>(&mask);
92
- atomicOr(words + (n / 64), 1ull << (n & 63));
93
- }
121
+ /// @brief Destructor frees memory on both the host and device
122
+ ~DeviceBuffer()
123
+ {
124
+ this->clear();
125
+ };
126
+
127
+ /// @brief Initialize buffer
128
+ /// @param size byte size of buffer to be initialized
129
+ /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU
130
+ /// @note All existing buffers are first cleared
131
+ /// @warning size is expected to be non-zero. Use clear() clear buffer!
132
+ void init(uint64_t size, bool host = true, void *stream = nullptr)
133
+ {
134
+ if (mSize > 0)
135
+ this->clear(stream);
136
+ NANOVDB_ASSERT(size > 0);
137
+ if (host)
138
+ {
139
+ mCpuData =
140
+ alloc_pinned(size); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned
141
+ }
142
+ else
143
+ {
144
+ mGpuData = alloc_device(WP_CURRENT_CONTEXT, size);
145
+ }
146
+ mSize = size;
147
+ mManaged = true;
148
+ }
94
149
 
95
- template<typename Vec3T>
96
- CUDA_CALLABLE_DEVICE void expand_cwise_atomic(nanovdb::BBox<Vec3T>& bbox, const Vec3T& v) {
97
- atomicMin(&bbox.mCoord[0][0], v[0]);
98
- atomicMin(&bbox.mCoord[0][1], v[1]);
99
- atomicMin(&bbox.mCoord[0][2], v[2]);
100
- atomicMax(&bbox.mCoord[1][0], v[0]);
101
- atomicMax(&bbox.mCoord[1][1], v[1]);
102
- atomicMax(&bbox.mCoord[1][2], v[2]);
103
- }
150
+ /// @brief Returns a raw pointer to the host/CPU buffer managed by this allocator.
151
+ /// @warning Note that the pointer can be NULL!
152
+ void *data() const
153
+ {
154
+ return mCpuData;
155
+ }
156
+
157
+ /// @brief Returns a raw pointer to the device/GPU buffer managed by this allocator.
158
+ /// @warning Note that the pointer can be NULL!
159
+ void *deviceData() const
160
+ {
161
+ return mGpuData;
162
+ }
163
+
164
+ /// @brief Returns the size in bytes of the raw memory buffer managed by this allocator.
165
+ uint64_t size() const
166
+ {
167
+ return mSize;
168
+ }
104
169
 
105
- template<typename RootDataType>
106
- __hostdev__ const typename RootDataType::Tile* find_tile(const RootDataType* root_data, const nanovdb::Coord& ijk)
170
+ //@{
171
+ /// @brief Returns true if this allocator is empty, i.e. has no allocated memory
172
+ bool empty() const
173
+ {
174
+ return mSize == 0;
175
+ }
176
+ bool isEmpty() const
177
+ {
178
+ return mSize == 0;
179
+ }
180
+ //@}
181
+
182
+ /// @brief Detach device data so it is not dealloced when this buffer is destroyed
183
+ void detachDeviceData()
184
+ {
185
+ mGpuData = nullptr;
186
+ if (!mCpuData)
187
+ {
188
+ mSize = 0;
189
+ }
190
+ }
191
+
192
+ /// @brief De-allocate all memory managed by this allocator and set all pointers to NULL
193
+ void clear(void *stream = nullptr)
194
+ {
195
+ if (mManaged && mGpuData)
196
+ free_device(WP_CURRENT_CONTEXT, mGpuData);
197
+ if (mManaged && mCpuData)
198
+ free_pinned(mCpuData);
199
+ mCpuData = mGpuData = nullptr;
200
+ mSize = 0;
201
+ mManaged = false;
202
+ }
203
+
204
+ }; // DeviceBuffer class
205
+
206
+ template <typename Tree> __global__ void activateAllLeafVoxels(Tree *tree)
107
207
  {
108
- using Tile = typename RootDataType::Tile;
109
- const Tile *tiles = reinterpret_cast<const Tile *>(root_data + 1);
110
- const auto key = RootDataType::CoordToKey(ijk);
208
+ const unsigned leaf_count = tree->mNodeCount[0];
111
209
 
112
- for (uint32_t i = 0; i < root_data->mTableSize; ++i)
210
+ const unsigned tid = blockIdx.x * blockDim.x + threadIdx.x;
211
+
212
+ if (tid < leaf_count)
113
213
  {
114
- if (tiles[i].key == key)
115
- return &tiles[i];
214
+ // activate all leaf voxels
215
+ typename Tree::LeafNodeType *const leaf_nodes = tree->getFirstLeaf();
216
+ typename Tree::LeafNodeType &leaf = leaf_nodes[tid];
217
+ leaf.mValueMask.setOn();
218
+ leaf.updateBBox();
219
+ }
220
+
221
+ if (tid == 0)
222
+ {
223
+ tree->mVoxelCount = Tree::LeafNodeType::SIZE * leaf_count; // full leaves
116
224
  }
117
- return nullptr;
118
225
  }
119
226
 
120
- // --- Wrapper for launching lambda kernels
121
- template<typename Func, typename... Args>
122
- __global__ void kernel(const size_t num_items, Func f, Args... args)
227
+ template <typename Node>
228
+ __device__ std::enable_if_t<!nanovdb::BuildTraits<typename Node::BuildType>::is_index> setBackgroundValue(
229
+ Node &node, unsigned tile_id, const typename Node::BuildType background_value)
123
230
  {
124
- const int tid = blockIdx.x * blockDim.x + threadIdx.x;
125
- if (tid >= num_items) return;
126
- f(tid, args...);
231
+ node.setValue(tile_id, background_value);
127
232
  }
128
233
 
129
- template <typename BuildT>
130
- void build_grid_from_tiles(nanovdb::Grid<nanovdb::NanoTree<BuildT>> *&out_grid,
131
- size_t &out_grid_size,
132
- const void *points,
133
- size_t num_points,
134
- bool points_in_world_space,
135
- const BuildGridParams<BuildT> &params)
234
+ template <typename Node>
235
+ __device__ std::enable_if_t<nanovdb::BuildTraits<typename Node::BuildType>::is_index> setBackgroundValue(
236
+ Node &node, unsigned tile_id, const typename Node::BuildType background_value)
136
237
  {
137
- using FloatT = typename nanovdb::FloatTraits<BuildT>::FloatType;
138
- const BuildT ZERO_VAL{0};
139
- const FloatT ZERO_SCALAR{0};
140
-
141
- // Don't want to access "params" in kernels
142
- const double dx = params.voxel_size;
143
- const double Tx = params.translation[0], Ty = params.translation[1], Tz = params.translation[2];
144
- const BuildT background_value = params.background_value;
238
+ }
145
239
 
146
- const unsigned int num_threads = 256;
147
- unsigned int num_blocks;
240
+ template <typename Node>
241
+ __device__ std::enable_if_t<!nanovdb::BuildTraits<typename Node::BuildType>::is_index> setBackgroundValue(
242
+ Node &node, const typename Node::BuildType background_value)
243
+ {
244
+ node.mBackground = background_value;
245
+ }
148
246
 
149
- out_grid = nullptr;
150
- out_grid_size = 0;
247
+ template <typename Node>
248
+ __device__ std::enable_if_t<nanovdb::BuildTraits<typename Node::BuildType>::is_index> setBackgroundValue(
249
+ Node &node, const typename Node::BuildType background_value)
250
+ {
251
+ }
151
252
 
152
- cub::CachingDeviceAllocator allocator;
153
-
154
- uint64_t* leaf_keys;
155
- uint64_t* lower_keys;
156
- uint64_t* upper_keys;
157
- uint32_t* node_counts;
158
- uint32_t leaf_count, lower_node_count, upper_node_count;
253
+ template <typename Tree, typename NodeT>
254
+ __global__ void setInternalBBoxAndBackgroundValue(Tree *tree, const typename Tree::BuildType background_value)
255
+ {
256
+ using BBox = nanovdb::math::BBox<typename NodeT::CoordT>;
257
+ __shared__ BBox bbox;
159
258
 
160
- allocator.DeviceAllocate((void**)&leaf_keys, sizeof(uint64_t) * num_points);
161
- allocator.DeviceAllocate((void**)&node_counts, sizeof(uint32_t) * 3);
259
+ const unsigned node_count = tree->mNodeCount[NodeT::LEVEL];
260
+ const unsigned node_id = blockIdx.x;
162
261
 
163
- // Phase 1: counting the nodes
262
+ if (node_id < node_count)
164
263
  {
165
- // Generating keys from coords
166
- uint64_t* all_leaf_keys;
167
- uint64_t* all_leaf_keys_sorted;
168
- allocator.DeviceAllocate((void**)&all_leaf_keys, sizeof(uint64_t) * num_points);
169
- allocator.DeviceAllocate((void**)&all_leaf_keys_sorted, sizeof(uint64_t) * num_points);
170
-
171
- num_blocks = (static_cast<unsigned int>(num_points) + num_threads - 1) / num_threads;
172
- if (points_in_world_space) {
173
- generate_keys<<<num_blocks, num_threads>>>(num_points, static_cast<const nanovdb::Vec3f*>(points), all_leaf_keys, static_cast<float>(1.0 / dx), nanovdb::Vec3f(params.translation));
174
- } else {
175
- generate_keys<<<num_blocks, num_threads>>>(num_points, static_cast<const nanovdb::Coord*>(points), all_leaf_keys);
264
+
265
+ if (threadIdx.x == 0)
266
+ {
267
+ bbox = BBox();
176
268
  }
177
269
 
178
- void* d_temp_storage = nullptr;
179
- size_t temp_storage_bytes;
180
-
181
- // Sort the keys, then get an array of unique keys
182
- cub::DeviceRadixSort::SortKeys(nullptr, temp_storage_bytes, all_leaf_keys, all_leaf_keys_sorted, static_cast<int>(num_points), /* begin_bit = */ 0, /* end_bit = */ 63);
183
- allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes);
184
- cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, all_leaf_keys, all_leaf_keys_sorted, static_cast<int>(num_points), /* begin_bit = */ 0, /* end_bit = */ 63);
185
- allocator.DeviceFree(d_temp_storage);
186
-
187
- cub::DeviceSelect::Unique(nullptr, temp_storage_bytes, all_leaf_keys_sorted, leaf_keys, node_counts, static_cast<int>(num_points));
188
- allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes);
189
- cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, all_leaf_keys_sorted, leaf_keys, node_counts, static_cast<int>(num_points));
190
- allocator.DeviceFree(d_temp_storage);
191
- check_cuda(cudaMemcpy(&leaf_count, node_counts, sizeof(uint32_t), cudaMemcpyDeviceToHost));
192
-
193
- allocator.DeviceFree(all_leaf_keys);
194
- all_leaf_keys = nullptr;
195
- allocator.DeviceFree(all_leaf_keys_sorted);
196
- all_leaf_keys_sorted = nullptr;
197
-
198
-
199
- // Get the keys unique to lower nodes and the number of them
200
- allocator.DeviceAllocate((void**)&lower_keys, sizeof(uint64_t) * leaf_count);
201
- cub::DeviceSelect::Unique(nullptr, temp_storage_bytes, ShiftRightIterator<12>(leaf_keys), lower_keys, node_counts + 1, leaf_count);
202
- allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes);
203
- cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, ShiftRightIterator<12>(leaf_keys), lower_keys, node_counts + 1, leaf_count);
204
- allocator.DeviceFree(d_temp_storage);
205
- check_cuda(cudaMemcpy(&lower_node_count, node_counts + 1, sizeof(uint32_t), cudaMemcpyDeviceToHost));
206
-
207
- // Get the keys unique to upper nodes and the number of them
208
- allocator.DeviceAllocate((void**)&upper_keys, sizeof(uint64_t) * lower_node_count);
209
- cub::DeviceSelect::Unique(nullptr, temp_storage_bytes, ShiftRightIterator<15>(lower_keys), upper_keys, node_counts + 2, lower_node_count);
210
- allocator.DeviceAllocate((void**)&d_temp_storage, temp_storage_bytes);
211
- cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, ShiftRightIterator<15>(lower_keys), upper_keys, node_counts + 2, lower_node_count);
212
- allocator.DeviceFree(d_temp_storage);
213
- check_cuda(cudaMemcpy(&upper_node_count, node_counts + 2, sizeof(uint32_t), cudaMemcpyDeviceToHost));
270
+ __syncthreads();
271
+
272
+ NodeT &node = tree->template getFirstNode<NodeT>()[node_id];
273
+ for (unsigned child_id = threadIdx.x; child_id < NodeT::SIZE; child_id += blockDim.x)
274
+ {
275
+ if (node.isChild(child_id))
276
+ {
277
+ bbox.expandAtomic(node.getChild(child_id)->bbox());
278
+ }
279
+ else
280
+ {
281
+ setBackgroundValue(node, child_id, background_value);
282
+ }
283
+ }
284
+
285
+ __syncthreads();
286
+
287
+ if (threadIdx.x == 0)
288
+ {
289
+ node.mBBox = bbox;
290
+ }
214
291
  }
292
+ }
215
293
 
216
- using Tree = nanovdb::NanoTree<BuildT>;
217
- using Grid = nanovdb::Grid<Tree>;
218
-
219
- const size_t total_bytes =
220
- sizeof(Grid) +
221
- sizeof(Tree) +
222
- sizeof(typename Tree::RootType) +
223
- sizeof(typename Tree::RootType::Tile) * upper_node_count +
224
- sizeof(typename Tree::Node2) * upper_node_count +
225
- sizeof(typename Tree::Node1) * lower_node_count +
226
- sizeof(typename Tree::Node0) * leaf_count;
227
-
228
- const int64_t upper_mem_offset =
229
- sizeof(nanovdb::GridData) + sizeof(Tree) + sizeof(typename Tree::RootType) +
230
- sizeof(typename Tree::RootType::Tile) * upper_node_count;
231
- const int64_t lower_mem_offset = upper_mem_offset + sizeof(typename Tree::Node2) * upper_node_count;
232
- const int64_t leaf_mem_offset = lower_mem_offset + sizeof(typename Tree::Node1) * lower_node_count;
233
-
234
- typename Grid::DataType* grid;
235
- check_cuda(cudaMalloc(&grid, total_bytes));
236
-
237
- typename Tree::DataType* const tree = reinterpret_cast<typename Tree::DataType*>(grid + 1); // The tree is immediately after the grid
238
- typename Tree::RootType::DataType* const root = reinterpret_cast<typename Tree::RootType::DataType*>(tree + 1); // The root is immediately after the tree
239
- typename Tree::RootType::Tile* const tiles = reinterpret_cast<typename Tree::RootType::Tile*>(root + 1);
240
- typename Tree::Node2::DataType* const upper_nodes = nanovdb::PtrAdd<typename Tree::Node2::DataType>(grid, upper_mem_offset);
241
- typename Tree::Node1::DataType* const lower_nodes = nanovdb::PtrAdd<typename Tree::Node1::DataType>(grid, lower_mem_offset);
242
- typename Tree::Node0::DataType* const leaf_nodes = nanovdb::PtrAdd<typename Tree::Node0::DataType>(grid, leaf_mem_offset);
243
-
244
- // Phase 2: building the tree
294
+ template <typename Tree>
295
+ __global__ void setRootBBoxAndBackgroundValue(nanovdb::Grid<Tree> *grid,
296
+ const typename Tree::BuildType background_value)
297
+ {
298
+ using BBox = typename Tree::RootNodeType::BBoxType;
299
+ __shared__ BBox bbox;
300
+
301
+ Tree &tree = grid->tree();
302
+ const unsigned upper_count = tree.mNodeCount[2];
303
+
304
+ if (threadIdx.x == 0)
245
305
  {
246
- // Setting up the tree and root node
247
- kernel<<<1, 1>>>(1, [=] __device__(size_t i) {
248
- tree->mNodeOffset[3] = sizeof(Tree);
249
- tree->mNodeOffset[2] = tree->mNodeOffset[3] + sizeof(typename Tree::RootType) + sizeof(typename Tree::RootType::Tile) * upper_node_count;
250
- tree->mNodeOffset[1] = tree->mNodeOffset[2] + sizeof(typename Tree::Node2) * upper_node_count;
251
- tree->mNodeOffset[0] = tree->mNodeOffset[1] + sizeof(typename Tree::Node1) * lower_node_count;
252
- tree->mNodeCount[2] = tree->mTileCount[2] = upper_node_count;
253
- tree->mNodeCount[1] = tree->mTileCount[1] = lower_node_count;
254
- tree->mNodeCount[0] = tree->mTileCount[0] = leaf_count;
255
- tree->mVoxelCount = Tree::Node0::SIZE * leaf_count; // assuming full leaves
256
-
257
- root->mBBox = nanovdb::CoordBBox(); // init to empty
258
- root->mTableSize = upper_node_count;
259
- root->mBackground = background_value;
260
- root->mMinimum = ZERO_VAL;
261
- root->mMaximum = ZERO_VAL;
262
- root->mAverage = ZERO_SCALAR;
263
- root->mStdDevi = ZERO_SCALAR;
264
- });
306
+ bbox = BBox();
265
307
  }
266
308
 
267
- // Add tiles and upper nodes
268
- // i : 0 .. upper_node_count-1
269
- num_blocks = (upper_node_count + num_threads - 1) / num_threads;
309
+ __syncthreads();
310
+
311
+ for (unsigned upper_id = threadIdx.x; upper_id < upper_count; upper_id += blockDim.x)
270
312
  {
271
- kernel<<<num_blocks, num_threads>>>(upper_node_count, [=] __device__(size_t i) {
272
- tiles[i].key = root->CoordToKey(tile_key36_to_coord(upper_keys[i]));
273
- tiles[i].child = sizeof(typename Tree::RootType) + sizeof(typename Tree::RootType::Tile) * upper_node_count + sizeof(typename Tree::Node2) * i;
274
- tiles[i].state = 0;
275
- tiles[i].value = background_value;
276
-
277
- assert(reinterpret_cast<const char*>(root->getChild(tiles + i)) == reinterpret_cast<const char*>(upper_nodes + i));
278
- auto& node = upper_nodes[i];
279
- node.mBBox = nanovdb::CoordBBox();
280
- node.mFlags = 0;
281
- node.mValueMask.setOff();
282
- node.mChildMask.setOff();
283
- node.mMinimum = ZERO_VAL;
284
- node.mMaximum = ZERO_VAL;
285
- node.mAverage = ZERO_SCALAR;
286
- node.mStdDevi = ZERO_SCALAR;
287
- for (size_t n = 0; n < Tree::Node2::SIZE; ++n) {
288
- node.mTable[n].value = background_value;
289
- }
290
- });
313
+ typename Tree::UpperNodeType &upper = tree.getFirstUpper()[upper_id];
314
+ bbox.expandAtomic(upper.bbox());
291
315
  }
292
316
 
293
- constexpr uint32_t MASK_15BITS = (1u << 15) - 1u;
294
- constexpr uint32_t MASK_12BITS = (1u << 12) - 1u;
317
+ __syncthreads();
295
318
 
296
- // Init lower nodes and register to parent
297
- // i : 0 .. lower_node_count-1
298
- num_blocks = (lower_node_count + num_threads - 1) / num_threads;
319
+ if (threadIdx.x == 0)
299
320
  {
300
- kernel<<<num_blocks, num_threads>>>(lower_node_count, [=] __device__(size_t i) {
301
- uint32_t upper_offset = lower_keys[i] & MASK_15BITS;
302
- auto* upper_node = root->getChild(find_tile(root, tile_key36_to_coord(lower_keys[i] >> 15)))->data();
303
- set_mask_atomic(upper_node->mChildMask, upper_offset);
304
- upper_node->setChild(upper_offset, lower_nodes + i);
305
-
306
- auto& node = lower_nodes[i];
307
- node.mBBox = nanovdb::CoordBBox();
308
- node.mFlags = 0;
309
- node.mValueMask.setOff();
310
- node.mChildMask.setOff();
311
- node.mMinimum = ZERO_VAL;
312
- node.mMaximum = ZERO_VAL;
313
- node.mAverage = ZERO_SCALAR;
314
- node.mStdDevi = ZERO_SCALAR;
315
- for (size_t n = 0; n < Tree::Node1::SIZE; ++n) {
316
- node.mTable[n].value = background_value;
317
- }
318
- });
321
+ typename Tree::RootNodeType &root = tree.root();
322
+ setBackgroundValue(root, background_value);
323
+ root.mBBox = bbox;
324
+
325
+ grid->mWorldBBox = root.mBBox.transform(grid->map());
319
326
  }
327
+ }
328
+
329
+ template <typename BuildT>
330
+ void finalize_grid(nanovdb::Grid<nanovdb::NanoTree<BuildT>> &out_grid, const BuildGridParams<BuildT> &params)
331
+ {
332
+ // set background value, activate all voxels for allocated tiles and update bbox
333
+
334
+ using Tree = nanovdb::NanoTree<BuildT>;
335
+ Tree *tree = &out_grid.tree();
336
+
337
+ int node_counts[3];
338
+ memcpy_d2h(WP_CURRENT_CONTEXT, node_counts, tree->mNodeCount, sizeof(node_counts));
339
+ // synchronization below is unnecessary as node_counts is in pageable memory.
340
+ // keep it for clarity
341
+ cudaStream_t stream = static_cast<cudaStream_t>(cuda_stream_get_current());
342
+ cuda_stream_synchronize(stream);
343
+
344
+ const unsigned int leaf_count = node_counts[0];
345
+ const unsigned int lower_count = node_counts[1];
346
+ const unsigned int upper_count = node_counts[2];
347
+
348
+ constexpr unsigned NUM_THREADS = 256;
349
+ const unsigned leaf_blocks = (leaf_count + NUM_THREADS - 1) / NUM_THREADS;
350
+ activateAllLeafVoxels<Tree><<<leaf_blocks, NUM_THREADS, 0, stream>>>(tree);
351
+
352
+ setInternalBBoxAndBackgroundValue<Tree, typename Tree::LowerNodeType>
353
+ <<<lower_count, NUM_THREADS, 0, stream>>>(tree, params.background_value);
354
+ setInternalBBoxAndBackgroundValue<Tree, typename Tree::UpperNodeType>
355
+ <<<upper_count, NUM_THREADS, 0, stream>>>(tree, params.background_value);
356
+ setRootBBoxAndBackgroundValue<Tree><<<1, NUM_THREADS, 0, stream>>>(&out_grid, params.background_value);
357
+
358
+ check_cuda(cuda_context_check(WP_CURRENT_CONTEXT));
359
+ }
360
+
361
+ template <>
362
+ void finalize_grid(nanovdb::Grid<nanovdb::NanoTree<nanovdb::ValueOnIndex>> &out_grid,
363
+ const BuildGridParams<nanovdb::ValueOnIndex> &params)
364
+ {
365
+ // nothing to do for OnIndex grids
366
+ }
367
+
368
+ /// "fancy-pointer" that transforms from world to index coordinates
369
+ struct WorldSpacePointsPtr
370
+ {
371
+ const nanovdb::Vec3f *points;
372
+ const nanovdb::Map map;
320
373
 
321
- // Init leaf nodes and register to parent
322
- // i : 0 .. leaf_count-1
323
- num_blocks = (leaf_count + num_threads - 1) / num_threads;
374
+ __device__ nanovdb::Vec3f operator[](int idx) const
324
375
  {
325
- kernel<<<num_blocks, num_threads>>>(leaf_count, [=] __device__(size_t i) {
326
- uint32_t lower_offset = leaf_keys[i] & MASK_12BITS;
327
- uint32_t upper_offset = (leaf_keys[i] >> 12) & MASK_15BITS;
328
- const nanovdb::Coord ijk = tile_key36_to_coord(leaf_keys[i] >> 27);
329
-
330
- auto* upper_node = root->getChild(find_tile(root, ijk))->data();
331
- auto* lower_node = upper_node->getChild(upper_offset)->data();
332
- set_mask_atomic(lower_node->mChildMask, lower_offset);
333
- lower_node->setChild(lower_offset, leaf_nodes + i);
334
-
335
- const nanovdb::Coord localUpperIjk = Tree::Node2::OffsetToLocalCoord(upper_offset) << Tree::Node1::TOTAL;
336
- const nanovdb::Coord localLowerIjk = Tree::Node1::OffsetToLocalCoord(lower_offset) << Tree::Node0::TOTAL;
337
- const nanovdb::Coord leafOrigin = ijk + localUpperIjk + localLowerIjk;
338
-
339
- auto& node = leaf_nodes[i];
340
- node.mBBoxMin = leafOrigin;
341
- node.mBBoxDif[0] = leaf_nodes[i].mBBoxDif[1] = leaf_nodes[i].mBBoxDif[2] = Tree::Node0::DIM;
342
- node.mFlags = 0;
343
- node.mValueMask.setOn();
344
- node.mMinimum = ZERO_VAL;
345
- node.mMaximum = ZERO_VAL;
346
- node.mAverage = ZERO_SCALAR;
347
- node.mStdDevi = ZERO_SCALAR;
348
- // mValues is undefined
349
-
350
- // propagating bbox up:
351
- expand_cwise_atomic(lower_node->mBBox, leafOrigin);
352
- expand_cwise_atomic(lower_node->mBBox, leafOrigin + nanovdb::Coord(Tree::Node0::DIM));
353
- });
376
+ return map.applyInverseMapF(points[idx]);
354
377
  }
355
378
 
356
- // Propagating bounding boxes from lower nodes to upper nodes
357
- // i : 0 .. lower_node_count-1
358
- num_blocks = (lower_node_count + num_threads - 1) / num_threads;
379
+ __device__ nanovdb::Vec3f operator*() const
359
380
  {
360
- kernel<<<num_blocks, num_threads>>>(lower_node_count, [=] __device__(size_t i) {
361
- auto* upper_node = root->getChild(find_tile(root, tile_key36_to_coord(lower_keys[i] >> 15)))->data();
362
- expand_cwise_atomic(upper_node->mBBox, lower_nodes[i].mBBox.min());
363
- expand_cwise_atomic(upper_node->mBBox, lower_nodes[i].mBBox.max());
364
- });
381
+ return (*this)[0];
365
382
  }
383
+ };
366
384
 
367
- // Setting up root bounding box and grid
368
- {
369
- kernel<<<1, 1>>>(1, [=] __device__(size_t i) {
370
- for (int i = 0; i < upper_node_count; ++i) {
371
- root->mBBox.expand(upper_nodes[i].mBBox.min());
372
- root->mBBox.expand(upper_nodes[i].mBBox.max());
373
- }
385
+ } // namespace
374
386
 
375
- nanovdb::Map map;
376
- {
377
- const double mat[4][4] = {
378
- {dx, 0.0, 0.0, 0.0}, // row 0
379
- {0.0, dx, 0.0, 0.0}, // row 1
380
- {0.0, 0.0, dx, 0.0}, // row 2
381
- {Tx, Ty, Tz, 1.0}, // row 3
382
- };
383
- const double invMat[4][4] = {
384
- {1 / dx, 0.0, 0.0, 0.0}, // row 0
385
- {0.0, 1 / dx, 0.0, 0.0}, // row 1
386
- {0.0, 0.0, 1 / dx, 0.0}, // row 2
387
- {0.0, 0.0, 0.0, 0.0}, // row 3, ignored by Map::set
388
- };
389
- map.set(mat, invMat, 1.0);
390
- }
387
+ namespace nanovdb
388
+ {
389
+ template <> struct BufferTraits<DeviceBuffer>
390
+ {
391
+ static constexpr bool hasDeviceDual = true;
392
+ };
393
+
394
+ } // namespace nanovdb
395
+
396
+ template <typename BuildT>
397
+ void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<BuildT>> *&out_grid, size_t &out_grid_size,
398
+ const void *points, size_t num_points, bool points_in_world_space,
399
+ const BuildGridParams<BuildT> &params)
400
+ {
401
+
402
+ out_grid = nullptr;
403
+ out_grid_size = 0;
391
404
 
392
- grid->mMagic = NANOVDB_MAGIC_NUMBER;
393
- grid->mChecksum = 0xFFFFFFFFFFFFFFFFull;
394
- grid->mVersion = nanovdb::Version();
395
- grid->mFlags = static_cast<uint32_t>(nanovdb::GridFlags::HasBBox) |
396
- static_cast<uint32_t>(nanovdb::GridFlags::IsBreadthFirst);
397
- grid->mGridIndex = 0;
398
- grid->mGridCount = 1;
399
- grid->mGridSize = total_bytes;
400
- // mGridName is set below
401
- grid->mWorldBBox.mCoord[0] = map.applyMap(nanovdb::Vec3R(root->mBBox.mCoord[0]));
402
- grid->mWorldBBox.mCoord[1] = map.applyMap(nanovdb::Vec3R(root->mBBox.mCoord[1]));
403
- grid->mVoxelSize = nanovdb::Vec3d(dx);
404
- grid->mMap = map;
405
- grid->mGridClass = nanovdb::GridClass::Unknown;
406
- grid->mGridType = nanovdb::mapToGridType<BuildT>();
407
- grid->mBlindMetadataOffset = total_bytes;
408
- grid->mBlindMetadataCount = 0;
409
- });
405
+ cudaStream_t stream = static_cast<cudaStream_t>(cuda_stream_get_current());
406
+ nanovdb::Map map(params.voxel_size, params.translation);
407
+ nanovdb::tools::cuda::PointsToGrid<BuildT, Allocator> p2g(map, stream);
408
+
409
+ // p2g.setVerbose(2);
410
+ p2g.setGridName(params.name);
411
+ p2g.setChecksum(nanovdb::CheckMode::Disable);
412
+
413
+ // Only compute bbox for OnIndex grids. Otherwise bbox will be computed after activating all leaf voxels
414
+ p2g.includeBBox(nanovdb::BuildTraits<BuildT>::is_onindex);
415
+
416
+ nanovdb::GridHandle<DeviceBuffer> grid_handle;
417
+
418
+ if (points_in_world_space)
419
+ {
420
+ grid_handle = p2g.getHandle(WorldSpacePointsPtr{static_cast<const nanovdb::Vec3f *>(points), map}, num_points,
421
+ DeviceBuffer());
422
+ }
423
+ else
424
+ {
425
+ grid_handle = p2g.getHandle(static_cast<const nanovdb::Coord *>(points), num_points, DeviceBuffer());
410
426
  }
411
427
 
412
- check_cuda(cudaMemcpy(grid->mGridName, params.name, 256, cudaMemcpyHostToDevice));
428
+ out_grid = grid_handle.deviceGrid<BuildT>();
429
+ out_grid_size = grid_handle.gridSize();
413
430
 
414
- allocator.DeviceFree(lower_keys);
415
- allocator.DeviceFree(upper_keys);
416
- allocator.DeviceFree(leaf_keys);
417
- allocator.DeviceFree(node_counts);
431
+ finalize_grid(*out_grid, params);
418
432
 
419
- out_grid = reinterpret_cast<Grid*>(grid);
420
- out_grid_size = total_bytes;
433
+ // So that buffer is not destroyed when handles goes out of scope
434
+ grid_handle.buffer().detachDeviceData();
421
435
  }
422
436
 
423
- template void build_grid_from_tiles(nanovdb::Grid<nanovdb::NanoTree<float>>*&, size_t&, const void*, size_t, bool, const BuildGridParams<float>&);
424
- template void build_grid_from_tiles(nanovdb::Grid<nanovdb::NanoTree<nanovdb::Vec3f>>*&, size_t&, const void*, size_t, bool, const BuildGridParams<nanovdb::Vec3f>&);
425
- template void build_grid_from_tiles(nanovdb::Grid<nanovdb::NanoTree<int32_t>>*&, size_t&, const void*, size_t, bool, const BuildGridParams<int32_t>&);
437
+ template void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<float>> *&, size_t &, const void *, size_t, bool,
438
+ const BuildGridParams<float> &);
439
+ template void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<nanovdb::Vec3f>> *&, size_t &, const void *,
440
+ size_t, bool, const BuildGridParams<nanovdb::Vec3f> &);
441
+ template void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<int32_t>> *&, size_t &, const void *, size_t, bool,
442
+ const BuildGridParams<int32_t> &);
443
+ template void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<nanovdb::ValueIndex>> *&, size_t &, const void *,
444
+ size_t, bool, const BuildGridParams<nanovdb::ValueIndex> &);
445
+ template void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<nanovdb::ValueOnIndex>> *&, size_t &, const void *,
446
+ size_t, bool, const BuildGridParams<nanovdb::ValueOnIndex> &);