PyPI - warp-lang - Versions diffs - 1.8.1__py3-none-manylinux_2_34_aarch64.whl → 1.9.0__py3-none-manylinux_2_34_aarch64.whl - Mend

warp-lang 1.8.1__py3-none-manylinux_2_34_aarch64.whl → 1.9.0__py3-none-manylinux_2_34_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (134) hide show

warp/__init__.py +282 -103
warp/__init__.pyi +482 -110
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +93 -30
warp/build_dll.py +47 -67
warp/builtins.py +955 -137
warp/codegen.py +312 -206
warp/config.py +1 -1
warp/context.py +1249 -784
warp/examples/core/example_marching_cubes.py +1 -0
warp/examples/core/example_render_opengl.py +100 -3
warp/examples/fem/example_apic_fluid.py +98 -52
warp/examples/fem/example_convection_diffusion_dg.py +25 -4
warp/examples/fem/example_diffusion_mgpu.py +8 -3
warp/examples/fem/utils.py +68 -22
warp/fabric.py +1 -1
warp/fem/cache.py +27 -19
warp/fem/domain.py +2 -2
warp/fem/field/nodal_field.py +2 -2
warp/fem/field/virtual.py +264 -166
warp/fem/geometry/geometry.py +5 -5
warp/fem/integrate.py +129 -51
warp/fem/space/restriction.py +4 -0
warp/fem/space/shape/tet_shape_function.py +3 -10
warp/jax_experimental/custom_call.py +1 -1
warp/jax_experimental/ffi.py +2 -1
warp/marching_cubes.py +708 -0
warp/native/array.h +99 -4
warp/native/builtin.h +82 -5
warp/native/bvh.cpp +64 -28
warp/native/bvh.cu +58 -58
warp/native/bvh.h +2 -2
warp/native/clang/clang.cpp +7 -7
warp/native/coloring.cpp +8 -2
warp/native/crt.cpp +2 -2
warp/native/crt.h +3 -5
warp/native/cuda_util.cpp +41 -10
warp/native/cuda_util.h +10 -4
warp/native/exports.h +1842 -1908
warp/native/fabric.h +2 -1
warp/native/hashgrid.cpp +37 -37
warp/native/hashgrid.cu +2 -2
warp/native/initializer_array.h +1 -1
warp/native/intersect.h +2 -2
warp/native/mat.h +1910 -116
warp/native/mathdx.cpp +43 -43
warp/native/mesh.cpp +24 -24
warp/native/mesh.cu +26 -26
warp/native/mesh.h +4 -2
warp/native/nanovdb/GridHandle.h +179 -12
warp/native/nanovdb/HostBuffer.h +8 -7
warp/native/nanovdb/NanoVDB.h +517 -895
warp/native/nanovdb/NodeManager.h +323 -0
warp/native/nanovdb/PNanoVDB.h +2 -2
warp/native/quat.h +331 -14
warp/native/range.h +7 -1
warp/native/reduce.cpp +10 -10
warp/native/reduce.cu +13 -14
warp/native/runlength_encode.cpp +2 -2
warp/native/runlength_encode.cu +5 -5
warp/native/scan.cpp +3 -3
warp/native/scan.cu +4 -4
warp/native/sort.cpp +10 -10
warp/native/sort.cu +22 -22
warp/native/sparse.cpp +8 -8
warp/native/sparse.cu +13 -13
warp/native/spatial.h +366 -17
warp/native/temp_buffer.h +2 -2
warp/native/tile.h +283 -69
warp/native/vec.h +381 -14
warp/native/volume.cpp +54 -54
warp/native/volume.cu +1 -1
warp/native/volume.h +2 -1
warp/native/volume_builder.cu +30 -37
warp/native/warp.cpp +150 -149
warp/native/warp.cu +323 -192
warp/native/warp.h +227 -226
warp/optim/linear.py +736 -271
warp/render/imgui_manager.py +289 -0
warp/render/render_opengl.py +85 -6
warp/sim/graph_coloring.py +2 -2
warp/sparse.py +558 -175
warp/tests/aux_test_module_aot.py +7 -0
warp/tests/cuda/test_async.py +3 -3
warp/tests/cuda/test_conditional_captures.py +101 -0
warp/tests/geometry/test_marching_cubes.py +233 -12
warp/tests/sim/test_coloring.py +6 -6
warp/tests/test_array.py +56 -5
warp/tests/test_codegen.py +3 -2
warp/tests/test_context.py +8 -15
warp/tests/test_enum.py +136 -0
warp/tests/test_examples.py +2 -2
warp/tests/test_fem.py +45 -2
warp/tests/test_fixedarray.py +229 -0
warp/tests/test_func.py +18 -15
warp/tests/test_future_annotations.py +7 -5
warp/tests/test_linear_solvers.py +30 -0
warp/tests/test_map.py +1 -1
warp/tests/test_mat.py +1518 -378
warp/tests/test_mat_assign_copy.py +178 -0
warp/tests/test_mat_constructors.py +574 -0
warp/tests/test_module_aot.py +287 -0
warp/tests/test_print.py +69 -0
warp/tests/test_quat.py +140 -34
warp/tests/test_quat_assign_copy.py +145 -0
warp/tests/test_reload.py +2 -1
warp/tests/test_sparse.py +71 -0
warp/tests/test_spatial.py +140 -34
warp/tests/test_spatial_assign_copy.py +160 -0
warp/tests/test_struct.py +43 -3
warp/tests/test_types.py +0 -20
warp/tests/test_vec.py +179 -34
warp/tests/test_vec_assign_copy.py +143 -0
warp/tests/tile/test_tile.py +184 -18
warp/tests/tile/test_tile_cholesky.py +605 -0
warp/tests/tile/test_tile_load.py +169 -0
warp/tests/tile/test_tile_mathdx.py +2 -558
warp/tests/tile/test_tile_matmul.py +1 -1
warp/tests/tile/test_tile_mlp.py +1 -1
warp/tests/tile/test_tile_shared_memory.py +5 -5
warp/tests/unittest_suites.py +6 -0
warp/tests/walkthrough_debug.py +1 -1
warp/thirdparty/unittest_parallel.py +108 -9
warp/types.py +554 -264
warp/utils.py +68 -86
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/RECORD +131 -121
warp/native/marching.cpp +0 -19
warp/native/marching.cu +0 -514
warp/native/marching.h +0 -19
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0

warp/native/volume.cpp CHANGED Viewed

@@ -85,7 +85,7 @@ void volume_set_map(nanovdb::Map& map, const float transform[9], const float tra
 } // anonymous namespace
 // NB: buf must be a host pointer
-uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
+uint64_t wp_volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
 {
     if (buf == nullptr || (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t)))
         return 0; // This cannot be a valid NanoVDB grid with data
@@ -99,8 +99,8 @@ uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
     VolumeDesc volume;
     volume.context = NULL;
-    memcpy_h2h(&volume.grid_data, buf, sizeof(pnanovdb_grid_t));
-    memcpy_h2h(&volume.tree_data, (pnanovdb_grid_t*)buf + 1, sizeof(pnanovdb_tree_t));
+    wp_memcpy_h2h(&volume.grid_data, buf, sizeof(pnanovdb_grid_t));
+    wp_memcpy_h2h(&volume.tree_data, (pnanovdb_grid_t*)buf + 1, sizeof(pnanovdb_tree_t));
     if (volume.grid_data.magic != PNANOVDB_MAGIC_NUMBER && volume.grid_data.magic != PNANOVDB_MAGIC_GRID)
         return 0;
@@ -114,8 +114,8 @@ uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
     volume.size_in_bytes = size;
     if (copy)
     {
-        volume.buffer = alloc_host(size);
-        memcpy_h2h(volume.buffer, buf, size);
+        volume.buffer = wp_alloc_host(size);
+        wp_memcpy_h2h(volume.buffer, buf, size);
         volume.owner = true;
     }
     else
@@ -136,7 +136,7 @@ uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
 }
 // NB: buf must be a pointer on the same device
-uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy, bool owner)
+uint64_t wp_volume_create_device(void* context, void* buf, uint64_t size, bool copy, bool owner)
 {
     if (buf == nullptr || (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t)))
         return 0; // This cannot be a valid NanoVDB grid with data
@@ -150,10 +150,10 @@ uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy
     ContextGuard guard(context);
     VolumeDesc volume;
-    volume.context = context ? context : cuda_context_get_current();
+    volume.context = context ? context : wp_cuda_context_get_current();
-    memcpy_d2h(WP_CURRENT_CONTEXT, &volume.grid_data, buf, sizeof(pnanovdb_grid_t));
-    memcpy_d2h(WP_CURRENT_CONTEXT, &volume.tree_data, (pnanovdb_grid_t*)buf + 1, sizeof(pnanovdb_tree_t));
+    wp_memcpy_d2h(WP_CURRENT_CONTEXT, &volume.grid_data, buf, sizeof(pnanovdb_grid_t));
+    wp_memcpy_d2h(WP_CURRENT_CONTEXT, &volume.tree_data, (pnanovdb_grid_t*)buf + 1, sizeof(pnanovdb_tree_t));
     // no sync needed since the above copies are to pageable memory
     if (volume.grid_data.magic != PNANOVDB_MAGIC_NUMBER && volume.grid_data.magic != PNANOVDB_MAGIC_GRID)
@@ -168,8 +168,8 @@ uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy
     volume.size_in_bytes = size;
     if (copy)
     {
-        volume.buffer = alloc_device(WP_CURRENT_CONTEXT, size);
-        memcpy_d2d(WP_CURRENT_CONTEXT, volume.buffer, buf, size);
+        volume.buffer = wp_alloc_device(WP_CURRENT_CONTEXT, size);
+        wp_memcpy_d2d(WP_CURRENT_CONTEXT, volume.buffer, buf, size);
         volume.owner = true;
     }
     else
@@ -180,9 +180,9 @@ uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy
     // Make blind metadata accessible on host
     const uint64_t blindmetadata_size = volume.grid_data.blind_metadata_count * sizeof(pnanovdb_gridblindmetadata_t);
-    volume.blind_metadata = static_cast<pnanovdb_gridblindmetadata_t*>(alloc_pinned(blindmetadata_size));
-    memcpy_d2h(WP_CURRENT_CONTEXT, volume.blind_metadata,
-               static_cast<uint8_t*>(volume.buffer) + volume.grid_data.blind_metadata_offset, blindmetadata_size);
+    volume.blind_metadata = static_cast<pnanovdb_gridblindmetadata_t*>(wp_alloc_pinned(blindmetadata_size));
+    wp_memcpy_d2h(WP_CURRENT_CONTEXT, volume.blind_metadata,
+                  static_cast<uint8_t*>(volume.buffer) + volume.grid_data.blind_metadata_offset, blindmetadata_size);
     uint64_t id = (uint64_t)volume.buffer;
     volume_add_descriptor(id, std::move(volume));
@@ -190,7 +190,7 @@ uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy
     return id;
 }
-void volume_get_buffer_info(uint64_t id, void** buf, uint64_t* size)
+void wp_volume_get_buffer_info(uint64_t id, void** buf, uint64_t* size)
 {
     *buf = 0;
     *size = 0;
@@ -203,7 +203,7 @@ void volume_get_buffer_info(uint64_t id, void** buf, uint64_t* size)
     }
 }
-void volume_get_voxel_size(uint64_t id, float* dx, float* dy, float* dz)
+void wp_volume_get_voxel_size(uint64_t id, float* dx, float* dy, float* dz)
 {
     *dx = *dx = *dz = 0.0f;
@@ -216,7 +216,7 @@ void volume_get_voxel_size(uint64_t id, float* dx, float* dy, float* dz)
     }
 }
-void volume_get_tile_and_voxel_count(uint64_t id, uint32_t& tile_count, uint64_t& voxel_count)
+void wp_volume_get_tile_and_voxel_count(uint64_t id, uint32_t& tile_count, uint64_t& voxel_count)
 {
     tile_count = 0;
     voxel_count = 0;
@@ -242,8 +242,8 @@ void volume_get_tile_and_voxel_count(uint64_t id, uint32_t& tile_count, uint64_t
     }
 }
-const char* volume_get_grid_info(uint64_t id, uint64_t* grid_size, uint32_t* grid_index, uint32_t* grid_count,
-                                 float translation[3], float transform[9], char type_str[16])
+const char* wp_volume_get_grid_info(uint64_t id, uint64_t* grid_size, uint32_t* grid_index, uint32_t* grid_count,
+                                    float translation[3], float transform[9], char type_str[16])
 {
     const VolumeDesc* volume;
     if (volume_get_descriptor(id, volume))
@@ -257,7 +257,7 @@ const char* volume_get_grid_info(uint64_t id, uint64_t* grid_size, uint32_t* gri
         memcpy(transform, grid_data.map.matf, sizeof(grid_data.map.matf));
         nanovdb::toStr(type_str, static_cast<nanovdb::GridType>(grid_data.grid_type));
-        return (const char*)grid_data.grid_name;
+        return reinterpret_cast<const char*>(grid_data.grid_name);
     }
     *grid_size = 0;
@@ -268,7 +268,7 @@ const char* volume_get_grid_info(uint64_t id, uint64_t* grid_size, uint32_t* gri
     return nullptr;
 }
-uint32_t volume_get_blind_data_count(uint64_t id)
+uint32_t wp_volume_get_blind_data_count(uint64_t id)
 {
     const VolumeDesc* volume;
     if (volume_get_descriptor(id, volume))
@@ -278,8 +278,8 @@ uint32_t volume_get_blind_data_count(uint64_t id)
     return 0;
 }
-const char* volume_get_blind_data_info(uint64_t id, uint32_t data_index, void** buf, uint64_t* value_count,
-                                       uint32_t* value_size, char type_str[16])
+const char* wp_volume_get_blind_data_info(uint64_t id, uint32_t data_index, void** buf, uint64_t* value_count,
+                                          uint32_t* value_size, char type_str[16])
 {
     const VolumeDesc* volume;
     if (volume_get_descriptor(id, volume) && data_index < volume->grid_data.blind_metadata_count)
@@ -291,7 +291,7 @@ const char* volume_get_blind_data_info(uint64_t id, uint32_t data_index, void**
         nanovdb::toStr(type_str, static_cast<nanovdb::GridType>(metadata.data_type));
         *buf = static_cast<uint8_t*>(volume->buffer) + volume->grid_data.blind_metadata_offset +
                data_index * sizeof(pnanovdb_gridblindmetadata_t) + metadata.data_offset;
-        return (const char*)metadata.name;
+        return reinterpret_cast<const char*>(metadata.name);
     }
     *buf = nullptr;
     *value_count = 0;
@@ -300,7 +300,7 @@ const char* volume_get_blind_data_info(uint64_t id, uint32_t data_index, void**
     return nullptr;
 }
-void volume_get_tiles_host(uint64_t id, void* buf)
+void wp_volume_get_tiles_host(uint64_t id, void* buf)
 {
     static constexpr uint32_t MASK = (1u << 3u) - 1u; // mask for bit operations
@@ -325,14 +325,14 @@ void volume_get_tiles_host(uint64_t id, void* buf)
     }
 }
-void volume_get_voxels_host(uint64_t id, void* buf)
+void wp_volume_get_voxels_host(uint64_t id, void* buf)
 {
     const VolumeDesc* volume;
     if (volume_get_descriptor(id, volume))
     {
         uint32_t leaf_count;
         uint64_t voxel_count;
-        volume_get_tile_and_voxel_count(id, leaf_count, voxel_count);
+        wp_volume_get_tile_and_voxel_count(id, leaf_count, voxel_count);
         pnanovdb_coord_t* voxel_coords = static_cast<pnanovdb_coord_t*>(buf);
@@ -361,20 +361,20 @@ void volume_get_voxels_host(uint64_t id, void* buf)
     }
 }
-void volume_destroy_host(uint64_t id)
+void wp_volume_destroy_host(uint64_t id)
 {
     const VolumeDesc* volume;
     if (volume_get_descriptor(id, volume))
     {
         if (volume->owner)
         {
-            free_host(volume->buffer);
+            wp_free_host(volume->buffer);
         }
         volume_rem_descriptor(id);
     }
 }
-void volume_destroy_device(uint64_t id)
+void wp_volume_destroy_device(uint64_t id)
 {
     const VolumeDesc* volume;
     if (volume_get_descriptor(id, volume))
@@ -382,18 +382,18 @@ void volume_destroy_device(uint64_t id)
         ContextGuard guard(volume->context);
         if (volume->owner)
         {
-            free_device(WP_CURRENT_CONTEXT, volume->buffer);
+            wp_free_device(WP_CURRENT_CONTEXT, volume->buffer);
         }
-        free_pinned(volume->blind_metadata);
+        wp_free_pinned(volume->blind_metadata);
         volume_rem_descriptor(id);
     }
 }
 #if WP_ENABLE_CUDA
-uint64_t volume_from_tiles_device(void* context, void* points, int num_points, float transform[9], float translation[3],
-                                  bool points_in_world_space, const void* value_ptr, uint32_t value_size,
-                                  const char* value_type)
+uint64_t wp_volume_from_tiles_device(void* context, void* points, int num_points, float transform[9], float translation[3],
+                                     bool points_in_world_space, const void* value_ptr, uint32_t value_size,
+                                     const char* value_type)
 {
     char gridTypeStr[12];
@@ -407,7 +407,7 @@ uint64_t volume_from_tiles_device(void* context, void* points, int num_points, f
         size_t gridSize;                                                                                               \
         nanovdb::Grid<nanovdb::NanoTree<type>>* grid;                                                                  \
         build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params);                     \
-        return volume_create_device(context, grid, gridSize, false, true);                                             \
+        return wp_volume_create_device(context, grid, gridSize, false, true);                                             \
     }
     WP_VOLUME_BUILDER_INSTANTIATE_TYPES
@@ -416,8 +416,8 @@ uint64_t volume_from_tiles_device(void* context, void* points, int num_points, f
     return 0;
 }
-uint64_t volume_index_from_tiles_device(void* context, void* points, int num_points, float transform[9],
-                                        float translation[3], bool points_in_world_space)
+uint64_t wp_volume_index_from_tiles_device(void* context, void* points, int num_points, float transform[9],
+                                           float translation[3], bool points_in_world_space)
 {
     nanovdb::IndexGrid* grid;
     size_t gridSize;
@@ -426,11 +426,11 @@ uint64_t volume_index_from_tiles_device(void* context, void* points, int num_poi
     build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params);
-    return volume_create_device(context, grid, gridSize, false, true);
+    return wp_volume_create_device(context, grid, gridSize, false, true);
 }
-uint64_t volume_from_active_voxels_device(void* context, void* points, int num_points, float transform[9],
-                                          float translation[3], bool points_in_world_space)
+uint64_t wp_volume_from_active_voxels_device(void* context, void* points, int num_points, float transform[9],
+                                             float translation[3], bool points_in_world_space)
 {
     nanovdb::OnIndexGrid* grid;
     size_t gridSize;
@@ -439,7 +439,7 @@ uint64_t volume_from_active_voxels_device(void* context, void* points, int num_p
     build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params);
-    return volume_create_device(context, grid, gridSize, false, true);
+    return wp_volume_create_device(context, grid, gridSize, false, true);
 }
 void launch_get_leaf_coords(void* context, const uint32_t leaf_count, pnanovdb_coord_t* leaf_coords,
@@ -447,7 +447,7 @@ void launch_get_leaf_coords(void* context, const uint32_t leaf_count, pnanovdb_c
 void launch_get_voxel_coords(void* context, const uint32_t leaf_count, const uint32_t voxel_count,
                              pnanovdb_coord_t* voxel_coords, pnanovdb_buf_t buf);
-void volume_get_tiles_device(uint64_t id, void* buf)
+void wp_volume_get_tiles_device(uint64_t id, void* buf)
 {
     const VolumeDesc* volume;
     if (volume_get_descriptor(id, volume))
@@ -459,14 +459,14 @@ void volume_get_tiles_device(uint64_t id, void* buf)
     }
 }
-void volume_get_voxels_device(uint64_t id, void* buf)
+void wp_volume_get_voxels_device(uint64_t id, void* buf)
 {
     const VolumeDesc* volume;
     if (volume_get_descriptor(id, volume))
     {
         uint32_t leaf_count;
         uint64_t voxel_count;
-        volume_get_tile_and_voxel_count(id, leaf_count, voxel_count);
+        wp_volume_get_tile_and_voxel_count(id, leaf_count, voxel_count);
         pnanovdb_coord_t* voxel_coords = static_cast<pnanovdb_coord_t*>(buf);
         launch_get_voxel_coords(volume->context, leaf_count, voxel_count, voxel_coords, volume->as_pnano());
@@ -475,27 +475,27 @@ void volume_get_voxels_device(uint64_t id, void* buf)
 #else
 // stubs for non-CUDA platforms
-uint64_t volume_from_tiles_device(void* context, void* points, int num_points, float transform[9],
-                                  float translation[3], bool points_in_world_space, const void* value_ptr, uint32_t value_size,
-                                  const char* value_type)
+uint64_t wp_volume_from_tiles_device(void* context, void* points, int num_points, float transform[9],
+                                     float translation[3], bool points_in_world_space, const void* value_ptr, uint32_t value_size,
+                                     const char* value_type)
 {
     return 0;
 }
-uint64_t volume_index_from_tiles_device(void* context, void* points, int num_points, float transform[9],
-                                        float translation[3], bool points_in_world_space)
+uint64_t wp_volume_index_from_tiles_device(void* context, void* points, int num_points, float transform[9],
+                                           float translation[3], bool points_in_world_space)
 {
     return 0;
 }
-uint64_t volume_from_active_voxels_device(void* context, void* points, int num_points, float transform[9],
-                                          float translation[3], bool points_in_world_space)
+uint64_t wp_volume_from_active_voxels_device(void* context, void* points, int num_points, float transform[9],
+                                             float translation[3], bool points_in_world_space)
 {
     return 0;
 }
-void volume_get_tiles_device(uint64_t id, void* buf) {}
+void wp_volume_get_tiles_device(uint64_t id, void* buf) {}
-void volume_get_voxels_device(uint64_t id, void* buf) {}
+void wp_volume_get_voxels_device(uint64_t id, void* buf) {}
 #endif

warp/native/volume.cu CHANGED Viewed

@@ -62,6 +62,6 @@ void launch_get_voxel_coords(void *context, const uint32_t leaf_count, const uin
                              pnanovdb_coord_t *voxel_coords, pnanovdb_buf_t buf)
 {
     ContextGuard guard(context);
-    cudaStream_t stream = (cudaStream_t)cuda_stream_get_current();
+    cudaStream_t stream = (cudaStream_t)wp_cuda_stream_get_current();
     volume_get_voxel_coords<<<leaf_count, dim3(8, 8, 8), 0, stream>>>(voxel_count, voxel_coords, buf);
 }

warp/native/volume.h CHANGED Viewed

@@ -48,7 +48,7 @@ static constexpr int LINEAR = 1;
 CUDA_CALLABLE inline pnanovdb_buf_t id_to_buffer(uint64_t id)
 {
-    pnanovdb_buf_t buf;
+    pnanovdb_buf_t buf = {};  // Zero-initialize the entire struct
     buf.data = (uint32_t *)id;
     return buf;
 }
@@ -171,6 +171,7 @@ struct value_accessor_base
     explicit inline CUDA_CALLABLE value_accessor_base(const pnanovdb_buf_t buf) : buf(buf), root(get_root(buf))
     {
+        accessor = {};
     }
     CUDA_CALLABLE inline void init_cache()

warp/native/volume_builder.cu CHANGED Viewed

@@ -33,29 +33,22 @@
 #endif
 namespace
 {
-/// Allocator class following interface of cub::cachingDeviceAllocator, as expected by naovdb::PointsToGrid
-struct Allocator
+/// Resource class following interface of nanovdb::DeviceResource as expected by nanovdb::PointsToGrid
+class Resource
 {
+public:
+    // cudaMalloc aligns memory to 256 bytes by default
+    static constexpr size_t DEFAULT_ALIGNMENT = 256;
-    cudaError_t DeviceAllocate(void **d_ptr,               ///< [out] Reference to pointer to the allocation
-                               size_t bytes,               ///< [in] Minimum number of bytes for the allocation
-                               cudaStream_t active_stream) ///< [in] The stream to be associated with this allocation
-    {
-        // in PointsToGrid stream argument always coincide with current stream, ignore
-        *d_ptr = alloc_device(WP_CURRENT_CONTEXT, bytes);
+    static void* allocateAsync(size_t bytes, size_t, cudaStream_t stream) {
+        // In PointsToGrid, the stream argument always coincides with current stream, ignore
+        void *d_ptr = wp_alloc_device(WP_CURRENT_CONTEXT, bytes);
         cudaCheckError();
-        return cudaSuccess;
+        return d_ptr;
     }
-    cudaError_t DeviceFree(void *d_ptr)
-    {
-        free_device(WP_CURRENT_CONTEXT, d_ptr);
-        return cudaSuccess;
-    }
-    cudaError_t FreeAllCached()
-    {
-        return cudaSuccess;
+    static void deallocateAsync(void *d_ptr, size_t, size_t, cudaStream_t stream) {
+        wp_free_device(WP_CURRENT_CONTEXT, d_ptr);
     }
 };
@@ -70,13 +63,13 @@ class DeviceBuffer
     /// @brief Static factory method that return an instance of this buffer
     /// @param size byte size of buffer to be initialized
     /// @param dummy this argument is currently ignored but required to match the API of the HostBuffer
-    /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU
+    /// @param device id of the device on which to initialize the buffer
     /// @param stream optional stream argument (defaults to stream NULL)
     /// @return An instance of this class using move semantics
-    static DeviceBuffer create(uint64_t size, const DeviceBuffer *dummy = nullptr, bool host = true,
-                               void *stream = nullptr)
+    static DeviceBuffer create(uint64_t size, const DeviceBuffer *dummy = nullptr, int device = cudaCpuDeviceId,
+                               cudaStream_t stream = nullptr)
     {
-        return DeviceBuffer(size, host, stream);
+        return DeviceBuffer(size, device, stream);
     }
     /// @brief Static factory method that return an instance of this buffer that wraps externally managed memory
@@ -93,11 +86,11 @@ class DeviceBuffer
     /// @param size byte size of buffer to be initialized
     /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU
     /// @param stream optional stream argument (defaults to stream NULL)
-    DeviceBuffer(uint64_t size = 0, bool host = true, void *stream = nullptr)
+    DeviceBuffer(uint64_t size = 0, int device = cudaCpuDeviceId, cudaStream_t stream = nullptr)
         : mSize(0), mCpuData(nullptr), mGpuData(nullptr), mManaged(false)
     {
         if (size > 0)
-            this->init(size, host, stream);
+            this->init(size, device, stream);
     }
     DeviceBuffer(uint64_t size, void *cpuData, void *gpuData)
@@ -144,22 +137,22 @@ class DeviceBuffer
     /// @brief Initialize buffer
     /// @param size byte size of buffer to be initialized
-    /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU
+    /// @param device id of the device on which to initialize the buffer
     /// @note All existing buffers are first cleared
     /// @warning size is expected to be non-zero. Use clear() clear buffer!
-    void init(uint64_t size, bool host = true, void *stream = nullptr)
+    void init(uint64_t size, int device = cudaCpuDeviceId, void *stream = nullptr)
     {
         if (mSize > 0)
             this->clear(stream);
         NANOVDB_ASSERT(size > 0);
-        if (host)
+        if (device == cudaCpuDeviceId)
         {
             mCpuData =
-                alloc_pinned(size); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned
+                wp_alloc_pinned(size); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned
         }
         else
         {
-            mGpuData = alloc_device(WP_CURRENT_CONTEXT, size);
+            mGpuData = wp_alloc_device(WP_CURRENT_CONTEXT, size);
         }
         cudaCheckError();
         mSize = size;
@@ -212,9 +205,9 @@ class DeviceBuffer
     void clear(void *stream = nullptr)
     {
         if (mManaged && mGpuData)
-            free_device(WP_CURRENT_CONTEXT, mGpuData);
+            wp_free_device(WP_CURRENT_CONTEXT, mGpuData);
         if (mManaged && mCpuData)
-            free_pinned(mCpuData);
+            wp_free_pinned(mCpuData);
         mCpuData = mGpuData = nullptr;
         mSize = 0;
         mManaged = false;
@@ -367,11 +360,11 @@ void finalize_grid(nanovdb::Grid<nanovdb::NanoTree<BuildT>> &out_grid, const Bui
     Tree *tree = &out_grid.tree();
     int node_counts[3];
-    memcpy_d2h(WP_CURRENT_CONTEXT, node_counts, tree->mNodeCount, sizeof(node_counts));
+    wp_memcpy_d2h(WP_CURRENT_CONTEXT, node_counts, tree->mNodeCount, sizeof(node_counts));
     // synchronization below is unnecessary as node_counts is in pageable memory.
     // keep it for clarity
-    cudaStream_t stream = static_cast<cudaStream_t>(cuda_stream_get_current());
-    cuda_stream_synchronize(stream);
+    cudaStream_t stream = static_cast<cudaStream_t>(wp_cuda_stream_get_current());
+    wp_cuda_stream_synchronize(stream);
     const unsigned int leaf_count = node_counts[0];
     const unsigned int lower_count = node_counts[1];
@@ -387,7 +380,7 @@ void finalize_grid(nanovdb::Grid<nanovdb::NanoTree<BuildT>> &out_grid, const Bui
         <<<upper_count, NUM_THREADS, 0, stream>>>(tree, params.background_value);
     setRootBBoxAndBackgroundValue<Tree><<<1, NUM_THREADS, 0, stream>>>(&out_grid, params.background_value);
-    check_cuda(cuda_context_check(WP_CURRENT_CONTEXT));
+    check_cuda(wp_cuda_context_check(WP_CURRENT_CONTEXT));
 }
 template <>
@@ -437,8 +430,8 @@ void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<BuildT>> *&out_grid,
     try
     {
-        cudaStream_t stream = static_cast<cudaStream_t>(cuda_stream_get_current());
-        nanovdb::tools::cuda::PointsToGrid<BuildT, Allocator> p2g(params.map, stream);
+        cudaStream_t stream = static_cast<cudaStream_t>(wp_cuda_stream_get_current());
+        nanovdb::tools::cuda::PointsToGrid<BuildT, Resource> p2g(params.map, stream);
         // p2g.setVerbose(2);
         p2g.setGridName(params.name);