warp-lang 1.8.1__py3-none-macosx_10_13_universal2.whl → 1.9.1__py3-none-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +282 -103
- warp/__init__.pyi +1904 -114
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/build.py +93 -30
- warp/build_dll.py +331 -101
- warp/builtins.py +1244 -160
- warp/codegen.py +317 -206
- warp/config.py +1 -1
- warp/context.py +1465 -789
- warp/examples/core/example_marching_cubes.py +1 -0
- warp/examples/core/example_render_opengl.py +100 -3
- warp/examples/fem/example_apic_fluid.py +98 -52
- warp/examples/fem/example_convection_diffusion_dg.py +25 -4
- warp/examples/fem/example_diffusion_mgpu.py +8 -3
- warp/examples/fem/utils.py +68 -22
- warp/examples/interop/example_jax_kernel.py +2 -1
- warp/fabric.py +1 -1
- warp/fem/cache.py +27 -19
- warp/fem/domain.py +2 -2
- warp/fem/field/nodal_field.py +2 -2
- warp/fem/field/virtual.py +264 -166
- warp/fem/geometry/geometry.py +5 -5
- warp/fem/integrate.py +129 -51
- warp/fem/space/restriction.py +4 -0
- warp/fem/space/shape/tet_shape_function.py +3 -10
- warp/jax_experimental/custom_call.py +25 -2
- warp/jax_experimental/ffi.py +22 -1
- warp/jax_experimental/xla_ffi.py +16 -7
- warp/marching_cubes.py +708 -0
- warp/native/array.h +99 -4
- warp/native/builtin.h +86 -9
- warp/native/bvh.cpp +64 -28
- warp/native/bvh.cu +58 -58
- warp/native/bvh.h +2 -2
- warp/native/clang/clang.cpp +7 -7
- warp/native/coloring.cpp +8 -2
- warp/native/crt.cpp +2 -2
- warp/native/crt.h +3 -5
- warp/native/cuda_util.cpp +41 -10
- warp/native/cuda_util.h +10 -4
- warp/native/exports.h +1842 -1908
- warp/native/fabric.h +2 -1
- warp/native/hashgrid.cpp +37 -37
- warp/native/hashgrid.cu +2 -2
- warp/native/initializer_array.h +1 -1
- warp/native/intersect.h +2 -2
- warp/native/mat.h +1910 -116
- warp/native/mathdx.cpp +43 -43
- warp/native/mesh.cpp +24 -24
- warp/native/mesh.cu +26 -26
- warp/native/mesh.h +4 -2
- warp/native/nanovdb/GridHandle.h +179 -12
- warp/native/nanovdb/HostBuffer.h +8 -7
- warp/native/nanovdb/NanoVDB.h +517 -895
- warp/native/nanovdb/NodeManager.h +323 -0
- warp/native/nanovdb/PNanoVDB.h +2 -2
- warp/native/quat.h +331 -14
- warp/native/range.h +7 -1
- warp/native/reduce.cpp +10 -10
- warp/native/reduce.cu +13 -14
- warp/native/runlength_encode.cpp +2 -2
- warp/native/runlength_encode.cu +5 -5
- warp/native/scan.cpp +3 -3
- warp/native/scan.cu +4 -4
- warp/native/sort.cpp +10 -10
- warp/native/sort.cu +40 -31
- warp/native/sort.h +2 -0
- warp/native/sparse.cpp +8 -8
- warp/native/sparse.cu +13 -13
- warp/native/spatial.h +366 -17
- warp/native/temp_buffer.h +2 -2
- warp/native/tile.h +471 -82
- warp/native/vec.h +328 -14
- warp/native/volume.cpp +54 -54
- warp/native/volume.cu +1 -1
- warp/native/volume.h +2 -1
- warp/native/volume_builder.cu +30 -37
- warp/native/warp.cpp +150 -149
- warp/native/warp.cu +377 -216
- warp/native/warp.h +227 -226
- warp/optim/linear.py +736 -271
- warp/render/imgui_manager.py +289 -0
- warp/render/render_opengl.py +99 -18
- warp/render/render_usd.py +1 -0
- warp/sim/graph_coloring.py +2 -2
- warp/sparse.py +558 -175
- warp/tests/aux_test_module_aot.py +7 -0
- warp/tests/cuda/test_async.py +3 -3
- warp/tests/cuda/test_conditional_captures.py +101 -0
- warp/tests/geometry/test_hash_grid.py +38 -0
- warp/tests/geometry/test_marching_cubes.py +233 -12
- warp/tests/interop/test_jax.py +608 -28
- warp/tests/sim/test_coloring.py +6 -6
- warp/tests/test_array.py +58 -5
- warp/tests/test_codegen.py +4 -3
- warp/tests/test_context.py +8 -15
- warp/tests/test_enum.py +136 -0
- warp/tests/test_examples.py +2 -2
- warp/tests/test_fem.py +49 -6
- warp/tests/test_fixedarray.py +229 -0
- warp/tests/test_func.py +18 -15
- warp/tests/test_future_annotations.py +7 -5
- warp/tests/test_linear_solvers.py +30 -0
- warp/tests/test_map.py +15 -1
- warp/tests/test_mat.py +1518 -378
- warp/tests/test_mat_assign_copy.py +178 -0
- warp/tests/test_mat_constructors.py +574 -0
- warp/tests/test_module_aot.py +287 -0
- warp/tests/test_print.py +69 -0
- warp/tests/test_quat.py +140 -34
- warp/tests/test_quat_assign_copy.py +145 -0
- warp/tests/test_reload.py +2 -1
- warp/tests/test_sparse.py +71 -0
- warp/tests/test_spatial.py +140 -34
- warp/tests/test_spatial_assign_copy.py +160 -0
- warp/tests/test_struct.py +43 -3
- warp/tests/test_tuple.py +96 -0
- warp/tests/test_types.py +61 -20
- warp/tests/test_vec.py +179 -34
- warp/tests/test_vec_assign_copy.py +143 -0
- warp/tests/tile/test_tile.py +245 -18
- warp/tests/tile/test_tile_cholesky.py +605 -0
- warp/tests/tile/test_tile_load.py +169 -0
- warp/tests/tile/test_tile_mathdx.py +2 -558
- warp/tests/tile/test_tile_matmul.py +1 -1
- warp/tests/tile/test_tile_mlp.py +1 -1
- warp/tests/tile/test_tile_shared_memory.py +5 -5
- warp/tests/unittest_suites.py +6 -0
- warp/tests/walkthrough_debug.py +1 -1
- warp/thirdparty/unittest_parallel.py +108 -9
- warp/types.py +571 -267
- warp/utils.py +68 -86
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/METADATA +29 -69
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/RECORD +138 -128
- warp/native/marching.cpp +0 -19
- warp/native/marching.cu +0 -514
- warp/native/marching.h +0 -19
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/WHEEL +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/top_level.txt +0 -0
warp/native/volume.cpp
CHANGED
|
@@ -85,7 +85,7 @@ void volume_set_map(nanovdb::Map& map, const float transform[9], const float tra
|
|
|
85
85
|
} // anonymous namespace
|
|
86
86
|
|
|
87
87
|
// NB: buf must be a host pointer
|
|
88
|
-
uint64_t
|
|
88
|
+
uint64_t wp_volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
|
|
89
89
|
{
|
|
90
90
|
if (buf == nullptr || (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t)))
|
|
91
91
|
return 0; // This cannot be a valid NanoVDB grid with data
|
|
@@ -99,8 +99,8 @@ uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
|
|
|
99
99
|
VolumeDesc volume;
|
|
100
100
|
volume.context = NULL;
|
|
101
101
|
|
|
102
|
-
|
|
103
|
-
|
|
102
|
+
wp_memcpy_h2h(&volume.grid_data, buf, sizeof(pnanovdb_grid_t));
|
|
103
|
+
wp_memcpy_h2h(&volume.tree_data, (pnanovdb_grid_t*)buf + 1, sizeof(pnanovdb_tree_t));
|
|
104
104
|
|
|
105
105
|
if (volume.grid_data.magic != PNANOVDB_MAGIC_NUMBER && volume.grid_data.magic != PNANOVDB_MAGIC_GRID)
|
|
106
106
|
return 0;
|
|
@@ -114,8 +114,8 @@ uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
|
|
|
114
114
|
volume.size_in_bytes = size;
|
|
115
115
|
if (copy)
|
|
116
116
|
{
|
|
117
|
-
volume.buffer =
|
|
118
|
-
|
|
117
|
+
volume.buffer = wp_alloc_host(size);
|
|
118
|
+
wp_memcpy_h2h(volume.buffer, buf, size);
|
|
119
119
|
volume.owner = true;
|
|
120
120
|
}
|
|
121
121
|
else
|
|
@@ -136,7 +136,7 @@ uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
|
|
|
136
136
|
}
|
|
137
137
|
|
|
138
138
|
// NB: buf must be a pointer on the same device
|
|
139
|
-
uint64_t
|
|
139
|
+
uint64_t wp_volume_create_device(void* context, void* buf, uint64_t size, bool copy, bool owner)
|
|
140
140
|
{
|
|
141
141
|
if (buf == nullptr || (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t)))
|
|
142
142
|
return 0; // This cannot be a valid NanoVDB grid with data
|
|
@@ -150,10 +150,10 @@ uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy
|
|
|
150
150
|
ContextGuard guard(context);
|
|
151
151
|
|
|
152
152
|
VolumeDesc volume;
|
|
153
|
-
volume.context = context ? context :
|
|
153
|
+
volume.context = context ? context : wp_cuda_context_get_current();
|
|
154
154
|
|
|
155
|
-
|
|
156
|
-
|
|
155
|
+
wp_memcpy_d2h(WP_CURRENT_CONTEXT, &volume.grid_data, buf, sizeof(pnanovdb_grid_t));
|
|
156
|
+
wp_memcpy_d2h(WP_CURRENT_CONTEXT, &volume.tree_data, (pnanovdb_grid_t*)buf + 1, sizeof(pnanovdb_tree_t));
|
|
157
157
|
// no sync needed since the above copies are to pageable memory
|
|
158
158
|
|
|
159
159
|
if (volume.grid_data.magic != PNANOVDB_MAGIC_NUMBER && volume.grid_data.magic != PNANOVDB_MAGIC_GRID)
|
|
@@ -168,8 +168,8 @@ uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy
|
|
|
168
168
|
volume.size_in_bytes = size;
|
|
169
169
|
if (copy)
|
|
170
170
|
{
|
|
171
|
-
volume.buffer =
|
|
172
|
-
|
|
171
|
+
volume.buffer = wp_alloc_device(WP_CURRENT_CONTEXT, size);
|
|
172
|
+
wp_memcpy_d2d(WP_CURRENT_CONTEXT, volume.buffer, buf, size);
|
|
173
173
|
volume.owner = true;
|
|
174
174
|
}
|
|
175
175
|
else
|
|
@@ -180,9 +180,9 @@ uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy
|
|
|
180
180
|
|
|
181
181
|
// Make blind metadata accessible on host
|
|
182
182
|
const uint64_t blindmetadata_size = volume.grid_data.blind_metadata_count * sizeof(pnanovdb_gridblindmetadata_t);
|
|
183
|
-
volume.blind_metadata = static_cast<pnanovdb_gridblindmetadata_t*>(
|
|
184
|
-
|
|
185
|
-
|
|
183
|
+
volume.blind_metadata = static_cast<pnanovdb_gridblindmetadata_t*>(wp_alloc_pinned(blindmetadata_size));
|
|
184
|
+
wp_memcpy_d2h(WP_CURRENT_CONTEXT, volume.blind_metadata,
|
|
185
|
+
static_cast<uint8_t*>(volume.buffer) + volume.grid_data.blind_metadata_offset, blindmetadata_size);
|
|
186
186
|
|
|
187
187
|
uint64_t id = (uint64_t)volume.buffer;
|
|
188
188
|
volume_add_descriptor(id, std::move(volume));
|
|
@@ -190,7 +190,7 @@ uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy
|
|
|
190
190
|
return id;
|
|
191
191
|
}
|
|
192
192
|
|
|
193
|
-
void
|
|
193
|
+
void wp_volume_get_buffer_info(uint64_t id, void** buf, uint64_t* size)
|
|
194
194
|
{
|
|
195
195
|
*buf = 0;
|
|
196
196
|
*size = 0;
|
|
@@ -203,7 +203,7 @@ void volume_get_buffer_info(uint64_t id, void** buf, uint64_t* size)
|
|
|
203
203
|
}
|
|
204
204
|
}
|
|
205
205
|
|
|
206
|
-
void
|
|
206
|
+
void wp_volume_get_voxel_size(uint64_t id, float* dx, float* dy, float* dz)
|
|
207
207
|
{
|
|
208
208
|
*dx = *dx = *dz = 0.0f;
|
|
209
209
|
|
|
@@ -216,7 +216,7 @@ void volume_get_voxel_size(uint64_t id, float* dx, float* dy, float* dz)
|
|
|
216
216
|
}
|
|
217
217
|
}
|
|
218
218
|
|
|
219
|
-
void
|
|
219
|
+
void wp_volume_get_tile_and_voxel_count(uint64_t id, uint32_t& tile_count, uint64_t& voxel_count)
|
|
220
220
|
{
|
|
221
221
|
tile_count = 0;
|
|
222
222
|
voxel_count = 0;
|
|
@@ -242,8 +242,8 @@ void volume_get_tile_and_voxel_count(uint64_t id, uint32_t& tile_count, uint64_t
|
|
|
242
242
|
}
|
|
243
243
|
}
|
|
244
244
|
|
|
245
|
-
const char*
|
|
246
|
-
|
|
245
|
+
const char* wp_volume_get_grid_info(uint64_t id, uint64_t* grid_size, uint32_t* grid_index, uint32_t* grid_count,
|
|
246
|
+
float translation[3], float transform[9], char type_str[16])
|
|
247
247
|
{
|
|
248
248
|
const VolumeDesc* volume;
|
|
249
249
|
if (volume_get_descriptor(id, volume))
|
|
@@ -257,7 +257,7 @@ const char* volume_get_grid_info(uint64_t id, uint64_t* grid_size, uint32_t* gri
|
|
|
257
257
|
memcpy(transform, grid_data.map.matf, sizeof(grid_data.map.matf));
|
|
258
258
|
|
|
259
259
|
nanovdb::toStr(type_str, static_cast<nanovdb::GridType>(grid_data.grid_type));
|
|
260
|
-
return
|
|
260
|
+
return reinterpret_cast<const char*>(grid_data.grid_name);
|
|
261
261
|
}
|
|
262
262
|
|
|
263
263
|
*grid_size = 0;
|
|
@@ -268,7 +268,7 @@ const char* volume_get_grid_info(uint64_t id, uint64_t* grid_size, uint32_t* gri
|
|
|
268
268
|
return nullptr;
|
|
269
269
|
}
|
|
270
270
|
|
|
271
|
-
uint32_t
|
|
271
|
+
uint32_t wp_volume_get_blind_data_count(uint64_t id)
|
|
272
272
|
{
|
|
273
273
|
const VolumeDesc* volume;
|
|
274
274
|
if (volume_get_descriptor(id, volume))
|
|
@@ -278,8 +278,8 @@ uint32_t volume_get_blind_data_count(uint64_t id)
|
|
|
278
278
|
return 0;
|
|
279
279
|
}
|
|
280
280
|
|
|
281
|
-
const char*
|
|
282
|
-
|
|
281
|
+
const char* wp_volume_get_blind_data_info(uint64_t id, uint32_t data_index, void** buf, uint64_t* value_count,
|
|
282
|
+
uint32_t* value_size, char type_str[16])
|
|
283
283
|
{
|
|
284
284
|
const VolumeDesc* volume;
|
|
285
285
|
if (volume_get_descriptor(id, volume) && data_index < volume->grid_data.blind_metadata_count)
|
|
@@ -291,7 +291,7 @@ const char* volume_get_blind_data_info(uint64_t id, uint32_t data_index, void**
|
|
|
291
291
|
nanovdb::toStr(type_str, static_cast<nanovdb::GridType>(metadata.data_type));
|
|
292
292
|
*buf = static_cast<uint8_t*>(volume->buffer) + volume->grid_data.blind_metadata_offset +
|
|
293
293
|
data_index * sizeof(pnanovdb_gridblindmetadata_t) + metadata.data_offset;
|
|
294
|
-
return
|
|
294
|
+
return reinterpret_cast<const char*>(metadata.name);
|
|
295
295
|
}
|
|
296
296
|
*buf = nullptr;
|
|
297
297
|
*value_count = 0;
|
|
@@ -300,7 +300,7 @@ const char* volume_get_blind_data_info(uint64_t id, uint32_t data_index, void**
|
|
|
300
300
|
return nullptr;
|
|
301
301
|
}
|
|
302
302
|
|
|
303
|
-
void
|
|
303
|
+
void wp_volume_get_tiles_host(uint64_t id, void* buf)
|
|
304
304
|
{
|
|
305
305
|
static constexpr uint32_t MASK = (1u << 3u) - 1u; // mask for bit operations
|
|
306
306
|
|
|
@@ -325,14 +325,14 @@ void volume_get_tiles_host(uint64_t id, void* buf)
|
|
|
325
325
|
}
|
|
326
326
|
}
|
|
327
327
|
|
|
328
|
-
void
|
|
328
|
+
void wp_volume_get_voxels_host(uint64_t id, void* buf)
|
|
329
329
|
{
|
|
330
330
|
const VolumeDesc* volume;
|
|
331
331
|
if (volume_get_descriptor(id, volume))
|
|
332
332
|
{
|
|
333
333
|
uint32_t leaf_count;
|
|
334
334
|
uint64_t voxel_count;
|
|
335
|
-
|
|
335
|
+
wp_volume_get_tile_and_voxel_count(id, leaf_count, voxel_count);
|
|
336
336
|
|
|
337
337
|
pnanovdb_coord_t* voxel_coords = static_cast<pnanovdb_coord_t*>(buf);
|
|
338
338
|
|
|
@@ -361,20 +361,20 @@ void volume_get_voxels_host(uint64_t id, void* buf)
|
|
|
361
361
|
}
|
|
362
362
|
}
|
|
363
363
|
|
|
364
|
-
void
|
|
364
|
+
void wp_volume_destroy_host(uint64_t id)
|
|
365
365
|
{
|
|
366
366
|
const VolumeDesc* volume;
|
|
367
367
|
if (volume_get_descriptor(id, volume))
|
|
368
368
|
{
|
|
369
369
|
if (volume->owner)
|
|
370
370
|
{
|
|
371
|
-
|
|
371
|
+
wp_free_host(volume->buffer);
|
|
372
372
|
}
|
|
373
373
|
volume_rem_descriptor(id);
|
|
374
374
|
}
|
|
375
375
|
}
|
|
376
376
|
|
|
377
|
-
void
|
|
377
|
+
void wp_volume_destroy_device(uint64_t id)
|
|
378
378
|
{
|
|
379
379
|
const VolumeDesc* volume;
|
|
380
380
|
if (volume_get_descriptor(id, volume))
|
|
@@ -382,18 +382,18 @@ void volume_destroy_device(uint64_t id)
|
|
|
382
382
|
ContextGuard guard(volume->context);
|
|
383
383
|
if (volume->owner)
|
|
384
384
|
{
|
|
385
|
-
|
|
385
|
+
wp_free_device(WP_CURRENT_CONTEXT, volume->buffer);
|
|
386
386
|
}
|
|
387
|
-
|
|
387
|
+
wp_free_pinned(volume->blind_metadata);
|
|
388
388
|
volume_rem_descriptor(id);
|
|
389
389
|
}
|
|
390
390
|
}
|
|
391
391
|
|
|
392
392
|
#if WP_ENABLE_CUDA
|
|
393
393
|
|
|
394
|
-
uint64_t
|
|
395
|
-
|
|
396
|
-
|
|
394
|
+
uint64_t wp_volume_from_tiles_device(void* context, void* points, int num_points, float transform[9], float translation[3],
|
|
395
|
+
bool points_in_world_space, const void* value_ptr, uint32_t value_size,
|
|
396
|
+
const char* value_type)
|
|
397
397
|
{
|
|
398
398
|
char gridTypeStr[12];
|
|
399
399
|
|
|
@@ -407,7 +407,7 @@ uint64_t volume_from_tiles_device(void* context, void* points, int num_points, f
|
|
|
407
407
|
size_t gridSize; \
|
|
408
408
|
nanovdb::Grid<nanovdb::NanoTree<type>>* grid; \
|
|
409
409
|
build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params); \
|
|
410
|
-
return
|
|
410
|
+
return wp_volume_create_device(context, grid, gridSize, false, true); \
|
|
411
411
|
}
|
|
412
412
|
|
|
413
413
|
WP_VOLUME_BUILDER_INSTANTIATE_TYPES
|
|
@@ -416,8 +416,8 @@ uint64_t volume_from_tiles_device(void* context, void* points, int num_points, f
|
|
|
416
416
|
return 0;
|
|
417
417
|
}
|
|
418
418
|
|
|
419
|
-
uint64_t
|
|
420
|
-
|
|
419
|
+
uint64_t wp_volume_index_from_tiles_device(void* context, void* points, int num_points, float transform[9],
|
|
420
|
+
float translation[3], bool points_in_world_space)
|
|
421
421
|
{
|
|
422
422
|
nanovdb::IndexGrid* grid;
|
|
423
423
|
size_t gridSize;
|
|
@@ -426,11 +426,11 @@ uint64_t volume_index_from_tiles_device(void* context, void* points, int num_poi
|
|
|
426
426
|
|
|
427
427
|
build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params);
|
|
428
428
|
|
|
429
|
-
return
|
|
429
|
+
return wp_volume_create_device(context, grid, gridSize, false, true);
|
|
430
430
|
}
|
|
431
431
|
|
|
432
|
-
uint64_t
|
|
433
|
-
|
|
432
|
+
uint64_t wp_volume_from_active_voxels_device(void* context, void* points, int num_points, float transform[9],
|
|
433
|
+
float translation[3], bool points_in_world_space)
|
|
434
434
|
{
|
|
435
435
|
nanovdb::OnIndexGrid* grid;
|
|
436
436
|
size_t gridSize;
|
|
@@ -439,7 +439,7 @@ uint64_t volume_from_active_voxels_device(void* context, void* points, int num_p
|
|
|
439
439
|
|
|
440
440
|
build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params);
|
|
441
441
|
|
|
442
|
-
return
|
|
442
|
+
return wp_volume_create_device(context, grid, gridSize, false, true);
|
|
443
443
|
}
|
|
444
444
|
|
|
445
445
|
void launch_get_leaf_coords(void* context, const uint32_t leaf_count, pnanovdb_coord_t* leaf_coords,
|
|
@@ -447,7 +447,7 @@ void launch_get_leaf_coords(void* context, const uint32_t leaf_count, pnanovdb_c
|
|
|
447
447
|
void launch_get_voxel_coords(void* context, const uint32_t leaf_count, const uint32_t voxel_count,
|
|
448
448
|
pnanovdb_coord_t* voxel_coords, pnanovdb_buf_t buf);
|
|
449
449
|
|
|
450
|
-
void
|
|
450
|
+
void wp_volume_get_tiles_device(uint64_t id, void* buf)
|
|
451
451
|
{
|
|
452
452
|
const VolumeDesc* volume;
|
|
453
453
|
if (volume_get_descriptor(id, volume))
|
|
@@ -459,14 +459,14 @@ void volume_get_tiles_device(uint64_t id, void* buf)
|
|
|
459
459
|
}
|
|
460
460
|
}
|
|
461
461
|
|
|
462
|
-
void
|
|
462
|
+
void wp_volume_get_voxels_device(uint64_t id, void* buf)
|
|
463
463
|
{
|
|
464
464
|
const VolumeDesc* volume;
|
|
465
465
|
if (volume_get_descriptor(id, volume))
|
|
466
466
|
{
|
|
467
467
|
uint32_t leaf_count;
|
|
468
468
|
uint64_t voxel_count;
|
|
469
|
-
|
|
469
|
+
wp_volume_get_tile_and_voxel_count(id, leaf_count, voxel_count);
|
|
470
470
|
|
|
471
471
|
pnanovdb_coord_t* voxel_coords = static_cast<pnanovdb_coord_t*>(buf);
|
|
472
472
|
launch_get_voxel_coords(volume->context, leaf_count, voxel_count, voxel_coords, volume->as_pnano());
|
|
@@ -475,27 +475,27 @@ void volume_get_voxels_device(uint64_t id, void* buf)
|
|
|
475
475
|
|
|
476
476
|
#else
|
|
477
477
|
// stubs for non-CUDA platforms
|
|
478
|
-
uint64_t
|
|
479
|
-
|
|
480
|
-
|
|
478
|
+
uint64_t wp_volume_from_tiles_device(void* context, void* points, int num_points, float transform[9],
|
|
479
|
+
float translation[3], bool points_in_world_space, const void* value_ptr, uint32_t value_size,
|
|
480
|
+
const char* value_type)
|
|
481
481
|
{
|
|
482
482
|
return 0;
|
|
483
483
|
}
|
|
484
484
|
|
|
485
|
-
uint64_t
|
|
486
|
-
|
|
485
|
+
uint64_t wp_volume_index_from_tiles_device(void* context, void* points, int num_points, float transform[9],
|
|
486
|
+
float translation[3], bool points_in_world_space)
|
|
487
487
|
{
|
|
488
488
|
return 0;
|
|
489
489
|
}
|
|
490
490
|
|
|
491
|
-
uint64_t
|
|
492
|
-
|
|
491
|
+
uint64_t wp_volume_from_active_voxels_device(void* context, void* points, int num_points, float transform[9],
|
|
492
|
+
float translation[3], bool points_in_world_space)
|
|
493
493
|
{
|
|
494
494
|
return 0;
|
|
495
495
|
}
|
|
496
496
|
|
|
497
|
-
void
|
|
497
|
+
void wp_volume_get_tiles_device(uint64_t id, void* buf) {}
|
|
498
498
|
|
|
499
|
-
void
|
|
499
|
+
void wp_volume_get_voxels_device(uint64_t id, void* buf) {}
|
|
500
500
|
|
|
501
501
|
#endif
|
warp/native/volume.cu
CHANGED
|
@@ -62,6 +62,6 @@ void launch_get_voxel_coords(void *context, const uint32_t leaf_count, const uin
|
|
|
62
62
|
pnanovdb_coord_t *voxel_coords, pnanovdb_buf_t buf)
|
|
63
63
|
{
|
|
64
64
|
ContextGuard guard(context);
|
|
65
|
-
cudaStream_t stream = (cudaStream_t)
|
|
65
|
+
cudaStream_t stream = (cudaStream_t)wp_cuda_stream_get_current();
|
|
66
66
|
volume_get_voxel_coords<<<leaf_count, dim3(8, 8, 8), 0, stream>>>(voxel_count, voxel_coords, buf);
|
|
67
67
|
}
|
warp/native/volume.h
CHANGED
|
@@ -48,7 +48,7 @@ static constexpr int LINEAR = 1;
|
|
|
48
48
|
|
|
49
49
|
CUDA_CALLABLE inline pnanovdb_buf_t id_to_buffer(uint64_t id)
|
|
50
50
|
{
|
|
51
|
-
pnanovdb_buf_t buf;
|
|
51
|
+
pnanovdb_buf_t buf = {}; // Zero-initialize the entire struct
|
|
52
52
|
buf.data = (uint32_t *)id;
|
|
53
53
|
return buf;
|
|
54
54
|
}
|
|
@@ -171,6 +171,7 @@ struct value_accessor_base
|
|
|
171
171
|
|
|
172
172
|
explicit inline CUDA_CALLABLE value_accessor_base(const pnanovdb_buf_t buf) : buf(buf), root(get_root(buf))
|
|
173
173
|
{
|
|
174
|
+
accessor = {};
|
|
174
175
|
}
|
|
175
176
|
|
|
176
177
|
CUDA_CALLABLE inline void init_cache()
|
warp/native/volume_builder.cu
CHANGED
|
@@ -33,29 +33,22 @@
|
|
|
33
33
|
#endif
|
|
34
34
|
namespace
|
|
35
35
|
{
|
|
36
|
-
///
|
|
37
|
-
|
|
36
|
+
/// Resource class following interface of nanovdb::DeviceResource as expected by nanovdb::PointsToGrid
|
|
37
|
+
class Resource
|
|
38
38
|
{
|
|
39
|
+
public:
|
|
40
|
+
// cudaMalloc aligns memory to 256 bytes by default
|
|
41
|
+
static constexpr size_t DEFAULT_ALIGNMENT = 256;
|
|
39
42
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
{
|
|
44
|
-
// in PointsToGrid stream argument always coincide with current stream, ignore
|
|
45
|
-
*d_ptr = alloc_device(WP_CURRENT_CONTEXT, bytes);
|
|
43
|
+
static void* allocateAsync(size_t bytes, size_t, cudaStream_t stream) {
|
|
44
|
+
// In PointsToGrid, the stream argument always coincides with current stream, ignore
|
|
45
|
+
void *d_ptr = wp_alloc_device(WP_CURRENT_CONTEXT, bytes);
|
|
46
46
|
cudaCheckError();
|
|
47
|
-
return
|
|
47
|
+
return d_ptr;
|
|
48
48
|
}
|
|
49
49
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
free_device(WP_CURRENT_CONTEXT, d_ptr);
|
|
53
|
-
return cudaSuccess;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
cudaError_t FreeAllCached()
|
|
57
|
-
{
|
|
58
|
-
return cudaSuccess;
|
|
50
|
+
static void deallocateAsync(void *d_ptr, size_t, size_t, cudaStream_t stream) {
|
|
51
|
+
wp_free_device(WP_CURRENT_CONTEXT, d_ptr);
|
|
59
52
|
}
|
|
60
53
|
};
|
|
61
54
|
|
|
@@ -70,13 +63,13 @@ class DeviceBuffer
|
|
|
70
63
|
/// @brief Static factory method that return an instance of this buffer
|
|
71
64
|
/// @param size byte size of buffer to be initialized
|
|
72
65
|
/// @param dummy this argument is currently ignored but required to match the API of the HostBuffer
|
|
73
|
-
/// @param
|
|
66
|
+
/// @param device id of the device on which to initialize the buffer
|
|
74
67
|
/// @param stream optional stream argument (defaults to stream NULL)
|
|
75
68
|
/// @return An instance of this class using move semantics
|
|
76
|
-
static DeviceBuffer create(uint64_t size, const DeviceBuffer *dummy = nullptr,
|
|
77
|
-
|
|
69
|
+
static DeviceBuffer create(uint64_t size, const DeviceBuffer *dummy = nullptr, int device = cudaCpuDeviceId,
|
|
70
|
+
cudaStream_t stream = nullptr)
|
|
78
71
|
{
|
|
79
|
-
return DeviceBuffer(size,
|
|
72
|
+
return DeviceBuffer(size, device, stream);
|
|
80
73
|
}
|
|
81
74
|
|
|
82
75
|
/// @brief Static factory method that return an instance of this buffer that wraps externally managed memory
|
|
@@ -93,11 +86,11 @@ class DeviceBuffer
|
|
|
93
86
|
/// @param size byte size of buffer to be initialized
|
|
94
87
|
/// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU
|
|
95
88
|
/// @param stream optional stream argument (defaults to stream NULL)
|
|
96
|
-
DeviceBuffer(uint64_t size = 0,
|
|
89
|
+
DeviceBuffer(uint64_t size = 0, int device = cudaCpuDeviceId, cudaStream_t stream = nullptr)
|
|
97
90
|
: mSize(0), mCpuData(nullptr), mGpuData(nullptr), mManaged(false)
|
|
98
91
|
{
|
|
99
92
|
if (size > 0)
|
|
100
|
-
this->init(size,
|
|
93
|
+
this->init(size, device, stream);
|
|
101
94
|
}
|
|
102
95
|
|
|
103
96
|
DeviceBuffer(uint64_t size, void *cpuData, void *gpuData)
|
|
@@ -144,22 +137,22 @@ class DeviceBuffer
|
|
|
144
137
|
|
|
145
138
|
/// @brief Initialize buffer
|
|
146
139
|
/// @param size byte size of buffer to be initialized
|
|
147
|
-
/// @param
|
|
140
|
+
/// @param device id of the device on which to initialize the buffer
|
|
148
141
|
/// @note All existing buffers are first cleared
|
|
149
142
|
/// @warning size is expected to be non-zero. Use clear() clear buffer!
|
|
150
|
-
void init(uint64_t size,
|
|
143
|
+
void init(uint64_t size, int device = cudaCpuDeviceId, void *stream = nullptr)
|
|
151
144
|
{
|
|
152
145
|
if (mSize > 0)
|
|
153
146
|
this->clear(stream);
|
|
154
147
|
NANOVDB_ASSERT(size > 0);
|
|
155
|
-
if (
|
|
148
|
+
if (device == cudaCpuDeviceId)
|
|
156
149
|
{
|
|
157
150
|
mCpuData =
|
|
158
|
-
|
|
151
|
+
wp_alloc_pinned(size); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned
|
|
159
152
|
}
|
|
160
153
|
else
|
|
161
154
|
{
|
|
162
|
-
mGpuData =
|
|
155
|
+
mGpuData = wp_alloc_device(WP_CURRENT_CONTEXT, size);
|
|
163
156
|
}
|
|
164
157
|
cudaCheckError();
|
|
165
158
|
mSize = size;
|
|
@@ -212,9 +205,9 @@ class DeviceBuffer
|
|
|
212
205
|
void clear(void *stream = nullptr)
|
|
213
206
|
{
|
|
214
207
|
if (mManaged && mGpuData)
|
|
215
|
-
|
|
208
|
+
wp_free_device(WP_CURRENT_CONTEXT, mGpuData);
|
|
216
209
|
if (mManaged && mCpuData)
|
|
217
|
-
|
|
210
|
+
wp_free_pinned(mCpuData);
|
|
218
211
|
mCpuData = mGpuData = nullptr;
|
|
219
212
|
mSize = 0;
|
|
220
213
|
mManaged = false;
|
|
@@ -367,11 +360,11 @@ void finalize_grid(nanovdb::Grid<nanovdb::NanoTree<BuildT>> &out_grid, const Bui
|
|
|
367
360
|
Tree *tree = &out_grid.tree();
|
|
368
361
|
|
|
369
362
|
int node_counts[3];
|
|
370
|
-
|
|
363
|
+
wp_memcpy_d2h(WP_CURRENT_CONTEXT, node_counts, tree->mNodeCount, sizeof(node_counts));
|
|
371
364
|
// synchronization below is unnecessary as node_counts is in pageable memory.
|
|
372
365
|
// keep it for clarity
|
|
373
|
-
cudaStream_t stream = static_cast<cudaStream_t>(
|
|
374
|
-
|
|
366
|
+
cudaStream_t stream = static_cast<cudaStream_t>(wp_cuda_stream_get_current());
|
|
367
|
+
wp_cuda_stream_synchronize(stream);
|
|
375
368
|
|
|
376
369
|
const unsigned int leaf_count = node_counts[0];
|
|
377
370
|
const unsigned int lower_count = node_counts[1];
|
|
@@ -387,7 +380,7 @@ void finalize_grid(nanovdb::Grid<nanovdb::NanoTree<BuildT>> &out_grid, const Bui
|
|
|
387
380
|
<<<upper_count, NUM_THREADS, 0, stream>>>(tree, params.background_value);
|
|
388
381
|
setRootBBoxAndBackgroundValue<Tree><<<1, NUM_THREADS, 0, stream>>>(&out_grid, params.background_value);
|
|
389
382
|
|
|
390
|
-
check_cuda(
|
|
383
|
+
check_cuda(wp_cuda_context_check(WP_CURRENT_CONTEXT));
|
|
391
384
|
}
|
|
392
385
|
|
|
393
386
|
template <>
|
|
@@ -437,8 +430,8 @@ void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<BuildT>> *&out_grid,
|
|
|
437
430
|
try
|
|
438
431
|
{
|
|
439
432
|
|
|
440
|
-
cudaStream_t stream = static_cast<cudaStream_t>(
|
|
441
|
-
nanovdb::tools::cuda::PointsToGrid<BuildT,
|
|
433
|
+
cudaStream_t stream = static_cast<cudaStream_t>(wp_cuda_stream_get_current());
|
|
434
|
+
nanovdb::tools::cuda::PointsToGrid<BuildT, Resource> p2g(params.map, stream);
|
|
442
435
|
|
|
443
436
|
// p2g.setVerbose(2);
|
|
444
437
|
p2g.setGridName(params.name);
|