warp-lang 1.8.1__py3-none-manylinux_2_34_aarch64.whl → 1.9.0__py3-none-manylinux_2_34_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (134) hide show
  1. warp/__init__.py +282 -103
  2. warp/__init__.pyi +482 -110
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +93 -30
  6. warp/build_dll.py +47 -67
  7. warp/builtins.py +955 -137
  8. warp/codegen.py +312 -206
  9. warp/config.py +1 -1
  10. warp/context.py +1249 -784
  11. warp/examples/core/example_marching_cubes.py +1 -0
  12. warp/examples/core/example_render_opengl.py +100 -3
  13. warp/examples/fem/example_apic_fluid.py +98 -52
  14. warp/examples/fem/example_convection_diffusion_dg.py +25 -4
  15. warp/examples/fem/example_diffusion_mgpu.py +8 -3
  16. warp/examples/fem/utils.py +68 -22
  17. warp/fabric.py +1 -1
  18. warp/fem/cache.py +27 -19
  19. warp/fem/domain.py +2 -2
  20. warp/fem/field/nodal_field.py +2 -2
  21. warp/fem/field/virtual.py +264 -166
  22. warp/fem/geometry/geometry.py +5 -5
  23. warp/fem/integrate.py +129 -51
  24. warp/fem/space/restriction.py +4 -0
  25. warp/fem/space/shape/tet_shape_function.py +3 -10
  26. warp/jax_experimental/custom_call.py +1 -1
  27. warp/jax_experimental/ffi.py +2 -1
  28. warp/marching_cubes.py +708 -0
  29. warp/native/array.h +99 -4
  30. warp/native/builtin.h +82 -5
  31. warp/native/bvh.cpp +64 -28
  32. warp/native/bvh.cu +58 -58
  33. warp/native/bvh.h +2 -2
  34. warp/native/clang/clang.cpp +7 -7
  35. warp/native/coloring.cpp +8 -2
  36. warp/native/crt.cpp +2 -2
  37. warp/native/crt.h +3 -5
  38. warp/native/cuda_util.cpp +41 -10
  39. warp/native/cuda_util.h +10 -4
  40. warp/native/exports.h +1842 -1908
  41. warp/native/fabric.h +2 -1
  42. warp/native/hashgrid.cpp +37 -37
  43. warp/native/hashgrid.cu +2 -2
  44. warp/native/initializer_array.h +1 -1
  45. warp/native/intersect.h +2 -2
  46. warp/native/mat.h +1910 -116
  47. warp/native/mathdx.cpp +43 -43
  48. warp/native/mesh.cpp +24 -24
  49. warp/native/mesh.cu +26 -26
  50. warp/native/mesh.h +4 -2
  51. warp/native/nanovdb/GridHandle.h +179 -12
  52. warp/native/nanovdb/HostBuffer.h +8 -7
  53. warp/native/nanovdb/NanoVDB.h +517 -895
  54. warp/native/nanovdb/NodeManager.h +323 -0
  55. warp/native/nanovdb/PNanoVDB.h +2 -2
  56. warp/native/quat.h +331 -14
  57. warp/native/range.h +7 -1
  58. warp/native/reduce.cpp +10 -10
  59. warp/native/reduce.cu +13 -14
  60. warp/native/runlength_encode.cpp +2 -2
  61. warp/native/runlength_encode.cu +5 -5
  62. warp/native/scan.cpp +3 -3
  63. warp/native/scan.cu +4 -4
  64. warp/native/sort.cpp +10 -10
  65. warp/native/sort.cu +22 -22
  66. warp/native/sparse.cpp +8 -8
  67. warp/native/sparse.cu +13 -13
  68. warp/native/spatial.h +366 -17
  69. warp/native/temp_buffer.h +2 -2
  70. warp/native/tile.h +283 -69
  71. warp/native/vec.h +381 -14
  72. warp/native/volume.cpp +54 -54
  73. warp/native/volume.cu +1 -1
  74. warp/native/volume.h +2 -1
  75. warp/native/volume_builder.cu +30 -37
  76. warp/native/warp.cpp +150 -149
  77. warp/native/warp.cu +323 -192
  78. warp/native/warp.h +227 -226
  79. warp/optim/linear.py +736 -271
  80. warp/render/imgui_manager.py +289 -0
  81. warp/render/render_opengl.py +85 -6
  82. warp/sim/graph_coloring.py +2 -2
  83. warp/sparse.py +558 -175
  84. warp/tests/aux_test_module_aot.py +7 -0
  85. warp/tests/cuda/test_async.py +3 -3
  86. warp/tests/cuda/test_conditional_captures.py +101 -0
  87. warp/tests/geometry/test_marching_cubes.py +233 -12
  88. warp/tests/sim/test_coloring.py +6 -6
  89. warp/tests/test_array.py +56 -5
  90. warp/tests/test_codegen.py +3 -2
  91. warp/tests/test_context.py +8 -15
  92. warp/tests/test_enum.py +136 -0
  93. warp/tests/test_examples.py +2 -2
  94. warp/tests/test_fem.py +45 -2
  95. warp/tests/test_fixedarray.py +229 -0
  96. warp/tests/test_func.py +18 -15
  97. warp/tests/test_future_annotations.py +7 -5
  98. warp/tests/test_linear_solvers.py +30 -0
  99. warp/tests/test_map.py +1 -1
  100. warp/tests/test_mat.py +1518 -378
  101. warp/tests/test_mat_assign_copy.py +178 -0
  102. warp/tests/test_mat_constructors.py +574 -0
  103. warp/tests/test_module_aot.py +287 -0
  104. warp/tests/test_print.py +69 -0
  105. warp/tests/test_quat.py +140 -34
  106. warp/tests/test_quat_assign_copy.py +145 -0
  107. warp/tests/test_reload.py +2 -1
  108. warp/tests/test_sparse.py +71 -0
  109. warp/tests/test_spatial.py +140 -34
  110. warp/tests/test_spatial_assign_copy.py +160 -0
  111. warp/tests/test_struct.py +43 -3
  112. warp/tests/test_types.py +0 -20
  113. warp/tests/test_vec.py +179 -34
  114. warp/tests/test_vec_assign_copy.py +143 -0
  115. warp/tests/tile/test_tile.py +184 -18
  116. warp/tests/tile/test_tile_cholesky.py +605 -0
  117. warp/tests/tile/test_tile_load.py +169 -0
  118. warp/tests/tile/test_tile_mathdx.py +2 -558
  119. warp/tests/tile/test_tile_matmul.py +1 -1
  120. warp/tests/tile/test_tile_mlp.py +1 -1
  121. warp/tests/tile/test_tile_shared_memory.py +5 -5
  122. warp/tests/unittest_suites.py +6 -0
  123. warp/tests/walkthrough_debug.py +1 -1
  124. warp/thirdparty/unittest_parallel.py +108 -9
  125. warp/types.py +554 -264
  126. warp/utils.py +68 -86
  127. {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
  128. {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/RECORD +131 -121
  129. warp/native/marching.cpp +0 -19
  130. warp/native/marching.cu +0 -514
  131. warp/native/marching.h +0 -19
  132. {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
  133. {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
  134. {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0
warp/native/volume.cpp CHANGED
@@ -85,7 +85,7 @@ void volume_set_map(nanovdb::Map& map, const float transform[9], const float tra
85
85
  } // anonymous namespace
86
86
 
87
87
  // NB: buf must be a host pointer
88
- uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
88
+ uint64_t wp_volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
89
89
  {
90
90
  if (buf == nullptr || (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t)))
91
91
  return 0; // This cannot be a valid NanoVDB grid with data
@@ -99,8 +99,8 @@ uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
99
99
  VolumeDesc volume;
100
100
  volume.context = NULL;
101
101
 
102
- memcpy_h2h(&volume.grid_data, buf, sizeof(pnanovdb_grid_t));
103
- memcpy_h2h(&volume.tree_data, (pnanovdb_grid_t*)buf + 1, sizeof(pnanovdb_tree_t));
102
+ wp_memcpy_h2h(&volume.grid_data, buf, sizeof(pnanovdb_grid_t));
103
+ wp_memcpy_h2h(&volume.tree_data, (pnanovdb_grid_t*)buf + 1, sizeof(pnanovdb_tree_t));
104
104
 
105
105
  if (volume.grid_data.magic != PNANOVDB_MAGIC_NUMBER && volume.grid_data.magic != PNANOVDB_MAGIC_GRID)
106
106
  return 0;
@@ -114,8 +114,8 @@ uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
114
114
  volume.size_in_bytes = size;
115
115
  if (copy)
116
116
  {
117
- volume.buffer = alloc_host(size);
118
- memcpy_h2h(volume.buffer, buf, size);
117
+ volume.buffer = wp_alloc_host(size);
118
+ wp_memcpy_h2h(volume.buffer, buf, size);
119
119
  volume.owner = true;
120
120
  }
121
121
  else
@@ -136,7 +136,7 @@ uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
136
136
  }
137
137
 
138
138
  // NB: buf must be a pointer on the same device
139
- uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy, bool owner)
139
+ uint64_t wp_volume_create_device(void* context, void* buf, uint64_t size, bool copy, bool owner)
140
140
  {
141
141
  if (buf == nullptr || (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t)))
142
142
  return 0; // This cannot be a valid NanoVDB grid with data
@@ -150,10 +150,10 @@ uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy
150
150
  ContextGuard guard(context);
151
151
 
152
152
  VolumeDesc volume;
153
- volume.context = context ? context : cuda_context_get_current();
153
+ volume.context = context ? context : wp_cuda_context_get_current();
154
154
 
155
- memcpy_d2h(WP_CURRENT_CONTEXT, &volume.grid_data, buf, sizeof(pnanovdb_grid_t));
156
- memcpy_d2h(WP_CURRENT_CONTEXT, &volume.tree_data, (pnanovdb_grid_t*)buf + 1, sizeof(pnanovdb_tree_t));
155
+ wp_memcpy_d2h(WP_CURRENT_CONTEXT, &volume.grid_data, buf, sizeof(pnanovdb_grid_t));
156
+ wp_memcpy_d2h(WP_CURRENT_CONTEXT, &volume.tree_data, (pnanovdb_grid_t*)buf + 1, sizeof(pnanovdb_tree_t));
157
157
  // no sync needed since the above copies are to pageable memory
158
158
 
159
159
  if (volume.grid_data.magic != PNANOVDB_MAGIC_NUMBER && volume.grid_data.magic != PNANOVDB_MAGIC_GRID)
@@ -168,8 +168,8 @@ uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy
168
168
  volume.size_in_bytes = size;
169
169
  if (copy)
170
170
  {
171
- volume.buffer = alloc_device(WP_CURRENT_CONTEXT, size);
172
- memcpy_d2d(WP_CURRENT_CONTEXT, volume.buffer, buf, size);
171
+ volume.buffer = wp_alloc_device(WP_CURRENT_CONTEXT, size);
172
+ wp_memcpy_d2d(WP_CURRENT_CONTEXT, volume.buffer, buf, size);
173
173
  volume.owner = true;
174
174
  }
175
175
  else
@@ -180,9 +180,9 @@ uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy
180
180
 
181
181
  // Make blind metadata accessible on host
182
182
  const uint64_t blindmetadata_size = volume.grid_data.blind_metadata_count * sizeof(pnanovdb_gridblindmetadata_t);
183
- volume.blind_metadata = static_cast<pnanovdb_gridblindmetadata_t*>(alloc_pinned(blindmetadata_size));
184
- memcpy_d2h(WP_CURRENT_CONTEXT, volume.blind_metadata,
185
- static_cast<uint8_t*>(volume.buffer) + volume.grid_data.blind_metadata_offset, blindmetadata_size);
183
+ volume.blind_metadata = static_cast<pnanovdb_gridblindmetadata_t*>(wp_alloc_pinned(blindmetadata_size));
184
+ wp_memcpy_d2h(WP_CURRENT_CONTEXT, volume.blind_metadata,
185
+ static_cast<uint8_t*>(volume.buffer) + volume.grid_data.blind_metadata_offset, blindmetadata_size);
186
186
 
187
187
  uint64_t id = (uint64_t)volume.buffer;
188
188
  volume_add_descriptor(id, std::move(volume));
@@ -190,7 +190,7 @@ uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy
190
190
  return id;
191
191
  }
192
192
 
193
- void volume_get_buffer_info(uint64_t id, void** buf, uint64_t* size)
193
+ void wp_volume_get_buffer_info(uint64_t id, void** buf, uint64_t* size)
194
194
  {
195
195
  *buf = 0;
196
196
  *size = 0;
@@ -203,7 +203,7 @@ void volume_get_buffer_info(uint64_t id, void** buf, uint64_t* size)
203
203
  }
204
204
  }
205
205
 
206
- void volume_get_voxel_size(uint64_t id, float* dx, float* dy, float* dz)
206
+ void wp_volume_get_voxel_size(uint64_t id, float* dx, float* dy, float* dz)
207
207
  {
208
208
  *dx = *dx = *dz = 0.0f;
209
209
 
@@ -216,7 +216,7 @@ void volume_get_voxel_size(uint64_t id, float* dx, float* dy, float* dz)
216
216
  }
217
217
  }
218
218
 
219
- void volume_get_tile_and_voxel_count(uint64_t id, uint32_t& tile_count, uint64_t& voxel_count)
219
+ void wp_volume_get_tile_and_voxel_count(uint64_t id, uint32_t& tile_count, uint64_t& voxel_count)
220
220
  {
221
221
  tile_count = 0;
222
222
  voxel_count = 0;
@@ -242,8 +242,8 @@ void volume_get_tile_and_voxel_count(uint64_t id, uint32_t& tile_count, uint64_t
242
242
  }
243
243
  }
244
244
 
245
- const char* volume_get_grid_info(uint64_t id, uint64_t* grid_size, uint32_t* grid_index, uint32_t* grid_count,
246
- float translation[3], float transform[9], char type_str[16])
245
+ const char* wp_volume_get_grid_info(uint64_t id, uint64_t* grid_size, uint32_t* grid_index, uint32_t* grid_count,
246
+ float translation[3], float transform[9], char type_str[16])
247
247
  {
248
248
  const VolumeDesc* volume;
249
249
  if (volume_get_descriptor(id, volume))
@@ -257,7 +257,7 @@ const char* volume_get_grid_info(uint64_t id, uint64_t* grid_size, uint32_t* gri
257
257
  memcpy(transform, grid_data.map.matf, sizeof(grid_data.map.matf));
258
258
 
259
259
  nanovdb::toStr(type_str, static_cast<nanovdb::GridType>(grid_data.grid_type));
260
- return (const char*)grid_data.grid_name;
260
+ return reinterpret_cast<const char*>(grid_data.grid_name);
261
261
  }
262
262
 
263
263
  *grid_size = 0;
@@ -268,7 +268,7 @@ const char* volume_get_grid_info(uint64_t id, uint64_t* grid_size, uint32_t* gri
268
268
  return nullptr;
269
269
  }
270
270
 
271
- uint32_t volume_get_blind_data_count(uint64_t id)
271
+ uint32_t wp_volume_get_blind_data_count(uint64_t id)
272
272
  {
273
273
  const VolumeDesc* volume;
274
274
  if (volume_get_descriptor(id, volume))
@@ -278,8 +278,8 @@ uint32_t volume_get_blind_data_count(uint64_t id)
278
278
  return 0;
279
279
  }
280
280
 
281
- const char* volume_get_blind_data_info(uint64_t id, uint32_t data_index, void** buf, uint64_t* value_count,
282
- uint32_t* value_size, char type_str[16])
281
+ const char* wp_volume_get_blind_data_info(uint64_t id, uint32_t data_index, void** buf, uint64_t* value_count,
282
+ uint32_t* value_size, char type_str[16])
283
283
  {
284
284
  const VolumeDesc* volume;
285
285
  if (volume_get_descriptor(id, volume) && data_index < volume->grid_data.blind_metadata_count)
@@ -291,7 +291,7 @@ const char* volume_get_blind_data_info(uint64_t id, uint32_t data_index, void**
291
291
  nanovdb::toStr(type_str, static_cast<nanovdb::GridType>(metadata.data_type));
292
292
  *buf = static_cast<uint8_t*>(volume->buffer) + volume->grid_data.blind_metadata_offset +
293
293
  data_index * sizeof(pnanovdb_gridblindmetadata_t) + metadata.data_offset;
294
- return (const char*)metadata.name;
294
+ return reinterpret_cast<const char*>(metadata.name);
295
295
  }
296
296
  *buf = nullptr;
297
297
  *value_count = 0;
@@ -300,7 +300,7 @@ const char* volume_get_blind_data_info(uint64_t id, uint32_t data_index, void**
300
300
  return nullptr;
301
301
  }
302
302
 
303
- void volume_get_tiles_host(uint64_t id, void* buf)
303
+ void wp_volume_get_tiles_host(uint64_t id, void* buf)
304
304
  {
305
305
  static constexpr uint32_t MASK = (1u << 3u) - 1u; // mask for bit operations
306
306
 
@@ -325,14 +325,14 @@ void volume_get_tiles_host(uint64_t id, void* buf)
325
325
  }
326
326
  }
327
327
 
328
- void volume_get_voxels_host(uint64_t id, void* buf)
328
+ void wp_volume_get_voxels_host(uint64_t id, void* buf)
329
329
  {
330
330
  const VolumeDesc* volume;
331
331
  if (volume_get_descriptor(id, volume))
332
332
  {
333
333
  uint32_t leaf_count;
334
334
  uint64_t voxel_count;
335
- volume_get_tile_and_voxel_count(id, leaf_count, voxel_count);
335
+ wp_volume_get_tile_and_voxel_count(id, leaf_count, voxel_count);
336
336
 
337
337
  pnanovdb_coord_t* voxel_coords = static_cast<pnanovdb_coord_t*>(buf);
338
338
 
@@ -361,20 +361,20 @@ void volume_get_voxels_host(uint64_t id, void* buf)
361
361
  }
362
362
  }
363
363
 
364
- void volume_destroy_host(uint64_t id)
364
+ void wp_volume_destroy_host(uint64_t id)
365
365
  {
366
366
  const VolumeDesc* volume;
367
367
  if (volume_get_descriptor(id, volume))
368
368
  {
369
369
  if (volume->owner)
370
370
  {
371
- free_host(volume->buffer);
371
+ wp_free_host(volume->buffer);
372
372
  }
373
373
  volume_rem_descriptor(id);
374
374
  }
375
375
  }
376
376
 
377
- void volume_destroy_device(uint64_t id)
377
+ void wp_volume_destroy_device(uint64_t id)
378
378
  {
379
379
  const VolumeDesc* volume;
380
380
  if (volume_get_descriptor(id, volume))
@@ -382,18 +382,18 @@ void volume_destroy_device(uint64_t id)
382
382
  ContextGuard guard(volume->context);
383
383
  if (volume->owner)
384
384
  {
385
- free_device(WP_CURRENT_CONTEXT, volume->buffer);
385
+ wp_free_device(WP_CURRENT_CONTEXT, volume->buffer);
386
386
  }
387
- free_pinned(volume->blind_metadata);
387
+ wp_free_pinned(volume->blind_metadata);
388
388
  volume_rem_descriptor(id);
389
389
  }
390
390
  }
391
391
 
392
392
  #if WP_ENABLE_CUDA
393
393
 
394
- uint64_t volume_from_tiles_device(void* context, void* points, int num_points, float transform[9], float translation[3],
395
- bool points_in_world_space, const void* value_ptr, uint32_t value_size,
396
- const char* value_type)
394
+ uint64_t wp_volume_from_tiles_device(void* context, void* points, int num_points, float transform[9], float translation[3],
395
+ bool points_in_world_space, const void* value_ptr, uint32_t value_size,
396
+ const char* value_type)
397
397
  {
398
398
  char gridTypeStr[12];
399
399
 
@@ -407,7 +407,7 @@ uint64_t volume_from_tiles_device(void* context, void* points, int num_points, f
407
407
  size_t gridSize; \
408
408
  nanovdb::Grid<nanovdb::NanoTree<type>>* grid; \
409
409
  build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params); \
410
- return volume_create_device(context, grid, gridSize, false, true); \
410
+ return wp_volume_create_device(context, grid, gridSize, false, true); \
411
411
  }
412
412
 
413
413
  WP_VOLUME_BUILDER_INSTANTIATE_TYPES
@@ -416,8 +416,8 @@ uint64_t volume_from_tiles_device(void* context, void* points, int num_points, f
416
416
  return 0;
417
417
  }
418
418
 
419
- uint64_t volume_index_from_tiles_device(void* context, void* points, int num_points, float transform[9],
420
- float translation[3], bool points_in_world_space)
419
+ uint64_t wp_volume_index_from_tiles_device(void* context, void* points, int num_points, float transform[9],
420
+ float translation[3], bool points_in_world_space)
421
421
  {
422
422
  nanovdb::IndexGrid* grid;
423
423
  size_t gridSize;
@@ -426,11 +426,11 @@ uint64_t volume_index_from_tiles_device(void* context, void* points, int num_poi
426
426
 
427
427
  build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params);
428
428
 
429
- return volume_create_device(context, grid, gridSize, false, true);
429
+ return wp_volume_create_device(context, grid, gridSize, false, true);
430
430
  }
431
431
 
432
- uint64_t volume_from_active_voxels_device(void* context, void* points, int num_points, float transform[9],
433
- float translation[3], bool points_in_world_space)
432
+ uint64_t wp_volume_from_active_voxels_device(void* context, void* points, int num_points, float transform[9],
433
+ float translation[3], bool points_in_world_space)
434
434
  {
435
435
  nanovdb::OnIndexGrid* grid;
436
436
  size_t gridSize;
@@ -439,7 +439,7 @@ uint64_t volume_from_active_voxels_device(void* context, void* points, int num_p
439
439
 
440
440
  build_grid_from_points(grid, gridSize, points, num_points, points_in_world_space, params);
441
441
 
442
- return volume_create_device(context, grid, gridSize, false, true);
442
+ return wp_volume_create_device(context, grid, gridSize, false, true);
443
443
  }
444
444
 
445
445
  void launch_get_leaf_coords(void* context, const uint32_t leaf_count, pnanovdb_coord_t* leaf_coords,
@@ -447,7 +447,7 @@ void launch_get_leaf_coords(void* context, const uint32_t leaf_count, pnanovdb_c
447
447
  void launch_get_voxel_coords(void* context, const uint32_t leaf_count, const uint32_t voxel_count,
448
448
  pnanovdb_coord_t* voxel_coords, pnanovdb_buf_t buf);
449
449
 
450
- void volume_get_tiles_device(uint64_t id, void* buf)
450
+ void wp_volume_get_tiles_device(uint64_t id, void* buf)
451
451
  {
452
452
  const VolumeDesc* volume;
453
453
  if (volume_get_descriptor(id, volume))
@@ -459,14 +459,14 @@ void volume_get_tiles_device(uint64_t id, void* buf)
459
459
  }
460
460
  }
461
461
 
462
- void volume_get_voxels_device(uint64_t id, void* buf)
462
+ void wp_volume_get_voxels_device(uint64_t id, void* buf)
463
463
  {
464
464
  const VolumeDesc* volume;
465
465
  if (volume_get_descriptor(id, volume))
466
466
  {
467
467
  uint32_t leaf_count;
468
468
  uint64_t voxel_count;
469
- volume_get_tile_and_voxel_count(id, leaf_count, voxel_count);
469
+ wp_volume_get_tile_and_voxel_count(id, leaf_count, voxel_count);
470
470
 
471
471
  pnanovdb_coord_t* voxel_coords = static_cast<pnanovdb_coord_t*>(buf);
472
472
  launch_get_voxel_coords(volume->context, leaf_count, voxel_count, voxel_coords, volume->as_pnano());
@@ -475,27 +475,27 @@ void volume_get_voxels_device(uint64_t id, void* buf)
475
475
 
476
476
  #else
477
477
  // stubs for non-CUDA platforms
478
- uint64_t volume_from_tiles_device(void* context, void* points, int num_points, float transform[9],
479
- float translation[3], bool points_in_world_space, const void* value_ptr, uint32_t value_size,
480
- const char* value_type)
478
+ uint64_t wp_volume_from_tiles_device(void* context, void* points, int num_points, float transform[9],
479
+ float translation[3], bool points_in_world_space, const void* value_ptr, uint32_t value_size,
480
+ const char* value_type)
481
481
  {
482
482
  return 0;
483
483
  }
484
484
 
485
- uint64_t volume_index_from_tiles_device(void* context, void* points, int num_points, float transform[9],
486
- float translation[3], bool points_in_world_space)
485
+ uint64_t wp_volume_index_from_tiles_device(void* context, void* points, int num_points, float transform[9],
486
+ float translation[3], bool points_in_world_space)
487
487
  {
488
488
  return 0;
489
489
  }
490
490
 
491
- uint64_t volume_from_active_voxels_device(void* context, void* points, int num_points, float transform[9],
492
- float translation[3], bool points_in_world_space)
491
+ uint64_t wp_volume_from_active_voxels_device(void* context, void* points, int num_points, float transform[9],
492
+ float translation[3], bool points_in_world_space)
493
493
  {
494
494
  return 0;
495
495
  }
496
496
 
497
- void volume_get_tiles_device(uint64_t id, void* buf) {}
497
+ void wp_volume_get_tiles_device(uint64_t id, void* buf) {}
498
498
 
499
- void volume_get_voxels_device(uint64_t id, void* buf) {}
499
+ void wp_volume_get_voxels_device(uint64_t id, void* buf) {}
500
500
 
501
501
  #endif
warp/native/volume.cu CHANGED
@@ -62,6 +62,6 @@ void launch_get_voxel_coords(void *context, const uint32_t leaf_count, const uin
62
62
  pnanovdb_coord_t *voxel_coords, pnanovdb_buf_t buf)
63
63
  {
64
64
  ContextGuard guard(context);
65
- cudaStream_t stream = (cudaStream_t)cuda_stream_get_current();
65
+ cudaStream_t stream = (cudaStream_t)wp_cuda_stream_get_current();
66
66
  volume_get_voxel_coords<<<leaf_count, dim3(8, 8, 8), 0, stream>>>(voxel_count, voxel_coords, buf);
67
67
  }
warp/native/volume.h CHANGED
@@ -48,7 +48,7 @@ static constexpr int LINEAR = 1;
48
48
 
49
49
  CUDA_CALLABLE inline pnanovdb_buf_t id_to_buffer(uint64_t id)
50
50
  {
51
- pnanovdb_buf_t buf;
51
+ pnanovdb_buf_t buf = {}; // Zero-initialize the entire struct
52
52
  buf.data = (uint32_t *)id;
53
53
  return buf;
54
54
  }
@@ -171,6 +171,7 @@ struct value_accessor_base
171
171
 
172
172
  explicit inline CUDA_CALLABLE value_accessor_base(const pnanovdb_buf_t buf) : buf(buf), root(get_root(buf))
173
173
  {
174
+ accessor = {};
174
175
  }
175
176
 
176
177
  CUDA_CALLABLE inline void init_cache()
@@ -33,29 +33,22 @@
33
33
  #endif
34
34
  namespace
35
35
  {
36
- /// Allocator class following interface of cub::cachingDeviceAllocator, as expected by naovdb::PointsToGrid
37
- struct Allocator
36
+ /// Resource class following interface of nanovdb::DeviceResource as expected by nanovdb::PointsToGrid
37
+ class Resource
38
38
  {
39
+ public:
40
+ // cudaMalloc aligns memory to 256 bytes by default
41
+ static constexpr size_t DEFAULT_ALIGNMENT = 256;
39
42
 
40
- cudaError_t DeviceAllocate(void **d_ptr, ///< [out] Reference to pointer to the allocation
41
- size_t bytes, ///< [in] Minimum number of bytes for the allocation
42
- cudaStream_t active_stream) ///< [in] The stream to be associated with this allocation
43
- {
44
- // in PointsToGrid stream argument always coincide with current stream, ignore
45
- *d_ptr = alloc_device(WP_CURRENT_CONTEXT, bytes);
43
+ static void* allocateAsync(size_t bytes, size_t, cudaStream_t stream) {
44
+ // In PointsToGrid, the stream argument always coincides with current stream, ignore
45
+ void *d_ptr = wp_alloc_device(WP_CURRENT_CONTEXT, bytes);
46
46
  cudaCheckError();
47
- return cudaSuccess;
47
+ return d_ptr;
48
48
  }
49
49
 
50
- cudaError_t DeviceFree(void *d_ptr)
51
- {
52
- free_device(WP_CURRENT_CONTEXT, d_ptr);
53
- return cudaSuccess;
54
- }
55
-
56
- cudaError_t FreeAllCached()
57
- {
58
- return cudaSuccess;
50
+ static void deallocateAsync(void *d_ptr, size_t, size_t, cudaStream_t stream) {
51
+ wp_free_device(WP_CURRENT_CONTEXT, d_ptr);
59
52
  }
60
53
  };
61
54
 
@@ -70,13 +63,13 @@ class DeviceBuffer
70
63
  /// @brief Static factory method that return an instance of this buffer
71
64
  /// @param size byte size of buffer to be initialized
72
65
  /// @param dummy this argument is currently ignored but required to match the API of the HostBuffer
73
- /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU
66
+ /// @param device id of the device on which to initialize the buffer
74
67
  /// @param stream optional stream argument (defaults to stream NULL)
75
68
  /// @return An instance of this class using move semantics
76
- static DeviceBuffer create(uint64_t size, const DeviceBuffer *dummy = nullptr, bool host = true,
77
- void *stream = nullptr)
69
+ static DeviceBuffer create(uint64_t size, const DeviceBuffer *dummy = nullptr, int device = cudaCpuDeviceId,
70
+ cudaStream_t stream = nullptr)
78
71
  {
79
- return DeviceBuffer(size, host, stream);
72
+ return DeviceBuffer(size, device, stream);
80
73
  }
81
74
 
82
75
  /// @brief Static factory method that return an instance of this buffer that wraps externally managed memory
@@ -93,11 +86,11 @@ class DeviceBuffer
93
86
  /// @param size byte size of buffer to be initialized
94
87
  /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU
95
88
  /// @param stream optional stream argument (defaults to stream NULL)
96
- DeviceBuffer(uint64_t size = 0, bool host = true, void *stream = nullptr)
89
+ DeviceBuffer(uint64_t size = 0, int device = cudaCpuDeviceId, cudaStream_t stream = nullptr)
97
90
  : mSize(0), mCpuData(nullptr), mGpuData(nullptr), mManaged(false)
98
91
  {
99
92
  if (size > 0)
100
- this->init(size, host, stream);
93
+ this->init(size, device, stream);
101
94
  }
102
95
 
103
96
  DeviceBuffer(uint64_t size, void *cpuData, void *gpuData)
@@ -144,22 +137,22 @@ class DeviceBuffer
144
137
 
145
138
  /// @brief Initialize buffer
146
139
  /// @param size byte size of buffer to be initialized
147
- /// @param host If true buffer is initialized only on the host/CPU, else on the device/GPU
140
+ /// @param device id of the device on which to initialize the buffer
148
141
  /// @note All existing buffers are first cleared
149
142
  /// @warning size is expected to be non-zero. Use clear() clear buffer!
150
- void init(uint64_t size, bool host = true, void *stream = nullptr)
143
+ void init(uint64_t size, int device = cudaCpuDeviceId, void *stream = nullptr)
151
144
  {
152
145
  if (mSize > 0)
153
146
  this->clear(stream);
154
147
  NANOVDB_ASSERT(size > 0);
155
- if (host)
148
+ if (device == cudaCpuDeviceId)
156
149
  {
157
150
  mCpuData =
158
- alloc_pinned(size); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned
151
+ wp_alloc_pinned(size); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned
159
152
  }
160
153
  else
161
154
  {
162
- mGpuData = alloc_device(WP_CURRENT_CONTEXT, size);
155
+ mGpuData = wp_alloc_device(WP_CURRENT_CONTEXT, size);
163
156
  }
164
157
  cudaCheckError();
165
158
  mSize = size;
@@ -212,9 +205,9 @@ class DeviceBuffer
212
205
  void clear(void *stream = nullptr)
213
206
  {
214
207
  if (mManaged && mGpuData)
215
- free_device(WP_CURRENT_CONTEXT, mGpuData);
208
+ wp_free_device(WP_CURRENT_CONTEXT, mGpuData);
216
209
  if (mManaged && mCpuData)
217
- free_pinned(mCpuData);
210
+ wp_free_pinned(mCpuData);
218
211
  mCpuData = mGpuData = nullptr;
219
212
  mSize = 0;
220
213
  mManaged = false;
@@ -367,11 +360,11 @@ void finalize_grid(nanovdb::Grid<nanovdb::NanoTree<BuildT>> &out_grid, const Bui
367
360
  Tree *tree = &out_grid.tree();
368
361
 
369
362
  int node_counts[3];
370
- memcpy_d2h(WP_CURRENT_CONTEXT, node_counts, tree->mNodeCount, sizeof(node_counts));
363
+ wp_memcpy_d2h(WP_CURRENT_CONTEXT, node_counts, tree->mNodeCount, sizeof(node_counts));
371
364
  // synchronization below is unnecessary as node_counts is in pageable memory.
372
365
  // keep it for clarity
373
- cudaStream_t stream = static_cast<cudaStream_t>(cuda_stream_get_current());
374
- cuda_stream_synchronize(stream);
366
+ cudaStream_t stream = static_cast<cudaStream_t>(wp_cuda_stream_get_current());
367
+ wp_cuda_stream_synchronize(stream);
375
368
 
376
369
  const unsigned int leaf_count = node_counts[0];
377
370
  const unsigned int lower_count = node_counts[1];
@@ -387,7 +380,7 @@ void finalize_grid(nanovdb::Grid<nanovdb::NanoTree<BuildT>> &out_grid, const Bui
387
380
  <<<upper_count, NUM_THREADS, 0, stream>>>(tree, params.background_value);
388
381
  setRootBBoxAndBackgroundValue<Tree><<<1, NUM_THREADS, 0, stream>>>(&out_grid, params.background_value);
389
382
 
390
- check_cuda(cuda_context_check(WP_CURRENT_CONTEXT));
383
+ check_cuda(wp_cuda_context_check(WP_CURRENT_CONTEXT));
391
384
  }
392
385
 
393
386
  template <>
@@ -437,8 +430,8 @@ void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<BuildT>> *&out_grid,
437
430
  try
438
431
  {
439
432
 
440
- cudaStream_t stream = static_cast<cudaStream_t>(cuda_stream_get_current());
441
- nanovdb::tools::cuda::PointsToGrid<BuildT, Allocator> p2g(params.map, stream);
433
+ cudaStream_t stream = static_cast<cudaStream_t>(wp_cuda_stream_get_current());
434
+ nanovdb::tools::cuda::PointsToGrid<BuildT, Resource> p2g(params.map, stream);
442
435
 
443
436
  // p2g.setVerbose(2);
444
437
  p2g.setGridName(params.name);