warp-lang 1.8.0__py3-none-win_amd64.whl → 1.9.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (153) hide show
  1. warp/__init__.py +282 -103
  2. warp/__init__.pyi +482 -110
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +93 -30
  6. warp/build_dll.py +48 -63
  7. warp/builtins.py +955 -137
  8. warp/codegen.py +327 -209
  9. warp/config.py +1 -1
  10. warp/context.py +1363 -800
  11. warp/examples/core/example_marching_cubes.py +1 -0
  12. warp/examples/core/example_render_opengl.py +100 -3
  13. warp/examples/fem/example_apic_fluid.py +98 -52
  14. warp/examples/fem/example_convection_diffusion_dg.py +25 -4
  15. warp/examples/fem/example_diffusion_mgpu.py +8 -3
  16. warp/examples/fem/utils.py +68 -22
  17. warp/examples/interop/example_jax_callable.py +34 -4
  18. warp/examples/interop/example_jax_kernel.py +27 -1
  19. warp/fabric.py +1 -1
  20. warp/fem/cache.py +27 -19
  21. warp/fem/domain.py +2 -2
  22. warp/fem/field/nodal_field.py +2 -2
  23. warp/fem/field/virtual.py +266 -166
  24. warp/fem/geometry/geometry.py +5 -5
  25. warp/fem/integrate.py +200 -91
  26. warp/fem/space/restriction.py +4 -0
  27. warp/fem/space/shape/tet_shape_function.py +3 -10
  28. warp/jax_experimental/custom_call.py +1 -1
  29. warp/jax_experimental/ffi.py +203 -54
  30. warp/marching_cubes.py +708 -0
  31. warp/native/array.h +103 -8
  32. warp/native/builtin.h +90 -9
  33. warp/native/bvh.cpp +64 -28
  34. warp/native/bvh.cu +58 -58
  35. warp/native/bvh.h +2 -2
  36. warp/native/clang/clang.cpp +7 -7
  37. warp/native/coloring.cpp +13 -3
  38. warp/native/crt.cpp +2 -2
  39. warp/native/crt.h +3 -5
  40. warp/native/cuda_util.cpp +42 -11
  41. warp/native/cuda_util.h +10 -4
  42. warp/native/exports.h +1842 -1908
  43. warp/native/fabric.h +2 -1
  44. warp/native/hashgrid.cpp +37 -37
  45. warp/native/hashgrid.cu +2 -2
  46. warp/native/initializer_array.h +1 -1
  47. warp/native/intersect.h +4 -4
  48. warp/native/mat.h +1913 -119
  49. warp/native/mathdx.cpp +43 -43
  50. warp/native/mesh.cpp +24 -24
  51. warp/native/mesh.cu +26 -26
  52. warp/native/mesh.h +5 -3
  53. warp/native/nanovdb/GridHandle.h +179 -12
  54. warp/native/nanovdb/HostBuffer.h +8 -7
  55. warp/native/nanovdb/NanoVDB.h +517 -895
  56. warp/native/nanovdb/NodeManager.h +323 -0
  57. warp/native/nanovdb/PNanoVDB.h +2 -2
  58. warp/native/quat.h +337 -16
  59. warp/native/rand.h +7 -7
  60. warp/native/range.h +7 -1
  61. warp/native/reduce.cpp +10 -10
  62. warp/native/reduce.cu +13 -14
  63. warp/native/runlength_encode.cpp +2 -2
  64. warp/native/runlength_encode.cu +5 -5
  65. warp/native/scan.cpp +3 -3
  66. warp/native/scan.cu +4 -4
  67. warp/native/sort.cpp +10 -10
  68. warp/native/sort.cu +22 -22
  69. warp/native/sparse.cpp +8 -8
  70. warp/native/sparse.cu +14 -14
  71. warp/native/spatial.h +366 -17
  72. warp/native/svd.h +23 -8
  73. warp/native/temp_buffer.h +2 -2
  74. warp/native/tile.h +303 -70
  75. warp/native/tile_radix_sort.h +5 -1
  76. warp/native/tile_reduce.h +16 -25
  77. warp/native/tuple.h +2 -2
  78. warp/native/vec.h +385 -18
  79. warp/native/volume.cpp +54 -54
  80. warp/native/volume.cu +1 -1
  81. warp/native/volume.h +2 -1
  82. warp/native/volume_builder.cu +30 -37
  83. warp/native/warp.cpp +150 -149
  84. warp/native/warp.cu +337 -193
  85. warp/native/warp.h +227 -226
  86. warp/optim/linear.py +736 -271
  87. warp/render/imgui_manager.py +289 -0
  88. warp/render/render_opengl.py +137 -57
  89. warp/render/render_usd.py +0 -1
  90. warp/sim/collide.py +1 -2
  91. warp/sim/graph_coloring.py +2 -2
  92. warp/sim/integrator_vbd.py +10 -2
  93. warp/sparse.py +559 -176
  94. warp/tape.py +2 -0
  95. warp/tests/aux_test_module_aot.py +7 -0
  96. warp/tests/cuda/test_async.py +3 -3
  97. warp/tests/cuda/test_conditional_captures.py +101 -0
  98. warp/tests/geometry/test_marching_cubes.py +233 -12
  99. warp/tests/sim/test_cloth.py +89 -6
  100. warp/tests/sim/test_coloring.py +82 -7
  101. warp/tests/test_array.py +56 -5
  102. warp/tests/test_assert.py +53 -0
  103. warp/tests/test_atomic_cas.py +127 -114
  104. warp/tests/test_codegen.py +3 -2
  105. warp/tests/test_context.py +8 -15
  106. warp/tests/test_enum.py +136 -0
  107. warp/tests/test_examples.py +2 -2
  108. warp/tests/test_fem.py +45 -2
  109. warp/tests/test_fixedarray.py +229 -0
  110. warp/tests/test_func.py +18 -15
  111. warp/tests/test_future_annotations.py +7 -5
  112. warp/tests/test_linear_solvers.py +30 -0
  113. warp/tests/test_map.py +1 -1
  114. warp/tests/test_mat.py +1540 -378
  115. warp/tests/test_mat_assign_copy.py +178 -0
  116. warp/tests/test_mat_constructors.py +574 -0
  117. warp/tests/test_module_aot.py +287 -0
  118. warp/tests/test_print.py +69 -0
  119. warp/tests/test_quat.py +162 -34
  120. warp/tests/test_quat_assign_copy.py +145 -0
  121. warp/tests/test_reload.py +2 -1
  122. warp/tests/test_sparse.py +103 -0
  123. warp/tests/test_spatial.py +140 -34
  124. warp/tests/test_spatial_assign_copy.py +160 -0
  125. warp/tests/test_static.py +48 -0
  126. warp/tests/test_struct.py +43 -3
  127. warp/tests/test_tape.py +38 -0
  128. warp/tests/test_types.py +0 -20
  129. warp/tests/test_vec.py +216 -441
  130. warp/tests/test_vec_assign_copy.py +143 -0
  131. warp/tests/test_vec_constructors.py +325 -0
  132. warp/tests/tile/test_tile.py +206 -152
  133. warp/tests/tile/test_tile_cholesky.py +605 -0
  134. warp/tests/tile/test_tile_load.py +169 -0
  135. warp/tests/tile/test_tile_mathdx.py +2 -558
  136. warp/tests/tile/test_tile_matmul.py +179 -0
  137. warp/tests/tile/test_tile_mlp.py +1 -1
  138. warp/tests/tile/test_tile_reduce.py +100 -11
  139. warp/tests/tile/test_tile_shared_memory.py +16 -16
  140. warp/tests/tile/test_tile_sort.py +59 -55
  141. warp/tests/unittest_suites.py +16 -0
  142. warp/tests/walkthrough_debug.py +1 -1
  143. warp/thirdparty/unittest_parallel.py +108 -9
  144. warp/types.py +554 -264
  145. warp/utils.py +68 -86
  146. {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
  147. {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/RECORD +150 -138
  148. warp/native/marching.cpp +0 -19
  149. warp/native/marching.cu +0 -514
  150. warp/native/marching.h +0 -19
  151. {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
  152. {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
  153. {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0
warp/native/array.h CHANGED
@@ -161,7 +161,7 @@ inline CUDA_CALLABLE void print(shape_t s)
161
161
  // should probably store ndim with shape
162
162
  printf("(%d, %d, %d, %d)\n", s.dims[0], s.dims[1], s.dims[2], s.dims[3]);
163
163
  }
164
- inline CUDA_CALLABLE void adj_print(shape_t s, shape_t& shape_t) {}
164
+ inline CUDA_CALLABLE void adj_print(shape_t s, shape_t& adj_s) {}
165
165
 
166
166
 
167
167
  template <typename T>
@@ -252,6 +252,89 @@ struct array_t
252
252
  };
253
253
 
254
254
 
255
+ // Required when compiling adjoints.
256
+ template <typename T>
257
+ inline CUDA_CALLABLE array_t<T> add(
258
+ const array_t<T>& a, const array_t<T>& b
259
+ )
260
+ {
261
+ return array_t<T>();
262
+ }
263
+
264
+
265
+ // Stack‑allocated counterpart to `array_t<T>`.
266
+ // Useful for small buffers that have their shape known at compile-time,
267
+ // and that gain from having array semantics instead of vectors.
268
+ template <int Size, typename T>
269
+ struct fixedarray_t : array_t<T>
270
+ {
271
+ using Base = array_t<T>;
272
+
273
+ static_assert(Size > 0, "Expected Size > 0");
274
+
275
+ CUDA_CALLABLE inline fixedarray_t()
276
+ : Base(storage, Size), storage()
277
+ {}
278
+
279
+ CUDA_CALLABLE fixedarray_t(int dim0, T* grad=nullptr)
280
+ : Base(storage, dim0, grad), storage()
281
+ {
282
+ assert(Size == dim0);
283
+ }
284
+
285
+ CUDA_CALLABLE fixedarray_t(int dim0, int dim1, T* grad=nullptr)
286
+ : Base(storage, dim0, dim1, grad), storage()
287
+ {
288
+ assert(Size == dim0 * dim1);
289
+ }
290
+
291
+ CUDA_CALLABLE fixedarray_t(int dim0, int dim1, int dim2, T* grad=nullptr)
292
+ : Base(storage, dim0, dim1, dim2, grad), storage()
293
+ {
294
+ assert(Size == dim0 * dim1 * dim2);
295
+ }
296
+
297
+ CUDA_CALLABLE fixedarray_t(int dim0, int dim1, int dim2, int dim3, T* grad=nullptr)
298
+ : Base(storage, dim0, dim1, dim2, dim3, grad), storage()
299
+ {
300
+ assert(Size == dim0 * dim1 * dim2 * dim3);
301
+ }
302
+
303
+ CUDA_CALLABLE fixedarray_t<Size, T>& operator=(const fixedarray_t<Size, T>& other)
304
+ {
305
+ for (unsigned int i = 0; i < Size; ++i)
306
+ {
307
+ this->storage[i] = other.storage[i];
308
+ }
309
+
310
+ this->data = this->storage;
311
+ this->grad = nullptr;
312
+ this->shape = other.shape;
313
+
314
+ for (unsigned int i = 0; i < ARRAY_MAX_DIMS; ++i)
315
+ {
316
+ this->strides[i] = other.strides[i];
317
+ }
318
+
319
+ this->ndim = other.ndim;
320
+
321
+ return *this;
322
+ }
323
+
324
+ T storage[Size];
325
+ };
326
+
327
+
328
+ // Required when compiling adjoints.
329
+ template <int Size, typename T>
330
+ inline CUDA_CALLABLE fixedarray_t<Size, T> add(
331
+ const fixedarray_t<Size, T>& a, const fixedarray_t<Size, T>& b
332
+ )
333
+ {
334
+ return fixedarray_t<Size, T>();
335
+ }
336
+
337
+
255
338
  // TODO:
256
339
  // - templated index type?
257
340
  // - templated dimensionality? (also for array_t to save space when passing arrays to kernels)
@@ -665,11 +748,11 @@ CUDA_CALLABLE inline indexedarray_t<T> view(indexedarray_t<T>& src, int i, int j
665
748
  }
666
749
 
667
750
  template<template<typename> class A1, template<typename> class A2, template<typename> class A3, typename T>
668
- inline CUDA_CALLABLE void adj_view(A1<T>& src, int i, A2<T>& adj_src, int adj_i, A3<T> adj_ret) {}
751
+ inline CUDA_CALLABLE void adj_view(A1<T>& src, int i, A2<T>& adj_src, int adj_i, A3<T>& adj_ret) {}
669
752
  template<template<typename> class A1, template<typename> class A2, template<typename> class A3, typename T>
670
- inline CUDA_CALLABLE void adj_view(A1<T>& src, int i, int j, A2<T>& adj_src, int adj_i, int adj_j, A3<T> adj_ret) {}
753
+ inline CUDA_CALLABLE void adj_view(A1<T>& src, int i, int j, A2<T>& adj_src, int adj_i, int adj_j, A3<T>& adj_ret) {}
671
754
  template<template<typename> class A1, template<typename> class A2, template<typename> class A3, typename T>
672
- inline CUDA_CALLABLE void adj_view(A1<T>& src, int i, int j, int k, A2<T>& adj_src, int adj_i, int adj_j, int adj_k, A3<T> adj_ret) {}
755
+ inline CUDA_CALLABLE void adj_view(A1<T>& src, int i, int j, int k, A2<T>& adj_src, int adj_i, int adj_j, int adj_k, A3<T>& adj_ret) {}
673
756
 
674
757
  // TODO: lower_bound() for indexed arrays?
675
758
 
@@ -762,13 +845,25 @@ template<template<typename> class A, typename T>
762
845
  inline CUDA_CALLABLE T atomic_exch(const A<T>& buf, int i, int j, int k, int l, T value) { return atomic_exch(&index(buf, i, j, k, l), value); }
763
846
 
764
847
  template<template<typename> class A, typename T>
765
- inline CUDA_CALLABLE T* address(const A<T>& buf, int i) { return &index(buf, i); }
848
+ inline CUDA_CALLABLE T* address(const A<T>& buf, int i)
849
+ {
850
+ return &index(buf, i); // cppcheck-suppress returnDanglingLifetime
851
+ }
766
852
  template<template<typename> class A, typename T>
767
- inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j) { return &index(buf, i, j); }
853
+ inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j)
854
+ {
855
+ return &index(buf, i, j); // cppcheck-suppress returnDanglingLifetime
856
+ }
768
857
  template<template<typename> class A, typename T>
769
- inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j, int k) { return &index(buf, i, j, k); }
858
+ inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j, int k)
859
+ {
860
+ return &index(buf, i, j, k); // cppcheck-suppress returnDanglingLifetime
861
+ }
770
862
  template<template<typename> class A, typename T>
771
- inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j, int k, int l) { return &index(buf, i, j, k, l); }
863
+ inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j, int k, int l)
864
+ {
865
+ return &index(buf, i, j, k, l); // cppcheck-suppress returnDanglingLifetime
866
+ }
772
867
 
773
868
  template<template<typename> class A, typename T>
774
869
  inline CUDA_CALLABLE void array_store(const A<T>& buf, int i, T value)
warp/native/builtin.h CHANGED
@@ -49,7 +49,7 @@
49
49
  #define DEG_TO_RAD 0.01745329251994329577
50
50
 
51
51
  #if defined(__CUDACC__) && !defined(_MSC_VER)
52
- __device__ void __debugbreak() {}
52
+ __device__ void __debugbreak() { __brkpt(); }
53
53
  #endif
54
54
 
55
55
  #if defined(__clang__) && defined(__CUDA__) && defined(__CUDA_ARCH__)
@@ -197,19 +197,19 @@ CUDA_CALLABLE inline float half_to_float(half h)
197
197
 
198
198
  #else // Native C++ for Warp builtins outside of kernels
199
199
 
200
- extern "C" WP_API uint16_t float_to_half_bits(float x);
201
- extern "C" WP_API float half_bits_to_float(uint16_t u);
200
+ extern "C" WP_API uint16_t wp_float_to_half_bits(float x);
201
+ extern "C" WP_API float wp_half_bits_to_float(uint16_t u);
202
202
 
203
203
  inline half float_to_half(float x)
204
204
  {
205
205
  half h;
206
- h.u = float_to_half_bits(x);
206
+ h.u = wp_float_to_half_bits(x);
207
207
  return h;
208
208
  }
209
209
 
210
210
  inline float half_to_float(half h)
211
211
  {
212
- return half_bits_to_float(h.u);
212
+ return wp_half_bits_to_float(h.u);
213
213
  }
214
214
 
215
215
  #endif
@@ -268,16 +268,20 @@ inline CUDA_CALLABLE half operator / (half a,half b)
268
268
 
269
269
 
270
270
  template <typename T>
271
- CUDA_CALLABLE float cast_float(T x) { return (float)(x); }
271
+ CUDA_CALLABLE inline float cast_float(T x) { return (float)(x); }
272
272
 
273
273
  template <typename T>
274
- CUDA_CALLABLE int cast_int(T x) { return (int)(x); }
274
+ CUDA_CALLABLE inline int cast_int(T x) { return (int)(x); }
275
275
 
276
276
  template <typename T>
277
- CUDA_CALLABLE void adj_cast_float(T x, T& adj_x, float adj_ret) { adj_x += T(adj_ret); }
277
+ CUDA_CALLABLE inline void adj_cast_float(T x, T& adj_x, float adj_ret) {}
278
+
279
+ CUDA_CALLABLE inline void adj_cast_float(float16 x, float16& adj_x, float adj_ret) { adj_x += float16(adj_ret); }
280
+ CUDA_CALLABLE inline void adj_cast_float(float32 x, float32& adj_x, float adj_ret) { adj_x += float32(adj_ret); }
281
+ CUDA_CALLABLE inline void adj_cast_float(float64 x, float64& adj_x, float adj_ret) { adj_x += float64(adj_ret); }
278
282
 
279
283
  template <typename T>
280
- CUDA_CALLABLE void adj_cast_int(T x, T& adj_x, int adj_ret) { adj_x += adj_ret; }
284
+ CUDA_CALLABLE inline void adj_cast_int(T x, T& adj_x, int adj_ret) {}
281
285
 
282
286
  template <typename T>
283
287
  CUDA_CALLABLE inline void adj_int8(T, T&, int8) {}
@@ -1273,6 +1277,83 @@ inline CUDA_CALLABLE_DEVICE void tid(int& i, int& j, int& k, int& l, size_t inde
1273
1277
  l = c.l;
1274
1278
  }
1275
1279
 
1280
+ // should match types.py
1281
+ constexpr int SLICE_BEGIN = (1U << (sizeof(int) - 1)) - 1; // std::numeric_limits<int>::max()
1282
+ constexpr int SLICE_END = -(1U << (sizeof(int) - 1)); // std::numeric_limits<int>::min()
1283
+
1284
+ struct slice_t
1285
+ {
1286
+ int start;
1287
+ int stop;
1288
+ int step;
1289
+
1290
+ CUDA_CALLABLE inline slice_t()
1291
+ : start(SLICE_BEGIN), stop(SLICE_END), step(1)
1292
+ {}
1293
+
1294
+ CUDA_CALLABLE inline slice_t(int start, int stop, int step)
1295
+ : start(start), stop(stop), step(step)
1296
+ {}
1297
+ };
1298
+
1299
+ CUDA_CALLABLE inline slice_t slice_adjust_indices(const slice_t& slice, int length)
1300
+ {
1301
+ #ifndef NDEBUG
1302
+ if (slice.step == 0)
1303
+ {
1304
+ printf("%s:%d slice step cannot be 0\n", __FILE__, __LINE__);
1305
+ assert(0);
1306
+ }
1307
+ #endif
1308
+
1309
+ int start, stop;
1310
+
1311
+ if (slice.start == SLICE_BEGIN)
1312
+ {
1313
+ start = slice.step < 0 ? length - 1 : 0;
1314
+ }
1315
+ else
1316
+ {
1317
+ start = min(max(slice.start, -length), length);
1318
+ start = start < 0 ? start + length : start;
1319
+ }
1320
+
1321
+ if (slice.stop == SLICE_END)
1322
+ {
1323
+ stop = slice.step < 0 ? -1 : length;
1324
+ }
1325
+ else
1326
+ {
1327
+ stop = min(max(slice.stop, -length), length);
1328
+ stop = stop < 0 ? stop + length : stop;
1329
+ }
1330
+
1331
+ return {start, stop, slice.step};
1332
+ }
1333
+
1334
+ CUDA_CALLABLE inline int slice_get_length(const slice_t& slice)
1335
+ {
1336
+ #ifndef NDEBUG
1337
+ if (slice.step == 0)
1338
+ {
1339
+ printf("%s:%d slice step cannot be 0\n", __FILE__, __LINE__);
1340
+ assert(0);
1341
+ }
1342
+ #endif
1343
+
1344
+ if (slice.step > 0 && slice.start < slice.stop)
1345
+ {
1346
+ return 1 + (slice.stop - slice.start - 1) / slice.step;
1347
+ }
1348
+
1349
+ if (slice.step < 0 && slice.start > slice.stop)
1350
+ {
1351
+ return 1 + (slice.start - slice.stop - 1) / (-slice.step);
1352
+ }
1353
+
1354
+ return 0;
1355
+ }
1356
+
1276
1357
  template<typename T>
1277
1358
  inline CUDA_CALLABLE T atomic_add(T* buf, T value)
1278
1359
  {
warp/native/bvh.cpp CHANGED
@@ -22,7 +22,9 @@
22
22
  #include "warp.h"
23
23
  #include "cuda_util.h"
24
24
 
25
+ #include <cassert>
25
26
  #include <map>
27
+ #include <climits>
26
28
 
27
29
  using namespace wp;
28
30
 
@@ -40,6 +42,8 @@ public:
40
42
 
41
43
  private:
42
44
 
45
+ void initialize_empty(BVH& bvh);
46
+
43
47
  bounds3 calc_bounds(const vec3* lowers, const vec3* uppers, const int* indices, int start, int end);
44
48
 
45
49
  int partition_median(const vec3* lowers, const vec3* uppers, int* indices, int start, int end, bounds3 range_bounds);
@@ -54,30 +58,64 @@ private:
54
58
 
55
59
  //////////////////////////////////////////////////////////////////////
56
60
 
61
+ void TopDownBVHBuilder::initialize_empty(BVH& bvh)
62
+ {
63
+ bvh.max_depth = 0;
64
+ bvh.max_nodes = 0;
65
+ bvh.node_lowers = nullptr;
66
+ bvh.node_uppers = nullptr;
67
+ bvh.node_parents = nullptr;
68
+ bvh.node_counts = nullptr;
69
+ bvh.root = nullptr;
70
+ bvh.primitive_indices = nullptr;
71
+ bvh.num_leaf_nodes = 0;
72
+ }
73
+
57
74
  void TopDownBVHBuilder::build(BVH& bvh, const vec3* lowers, const vec3* uppers, int n, int in_constructor_type)
58
75
  {
76
+ assert(n >= 0);
77
+ if (n > 0)
78
+ {
79
+ assert(lowers != nullptr && uppers != nullptr && "Pointers must be valid for n > 0");
80
+ }
81
+
59
82
  constructor_type = in_constructor_type;
60
83
  if (constructor_type != BVH_CONSTRUCTOR_SAH && constructor_type != BVH_CONSTRUCTOR_MEDIAN)
61
84
  {
62
- printf("Unrecognized Constructor type: %d! For CPU constructor it should be either SAH (%d) or Median (%d)!\n",
85
+ fprintf(stderr, "Unrecognized Constructor type: %d! For CPU constructor it should be either SAH (%d) or Median (%d)!\n",
63
86
  constructor_type, BVH_CONSTRUCTOR_SAH, BVH_CONSTRUCTOR_MEDIAN);
64
87
  return;
65
88
  }
66
89
 
90
+ if (n < 0)
91
+ {
92
+ fprintf(stderr, "Error: Cannot build BVH with a negative primitive count: %d\n", n);
93
+ initialize_empty(bvh);
94
+ return;
95
+ }
96
+ else if (n == 0)
97
+ {
98
+ initialize_empty(bvh);
99
+ return;
100
+ }
101
+ else if (n > INT_MAX / 2)
102
+ {
103
+ fprintf(stderr, "Error: Primitive count %d is too large and would cause an integer overflow.\n", n);
104
+ initialize_empty(bvh);
105
+ return;
106
+ }
107
+
67
108
  bvh.max_depth = 0;
68
109
  bvh.max_nodes = 2*n-1;
69
110
 
70
111
  bvh.node_lowers = new BVHPackedNodeHalf[bvh.max_nodes];
71
112
  bvh.node_uppers = new BVHPackedNodeHalf[bvh.max_nodes];
72
113
  bvh.node_parents = new int[bvh.max_nodes];
73
- bvh.node_counts = NULL;
114
+ bvh.node_counts = nullptr;
74
115
 
75
116
  // root is always in first slot for top down builders
76
117
  bvh.root = new int[1];
77
118
  bvh.root[0] = 0;
78
-
79
- if (n == 0)
80
- return;
81
119
 
82
120
  bvh.primitive_indices = new int[n];
83
121
  for (int i = 0; i < n; ++i)
@@ -273,8 +311,6 @@ int TopDownBVHBuilder::build_recursive(BVH& bvh, const vec3* lowers, const vec3*
273
311
  {
274
312
  assert(start < end);
275
313
 
276
- // printf("start %d end %d\n", start, end);
277
-
278
314
  const int n = end - start;
279
315
  const int node_index = bvh.num_nodes++;
280
316
 
@@ -353,8 +389,8 @@ void bvh_refit_recursive(BVH& bvh, int index)
353
389
  bound.add_bounds(bvh.item_lowers[item], bvh.item_uppers[item]);
354
390
  }
355
391
 
356
- (vec3&)lower = bound.lower;
357
- (vec3&)upper = bound.upper;
392
+ reinterpret_cast<vec3&>(lower) = bound.lower;
393
+ reinterpret_cast<vec3&>(upper) = bound.upper;
358
394
  }
359
395
  else
360
396
  {
@@ -365,19 +401,19 @@ void bvh_refit_recursive(BVH& bvh, int index)
365
401
  bvh_refit_recursive(bvh, right_index);
366
402
 
367
403
  // compute union of children
368
- const vec3& left_lower = (vec3&)bvh.node_lowers[left_index];
369
- const vec3& left_upper = (vec3&)bvh.node_uppers[left_index];
404
+ const vec3& left_lower = reinterpret_cast<const vec3&>(bvh.node_lowers[left_index]);
405
+ const vec3& left_upper = reinterpret_cast<const vec3&>(bvh.node_uppers[left_index]);
370
406
 
371
- const vec3& right_lower = (vec3&)bvh.node_lowers[right_index];
372
- const vec3& right_upper = (vec3&)bvh.node_uppers[right_index];
407
+ const vec3& right_lower = reinterpret_cast<const vec3&>(bvh.node_lowers[right_index]);
408
+ const vec3& right_upper = reinterpret_cast<const vec3&>(bvh.node_uppers[right_index]);
373
409
 
374
410
  // union of child bounds
375
411
  vec3 new_lower = min(left_lower, right_lower);
376
412
  vec3 new_upper = max(left_upper, right_upper);
377
413
 
378
414
  // write new BVH nodes
379
- (vec3&)lower = new_lower;
380
- (vec3&)upper = new_upper;
415
+ reinterpret_cast<vec3&>(lower) = new_lower;
416
+ reinterpret_cast<vec3&>(upper) = new_upper;
381
417
  }
382
418
  }
383
419
 
@@ -448,11 +484,11 @@ void bvh_destroy_host(BVH& bvh)
448
484
  delete[] bvh.primitive_indices;
449
485
  delete[] bvh.root;
450
486
 
451
- bvh.node_lowers = NULL;
452
- bvh.node_uppers = NULL;
453
- bvh.node_parents = NULL;
454
- bvh.primitive_indices = NULL;
455
- bvh.root = NULL;
487
+ bvh.node_lowers = nullptr;
488
+ bvh.node_uppers = nullptr;
489
+ bvh.node_parents = nullptr;
490
+ bvh.primitive_indices = nullptr;
491
+ bvh.root = nullptr;
456
492
 
457
493
  bvh.max_nodes = 0;
458
494
  bvh.num_items = 0;
@@ -460,7 +496,7 @@ void bvh_destroy_host(BVH& bvh)
460
496
 
461
497
  } // namespace wp
462
498
 
463
- uint64_t bvh_create_host(vec3* lowers, vec3* uppers, int num_items, int constructor_type)
499
+ uint64_t wp_bvh_create_host(vec3* lowers, vec3* uppers, int num_items, int constructor_type)
464
500
  {
465
501
  BVH* bvh = new BVH();
466
502
  wp::bvh_create_host(lowers, uppers, num_items, constructor_type, *bvh);
@@ -468,16 +504,16 @@ uint64_t bvh_create_host(vec3* lowers, vec3* uppers, int num_items, int construc
468
504
  return (uint64_t)bvh;
469
505
  }
470
506
 
471
- void bvh_refit_host(uint64_t id)
507
+ void wp_bvh_refit_host(uint64_t id)
472
508
  {
473
509
  BVH* bvh = (BVH*)(id);
474
- bvh_refit_host(*bvh);
510
+ wp::bvh_refit_host(*bvh);
475
511
  }
476
512
 
477
- void bvh_destroy_host(uint64_t id)
513
+ void wp_bvh_destroy_host(uint64_t id)
478
514
  {
479
515
  BVH* bvh = (BVH*)(id);
480
- bvh_destroy_host(*bvh);
516
+ wp::bvh_destroy_host(*bvh);
481
517
  delete bvh;
482
518
  }
483
519
 
@@ -485,8 +521,8 @@ void bvh_destroy_host(uint64_t id)
485
521
  // stubs for non-CUDA platforms
486
522
  #if !WP_ENABLE_CUDA
487
523
 
488
- uint64_t bvh_create_device(void* context, wp::vec3* lowers, wp::vec3* uppers, int num_items, int constructor_type) { return 0; }
489
- void bvh_refit_device(uint64_t id) {}
490
- void bvh_destroy_device(uint64_t id) {}
524
+ uint64_t wp_bvh_create_device(void* context, wp::vec3* lowers, wp::vec3* uppers, int num_items, int constructor_type) { return 0; }
525
+ void wp_bvh_refit_device(uint64_t id) {}
526
+ void wp_bvh_destroy_device(uint64_t id) {}
491
527
 
492
528
  #endif // !WP_ENABLE_CUDA