warp-lang 1.8.0__py3-none-macosx_10_13_universal2.whl → 1.9.0__py3-none-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +282 -103
- warp/__init__.pyi +482 -110
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/build.py +93 -30
- warp/build_dll.py +48 -63
- warp/builtins.py +955 -137
- warp/codegen.py +327 -209
- warp/config.py +1 -1
- warp/context.py +1363 -800
- warp/examples/core/example_marching_cubes.py +1 -0
- warp/examples/core/example_render_opengl.py +100 -3
- warp/examples/fem/example_apic_fluid.py +98 -52
- warp/examples/fem/example_convection_diffusion_dg.py +25 -4
- warp/examples/fem/example_diffusion_mgpu.py +8 -3
- warp/examples/fem/utils.py +68 -22
- warp/examples/interop/example_jax_callable.py +34 -4
- warp/examples/interop/example_jax_kernel.py +27 -1
- warp/fabric.py +1 -1
- warp/fem/cache.py +27 -19
- warp/fem/domain.py +2 -2
- warp/fem/field/nodal_field.py +2 -2
- warp/fem/field/virtual.py +266 -166
- warp/fem/geometry/geometry.py +5 -5
- warp/fem/integrate.py +200 -91
- warp/fem/space/restriction.py +4 -0
- warp/fem/space/shape/tet_shape_function.py +3 -10
- warp/jax_experimental/custom_call.py +1 -1
- warp/jax_experimental/ffi.py +203 -54
- warp/marching_cubes.py +708 -0
- warp/native/array.h +103 -8
- warp/native/builtin.h +90 -9
- warp/native/bvh.cpp +64 -28
- warp/native/bvh.cu +58 -58
- warp/native/bvh.h +2 -2
- warp/native/clang/clang.cpp +7 -7
- warp/native/coloring.cpp +13 -3
- warp/native/crt.cpp +2 -2
- warp/native/crt.h +3 -5
- warp/native/cuda_util.cpp +42 -11
- warp/native/cuda_util.h +10 -4
- warp/native/exports.h +1842 -1908
- warp/native/fabric.h +2 -1
- warp/native/hashgrid.cpp +37 -37
- warp/native/hashgrid.cu +2 -2
- warp/native/initializer_array.h +1 -1
- warp/native/intersect.h +4 -4
- warp/native/mat.h +1913 -119
- warp/native/mathdx.cpp +43 -43
- warp/native/mesh.cpp +24 -24
- warp/native/mesh.cu +26 -26
- warp/native/mesh.h +5 -3
- warp/native/nanovdb/GridHandle.h +179 -12
- warp/native/nanovdb/HostBuffer.h +8 -7
- warp/native/nanovdb/NanoVDB.h +517 -895
- warp/native/nanovdb/NodeManager.h +323 -0
- warp/native/nanovdb/PNanoVDB.h +2 -2
- warp/native/quat.h +337 -16
- warp/native/rand.h +7 -7
- warp/native/range.h +7 -1
- warp/native/reduce.cpp +10 -10
- warp/native/reduce.cu +13 -14
- warp/native/runlength_encode.cpp +2 -2
- warp/native/runlength_encode.cu +5 -5
- warp/native/scan.cpp +3 -3
- warp/native/scan.cu +4 -4
- warp/native/sort.cpp +10 -10
- warp/native/sort.cu +22 -22
- warp/native/sparse.cpp +8 -8
- warp/native/sparse.cu +14 -14
- warp/native/spatial.h +366 -17
- warp/native/svd.h +23 -8
- warp/native/temp_buffer.h +2 -2
- warp/native/tile.h +303 -70
- warp/native/tile_radix_sort.h +5 -1
- warp/native/tile_reduce.h +16 -25
- warp/native/tuple.h +2 -2
- warp/native/vec.h +385 -18
- warp/native/volume.cpp +54 -54
- warp/native/volume.cu +1 -1
- warp/native/volume.h +2 -1
- warp/native/volume_builder.cu +30 -37
- warp/native/warp.cpp +150 -149
- warp/native/warp.cu +337 -193
- warp/native/warp.h +227 -226
- warp/optim/linear.py +736 -271
- warp/render/imgui_manager.py +289 -0
- warp/render/render_opengl.py +137 -57
- warp/render/render_usd.py +0 -1
- warp/sim/collide.py +1 -2
- warp/sim/graph_coloring.py +2 -2
- warp/sim/integrator_vbd.py +10 -2
- warp/sparse.py +559 -176
- warp/tape.py +2 -0
- warp/tests/aux_test_module_aot.py +7 -0
- warp/tests/cuda/test_async.py +3 -3
- warp/tests/cuda/test_conditional_captures.py +101 -0
- warp/tests/geometry/test_marching_cubes.py +233 -12
- warp/tests/sim/test_cloth.py +89 -6
- warp/tests/sim/test_coloring.py +82 -7
- warp/tests/test_array.py +56 -5
- warp/tests/test_assert.py +53 -0
- warp/tests/test_atomic_cas.py +127 -114
- warp/tests/test_codegen.py +3 -2
- warp/tests/test_context.py +8 -15
- warp/tests/test_enum.py +136 -0
- warp/tests/test_examples.py +2 -2
- warp/tests/test_fem.py +45 -2
- warp/tests/test_fixedarray.py +229 -0
- warp/tests/test_func.py +18 -15
- warp/tests/test_future_annotations.py +7 -5
- warp/tests/test_linear_solvers.py +30 -0
- warp/tests/test_map.py +1 -1
- warp/tests/test_mat.py +1540 -378
- warp/tests/test_mat_assign_copy.py +178 -0
- warp/tests/test_mat_constructors.py +574 -0
- warp/tests/test_module_aot.py +287 -0
- warp/tests/test_print.py +69 -0
- warp/tests/test_quat.py +162 -34
- warp/tests/test_quat_assign_copy.py +145 -0
- warp/tests/test_reload.py +2 -1
- warp/tests/test_sparse.py +103 -0
- warp/tests/test_spatial.py +140 -34
- warp/tests/test_spatial_assign_copy.py +160 -0
- warp/tests/test_static.py +48 -0
- warp/tests/test_struct.py +43 -3
- warp/tests/test_tape.py +38 -0
- warp/tests/test_types.py +0 -20
- warp/tests/test_vec.py +216 -441
- warp/tests/test_vec_assign_copy.py +143 -0
- warp/tests/test_vec_constructors.py +325 -0
- warp/tests/tile/test_tile.py +206 -152
- warp/tests/tile/test_tile_cholesky.py +605 -0
- warp/tests/tile/test_tile_load.py +169 -0
- warp/tests/tile/test_tile_mathdx.py +2 -558
- warp/tests/tile/test_tile_matmul.py +179 -0
- warp/tests/tile/test_tile_mlp.py +1 -1
- warp/tests/tile/test_tile_reduce.py +100 -11
- warp/tests/tile/test_tile_shared_memory.py +16 -16
- warp/tests/tile/test_tile_sort.py +59 -55
- warp/tests/unittest_suites.py +16 -0
- warp/tests/walkthrough_debug.py +1 -1
- warp/thirdparty/unittest_parallel.py +108 -9
- warp/types.py +554 -264
- warp/utils.py +68 -86
- {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
- {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/RECORD +150 -138
- warp/native/marching.cpp +0 -19
- warp/native/marching.cu +0 -514
- warp/native/marching.h +0 -19
- {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
- {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0
warp/native/array.h
CHANGED
|
@@ -161,7 +161,7 @@ inline CUDA_CALLABLE void print(shape_t s)
|
|
|
161
161
|
// should probably store ndim with shape
|
|
162
162
|
printf("(%d, %d, %d, %d)\n", s.dims[0], s.dims[1], s.dims[2], s.dims[3]);
|
|
163
163
|
}
|
|
164
|
-
inline CUDA_CALLABLE void adj_print(shape_t s, shape_t&
|
|
164
|
+
inline CUDA_CALLABLE void adj_print(shape_t s, shape_t& adj_s) {}
|
|
165
165
|
|
|
166
166
|
|
|
167
167
|
template <typename T>
|
|
@@ -252,6 +252,89 @@ struct array_t
|
|
|
252
252
|
};
|
|
253
253
|
|
|
254
254
|
|
|
255
|
+
// Required when compiling adjoints.
|
|
256
|
+
template <typename T>
|
|
257
|
+
inline CUDA_CALLABLE array_t<T> add(
|
|
258
|
+
const array_t<T>& a, const array_t<T>& b
|
|
259
|
+
)
|
|
260
|
+
{
|
|
261
|
+
return array_t<T>();
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
// Stack‑allocated counterpart to `array_t<T>`.
|
|
266
|
+
// Useful for small buffers that have their shape known at compile-time,
|
|
267
|
+
// and that gain from having array semantics instead of vectors.
|
|
268
|
+
template <int Size, typename T>
|
|
269
|
+
struct fixedarray_t : array_t<T>
|
|
270
|
+
{
|
|
271
|
+
using Base = array_t<T>;
|
|
272
|
+
|
|
273
|
+
static_assert(Size > 0, "Expected Size > 0");
|
|
274
|
+
|
|
275
|
+
CUDA_CALLABLE inline fixedarray_t()
|
|
276
|
+
: Base(storage, Size), storage()
|
|
277
|
+
{}
|
|
278
|
+
|
|
279
|
+
CUDA_CALLABLE fixedarray_t(int dim0, T* grad=nullptr)
|
|
280
|
+
: Base(storage, dim0, grad), storage()
|
|
281
|
+
{
|
|
282
|
+
assert(Size == dim0);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
CUDA_CALLABLE fixedarray_t(int dim0, int dim1, T* grad=nullptr)
|
|
286
|
+
: Base(storage, dim0, dim1, grad), storage()
|
|
287
|
+
{
|
|
288
|
+
assert(Size == dim0 * dim1);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
CUDA_CALLABLE fixedarray_t(int dim0, int dim1, int dim2, T* grad=nullptr)
|
|
292
|
+
: Base(storage, dim0, dim1, dim2, grad), storage()
|
|
293
|
+
{
|
|
294
|
+
assert(Size == dim0 * dim1 * dim2);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
CUDA_CALLABLE fixedarray_t(int dim0, int dim1, int dim2, int dim3, T* grad=nullptr)
|
|
298
|
+
: Base(storage, dim0, dim1, dim2, dim3, grad), storage()
|
|
299
|
+
{
|
|
300
|
+
assert(Size == dim0 * dim1 * dim2 * dim3);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
CUDA_CALLABLE fixedarray_t<Size, T>& operator=(const fixedarray_t<Size, T>& other)
|
|
304
|
+
{
|
|
305
|
+
for (unsigned int i = 0; i < Size; ++i)
|
|
306
|
+
{
|
|
307
|
+
this->storage[i] = other.storage[i];
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
this->data = this->storage;
|
|
311
|
+
this->grad = nullptr;
|
|
312
|
+
this->shape = other.shape;
|
|
313
|
+
|
|
314
|
+
for (unsigned int i = 0; i < ARRAY_MAX_DIMS; ++i)
|
|
315
|
+
{
|
|
316
|
+
this->strides[i] = other.strides[i];
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
this->ndim = other.ndim;
|
|
320
|
+
|
|
321
|
+
return *this;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
T storage[Size];
|
|
325
|
+
};
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
// Required when compiling adjoints.
|
|
329
|
+
template <int Size, typename T>
|
|
330
|
+
inline CUDA_CALLABLE fixedarray_t<Size, T> add(
|
|
331
|
+
const fixedarray_t<Size, T>& a, const fixedarray_t<Size, T>& b
|
|
332
|
+
)
|
|
333
|
+
{
|
|
334
|
+
return fixedarray_t<Size, T>();
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
|
|
255
338
|
// TODO:
|
|
256
339
|
// - templated index type?
|
|
257
340
|
// - templated dimensionality? (also for array_t to save space when passing arrays to kernels)
|
|
@@ -665,11 +748,11 @@ CUDA_CALLABLE inline indexedarray_t<T> view(indexedarray_t<T>& src, int i, int j
|
|
|
665
748
|
}
|
|
666
749
|
|
|
667
750
|
template<template<typename> class A1, template<typename> class A2, template<typename> class A3, typename T>
|
|
668
|
-
inline CUDA_CALLABLE void adj_view(A1<T>& src, int i, A2<T>& adj_src, int adj_i, A3<T
|
|
751
|
+
inline CUDA_CALLABLE void adj_view(A1<T>& src, int i, A2<T>& adj_src, int adj_i, A3<T>& adj_ret) {}
|
|
669
752
|
template<template<typename> class A1, template<typename> class A2, template<typename> class A3, typename T>
|
|
670
|
-
inline CUDA_CALLABLE void adj_view(A1<T>& src, int i, int j, A2<T>& adj_src, int adj_i, int adj_j, A3<T
|
|
753
|
+
inline CUDA_CALLABLE void adj_view(A1<T>& src, int i, int j, A2<T>& adj_src, int adj_i, int adj_j, A3<T>& adj_ret) {}
|
|
671
754
|
template<template<typename> class A1, template<typename> class A2, template<typename> class A3, typename T>
|
|
672
|
-
inline CUDA_CALLABLE void adj_view(A1<T>& src, int i, int j, int k, A2<T>& adj_src, int adj_i, int adj_j, int adj_k, A3<T
|
|
755
|
+
inline CUDA_CALLABLE void adj_view(A1<T>& src, int i, int j, int k, A2<T>& adj_src, int adj_i, int adj_j, int adj_k, A3<T>& adj_ret) {}
|
|
673
756
|
|
|
674
757
|
// TODO: lower_bound() for indexed arrays?
|
|
675
758
|
|
|
@@ -762,13 +845,25 @@ template<template<typename> class A, typename T>
|
|
|
762
845
|
inline CUDA_CALLABLE T atomic_exch(const A<T>& buf, int i, int j, int k, int l, T value) { return atomic_exch(&index(buf, i, j, k, l), value); }
|
|
763
846
|
|
|
764
847
|
template<template<typename> class A, typename T>
|
|
765
|
-
inline CUDA_CALLABLE T* address(const A<T>& buf, int i)
|
|
848
|
+
inline CUDA_CALLABLE T* address(const A<T>& buf, int i)
|
|
849
|
+
{
|
|
850
|
+
return &index(buf, i); // cppcheck-suppress returnDanglingLifetime
|
|
851
|
+
}
|
|
766
852
|
template<template<typename> class A, typename T>
|
|
767
|
-
inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j)
|
|
853
|
+
inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j)
|
|
854
|
+
{
|
|
855
|
+
return &index(buf, i, j); // cppcheck-suppress returnDanglingLifetime
|
|
856
|
+
}
|
|
768
857
|
template<template<typename> class A, typename T>
|
|
769
|
-
inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j, int k)
|
|
858
|
+
inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j, int k)
|
|
859
|
+
{
|
|
860
|
+
return &index(buf, i, j, k); // cppcheck-suppress returnDanglingLifetime
|
|
861
|
+
}
|
|
770
862
|
template<template<typename> class A, typename T>
|
|
771
|
-
inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j, int k, int l)
|
|
863
|
+
inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j, int k, int l)
|
|
864
|
+
{
|
|
865
|
+
return &index(buf, i, j, k, l); // cppcheck-suppress returnDanglingLifetime
|
|
866
|
+
}
|
|
772
867
|
|
|
773
868
|
template<template<typename> class A, typename T>
|
|
774
869
|
inline CUDA_CALLABLE void array_store(const A<T>& buf, int i, T value)
|
warp/native/builtin.h
CHANGED
|
@@ -49,7 +49,7 @@
|
|
|
49
49
|
#define DEG_TO_RAD 0.01745329251994329577
|
|
50
50
|
|
|
51
51
|
#if defined(__CUDACC__) && !defined(_MSC_VER)
|
|
52
|
-
__device__ void __debugbreak() {}
|
|
52
|
+
__device__ void __debugbreak() { __brkpt(); }
|
|
53
53
|
#endif
|
|
54
54
|
|
|
55
55
|
#if defined(__clang__) && defined(__CUDA__) && defined(__CUDA_ARCH__)
|
|
@@ -197,19 +197,19 @@ CUDA_CALLABLE inline float half_to_float(half h)
|
|
|
197
197
|
|
|
198
198
|
#else // Native C++ for Warp builtins outside of kernels
|
|
199
199
|
|
|
200
|
-
extern "C" WP_API uint16_t
|
|
201
|
-
extern "C" WP_API float
|
|
200
|
+
extern "C" WP_API uint16_t wp_float_to_half_bits(float x);
|
|
201
|
+
extern "C" WP_API float wp_half_bits_to_float(uint16_t u);
|
|
202
202
|
|
|
203
203
|
inline half float_to_half(float x)
|
|
204
204
|
{
|
|
205
205
|
half h;
|
|
206
|
-
h.u =
|
|
206
|
+
h.u = wp_float_to_half_bits(x);
|
|
207
207
|
return h;
|
|
208
208
|
}
|
|
209
209
|
|
|
210
210
|
inline float half_to_float(half h)
|
|
211
211
|
{
|
|
212
|
-
return
|
|
212
|
+
return wp_half_bits_to_float(h.u);
|
|
213
213
|
}
|
|
214
214
|
|
|
215
215
|
#endif
|
|
@@ -268,16 +268,20 @@ inline CUDA_CALLABLE half operator / (half a,half b)
|
|
|
268
268
|
|
|
269
269
|
|
|
270
270
|
template <typename T>
|
|
271
|
-
CUDA_CALLABLE float cast_float(T x) { return (float)(x); }
|
|
271
|
+
CUDA_CALLABLE inline float cast_float(T x) { return (float)(x); }
|
|
272
272
|
|
|
273
273
|
template <typename T>
|
|
274
|
-
CUDA_CALLABLE int cast_int(T x) { return (int)(x); }
|
|
274
|
+
CUDA_CALLABLE inline int cast_int(T x) { return (int)(x); }
|
|
275
275
|
|
|
276
276
|
template <typename T>
|
|
277
|
-
CUDA_CALLABLE void adj_cast_float(T x, T& adj_x, float adj_ret) {
|
|
277
|
+
CUDA_CALLABLE inline void adj_cast_float(T x, T& adj_x, float adj_ret) {}
|
|
278
|
+
|
|
279
|
+
CUDA_CALLABLE inline void adj_cast_float(float16 x, float16& adj_x, float adj_ret) { adj_x += float16(adj_ret); }
|
|
280
|
+
CUDA_CALLABLE inline void adj_cast_float(float32 x, float32& adj_x, float adj_ret) { adj_x += float32(adj_ret); }
|
|
281
|
+
CUDA_CALLABLE inline void adj_cast_float(float64 x, float64& adj_x, float adj_ret) { adj_x += float64(adj_ret); }
|
|
278
282
|
|
|
279
283
|
template <typename T>
|
|
280
|
-
CUDA_CALLABLE void adj_cast_int(T x, T& adj_x, int adj_ret) {
|
|
284
|
+
CUDA_CALLABLE inline void adj_cast_int(T x, T& adj_x, int adj_ret) {}
|
|
281
285
|
|
|
282
286
|
template <typename T>
|
|
283
287
|
CUDA_CALLABLE inline void adj_int8(T, T&, int8) {}
|
|
@@ -1273,6 +1277,83 @@ inline CUDA_CALLABLE_DEVICE void tid(int& i, int& j, int& k, int& l, size_t inde
|
|
|
1273
1277
|
l = c.l;
|
|
1274
1278
|
}
|
|
1275
1279
|
|
|
1280
|
+
// should match types.py
|
|
1281
|
+
constexpr int SLICE_BEGIN = (1U << (sizeof(int) - 1)) - 1; // std::numeric_limits<int>::max()
|
|
1282
|
+
constexpr int SLICE_END = -(1U << (sizeof(int) - 1)); // std::numeric_limits<int>::min()
|
|
1283
|
+
|
|
1284
|
+
struct slice_t
|
|
1285
|
+
{
|
|
1286
|
+
int start;
|
|
1287
|
+
int stop;
|
|
1288
|
+
int step;
|
|
1289
|
+
|
|
1290
|
+
CUDA_CALLABLE inline slice_t()
|
|
1291
|
+
: start(SLICE_BEGIN), stop(SLICE_END), step(1)
|
|
1292
|
+
{}
|
|
1293
|
+
|
|
1294
|
+
CUDA_CALLABLE inline slice_t(int start, int stop, int step)
|
|
1295
|
+
: start(start), stop(stop), step(step)
|
|
1296
|
+
{}
|
|
1297
|
+
};
|
|
1298
|
+
|
|
1299
|
+
CUDA_CALLABLE inline slice_t slice_adjust_indices(const slice_t& slice, int length)
|
|
1300
|
+
{
|
|
1301
|
+
#ifndef NDEBUG
|
|
1302
|
+
if (slice.step == 0)
|
|
1303
|
+
{
|
|
1304
|
+
printf("%s:%d slice step cannot be 0\n", __FILE__, __LINE__);
|
|
1305
|
+
assert(0);
|
|
1306
|
+
}
|
|
1307
|
+
#endif
|
|
1308
|
+
|
|
1309
|
+
int start, stop;
|
|
1310
|
+
|
|
1311
|
+
if (slice.start == SLICE_BEGIN)
|
|
1312
|
+
{
|
|
1313
|
+
start = slice.step < 0 ? length - 1 : 0;
|
|
1314
|
+
}
|
|
1315
|
+
else
|
|
1316
|
+
{
|
|
1317
|
+
start = min(max(slice.start, -length), length);
|
|
1318
|
+
start = start < 0 ? start + length : start;
|
|
1319
|
+
}
|
|
1320
|
+
|
|
1321
|
+
if (slice.stop == SLICE_END)
|
|
1322
|
+
{
|
|
1323
|
+
stop = slice.step < 0 ? -1 : length;
|
|
1324
|
+
}
|
|
1325
|
+
else
|
|
1326
|
+
{
|
|
1327
|
+
stop = min(max(slice.stop, -length), length);
|
|
1328
|
+
stop = stop < 0 ? stop + length : stop;
|
|
1329
|
+
}
|
|
1330
|
+
|
|
1331
|
+
return {start, stop, slice.step};
|
|
1332
|
+
}
|
|
1333
|
+
|
|
1334
|
+
CUDA_CALLABLE inline int slice_get_length(const slice_t& slice)
|
|
1335
|
+
{
|
|
1336
|
+
#ifndef NDEBUG
|
|
1337
|
+
if (slice.step == 0)
|
|
1338
|
+
{
|
|
1339
|
+
printf("%s:%d slice step cannot be 0\n", __FILE__, __LINE__);
|
|
1340
|
+
assert(0);
|
|
1341
|
+
}
|
|
1342
|
+
#endif
|
|
1343
|
+
|
|
1344
|
+
if (slice.step > 0 && slice.start < slice.stop)
|
|
1345
|
+
{
|
|
1346
|
+
return 1 + (slice.stop - slice.start - 1) / slice.step;
|
|
1347
|
+
}
|
|
1348
|
+
|
|
1349
|
+
if (slice.step < 0 && slice.start > slice.stop)
|
|
1350
|
+
{
|
|
1351
|
+
return 1 + (slice.start - slice.stop - 1) / (-slice.step);
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
return 0;
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1276
1357
|
template<typename T>
|
|
1277
1358
|
inline CUDA_CALLABLE T atomic_add(T* buf, T value)
|
|
1278
1359
|
{
|
warp/native/bvh.cpp
CHANGED
|
@@ -22,7 +22,9 @@
|
|
|
22
22
|
#include "warp.h"
|
|
23
23
|
#include "cuda_util.h"
|
|
24
24
|
|
|
25
|
+
#include <cassert>
|
|
25
26
|
#include <map>
|
|
27
|
+
#include <climits>
|
|
26
28
|
|
|
27
29
|
using namespace wp;
|
|
28
30
|
|
|
@@ -40,6 +42,8 @@ public:
|
|
|
40
42
|
|
|
41
43
|
private:
|
|
42
44
|
|
|
45
|
+
void initialize_empty(BVH& bvh);
|
|
46
|
+
|
|
43
47
|
bounds3 calc_bounds(const vec3* lowers, const vec3* uppers, const int* indices, int start, int end);
|
|
44
48
|
|
|
45
49
|
int partition_median(const vec3* lowers, const vec3* uppers, int* indices, int start, int end, bounds3 range_bounds);
|
|
@@ -54,30 +58,64 @@ private:
|
|
|
54
58
|
|
|
55
59
|
//////////////////////////////////////////////////////////////////////
|
|
56
60
|
|
|
61
|
+
void TopDownBVHBuilder::initialize_empty(BVH& bvh)
|
|
62
|
+
{
|
|
63
|
+
bvh.max_depth = 0;
|
|
64
|
+
bvh.max_nodes = 0;
|
|
65
|
+
bvh.node_lowers = nullptr;
|
|
66
|
+
bvh.node_uppers = nullptr;
|
|
67
|
+
bvh.node_parents = nullptr;
|
|
68
|
+
bvh.node_counts = nullptr;
|
|
69
|
+
bvh.root = nullptr;
|
|
70
|
+
bvh.primitive_indices = nullptr;
|
|
71
|
+
bvh.num_leaf_nodes = 0;
|
|
72
|
+
}
|
|
73
|
+
|
|
57
74
|
void TopDownBVHBuilder::build(BVH& bvh, const vec3* lowers, const vec3* uppers, int n, int in_constructor_type)
|
|
58
75
|
{
|
|
76
|
+
assert(n >= 0);
|
|
77
|
+
if (n > 0)
|
|
78
|
+
{
|
|
79
|
+
assert(lowers != nullptr && uppers != nullptr && "Pointers must be valid for n > 0");
|
|
80
|
+
}
|
|
81
|
+
|
|
59
82
|
constructor_type = in_constructor_type;
|
|
60
83
|
if (constructor_type != BVH_CONSTRUCTOR_SAH && constructor_type != BVH_CONSTRUCTOR_MEDIAN)
|
|
61
84
|
{
|
|
62
|
-
|
|
85
|
+
fprintf(stderr, "Unrecognized Constructor type: %d! For CPU constructor it should be either SAH (%d) or Median (%d)!\n",
|
|
63
86
|
constructor_type, BVH_CONSTRUCTOR_SAH, BVH_CONSTRUCTOR_MEDIAN);
|
|
64
87
|
return;
|
|
65
88
|
}
|
|
66
89
|
|
|
90
|
+
if (n < 0)
|
|
91
|
+
{
|
|
92
|
+
fprintf(stderr, "Error: Cannot build BVH with a negative primitive count: %d\n", n);
|
|
93
|
+
initialize_empty(bvh);
|
|
94
|
+
return;
|
|
95
|
+
}
|
|
96
|
+
else if (n == 0)
|
|
97
|
+
{
|
|
98
|
+
initialize_empty(bvh);
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
else if (n > INT_MAX / 2)
|
|
102
|
+
{
|
|
103
|
+
fprintf(stderr, "Error: Primitive count %d is too large and would cause an integer overflow.\n", n);
|
|
104
|
+
initialize_empty(bvh);
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
|
|
67
108
|
bvh.max_depth = 0;
|
|
68
109
|
bvh.max_nodes = 2*n-1;
|
|
69
110
|
|
|
70
111
|
bvh.node_lowers = new BVHPackedNodeHalf[bvh.max_nodes];
|
|
71
112
|
bvh.node_uppers = new BVHPackedNodeHalf[bvh.max_nodes];
|
|
72
113
|
bvh.node_parents = new int[bvh.max_nodes];
|
|
73
|
-
bvh.node_counts =
|
|
114
|
+
bvh.node_counts = nullptr;
|
|
74
115
|
|
|
75
116
|
// root is always in first slot for top down builders
|
|
76
117
|
bvh.root = new int[1];
|
|
77
118
|
bvh.root[0] = 0;
|
|
78
|
-
|
|
79
|
-
if (n == 0)
|
|
80
|
-
return;
|
|
81
119
|
|
|
82
120
|
bvh.primitive_indices = new int[n];
|
|
83
121
|
for (int i = 0; i < n; ++i)
|
|
@@ -273,8 +311,6 @@ int TopDownBVHBuilder::build_recursive(BVH& bvh, const vec3* lowers, const vec3*
|
|
|
273
311
|
{
|
|
274
312
|
assert(start < end);
|
|
275
313
|
|
|
276
|
-
// printf("start %d end %d\n", start, end);
|
|
277
|
-
|
|
278
314
|
const int n = end - start;
|
|
279
315
|
const int node_index = bvh.num_nodes++;
|
|
280
316
|
|
|
@@ -353,8 +389,8 @@ void bvh_refit_recursive(BVH& bvh, int index)
|
|
|
353
389
|
bound.add_bounds(bvh.item_lowers[item], bvh.item_uppers[item]);
|
|
354
390
|
}
|
|
355
391
|
|
|
356
|
-
(
|
|
357
|
-
(
|
|
392
|
+
reinterpret_cast<vec3&>(lower) = bound.lower;
|
|
393
|
+
reinterpret_cast<vec3&>(upper) = bound.upper;
|
|
358
394
|
}
|
|
359
395
|
else
|
|
360
396
|
{
|
|
@@ -365,19 +401,19 @@ void bvh_refit_recursive(BVH& bvh, int index)
|
|
|
365
401
|
bvh_refit_recursive(bvh, right_index);
|
|
366
402
|
|
|
367
403
|
// compute union of children
|
|
368
|
-
const vec3& left_lower = (
|
|
369
|
-
const vec3& left_upper = (
|
|
404
|
+
const vec3& left_lower = reinterpret_cast<const vec3&>(bvh.node_lowers[left_index]);
|
|
405
|
+
const vec3& left_upper = reinterpret_cast<const vec3&>(bvh.node_uppers[left_index]);
|
|
370
406
|
|
|
371
|
-
const vec3& right_lower = (
|
|
372
|
-
const vec3& right_upper = (
|
|
407
|
+
const vec3& right_lower = reinterpret_cast<const vec3&>(bvh.node_lowers[right_index]);
|
|
408
|
+
const vec3& right_upper = reinterpret_cast<const vec3&>(bvh.node_uppers[right_index]);
|
|
373
409
|
|
|
374
410
|
// union of child bounds
|
|
375
411
|
vec3 new_lower = min(left_lower, right_lower);
|
|
376
412
|
vec3 new_upper = max(left_upper, right_upper);
|
|
377
413
|
|
|
378
414
|
// write new BVH nodes
|
|
379
|
-
(
|
|
380
|
-
(
|
|
415
|
+
reinterpret_cast<vec3&>(lower) = new_lower;
|
|
416
|
+
reinterpret_cast<vec3&>(upper) = new_upper;
|
|
381
417
|
}
|
|
382
418
|
}
|
|
383
419
|
|
|
@@ -448,11 +484,11 @@ void bvh_destroy_host(BVH& bvh)
|
|
|
448
484
|
delete[] bvh.primitive_indices;
|
|
449
485
|
delete[] bvh.root;
|
|
450
486
|
|
|
451
|
-
bvh.node_lowers =
|
|
452
|
-
bvh.node_uppers =
|
|
453
|
-
bvh.node_parents =
|
|
454
|
-
bvh.primitive_indices =
|
|
455
|
-
bvh.root =
|
|
487
|
+
bvh.node_lowers = nullptr;
|
|
488
|
+
bvh.node_uppers = nullptr;
|
|
489
|
+
bvh.node_parents = nullptr;
|
|
490
|
+
bvh.primitive_indices = nullptr;
|
|
491
|
+
bvh.root = nullptr;
|
|
456
492
|
|
|
457
493
|
bvh.max_nodes = 0;
|
|
458
494
|
bvh.num_items = 0;
|
|
@@ -460,7 +496,7 @@ void bvh_destroy_host(BVH& bvh)
|
|
|
460
496
|
|
|
461
497
|
} // namespace wp
|
|
462
498
|
|
|
463
|
-
uint64_t
|
|
499
|
+
uint64_t wp_bvh_create_host(vec3* lowers, vec3* uppers, int num_items, int constructor_type)
|
|
464
500
|
{
|
|
465
501
|
BVH* bvh = new BVH();
|
|
466
502
|
wp::bvh_create_host(lowers, uppers, num_items, constructor_type, *bvh);
|
|
@@ -468,16 +504,16 @@ uint64_t bvh_create_host(vec3* lowers, vec3* uppers, int num_items, int construc
|
|
|
468
504
|
return (uint64_t)bvh;
|
|
469
505
|
}
|
|
470
506
|
|
|
471
|
-
void
|
|
507
|
+
void wp_bvh_refit_host(uint64_t id)
|
|
472
508
|
{
|
|
473
509
|
BVH* bvh = (BVH*)(id);
|
|
474
|
-
bvh_refit_host(*bvh);
|
|
510
|
+
wp::bvh_refit_host(*bvh);
|
|
475
511
|
}
|
|
476
512
|
|
|
477
|
-
void
|
|
513
|
+
void wp_bvh_destroy_host(uint64_t id)
|
|
478
514
|
{
|
|
479
515
|
BVH* bvh = (BVH*)(id);
|
|
480
|
-
bvh_destroy_host(*bvh);
|
|
516
|
+
wp::bvh_destroy_host(*bvh);
|
|
481
517
|
delete bvh;
|
|
482
518
|
}
|
|
483
519
|
|
|
@@ -485,8 +521,8 @@ void bvh_destroy_host(uint64_t id)
|
|
|
485
521
|
// stubs for non-CUDA platforms
|
|
486
522
|
#if !WP_ENABLE_CUDA
|
|
487
523
|
|
|
488
|
-
uint64_t
|
|
489
|
-
void
|
|
490
|
-
void
|
|
524
|
+
uint64_t wp_bvh_create_device(void* context, wp::vec3* lowers, wp::vec3* uppers, int num_items, int constructor_type) { return 0; }
|
|
525
|
+
void wp_bvh_refit_device(uint64_t id) {}
|
|
526
|
+
void wp_bvh_destroy_device(uint64_t id) {}
|
|
491
527
|
|
|
492
528
|
#endif // !WP_ENABLE_CUDA
|