PyPI - warp-lang - Versions diffs - 1.8.1__py3-none-win_amd64.whl → 1.9.1__py3-none-win_amd64.whl - Mend

warp-lang 1.8.1__py3-none-win_amd64.whl → 1.9.1__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (141) hide show

warp/__init__.py +282 -103
warp/__init__.pyi +1904 -114
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +93 -30
warp/build_dll.py +331 -101
warp/builtins.py +1244 -160
warp/codegen.py +317 -206
warp/config.py +1 -1
warp/context.py +1465 -789
warp/examples/core/example_marching_cubes.py +1 -0
warp/examples/core/example_render_opengl.py +100 -3
warp/examples/fem/example_apic_fluid.py +98 -52
warp/examples/fem/example_convection_diffusion_dg.py +25 -4
warp/examples/fem/example_diffusion_mgpu.py +8 -3
warp/examples/fem/utils.py +68 -22
warp/examples/interop/example_jax_kernel.py +2 -1
warp/fabric.py +1 -1
warp/fem/cache.py +27 -19
warp/fem/domain.py +2 -2
warp/fem/field/nodal_field.py +2 -2
warp/fem/field/virtual.py +264 -166
warp/fem/geometry/geometry.py +5 -5
warp/fem/integrate.py +129 -51
warp/fem/space/restriction.py +4 -0
warp/fem/space/shape/tet_shape_function.py +3 -10
warp/jax_experimental/custom_call.py +25 -2
warp/jax_experimental/ffi.py +22 -1
warp/jax_experimental/xla_ffi.py +16 -7
warp/marching_cubes.py +708 -0
warp/native/array.h +99 -4
warp/native/builtin.h +86 -9
warp/native/bvh.cpp +64 -28
warp/native/bvh.cu +58 -58
warp/native/bvh.h +2 -2
warp/native/clang/clang.cpp +7 -7
warp/native/coloring.cpp +8 -2
warp/native/crt.cpp +2 -2
warp/native/crt.h +3 -5
warp/native/cuda_util.cpp +41 -10
warp/native/cuda_util.h +10 -4
warp/native/exports.h +1842 -1908
warp/native/fabric.h +2 -1
warp/native/hashgrid.cpp +37 -37
warp/native/hashgrid.cu +2 -2
warp/native/initializer_array.h +1 -1
warp/native/intersect.h +2 -2
warp/native/mat.h +1910 -116
warp/native/mathdx.cpp +43 -43
warp/native/mesh.cpp +24 -24
warp/native/mesh.cu +26 -26
warp/native/mesh.h +4 -2
warp/native/nanovdb/GridHandle.h +179 -12
warp/native/nanovdb/HostBuffer.h +8 -7
warp/native/nanovdb/NanoVDB.h +517 -895
warp/native/nanovdb/NodeManager.h +323 -0
warp/native/nanovdb/PNanoVDB.h +2 -2
warp/native/quat.h +331 -14
warp/native/range.h +7 -1
warp/native/reduce.cpp +10 -10
warp/native/reduce.cu +13 -14
warp/native/runlength_encode.cpp +2 -2
warp/native/runlength_encode.cu +5 -5
warp/native/scan.cpp +3 -3
warp/native/scan.cu +4 -4
warp/native/sort.cpp +10 -10
warp/native/sort.cu +40 -31
warp/native/sort.h +2 -0
warp/native/sparse.cpp +8 -8
warp/native/sparse.cu +13 -13
warp/native/spatial.h +366 -17
warp/native/temp_buffer.h +2 -2
warp/native/tile.h +471 -82
warp/native/vec.h +328 -14
warp/native/volume.cpp +54 -54
warp/native/volume.cu +1 -1
warp/native/volume.h +2 -1
warp/native/volume_builder.cu +30 -37
warp/native/warp.cpp +150 -149
warp/native/warp.cu +377 -216
warp/native/warp.h +227 -226
warp/optim/linear.py +736 -271
warp/render/imgui_manager.py +289 -0
warp/render/render_opengl.py +99 -18
warp/render/render_usd.py +1 -0
warp/sim/graph_coloring.py +2 -2
warp/sparse.py +558 -175
warp/tests/aux_test_module_aot.py +7 -0
warp/tests/cuda/test_async.py +3 -3
warp/tests/cuda/test_conditional_captures.py +101 -0
warp/tests/geometry/test_hash_grid.py +38 -0
warp/tests/geometry/test_marching_cubes.py +233 -12
warp/tests/interop/test_jax.py +608 -28
warp/tests/sim/test_coloring.py +6 -6
warp/tests/test_array.py +58 -5
warp/tests/test_codegen.py +4 -3
warp/tests/test_context.py +8 -15
warp/tests/test_enum.py +136 -0
warp/tests/test_examples.py +2 -2
warp/tests/test_fem.py +49 -6
warp/tests/test_fixedarray.py +229 -0
warp/tests/test_func.py +18 -15
warp/tests/test_future_annotations.py +7 -5
warp/tests/test_linear_solvers.py +30 -0
warp/tests/test_map.py +15 -1
warp/tests/test_mat.py +1518 -378
warp/tests/test_mat_assign_copy.py +178 -0
warp/tests/test_mat_constructors.py +574 -0
warp/tests/test_module_aot.py +287 -0
warp/tests/test_print.py +69 -0
warp/tests/test_quat.py +140 -34
warp/tests/test_quat_assign_copy.py +145 -0
warp/tests/test_reload.py +2 -1
warp/tests/test_sparse.py +71 -0
warp/tests/test_spatial.py +140 -34
warp/tests/test_spatial_assign_copy.py +160 -0
warp/tests/test_struct.py +43 -3
warp/tests/test_tuple.py +96 -0
warp/tests/test_types.py +61 -20
warp/tests/test_vec.py +179 -34
warp/tests/test_vec_assign_copy.py +143 -0
warp/tests/tile/test_tile.py +245 -18
warp/tests/tile/test_tile_cholesky.py +605 -0
warp/tests/tile/test_tile_load.py +169 -0
warp/tests/tile/test_tile_mathdx.py +2 -558
warp/tests/tile/test_tile_matmul.py +1 -1
warp/tests/tile/test_tile_mlp.py +1 -1
warp/tests/tile/test_tile_shared_memory.py +5 -5
warp/tests/unittest_suites.py +6 -0
warp/tests/walkthrough_debug.py +1 -1
warp/thirdparty/unittest_parallel.py +108 -9
warp/types.py +571 -267
warp/utils.py +68 -86
{warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/METADATA +29 -69
{warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/RECORD +138 -128
warp/native/marching.cpp +0 -19
warp/native/marching.cu +0 -514
warp/native/marching.h +0 -19
{warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/WHEEL +0 -0
{warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/licenses/LICENSE.md +0 -0
{warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/top_level.txt +0 -0

warp/native/array.h CHANGED Viewed

@@ -252,6 +252,89 @@ struct array_t
 };
+// Required when compiling adjoints.
+template <typename T>
+inline CUDA_CALLABLE array_t<T> add(
+    const array_t<T>& a, const array_t<T>& b
+)
+{
+    return array_t<T>();
+}
+// Stack‑allocated counterpart to `array_t<T>`.
+// Useful for small buffers that have their shape known at compile-time,
+// and that gain from having array semantics instead of vectors.
+template <int Size, typename T>
+struct fixedarray_t : array_t<T>
+{
+    using Base = array_t<T>;
+    static_assert(Size > 0, "Expected Size > 0");
+    CUDA_CALLABLE inline fixedarray_t()
+        : Base(storage, Size), storage()
+    {}
+    CUDA_CALLABLE fixedarray_t(int dim0, T* grad=nullptr)
+        : Base(storage, dim0, grad), storage()
+    {
+        assert(Size == dim0);
+    }
+    CUDA_CALLABLE fixedarray_t(int dim0, int dim1, T* grad=nullptr)
+        : Base(storage, dim0, dim1, grad), storage()
+    {
+        assert(Size == dim0 * dim1);
+    }
+    CUDA_CALLABLE fixedarray_t(int dim0, int dim1, int dim2, T* grad=nullptr)
+        : Base(storage, dim0, dim1, dim2, grad), storage()
+    {
+        assert(Size == dim0 * dim1 * dim2);
+    }
+    CUDA_CALLABLE fixedarray_t(int dim0, int dim1, int dim2, int dim3, T* grad=nullptr)
+        : Base(storage, dim0, dim1, dim2, dim3, grad), storage()
+    {
+        assert(Size == dim0 * dim1 * dim2 * dim3);
+    }
+    CUDA_CALLABLE fixedarray_t<Size, T>& operator=(const fixedarray_t<Size, T>& other)
+    {
+        for (unsigned int i = 0; i < Size; ++i)
+        {
+            this->storage[i] = other.storage[i];
+        }
+        this->data = this->storage;
+        this->grad = nullptr;
+        this->shape = other.shape;
+        for (unsigned int i = 0; i < ARRAY_MAX_DIMS; ++i)
+        {
+            this->strides[i] = other.strides[i];
+        }
+        this->ndim = other.ndim;
+        return *this;
+    }
+    T storage[Size];
+};
+// Required when compiling adjoints.
+template <int Size, typename T>
+inline CUDA_CALLABLE fixedarray_t<Size, T> add(
+    const fixedarray_t<Size, T>& a, const fixedarray_t<Size, T>& b
+)
+{
+    return fixedarray_t<Size, T>();
+}
 // TODO:
 // - templated index type?
 // - templated dimensionality? (also for array_t to save space when passing arrays to kernels)
@@ -762,13 +845,25 @@ template<template<typename> class A, typename T>
 inline CUDA_CALLABLE T atomic_exch(const A<T>& buf, int i, int j, int k, int l, T value) { return atomic_exch(&index(buf, i, j, k, l), value); }
 template<template<typename> class A, typename T>
-inline CUDA_CALLABLE T* address(const A<T>& buf, int i) { return &index(buf, i); }
+inline CUDA_CALLABLE T* address(const A<T>& buf, int i)
+{
+    return &index(buf, i); // cppcheck-suppress returnDanglingLifetime
+}
 template<template<typename> class A, typename T>
-inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j) { return &index(buf, i, j); }
+inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j)
+{
+    return &index(buf, i, j); // cppcheck-suppress returnDanglingLifetime
+}
 template<template<typename> class A, typename T>
-inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j, int k) { return &index(buf, i, j, k); }
+inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j, int k)
+{
+    return &index(buf, i, j, k); // cppcheck-suppress returnDanglingLifetime
+}
 template<template<typename> class A, typename T>
-inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j, int k, int l) { return &index(buf, i, j, k, l); }
+inline CUDA_CALLABLE T* address(const A<T>& buf, int i, int j, int k, int l)
+{
+    return &index(buf, i, j, k, l); // cppcheck-suppress returnDanglingLifetime
+}
 template<template<typename> class A, typename T>
 inline CUDA_CALLABLE void array_store(const A<T>& buf, int i, T value)

warp/native/builtin.h CHANGED Viewed

@@ -49,7 +49,7 @@
 #define DEG_TO_RAD  0.01745329251994329577
 #if defined(__CUDACC__) && !defined(_MSC_VER)
-__device__ void __debugbreak() {}
+__device__ void __debugbreak() { __brkpt(); }
 #endif
 #if defined(__clang__) && defined(__CUDA__) && defined(__CUDA_ARCH__)
@@ -197,19 +197,19 @@ CUDA_CALLABLE inline float half_to_float(half h)
 #else  // Native C++ for Warp builtins outside of kernels
-extern "C" WP_API uint16_t float_to_half_bits(float x);
-extern "C" WP_API float half_bits_to_float(uint16_t u);
+extern "C" WP_API uint16_t wp_float_to_half_bits(float x);
+extern "C" WP_API float wp_half_bits_to_float(uint16_t u);
 inline half float_to_half(float x)
 {
     half h;
-    h.u = float_to_half_bits(x);
+    h.u = wp_float_to_half_bits(x);
     return h;
 }
 inline float half_to_float(half h)
 {
-   return half_bits_to_float(h.u);
+   return wp_half_bits_to_float(h.u);
 }
 #endif
@@ -1093,8 +1093,8 @@ CUDA_CALLABLE inline T select(const C& cond, const T& a, const T& b)
     return (!!cond) ? b : a;
 }
-template <typename C, typename T>
-CUDA_CALLABLE inline void adj_select(const C& cond, const T& a, const T& b, C& adj_cond, T& adj_a, T& adj_b, const T& adj_ret)
+template <typename C, typename TA, typename TB, typename TRet>
+CUDA_CALLABLE inline void adj_select(const C& cond, const TA& a, const TB& b, C& adj_cond, TA& adj_a, TB& adj_b, const TRet& adj_ret)
 {
     // The double NOT operator !! casts to bool without compiler warnings.
     if (!!cond)
@@ -1110,8 +1110,8 @@ CUDA_CALLABLE inline T where(const C& cond, const T& a, const T& b)
     return (!!cond) ? a : b;
 }
-template <typename C, typename T>
-CUDA_CALLABLE inline void adj_where(const C& cond, const T& a, const T& b, C& adj_cond, T& adj_a, T& adj_b, const T& adj_ret)
+template <typename C, typename TA, typename TB, typename TRet>
+CUDA_CALLABLE inline void adj_where(const C& cond, const TA& a, const TB& b, C& adj_cond, TA& adj_a, TB& adj_b, const TRet& adj_ret)
 {
     // The double NOT operator !! casts to bool without compiler warnings.
     if (!!cond)
@@ -1277,6 +1277,83 @@ inline CUDA_CALLABLE_DEVICE void tid(int& i, int& j, int& k, int& l, size_t inde
     l = c.l;
 }
+// should match types.py
+constexpr int SLICE_BEGIN = (1U << (sizeof(int) - 1)) - 1; // std::numeric_limits<int>::max()
+constexpr int SLICE_END = -(1U << (sizeof(int) - 1)); // std::numeric_limits<int>::min()
+struct slice_t
+{
+    int start;
+    int stop;
+    int step;
+    CUDA_CALLABLE inline slice_t()
+        : start(SLICE_BEGIN), stop(SLICE_END), step(1)
+    {}
+    CUDA_CALLABLE inline slice_t(int start, int stop, int step)
+        : start(start), stop(stop), step(step)
+    {}
+};
+CUDA_CALLABLE inline slice_t slice_adjust_indices(const slice_t& slice, int length)
+{
+#ifndef NDEBUG
+    if (slice.step == 0)
+    {
+        printf("%s:%d slice step cannot be 0\n", __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    int start, stop;
+    if (slice.start == SLICE_BEGIN)
+    {
+        start = slice.step < 0 ? length - 1 : 0;
+    }
+    else
+    {
+        start = min(max(slice.start, -length), length);
+        start = start < 0 ? start + length : start;
+    }
+    if (slice.stop == SLICE_END)
+    {
+        stop = slice.step < 0 ? -1 : length;
+    }
+    else
+    {
+        stop = min(max(slice.stop, -length), length);
+        stop = stop < 0 ? stop + length : stop;
+    }
+    return {start, stop, slice.step};
+}
+CUDA_CALLABLE inline int slice_get_length(const slice_t& slice)
+{
+#ifndef NDEBUG
+    if (slice.step == 0)
+    {
+        printf("%s:%d slice step cannot be 0\n", __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    if (slice.step > 0 && slice.start < slice.stop)
+    {
+        return 1 + (slice.stop - slice.start - 1) / slice.step;
+    }
+    if (slice.step < 0 && slice.start > slice.stop)
+    {
+        return 1 + (slice.start - slice.stop - 1) / (-slice.step);
+    }
+    return 0;
+}
 template<typename T>
 inline CUDA_CALLABLE T atomic_add(T* buf, T value)
 {

warp/native/bvh.cpp CHANGED Viewed

@@ -22,7 +22,9 @@
 #include "warp.h"
 #include "cuda_util.h"
+#include <cassert>
 #include <map>
+#include <climits>
 using namespace wp;
@@ -40,6 +42,8 @@ public:
 private:
+    void initialize_empty(BVH& bvh);
     bounds3 calc_bounds(const vec3* lowers, const vec3* uppers, const int* indices, int start, int end);
     int partition_median(const vec3* lowers, const vec3* uppers, int* indices, int start, int end, bounds3 range_bounds);
@@ -54,30 +58,64 @@ private:
 //////////////////////////////////////////////////////////////////////
+void TopDownBVHBuilder::initialize_empty(BVH& bvh)
+{
+    bvh.max_depth = 0;
+    bvh.max_nodes = 0;
+    bvh.node_lowers = nullptr;
+    bvh.node_uppers = nullptr;
+    bvh.node_parents = nullptr;
+    bvh.node_counts = nullptr;
+    bvh.root = nullptr;
+    bvh.primitive_indices = nullptr;
+    bvh.num_leaf_nodes = 0;
+}
 void TopDownBVHBuilder::build(BVH& bvh, const vec3* lowers, const vec3* uppers, int n, int in_constructor_type)
 {
+    assert(n >= 0);
+    if (n > 0)
+    {
+        assert(lowers != nullptr && uppers != nullptr && "Pointers must be valid for n > 0");
+    }
     constructor_type = in_constructor_type;
     if (constructor_type != BVH_CONSTRUCTOR_SAH && constructor_type != BVH_CONSTRUCTOR_MEDIAN)
     {
-        printf("Unrecognized Constructor type: %d! For CPU constructor it should be either SAH (%d) or Median (%d)!\n",
+        fprintf(stderr, "Unrecognized Constructor type: %d! For CPU constructor it should be either SAH (%d) or Median (%d)!\n",
             constructor_type, BVH_CONSTRUCTOR_SAH, BVH_CONSTRUCTOR_MEDIAN);
         return;
     }
+    if (n < 0)
+    {
+        fprintf(stderr, "Error: Cannot build BVH with a negative primitive count: %d\n", n);
+        initialize_empty(bvh);
+        return;
+    }
+    else if (n == 0)
+    {
+        initialize_empty(bvh);
+        return;
+    }
+    else if (n > INT_MAX / 2)
+    {
+        fprintf(stderr, "Error: Primitive count %d is too large and would cause an integer overflow.\n", n);
+        initialize_empty(bvh);
+        return;
+    }
     bvh.max_depth = 0;
     bvh.max_nodes = 2*n-1;
     bvh.node_lowers = new BVHPackedNodeHalf[bvh.max_nodes];
     bvh.node_uppers = new BVHPackedNodeHalf[bvh.max_nodes];
     bvh.node_parents = new int[bvh.max_nodes];
-    bvh.node_counts = NULL;
+    bvh.node_counts = nullptr;
     // root is always in first slot for top down builders
     bvh.root = new int[1];
     bvh.root[0] = 0;
-    if (n == 0)
-        return;
     bvh.primitive_indices = new int[n];
     for (int i = 0; i < n; ++i)
@@ -273,8 +311,6 @@ int TopDownBVHBuilder::build_recursive(BVH& bvh, const vec3* lowers, const vec3*
 {
     assert(start < end);
-    // printf("start %d end %d\n", start, end);
     const int n = end - start;
     const int node_index = bvh.num_nodes++;
@@ -353,8 +389,8 @@ void bvh_refit_recursive(BVH& bvh, int index)
             bound.add_bounds(bvh.item_lowers[item], bvh.item_uppers[item]);
         }
-        (vec3&)lower = bound.lower;
-        (vec3&)upper = bound.upper;
+        reinterpret_cast<vec3&>(lower) = bound.lower;
+        reinterpret_cast<vec3&>(upper) = bound.upper;
     }
     else
     {
@@ -365,19 +401,19 @@ void bvh_refit_recursive(BVH& bvh, int index)
         bvh_refit_recursive(bvh, right_index);
         // compute union of children
-        const vec3& left_lower = (vec3&)bvh.node_lowers[left_index];
-        const vec3& left_upper = (vec3&)bvh.node_uppers[left_index];
+        const vec3& left_lower = reinterpret_cast<const vec3&>(bvh.node_lowers[left_index]);
+        const vec3& left_upper = reinterpret_cast<const vec3&>(bvh.node_uppers[left_index]);
-        const vec3& right_lower = (vec3&)bvh.node_lowers[right_index];
-        const vec3& right_upper = (vec3&)bvh.node_uppers[right_index];
+        const vec3& right_lower = reinterpret_cast<const vec3&>(bvh.node_lowers[right_index]);
+        const vec3& right_upper = reinterpret_cast<const vec3&>(bvh.node_uppers[right_index]);
         // union of child bounds
         vec3 new_lower = min(left_lower, right_lower);
         vec3 new_upper = max(left_upper, right_upper);
         // write new BVH nodes
-        (vec3&)lower = new_lower;
-        (vec3&)upper = new_upper;
+        reinterpret_cast<vec3&>(lower) = new_lower;
+        reinterpret_cast<vec3&>(upper) = new_upper;
     }
 }
@@ -448,11 +484,11 @@ void bvh_destroy_host(BVH& bvh)
     delete[] bvh.primitive_indices;
     delete[] bvh.root;
-    bvh.node_lowers = NULL;
-    bvh.node_uppers = NULL;
-    bvh.node_parents = NULL;
-    bvh.primitive_indices = NULL;
-    bvh.root = NULL;
+    bvh.node_lowers = nullptr;
+    bvh.node_uppers = nullptr;
+    bvh.node_parents = nullptr;
+    bvh.primitive_indices = nullptr;
+    bvh.root = nullptr;
     bvh.max_nodes = 0;
     bvh.num_items = 0;
@@ -460,7 +496,7 @@ void bvh_destroy_host(BVH& bvh)
 } // namespace wp
-uint64_t bvh_create_host(vec3* lowers, vec3* uppers, int num_items, int constructor_type)
+uint64_t wp_bvh_create_host(vec3* lowers, vec3* uppers, int num_items, int constructor_type)
 {
     BVH* bvh = new BVH();
     wp::bvh_create_host(lowers, uppers, num_items, constructor_type, *bvh);
@@ -468,16 +504,16 @@ uint64_t bvh_create_host(vec3* lowers, vec3* uppers, int num_items, int construc
     return (uint64_t)bvh;
 }
-void bvh_refit_host(uint64_t id)
+void wp_bvh_refit_host(uint64_t id)
 {
     BVH* bvh = (BVH*)(id);
-    bvh_refit_host(*bvh);
+    wp::bvh_refit_host(*bvh);
 }
-void bvh_destroy_host(uint64_t id)
+void wp_bvh_destroy_host(uint64_t id)
 {
     BVH* bvh = (BVH*)(id);
-    bvh_destroy_host(*bvh);
+    wp::bvh_destroy_host(*bvh);
     delete bvh;
 }
@@ -485,8 +521,8 @@ void bvh_destroy_host(uint64_t id)
 // stubs for non-CUDA platforms
 #if !WP_ENABLE_CUDA
-uint64_t bvh_create_device(void* context, wp::vec3* lowers, wp::vec3* uppers, int num_items, int constructor_type) { return 0; }
-void bvh_refit_device(uint64_t id) {}
-void bvh_destroy_device(uint64_t id) {}
+uint64_t wp_bvh_create_device(void* context, wp::vec3* lowers, wp::vec3* uppers, int num_items, int constructor_type) { return 0; }
+void wp_bvh_refit_device(uint64_t id) {}
+void wp_bvh_destroy_device(uint64_t id) {}
 #endif // !WP_ENABLE_CUDA