PyPI - warp-lang - Versions diffs - 1.8.1__py3-none-win_amd64.whl → 1.9.1__py3-none-win_amd64.whl - Mend

warp-lang 1.8.1__py3-none-win_amd64.whl → 1.9.1__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (141) hide show

warp/__init__.py +282 -103
warp/__init__.pyi +1904 -114
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +93 -30
warp/build_dll.py +331 -101
warp/builtins.py +1244 -160
warp/codegen.py +317 -206
warp/config.py +1 -1
warp/context.py +1465 -789
warp/examples/core/example_marching_cubes.py +1 -0
warp/examples/core/example_render_opengl.py +100 -3
warp/examples/fem/example_apic_fluid.py +98 -52
warp/examples/fem/example_convection_diffusion_dg.py +25 -4
warp/examples/fem/example_diffusion_mgpu.py +8 -3
warp/examples/fem/utils.py +68 -22
warp/examples/interop/example_jax_kernel.py +2 -1
warp/fabric.py +1 -1
warp/fem/cache.py +27 -19
warp/fem/domain.py +2 -2
warp/fem/field/nodal_field.py +2 -2
warp/fem/field/virtual.py +264 -166
warp/fem/geometry/geometry.py +5 -5
warp/fem/integrate.py +129 -51
warp/fem/space/restriction.py +4 -0
warp/fem/space/shape/tet_shape_function.py +3 -10
warp/jax_experimental/custom_call.py +25 -2
warp/jax_experimental/ffi.py +22 -1
warp/jax_experimental/xla_ffi.py +16 -7
warp/marching_cubes.py +708 -0
warp/native/array.h +99 -4
warp/native/builtin.h +86 -9
warp/native/bvh.cpp +64 -28
warp/native/bvh.cu +58 -58
warp/native/bvh.h +2 -2
warp/native/clang/clang.cpp +7 -7
warp/native/coloring.cpp +8 -2
warp/native/crt.cpp +2 -2
warp/native/crt.h +3 -5
warp/native/cuda_util.cpp +41 -10
warp/native/cuda_util.h +10 -4
warp/native/exports.h +1842 -1908
warp/native/fabric.h +2 -1
warp/native/hashgrid.cpp +37 -37
warp/native/hashgrid.cu +2 -2
warp/native/initializer_array.h +1 -1
warp/native/intersect.h +2 -2
warp/native/mat.h +1910 -116
warp/native/mathdx.cpp +43 -43
warp/native/mesh.cpp +24 -24
warp/native/mesh.cu +26 -26
warp/native/mesh.h +4 -2
warp/native/nanovdb/GridHandle.h +179 -12
warp/native/nanovdb/HostBuffer.h +8 -7
warp/native/nanovdb/NanoVDB.h +517 -895
warp/native/nanovdb/NodeManager.h +323 -0
warp/native/nanovdb/PNanoVDB.h +2 -2
warp/native/quat.h +331 -14
warp/native/range.h +7 -1
warp/native/reduce.cpp +10 -10
warp/native/reduce.cu +13 -14
warp/native/runlength_encode.cpp +2 -2
warp/native/runlength_encode.cu +5 -5
warp/native/scan.cpp +3 -3
warp/native/scan.cu +4 -4
warp/native/sort.cpp +10 -10
warp/native/sort.cu +40 -31
warp/native/sort.h +2 -0
warp/native/sparse.cpp +8 -8
warp/native/sparse.cu +13 -13
warp/native/spatial.h +366 -17
warp/native/temp_buffer.h +2 -2
warp/native/tile.h +471 -82
warp/native/vec.h +328 -14
warp/native/volume.cpp +54 -54
warp/native/volume.cu +1 -1
warp/native/volume.h +2 -1
warp/native/volume_builder.cu +30 -37
warp/native/warp.cpp +150 -149
warp/native/warp.cu +377 -216
warp/native/warp.h +227 -226
warp/optim/linear.py +736 -271
warp/render/imgui_manager.py +289 -0
warp/render/render_opengl.py +99 -18
warp/render/render_usd.py +1 -0
warp/sim/graph_coloring.py +2 -2
warp/sparse.py +558 -175
warp/tests/aux_test_module_aot.py +7 -0
warp/tests/cuda/test_async.py +3 -3
warp/tests/cuda/test_conditional_captures.py +101 -0
warp/tests/geometry/test_hash_grid.py +38 -0
warp/tests/geometry/test_marching_cubes.py +233 -12
warp/tests/interop/test_jax.py +608 -28
warp/tests/sim/test_coloring.py +6 -6
warp/tests/test_array.py +58 -5
warp/tests/test_codegen.py +4 -3
warp/tests/test_context.py +8 -15
warp/tests/test_enum.py +136 -0
warp/tests/test_examples.py +2 -2
warp/tests/test_fem.py +49 -6
warp/tests/test_fixedarray.py +229 -0
warp/tests/test_func.py +18 -15
warp/tests/test_future_annotations.py +7 -5
warp/tests/test_linear_solvers.py +30 -0
warp/tests/test_map.py +15 -1
warp/tests/test_mat.py +1518 -378
warp/tests/test_mat_assign_copy.py +178 -0
warp/tests/test_mat_constructors.py +574 -0
warp/tests/test_module_aot.py +287 -0
warp/tests/test_print.py +69 -0
warp/tests/test_quat.py +140 -34
warp/tests/test_quat_assign_copy.py +145 -0
warp/tests/test_reload.py +2 -1
warp/tests/test_sparse.py +71 -0
warp/tests/test_spatial.py +140 -34
warp/tests/test_spatial_assign_copy.py +160 -0
warp/tests/test_struct.py +43 -3
warp/tests/test_tuple.py +96 -0
warp/tests/test_types.py +61 -20
warp/tests/test_vec.py +179 -34
warp/tests/test_vec_assign_copy.py +143 -0
warp/tests/tile/test_tile.py +245 -18
warp/tests/tile/test_tile_cholesky.py +605 -0
warp/tests/tile/test_tile_load.py +169 -0
warp/tests/tile/test_tile_mathdx.py +2 -558
warp/tests/tile/test_tile_matmul.py +1 -1
warp/tests/tile/test_tile_mlp.py +1 -1
warp/tests/tile/test_tile_shared_memory.py +5 -5
warp/tests/unittest_suites.py +6 -0
warp/tests/walkthrough_debug.py +1 -1
warp/thirdparty/unittest_parallel.py +108 -9
warp/types.py +571 -267
warp/utils.py +68 -86
{warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/METADATA +29 -69
{warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/RECORD +138 -128
warp/native/marching.cpp +0 -19
warp/native/marching.cu +0 -514
warp/native/marching.h +0 -19
{warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/WHEEL +0 -0
{warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/licenses/LICENSE.md +0 -0
{warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/top_level.txt +0 -0

warp/native/spatial.h CHANGED Viewed

@@ -34,7 +34,7 @@ CUDA_CALLABLE inline Type spatial_dot(const spatial_vector_t<Type>& a, const spa
 template<typename Type>
 CUDA_CALLABLE inline vec_t<3,Type> &w_vec( spatial_vector_t<Type>& a )
 {
-    return *(vec_t<3,Type>*)(&a);
+    return *reinterpret_cast<vec_t<3,Type>*>(&a);
 }
 template<typename Type>
@@ -46,14 +46,14 @@ CUDA_CALLABLE inline vec_t<3,Type> &v_vec( spatial_vector_t<Type>& a )
 template<typename Type>
 CUDA_CALLABLE inline const vec_t<3,Type> &w_vec( const spatial_vector_t<Type>& a )
 {
-    spatial_vector_t<Type> &non_const_vec = *(spatial_vector_t<Type>*)(const_cast<Type*>(&a.c[0]));
+    spatial_vector_t<Type> &non_const_vec = *reinterpret_cast<spatial_vector_t<Type>*>(const_cast<Type*>(&a.c[0]));
     return w_vec(non_const_vec);
 }
 template<typename Type>
 CUDA_CALLABLE inline const vec_t<3,Type> &v_vec( const spatial_vector_t<Type>& a )
 {
-    spatial_vector_t<Type> &non_const_vec = *(spatial_vector_t<Type>*)(const_cast<Type*>(&a.c[0]));
+    spatial_vector_t<Type> &non_const_vec = *reinterpret_cast<spatial_vector_t<Type>*>(const_cast<Type*>(&a.c[0]));
     return v_vec(non_const_vec);
 }
@@ -408,27 +408,64 @@ template<typename Type>
 inline CUDA_CALLABLE Type extract(const transform_t<Type>& t, int idx)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= 7)
+    if (idx < -7 || idx >= 7)
     {
         printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
     return t[idx];
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE vec_t<SliceLength, Type> extract(const transform_t<Type> & t, slice_t slice)
+{
+    vec_t<SliceLength, Type> ret;
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        ret[ii] = t[i];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+    return ret;
+}
 template<typename Type>
 inline CUDA_CALLABLE Type* index(transform_t<Type>& t, int idx)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= 7)
+    if (idx < -7 || idx >= 7)
     {
         printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
     return &t[idx];
 }
@@ -436,13 +473,18 @@ template<typename Type>
 inline CUDA_CALLABLE Type* indexref(transform_t<Type>* t, int idx)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= 7)
+    if (idx < -7 || idx >= 7)
     {
-        printf("transformation store %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
     return &((*t)[idx]);
 }
@@ -452,6 +494,34 @@ inline void CUDA_CALLABLE adj_extract(const transform_t<Type>& t, int idx, trans
     adj_t[idx] += adj_ret;
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void adj_extract(
+    const transform_t<Type>& t, slice_t slice,
+    transform_t<Type>& adj_t, slice_t& adj_slice,
+    const vec_t<SliceLength, Type>& adj_ret
+)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_t[i] += adj_ret[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<typename Type>
 inline CUDA_CALLABLE void adj_index(transform_t<Type>& t, int idx,
                                        transform_t<Type>& adj_t, int adj_idx, const Type& adj_value)
@@ -470,120 +540,325 @@ template<typename Type>
 inline CUDA_CALLABLE void add_inplace(transform_t<Type>& t, int idx, Type value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= 7)
+    if (idx < -7 || idx >= 7)
     {
         printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
     t[idx] += value;
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void add_inplace(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        t[i] += a[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<typename Type>
 inline CUDA_CALLABLE void adj_add_inplace(transform_t<Type>& t, int idx, Type value,
                                         transform_t<Type>& adj_t, int adj_idx, Type& adj_value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= 7)
+    if (idx < -7 || idx >= 7)
     {
         printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
     adj_value += adj_t[idx];
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void adj_add_inplace(
+    const transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
+    transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
+)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_a[ii] += adj_t[i];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<typename Type>
 inline CUDA_CALLABLE void sub_inplace(transform_t<Type>& t, int idx, Type value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= 7)
+    if (idx < -7 || idx >= 7)
     {
         printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
     t[idx] -= value;
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void sub_inplace(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        t[i] -= a[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<typename Type>
 inline CUDA_CALLABLE void adj_sub_inplace(transform_t<Type>& t, int idx, Type value,
                                         transform_t<Type>& adj_t, int adj_idx, Type& adj_value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= 7)
+    if (idx < -7 || idx >= 7)
     {
         printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
     adj_value -= adj_t[idx];
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void adj_sub_inplace(
+    const transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
+    transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
+)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_a[ii] -= adj_t[i];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<typename Type>
 inline CUDA_CALLABLE void assign_inplace(transform_t<Type>& t, int idx, Type value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= 7)
+    if (idx < -7 || idx >= 7)
     {
         printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
     t[idx] = value;
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void assign_inplace(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        t[i] = a[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<typename Type>
 inline CUDA_CALLABLE void adj_assign_inplace(transform_t<Type>& t, int idx, Type value, transform_t<Type>& adj_t, int& adj_idx, Type& adj_value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= 7)
+    if (idx < -7 || idx >= 7)
     {
         printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
     adj_value += adj_t[idx];
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void adj_assign_inplace(
+    const transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
+    transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
+)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_a[ii] += adj_t[i];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<typename Type>
 inline CUDA_CALLABLE transform_t<Type> assign_copy(transform_t<Type>& t, int idx, Type value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= 7)
+    if (idx < -7 || idx >= 7)
     {
         printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
     transform_t<Type> ret(t);
     ret[idx] = value;
     return ret;
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE transform_t<Type> assign_copy(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    transform_t<Type> ret(t);
+    assign_inplace<SliceLength>(ret, slice, a);
+    return ret;
+}
 template<typename Type>
 inline CUDA_CALLABLE void adj_assign_copy(transform_t<Type>& t, int idx, Type value, transform_t<Type>& adj_t, int& adj_idx, Type& adj_value, const transform_t<Type>& adj_ret)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= 7)
+    if (idx < -7 || idx >= 7)
     {
         printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
     adj_value += adj_ret[idx];
     for(unsigned i=0; i < 7; ++i)
     {
@@ -592,6 +867,42 @@ inline CUDA_CALLABLE void adj_assign_copy(transform_t<Type>& t, int idx, Type va
     }
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void adj_assign_copy(
+    transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
+    transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a,
+    const transform_t<Type>& adj_ret
+)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (int i = 0; i < 7; ++i)
+    {
+        bool in_slice = is_reversed
+            ? (i <= slice.start && i > slice.stop && (slice.start - i) % (-slice.step) == 0)
+            : (i >= slice.start && i < slice.stop && (i - slice.start) % slice.step == 0);
+        if (!in_slice)
+        {
+            adj_t[i] += adj_ret[i];
+        }
+        else
+        {
+            adj_a[ii] += adj_ret[i];
+            ++ii;
+        }
+    }
+    assert(ii == SliceLength);
+}
 // adjoint methods
 template<typename Type>
 CUDA_CALLABLE inline void adj_add(const transform_t<Type>& a, const transform_t<Type>& b, transform_t<Type>& adj_a, transform_t<Type>& adj_b, const transform_t<Type>& adj_ret)
@@ -600,6 +911,25 @@ CUDA_CALLABLE inline void adj_add(const transform_t<Type>& a, const transform_t<
     adj_add(a.q, b.q, adj_a.q, adj_b.q, adj_ret.q);
 }
+template<typename Type>
+CUDA_CALLABLE inline void adj_add(
+    const transform_t<Type>& a, Type b,
+    transform_t<Type>& adj_a, Type& adj_b,
+    const transform_t<Type>& adj_ret
+)
+{
+    adj_a += adj_ret;
+    adj_b += adj_ret.p[0];
+    adj_b += adj_ret.p[1];
+    adj_b += adj_ret.p[2];
+    adj_b += adj_ret.q[0];
+    adj_b += adj_ret.q[1];
+    adj_b += adj_ret.q[2];
+    adj_b += adj_ret.q[3];
+}
 template<typename Type>
 CUDA_CALLABLE inline void adj_sub(const transform_t<Type>& a, const transform_t<Type>& b, transform_t<Type>& adj_a, transform_t<Type>& adj_b, const transform_t<Type>& adj_ret)
 {
@@ -607,6 +937,25 @@ CUDA_CALLABLE inline void adj_sub(const transform_t<Type>& a, const transform_t<
     adj_sub(a.q, b.q, adj_a.q, adj_b.q, adj_ret.q);
 }
+template<typename Type>
+CUDA_CALLABLE inline void adj_sub(
+    const transform_t<Type>& a, Type b,
+    transform_t<Type>& adj_a, Type& adj_b,
+    const transform_t<Type>& adj_ret
+)
+{
+    adj_a -= adj_ret;
+    adj_b -= adj_ret.p[0];
+    adj_b -= adj_ret.p[1];
+    adj_b -= adj_ret.p[2];
+    adj_b -= adj_ret.q[0];
+    adj_b -= adj_ret.q[1];
+    adj_b -= adj_ret.q[2];
+    adj_b -= adj_ret.q[3];
+}
 template<typename Type>
 CUDA_CALLABLE inline void adj_mul(const transform_t<Type>& a, Type s, transform_t<Type>& adj_a, Type& adj_s, const transform_t<Type>& adj_ret)
 {
@@ -942,4 +1291,4 @@ using spatial_matrixh = spatial_matrix_t<half>;
 using spatial_matrixf = spatial_matrix_t<float>;
 using spatial_matrixd = spatial_matrix_t<double>;
- } // namespace wp
+ } // namespace wp

warp/native/temp_buffer.h CHANGED Viewed

@@ -26,13 +26,13 @@ template <typename T = char> struct ScopedTemporary
 {
     ScopedTemporary(void *context, size_t size)
-        : m_context(context), m_buffer(static_cast<T*>(alloc_device(m_context, size * sizeof(T))))
+        : m_context(context), m_buffer(static_cast<T*>(wp_alloc_device(m_context, size * sizeof(T))))
     {
     }
     ~ScopedTemporary()
     {
-        free_device(m_context, m_buffer);
+        wp_free_device(m_context, m_buffer);
     }
     T *buffer() const