PyPI - warp-lang - Versions diffs - 1.8.0__py3-none-manylinux_2_34_aarch64.whl → 1.9.0__py3-none-manylinux_2_34_aarch64.whl - Mend

warp-lang 1.8.0__py3-none-manylinux_2_34_aarch64.whl → 1.9.0__py3-none-manylinux_2_34_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (153) hide show

warp/__init__.py +282 -103
warp/__init__.pyi +482 -110
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +93 -30
warp/build_dll.py +48 -63
warp/builtins.py +955 -137
warp/codegen.py +327 -209
warp/config.py +1 -1
warp/context.py +1363 -800
warp/examples/core/example_marching_cubes.py +1 -0
warp/examples/core/example_render_opengl.py +100 -3
warp/examples/fem/example_apic_fluid.py +98 -52
warp/examples/fem/example_convection_diffusion_dg.py +25 -4
warp/examples/fem/example_diffusion_mgpu.py +8 -3
warp/examples/fem/utils.py +68 -22
warp/examples/interop/example_jax_callable.py +34 -4
warp/examples/interop/example_jax_kernel.py +27 -1
warp/fabric.py +1 -1
warp/fem/cache.py +27 -19
warp/fem/domain.py +2 -2
warp/fem/field/nodal_field.py +2 -2
warp/fem/field/virtual.py +266 -166
warp/fem/geometry/geometry.py +5 -5
warp/fem/integrate.py +200 -91
warp/fem/space/restriction.py +4 -0
warp/fem/space/shape/tet_shape_function.py +3 -10
warp/jax_experimental/custom_call.py +1 -1
warp/jax_experimental/ffi.py +203 -54
warp/marching_cubes.py +708 -0
warp/native/array.h +103 -8
warp/native/builtin.h +90 -9
warp/native/bvh.cpp +64 -28
warp/native/bvh.cu +58 -58
warp/native/bvh.h +2 -2
warp/native/clang/clang.cpp +7 -7
warp/native/coloring.cpp +13 -3
warp/native/crt.cpp +2 -2
warp/native/crt.h +3 -5
warp/native/cuda_util.cpp +42 -11
warp/native/cuda_util.h +10 -4
warp/native/exports.h +1842 -1908
warp/native/fabric.h +2 -1
warp/native/hashgrid.cpp +37 -37
warp/native/hashgrid.cu +2 -2
warp/native/initializer_array.h +1 -1
warp/native/intersect.h +4 -4
warp/native/mat.h +1913 -119
warp/native/mathdx.cpp +43 -43
warp/native/mesh.cpp +24 -24
warp/native/mesh.cu +26 -26
warp/native/mesh.h +5 -3
warp/native/nanovdb/GridHandle.h +179 -12
warp/native/nanovdb/HostBuffer.h +8 -7
warp/native/nanovdb/NanoVDB.h +517 -895
warp/native/nanovdb/NodeManager.h +323 -0
warp/native/nanovdb/PNanoVDB.h +2 -2
warp/native/quat.h +337 -16
warp/native/rand.h +7 -7
warp/native/range.h +7 -1
warp/native/reduce.cpp +10 -10
warp/native/reduce.cu +13 -14
warp/native/runlength_encode.cpp +2 -2
warp/native/runlength_encode.cu +5 -5
warp/native/scan.cpp +3 -3
warp/native/scan.cu +4 -4
warp/native/sort.cpp +10 -10
warp/native/sort.cu +22 -22
warp/native/sparse.cpp +8 -8
warp/native/sparse.cu +14 -14
warp/native/spatial.h +366 -17
warp/native/svd.h +23 -8
warp/native/temp_buffer.h +2 -2
warp/native/tile.h +303 -70
warp/native/tile_radix_sort.h +5 -1
warp/native/tile_reduce.h +16 -25
warp/native/tuple.h +2 -2
warp/native/vec.h +385 -18
warp/native/volume.cpp +54 -54
warp/native/volume.cu +1 -1
warp/native/volume.h +2 -1
warp/native/volume_builder.cu +30 -37
warp/native/warp.cpp +150 -149
warp/native/warp.cu +337 -193
warp/native/warp.h +227 -226
warp/optim/linear.py +736 -271
warp/render/imgui_manager.py +289 -0
warp/render/render_opengl.py +137 -57
warp/render/render_usd.py +0 -1
warp/sim/collide.py +1 -2
warp/sim/graph_coloring.py +2 -2
warp/sim/integrator_vbd.py +10 -2
warp/sparse.py +559 -176
warp/tape.py +2 -0
warp/tests/aux_test_module_aot.py +7 -0
warp/tests/cuda/test_async.py +3 -3
warp/tests/cuda/test_conditional_captures.py +101 -0
warp/tests/geometry/test_marching_cubes.py +233 -12
warp/tests/sim/test_cloth.py +89 -6
warp/tests/sim/test_coloring.py +82 -7
warp/tests/test_array.py +56 -5
warp/tests/test_assert.py +53 -0
warp/tests/test_atomic_cas.py +127 -114
warp/tests/test_codegen.py +3 -2
warp/tests/test_context.py +8 -15
warp/tests/test_enum.py +136 -0
warp/tests/test_examples.py +2 -2
warp/tests/test_fem.py +45 -2
warp/tests/test_fixedarray.py +229 -0
warp/tests/test_func.py +18 -15
warp/tests/test_future_annotations.py +7 -5
warp/tests/test_linear_solvers.py +30 -0
warp/tests/test_map.py +1 -1
warp/tests/test_mat.py +1540 -378
warp/tests/test_mat_assign_copy.py +178 -0
warp/tests/test_mat_constructors.py +574 -0
warp/tests/test_module_aot.py +287 -0
warp/tests/test_print.py +69 -0
warp/tests/test_quat.py +162 -34
warp/tests/test_quat_assign_copy.py +145 -0
warp/tests/test_reload.py +2 -1
warp/tests/test_sparse.py +103 -0
warp/tests/test_spatial.py +140 -34
warp/tests/test_spatial_assign_copy.py +160 -0
warp/tests/test_static.py +48 -0
warp/tests/test_struct.py +43 -3
warp/tests/test_tape.py +38 -0
warp/tests/test_types.py +0 -20
warp/tests/test_vec.py +216 -441
warp/tests/test_vec_assign_copy.py +143 -0
warp/tests/test_vec_constructors.py +325 -0
warp/tests/tile/test_tile.py +206 -152
warp/tests/tile/test_tile_cholesky.py +605 -0
warp/tests/tile/test_tile_load.py +169 -0
warp/tests/tile/test_tile_mathdx.py +2 -558
warp/tests/tile/test_tile_matmul.py +179 -0
warp/tests/tile/test_tile_mlp.py +1 -1
warp/tests/tile/test_tile_reduce.py +100 -11
warp/tests/tile/test_tile_shared_memory.py +16 -16
warp/tests/tile/test_tile_sort.py +59 -55
warp/tests/unittest_suites.py +16 -0
warp/tests/walkthrough_debug.py +1 -1
warp/thirdparty/unittest_parallel.py +108 -9
warp/types.py +554 -264
warp/utils.py +68 -86
{warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
{warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/RECORD +150 -138
warp/native/marching.cpp +0 -19
warp/native/marching.cu +0 -514
warp/native/marching.h +0 -19
{warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
{warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
{warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0

warp/native/vec.h CHANGED Viewed

@@ -25,7 +25,7 @@ namespace wp
 template<unsigned Length, typename Type>
 struct vec_t
 {
-    Type c[Length];
+    Type c[Length < 1 ? 1 : Length];
     inline CUDA_CALLABLE vec_t()
         : c()
@@ -343,6 +343,17 @@ inline CUDA_CALLABLE vec_t<Length, Type> add(vec_t<Length, Type> a, vec_t<Length
     return ret;
 }
+template<unsigned Length, typename Type>
+inline CUDA_CALLABLE vec_t<Length, Type> add(Type a, vec_t<Length, Type> b)
+{
+    vec_t<Length, Type> ret;
+    for( unsigned i=0; i < Length; ++i )
+    {
+        ret[i] = a + b[i];
+    }
+    return ret;
+}
 template<typename Type>
 inline CUDA_CALLABLE vec_t<2, Type> add(vec_t<2, Type> a, vec_t<2, Type> b)
 {
@@ -367,6 +378,18 @@ inline CUDA_CALLABLE vec_t<Length, Type> sub(vec_t<Length, Type> a, vec_t<Length
     return ret;
 }
+template<unsigned Length, typename Type>
+inline CUDA_CALLABLE vec_t<Length, Type> sub(Type a, vec_t<Length, Type> b)
+{
+    vec_t<Length, Type> ret;
+    for (unsigned i=0; i < Length; ++i)
+    {
+        ret[i] = Type(a - b[i]);
+    }
+    return ret;
+}
 template<typename Type>
 inline CUDA_CALLABLE vec_t<2, Type> sub(vec_t<2, Type> a, vec_t<2, Type> b)
 {
@@ -440,27 +463,64 @@ template<unsigned Length, typename Type>
 inline CUDA_CALLABLE Type extract(const vec_t<Length, Type> & a, int idx)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= Length)
+    if (idx < -(int)Length || idx >= (int)Length)
     {
         printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += Length;
+    }
     return a[idx];
 }
+template<unsigned SliceLength, unsigned Length, typename Type>
+inline CUDA_CALLABLE vec_t<SliceLength, Type> extract(const vec_t<Length, Type> & a, slice_t slice)
+{
+    vec_t<SliceLength, Type> ret;
+    assert(slice.start >= 0 && slice.start <= (int)Length);
+    assert(slice.stop >= -1 && slice.stop <= (int)Length);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        ret[ii] = a[i];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+    return ret;
+}
 template<unsigned Length, typename Type>
 inline CUDA_CALLABLE Type* index(vec_t<Length, Type>& v, int idx)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= Length)
+    if (idx < -(int)Length || idx >= (int)Length)
     {
         printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += Length;
+    }
     return &v[idx];
 }
@@ -468,13 +528,18 @@ template<unsigned Length, typename Type>
 inline CUDA_CALLABLE Type* indexref(vec_t<Length, Type>* v, int idx)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= Length)
+    if (idx < -(int)Length || idx >= (int)Length)
     {
         printf("vec store %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += Length;
+    }
     return &((*v)[idx]);
 }
@@ -498,120 +563,325 @@ template<unsigned Length, typename Type>
 inline CUDA_CALLABLE void add_inplace(vec_t<Length, Type>& v, int idx, Type value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= Length)
+    if (idx < -(int)Length || idx >= (int)Length)
     {
         printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += Length;
+    }
     v[idx] += value;
 }
+template<unsigned SliceLength, unsigned Length, typename Type>
+inline CUDA_CALLABLE void add_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    assert(slice.start >= 0 && slice.start <= (int)Length);
+    assert(slice.stop >= -1 && slice.stop <= (int)Length);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        v[i] += a[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<unsigned Length, typename Type>
 inline CUDA_CALLABLE void adj_add_inplace(vec_t<Length, Type>& v, int idx, Type value,
                                         vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= Length)
+    if (idx < -(int)Length || idx >= (int)Length)
     {
         printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += Length;
+    }
     adj_value += adj_v[idx];
 }
+template<unsigned SliceLength, unsigned Length, typename Type>
+inline CUDA_CALLABLE void adj_add_inplace(
+    const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
+    vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
+)
+{
+    assert(slice.start >= 0 && slice.start <= (int)Length);
+    assert(slice.stop >= -1 && slice.stop <= (int)Length);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_a[ii] += adj_v[i];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<unsigned Length, typename Type>
 inline CUDA_CALLABLE void sub_inplace(vec_t<Length, Type>& v, int idx, Type value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= Length)
+    if (idx < -(int)Length || idx >= (int)Length)
     {
         printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += Length;
+    }
     v[idx] -= value;
 }
+template<unsigned SliceLength, unsigned Length, typename Type>
+inline CUDA_CALLABLE void sub_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    assert(slice.start >= 0 && slice.start <= (int)Length);
+    assert(slice.stop >= -1 && slice.stop <= (int)Length);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        v[i] -= a[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<unsigned Length, typename Type>
 inline CUDA_CALLABLE void adj_sub_inplace(vec_t<Length, Type>& v, int idx, Type value,
                                         vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= Length)
+    if (idx < -(int)Length || idx >= (int)Length)
     {
         printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += Length;
+    }
     adj_value -= adj_v[idx];
 }
+template<unsigned SliceLength, unsigned Length, typename Type>
+inline CUDA_CALLABLE void adj_sub_inplace(
+    const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
+    vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
+)
+{
+    assert(slice.start >= 0 && slice.start <= (int)Length);
+    assert(slice.stop >= -1 && slice.stop <= (int)Length);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_a[ii] -= adj_v[i];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<unsigned Length, typename Type>
 inline CUDA_CALLABLE void assign_inplace(vec_t<Length, Type>& v, int idx, Type value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= Length)
+    if (idx < -(int)Length || idx >= (int)Length)
     {
         printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += Length;
+    }
     v[idx] = value;
 }
+template<unsigned SliceLength, unsigned Length, typename Type>
+inline CUDA_CALLABLE void assign_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    assert(slice.start >= 0 && slice.start <= (int)Length);
+    assert(slice.stop >= -1 && slice.stop <= (int)Length);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        v[i] = a[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<unsigned Length, typename Type>
 inline CUDA_CALLABLE void adj_assign_inplace(vec_t<Length, Type>& v, int idx, Type value, vec_t<Length, Type>& adj_v, int& adj_idx, Type& adj_value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= Length)
+    if (idx < -(int)Length || idx >= (int)Length)
     {
         printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += Length;
+    }
     adj_value += adj_v[idx];
 }
+template<unsigned SliceLength, unsigned Length, typename Type>
+inline CUDA_CALLABLE void adj_assign_inplace(
+    const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
+    vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
+)
+{
+    assert(slice.start >= 0 && slice.start <= (int)Length);
+    assert(slice.stop >= -1 && slice.stop <= (int)Length);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_a[ii] += adj_v[i];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<unsigned Length, typename Type>
 inline CUDA_CALLABLE vec_t<Length, Type> assign_copy(vec_t<Length, Type>& v, int idx, Type value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= Length)
+    if (idx < -(int)Length || idx >= (int)Length)
     {
         printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += Length;
+    }
     vec_t<Length, Type> ret(v);
     ret[idx] = value;
     return ret;
 }
+template<unsigned SliceLength, unsigned Length, typename Type>
+inline CUDA_CALLABLE vec_t<Length, Type> assign_copy(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    vec_t<Length, Type> ret(v);
+    assign_inplace<SliceLength>(ret, slice, a);
+    return ret;
+}
 template<unsigned Length, typename Type>
 inline CUDA_CALLABLE void adj_assign_copy(vec_t<Length, Type>& v, int idx, Type value, vec_t<Length, Type>& adj_v, int& adj_idx, Type& adj_value, const vec_t<Length, Type>& adj_ret)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx >= Length)
+    if (idx < -(int)Length || idx >= (int)Length)
     {
         printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += Length;
+    }
     adj_value += adj_ret[idx];
     for(unsigned i=0; i < Length; ++i)
     {
@@ -620,6 +890,40 @@ inline CUDA_CALLABLE void adj_assign_copy(vec_t<Length, Type>& v, int idx, Type
     }
 }
+template<unsigned SliceLength, unsigned Length, typename Type>
+inline CUDA_CALLABLE void adj_assign_copy(
+    vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
+    vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a,
+    const vec_t<Length, Type>& adj_ret)
+{
+    assert(slice.start >= 0 && slice.start <= (int)Length);
+    assert(slice.stop >= -1 && slice.stop <= (int)Length);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (int i = 0; i < Length; ++i)
+    {
+        bool in_slice = is_reversed
+            ? (i <= slice.start && i > slice.stop && (slice.start - i) % (-slice.step) == 0)
+            : (i >= slice.start && i < slice.stop && (i - slice.start) % slice.step == 0);
+        if (!in_slice)
+        {
+            adj_v[i] += adj_ret[i];
+        }
+        else
+        {
+            adj_a[ii] += adj_ret[i];
+            ++ii;
+        }
+    }
+    assert(ii == SliceLength);
+}
 template<unsigned Length, typename Type>
 inline CUDA_CALLABLE Type length(vec_t<Length, Type> a)
 {
@@ -969,11 +1273,11 @@ template<unsigned Length, typename Type>
 inline CUDA_CALLABLE void adj_div(Type s, vec_t<Length, Type> a, Type& adj_s, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
 {
-    adj_s -= dot(a , adj_ret)/ (s * s); // - a / s^2
-    for( unsigned i=0; i < Length; ++i )
+    for (unsigned i=0; i < Length; ++i)
     {
-        adj_a[i] += s / adj_ret[i];
+        Type inv = Type(1) / a[i];
+        adj_a[i] -= s * adj_ret[i] * inv * inv;
+        adj_s += adj_ret[i] * inv;
     }
 #if FP_CHECK
@@ -999,6 +1303,21 @@ inline CUDA_CALLABLE void adj_add(vec_t<Length, Type> a, vec_t<Length, Type> b,
     adj_b += adj_ret;
 }
+template<unsigned Length, typename Type>
+inline CUDA_CALLABLE void adj_add(
+    Type a, vec_t<Length, Type> b,
+    Type& adj_a, vec_t<Length, Type>& adj_b,
+    const vec_t<Length, Type>& adj_ret
+)
+{
+    for (unsigned i = 0; i < Length; ++i)
+    {
+        adj_a += adj_ret.c[i];
+    }
+    adj_b += adj_ret;
+}
 template<typename Type>
 inline CUDA_CALLABLE void adj_add(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
 {
@@ -1026,6 +1345,21 @@ inline CUDA_CALLABLE void adj_sub(vec_t<Length, Type> a, vec_t<Length, Type> b,
     adj_b -= adj_ret;
 }
+template<unsigned Length, typename Type>
+inline CUDA_CALLABLE void adj_sub(
+    Type a, vec_t<Length, Type> b,
+    Type& adj_a, vec_t<Length, Type>& adj_b,
+    const vec_t<Length, Type>& adj_ret
+)
+{
+    for (unsigned i = 0; i < Length; ++i)
+    {
+        adj_a += adj_ret.c[i];
+    }
+    adj_b -= adj_ret;
+}
 template<typename Type>
 inline CUDA_CALLABLE void adj_sub(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
 {
@@ -1106,16 +1440,49 @@ template<unsigned Length, typename Type>
 inline CUDA_CALLABLE void adj_extract(const vec_t<Length, Type> & a, int idx, vec_t<Length, Type> & adj_a, int & adj_idx, Type & adj_ret)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx > Length)
+    if (idx < -(int)Length || idx >= (int)Length)
     {
-        printf("Tvec2<Scalar> index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += Length;
+    }
     adj_a[idx] += adj_ret;
 }
+template<unsigned SliceLength, unsigned Length, typename Type>
+inline CUDA_CALLABLE void adj_extract(
+    const vec_t<Length, Type>& a, slice_t slice,
+    vec_t<Length, Type>& adj_a, slice_t& adj_slice,
+    const vec_t<SliceLength, Type>& adj_ret
+)
+{
+    assert(slice.start >= 0 && slice.start <= (int)Length);
+    assert(slice.stop >= -1 && slice.stop <= (int)Length);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_a[i] += adj_ret[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<unsigned Length, typename Type>
 inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, Type ret, vec_t<Length, Type>& adj_a, const Type adj_ret)
 {