PyPI - warp-lang - Versions diffs - 1.8.0__py3-none-macosx_10_13_universal2.whl → 1.9.0__py3-none-macosx_10_13_universal2.whl - Mend

warp-lang 1.8.0__py3-none-macosx_10_13_universal2.whl → 1.9.0__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (153) hide show

warp/__init__.py +282 -103
warp/__init__.pyi +482 -110
warp/bin/libwarp-clang.dylib +0 -0
warp/bin/libwarp.dylib +0 -0
warp/build.py +93 -30
warp/build_dll.py +48 -63
warp/builtins.py +955 -137
warp/codegen.py +327 -209
warp/config.py +1 -1
warp/context.py +1363 -800
warp/examples/core/example_marching_cubes.py +1 -0
warp/examples/core/example_render_opengl.py +100 -3
warp/examples/fem/example_apic_fluid.py +98 -52
warp/examples/fem/example_convection_diffusion_dg.py +25 -4
warp/examples/fem/example_diffusion_mgpu.py +8 -3
warp/examples/fem/utils.py +68 -22
warp/examples/interop/example_jax_callable.py +34 -4
warp/examples/interop/example_jax_kernel.py +27 -1
warp/fabric.py +1 -1
warp/fem/cache.py +27 -19
warp/fem/domain.py +2 -2
warp/fem/field/nodal_field.py +2 -2
warp/fem/field/virtual.py +266 -166
warp/fem/geometry/geometry.py +5 -5
warp/fem/integrate.py +200 -91
warp/fem/space/restriction.py +4 -0
warp/fem/space/shape/tet_shape_function.py +3 -10
warp/jax_experimental/custom_call.py +1 -1
warp/jax_experimental/ffi.py +203 -54
warp/marching_cubes.py +708 -0
warp/native/array.h +103 -8
warp/native/builtin.h +90 -9
warp/native/bvh.cpp +64 -28
warp/native/bvh.cu +58 -58
warp/native/bvh.h +2 -2
warp/native/clang/clang.cpp +7 -7
warp/native/coloring.cpp +13 -3
warp/native/crt.cpp +2 -2
warp/native/crt.h +3 -5
warp/native/cuda_util.cpp +42 -11
warp/native/cuda_util.h +10 -4
warp/native/exports.h +1842 -1908
warp/native/fabric.h +2 -1
warp/native/hashgrid.cpp +37 -37
warp/native/hashgrid.cu +2 -2
warp/native/initializer_array.h +1 -1
warp/native/intersect.h +4 -4
warp/native/mat.h +1913 -119
warp/native/mathdx.cpp +43 -43
warp/native/mesh.cpp +24 -24
warp/native/mesh.cu +26 -26
warp/native/mesh.h +5 -3
warp/native/nanovdb/GridHandle.h +179 -12
warp/native/nanovdb/HostBuffer.h +8 -7
warp/native/nanovdb/NanoVDB.h +517 -895
warp/native/nanovdb/NodeManager.h +323 -0
warp/native/nanovdb/PNanoVDB.h +2 -2
warp/native/quat.h +337 -16
warp/native/rand.h +7 -7
warp/native/range.h +7 -1
warp/native/reduce.cpp +10 -10
warp/native/reduce.cu +13 -14
warp/native/runlength_encode.cpp +2 -2
warp/native/runlength_encode.cu +5 -5
warp/native/scan.cpp +3 -3
warp/native/scan.cu +4 -4
warp/native/sort.cpp +10 -10
warp/native/sort.cu +22 -22
warp/native/sparse.cpp +8 -8
warp/native/sparse.cu +14 -14
warp/native/spatial.h +366 -17
warp/native/svd.h +23 -8
warp/native/temp_buffer.h +2 -2
warp/native/tile.h +303 -70
warp/native/tile_radix_sort.h +5 -1
warp/native/tile_reduce.h +16 -25
warp/native/tuple.h +2 -2
warp/native/vec.h +385 -18
warp/native/volume.cpp +54 -54
warp/native/volume.cu +1 -1
warp/native/volume.h +2 -1
warp/native/volume_builder.cu +30 -37
warp/native/warp.cpp +150 -149
warp/native/warp.cu +337 -193
warp/native/warp.h +227 -226
warp/optim/linear.py +736 -271
warp/render/imgui_manager.py +289 -0
warp/render/render_opengl.py +137 -57
warp/render/render_usd.py +0 -1
warp/sim/collide.py +1 -2
warp/sim/graph_coloring.py +2 -2
warp/sim/integrator_vbd.py +10 -2
warp/sparse.py +559 -176
warp/tape.py +2 -0
warp/tests/aux_test_module_aot.py +7 -0
warp/tests/cuda/test_async.py +3 -3
warp/tests/cuda/test_conditional_captures.py +101 -0
warp/tests/geometry/test_marching_cubes.py +233 -12
warp/tests/sim/test_cloth.py +89 -6
warp/tests/sim/test_coloring.py +82 -7
warp/tests/test_array.py +56 -5
warp/tests/test_assert.py +53 -0
warp/tests/test_atomic_cas.py +127 -114
warp/tests/test_codegen.py +3 -2
warp/tests/test_context.py +8 -15
warp/tests/test_enum.py +136 -0
warp/tests/test_examples.py +2 -2
warp/tests/test_fem.py +45 -2
warp/tests/test_fixedarray.py +229 -0
warp/tests/test_func.py +18 -15
warp/tests/test_future_annotations.py +7 -5
warp/tests/test_linear_solvers.py +30 -0
warp/tests/test_map.py +1 -1
warp/tests/test_mat.py +1540 -378
warp/tests/test_mat_assign_copy.py +178 -0
warp/tests/test_mat_constructors.py +574 -0
warp/tests/test_module_aot.py +287 -0
warp/tests/test_print.py +69 -0
warp/tests/test_quat.py +162 -34
warp/tests/test_quat_assign_copy.py +145 -0
warp/tests/test_reload.py +2 -1
warp/tests/test_sparse.py +103 -0
warp/tests/test_spatial.py +140 -34
warp/tests/test_spatial_assign_copy.py +160 -0
warp/tests/test_static.py +48 -0
warp/tests/test_struct.py +43 -3
warp/tests/test_tape.py +38 -0
warp/tests/test_types.py +0 -20
warp/tests/test_vec.py +216 -441
warp/tests/test_vec_assign_copy.py +143 -0
warp/tests/test_vec_constructors.py +325 -0
warp/tests/tile/test_tile.py +206 -152
warp/tests/tile/test_tile_cholesky.py +605 -0
warp/tests/tile/test_tile_load.py +169 -0
warp/tests/tile/test_tile_mathdx.py +2 -558
warp/tests/tile/test_tile_matmul.py +179 -0
warp/tests/tile/test_tile_mlp.py +1 -1
warp/tests/tile/test_tile_reduce.py +100 -11
warp/tests/tile/test_tile_shared_memory.py +16 -16
warp/tests/tile/test_tile_sort.py +59 -55
warp/tests/unittest_suites.py +16 -0
warp/tests/walkthrough_debug.py +1 -1
warp/thirdparty/unittest_parallel.py +108 -9
warp/types.py +554 -264
warp/utils.py +68 -86
{warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
{warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/RECORD +150 -138
warp/native/marching.cpp +0 -19
warp/native/marching.cu +0 -514
warp/native/marching.h +0 -19
{warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
{warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
{warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0

warp/native/quat.h CHANGED Viewed

@@ -459,14 +459,19 @@ inline CUDA_CALLABLE quat_t<Type> quat_from_matrix(const mat_t<Rows,Cols,Type>&
 template<typename Type>
 inline CUDA_CALLABLE Type extract(const quat_t<Type>& a, int idx)
 {
-#if FP_CHECK
-    if (idx < 0 || idx > 3)
+#ifndef NDEBUG
+    if (idx < -4 || idx >= 4)
     {
         printf("quat_t index %d out of bounds at %s %d", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 4;
+    }
     /*
     * Because quat data is not stored in an array, we index the quaternion by checking all possible idx values.
     * (&a.x)[idx] would be the preferred access strategy, but this results in undefined behavior in the clang compiler
@@ -478,17 +483,48 @@ inline CUDA_CALLABLE Type extract(const quat_t<Type>& a, int idx)
     else                {return a.w;}
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE vec_t<SliceLength, Type> extract(const quat_t<Type> & a, slice_t slice)
+{
+    vec_t<SliceLength, Type> ret;
+    assert(slice.start >= 0 && slice.start <= 4);
+    assert(slice.stop >= -1 && slice.stop <= 4);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int idx = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        ret[idx] = a[i];
+        ++idx;
+    }
+    return ret;
+}
 template<typename Type>
 inline CUDA_CALLABLE Type* index(quat_t<Type>& q, int idx)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx > 3)
+    if (idx < -4 || idx >= 4)
     {
         printf("quat index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 4;
+    }
     return &q[idx];
 }
@@ -496,13 +532,18 @@ template<typename Type>
 inline CUDA_CALLABLE Type* indexref(quat_t<Type>* q, int idx)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx > 3)
+    if (idx < -4 || idx >= 4)
     {
         printf("quat store %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 4;
+    }
     return &((*q)[idx]);
 }
@@ -526,120 +567,328 @@ template<typename Type>
 inline CUDA_CALLABLE void add_inplace(quat_t<Type>& q, int idx, Type value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx > 3)
+    if (idx < -4 || idx >= 4)
     {
         printf("quat index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 4;
+    }
     q[idx] += value;
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void add_inplace(quat_t<Type>& q, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    assert(slice.start >= 0 && slice.start <= 4);
+    assert(slice.stop >= -1 && slice.stop <= 4);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        q[i] += a[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<typename Type>
 inline CUDA_CALLABLE void adj_add_inplace(quat_t<Type>& q, int idx, Type value,
                                         quat_t<Type>& adj_q, int adj_idx, Type& adj_value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx > 3)
+    if (idx < -4 || idx >= 4)
     {
         printf("quat index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 4;
+    }
     adj_value += adj_q[idx];
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void adj_add_inplace(
+    const quat_t<Type>& q, slice_t slice, const vec_t<SliceLength, Type> &a,
+    quat_t<Type>& adj_q, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
+)
+{
+    assert(slice.start >= 0 && slice.start <= 4);
+    assert(slice.stop >= -1 && slice.stop <= 4);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_a[ii] += adj_q[i];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<typename Type>
 inline CUDA_CALLABLE void sub_inplace(quat_t<Type>& q, int idx, Type value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx > 3)
+    if (idx < -4 || idx >= 4)
     {
         printf("quat index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 4;
+    }
     q[idx] -= value;
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void sub_inplace(quat_t<Type>& q, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    assert(slice.start >= 0 && slice.start <= 4);
+    assert(slice.stop >= -1 && slice.stop <= 4);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        q[i] -= a[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<typename Type>
 inline CUDA_CALLABLE void adj_sub_inplace(quat_t<Type>& q, int idx, Type value,
                                         quat_t<Type>& adj_q, int adj_idx, Type& adj_value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx > 3)
+    if (idx < -4 || idx >= 4)
     {
         printf("quat index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 4;
+    }
     adj_value -= adj_q[idx];
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void adj_sub_inplace(
+    const quat_t<Type>& q, slice_t slice, const vec_t<SliceLength, Type> &a,
+    quat_t<Type>& adj_q, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
+)
+{
+    assert(slice.start >= 0 && slice.start <= 4);
+    assert(slice.stop >= -1 && slice.stop <= 4);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_a[ii] -= adj_q[i];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<typename Type>
 inline CUDA_CALLABLE void assign_inplace(quat_t<Type>& q, int idx, Type value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx > 3)
+    if (idx < -4 || idx >= 4)
     {
         printf("quat index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 4;
+    }
     q[idx] = value;
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void assign_inplace(quat_t<Type>& q, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    assert(slice.start >= 0 && slice.start <= 4);
+    assert(slice.stop >= -1 && slice.stop <= 4);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        q[i] = a[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<typename Type>
 inline CUDA_CALLABLE void adj_assign_inplace(quat_t<Type>& q, int idx, Type value, quat_t<Type>& adj_q, int& adj_idx, Type& adj_value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx > 3)
+    if (idx < -4 || idx >= 4)
     {
         printf("quat index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 4;
+    }
     adj_value += adj_q[idx];
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void adj_assign_inplace(
+    const quat_t<Type>& q, slice_t slice, const vec_t<SliceLength, Type> &a,
+    quat_t<Type>& adj_q, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
+)
+{
+    assert(slice.start >= 0 && slice.start <= 4);
+    assert(slice.stop >= -1 && slice.stop <= 4);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_a[ii] += adj_q[i];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 template<typename Type>
 inline CUDA_CALLABLE quat_t<Type> assign_copy(quat_t<Type>& q, int idx, Type value)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx > 3)
+    if (idx < -4 || idx >= 4)
     {
         printf("quat index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 4;
+    }
     quat_t<Type> ret(q);
     ret[idx] = value;
     return ret;
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE quat_t<Type> assign_copy(quat_t<Type>& q, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    quat_t<Type> ret(q);
+    assign_inplace<SliceLength>(ret, slice, a);
+    return ret;
+}
 template<typename Type>
 inline CUDA_CALLABLE void adj_assign_copy(quat_t<Type>& q, int idx, Type value, quat_t<Type>& adj_q, int& adj_idx, Type& adj_value, const quat_t<Type>& adj_ret)
 {
 #ifndef NDEBUG
-    if (idx < 0 || idx > 3)
+    if (idx < -4 || idx >= 4)
     {
         printf("quat index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 4;
+    }
     adj_value += adj_ret[idx];
     for(unsigned i=0; i < 4; ++i)
     {
@@ -648,6 +897,41 @@ inline CUDA_CALLABLE void adj_assign_copy(quat_t<Type>& q, int idx, Type value,
     }
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void adj_assign_copy(
+    quat_t<Type>& q, slice_t slice, const vec_t<SliceLength, Type> &a,
+    quat_t<Type>& adj_q, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a,
+    const quat_t<Type>& adj_ret
+)
+{
+    assert(slice.start >= 0 && slice.start <= 4);
+    assert(slice.stop >= -1 && slice.stop <= 4);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (int i = 0; i < 4; ++i)
+    {
+        bool in_slice = is_reversed
+            ? (i <= slice.start && i > slice.stop && (slice.start - i) % (-slice.step) == 0)
+            : (i >= slice.start && i < slice.stop && (i - slice.start) % slice.step == 0);
+        if (!in_slice)
+        {
+            adj_q[i] += adj_ret[i];
+        }
+        else
+        {
+            adj_a[ii] += adj_ret[i];
+            ++ii;
+        }
+    }
+    assert(ii == SliceLength);
+}
 template<typename Type>
 CUDA_CALLABLE inline quat_t<Type> lerp(const quat_t<Type>& a, const quat_t<Type>& b, Type t)
@@ -666,14 +950,19 @@ CUDA_CALLABLE inline void adj_lerp(const quat_t<Type>& a, const quat_t<Type>& b,
 template<typename Type>
 inline CUDA_CALLABLE void adj_extract(const quat_t<Type>& a, int idx, quat_t<Type>& adj_a, int & adj_idx, Type & adj_ret)
 {
-#if FP_CHECK
-    if (idx < 0 || idx > 3)
+#ifndef NDEBUG
+    if (idx < -4 || idx >= 4)
     {
         printf("quat_t index %d out of bounds at %s %d", idx, __FILE__, __LINE__);
         assert(0);
     }
 #endif
+    if (idx < 0)
+    {
+        idx += 4;
+    }
     // See wp::extract(const quat_t<Type>& a, int idx) note
     if (idx == 0)       {adj_a.x += adj_ret;}
     else if (idx == 1)  {adj_a.y += adj_ret;}
@@ -681,6 +970,34 @@ inline CUDA_CALLABLE void adj_extract(const quat_t<Type>& a, int idx, quat_t<Typ
     else                {adj_a.w += adj_ret;}
 }
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void adj_extract(
+    const quat_t<Type>& a, slice_t slice,
+    quat_t<Type>& adj_a, slice_t& adj_slice,
+    const vec_t<SliceLength, Type>& adj_ret
+)
+{
+    assert(slice.start >= 0 && slice.start <= 4);
+    assert(slice.stop >= -1 && slice.stop <= 4);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_a[i] += adj_ret[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
 // backward methods
 template<typename Type>
@@ -904,8 +1221,12 @@ inline CUDA_CALLABLE void adj_div(quat_t<Type> a, Type s, quat_t<Type>& adj_a, T
 template<typename Type>
 inline CUDA_CALLABLE void adj_div(Type s, quat_t<Type> a, Type& adj_s, quat_t<Type>& adj_a, const quat_t<Type>& adj_ret)
 {
-    adj_s -= dot(a, adj_ret)/ (s * s); // - a / s^2
-    adj_a += s / adj_ret;
+    for (unsigned i=0; i < 4; ++i)
+    {
+        Type inv = Type(1) / a[i];
+        adj_a[i] -= s * adj_ret[i] * inv * inv;
+        adj_s += adj_ret[i] * inv;
+    }
 }
 template<typename Type>

warp/native/rand.h CHANGED Viewed

@@ -71,14 +71,14 @@ inline CUDA_CALLABLE float randf(uint32& state, float min, float max) { return (
 // Box-Muller method
 inline CUDA_CALLABLE float randn(uint32& state) { return sqrt(-2.f * log(randf(state) + RANDN_EPSILON)) * cos(2.f * M_PI_F * randf(state)); }
-inline CUDA_CALLABLE void adj_rand_init(int seed, int& adj_seed, float adj_ret) {}
-inline CUDA_CALLABLE void adj_rand_init(int seed, int offset, int& adj_seed, int& adj_offset, float adj_ret) {}
+inline CUDA_CALLABLE void adj_rand_init(int seed, int& adj_seed, uint32 adj_ret) {}
+inline CUDA_CALLABLE void adj_rand_init(int seed, int offset, int& adj_seed, int& adj_offset, uint32 adj_ret) {}
-inline CUDA_CALLABLE void adj_randi(uint32& state, uint32& adj_state, float adj_ret) {}
-inline CUDA_CALLABLE void adj_randi(uint32& state, int min, int max, uint32& adj_state, int& adj_min, int& adj_max, float adj_ret) {}
+inline CUDA_CALLABLE void adj_randi(uint32& state, uint32& adj_state, int adj_ret) {}
+inline CUDA_CALLABLE void adj_randi(uint32& state, int min, int max, uint32& adj_state, int& adj_min, int& adj_max, int adj_ret) {}
-inline CUDA_CALLABLE void adj_randu(uint32& state, uint32& adj_state, float adj_ret) {}
-inline CUDA_CALLABLE void adj_randu(uint32& state, uint32 min, uint32 max, uint32& adj_state, uint32& adj_min, uint32& adj_max, float adj_ret) {}
+inline CUDA_CALLABLE void adj_randu(uint32& state, uint32& adj_state, uint32 adj_ret) {}
+inline CUDA_CALLABLE void adj_randu(uint32& state, uint32 min, uint32 max, uint32& adj_state, uint32& adj_min, uint32& adj_max, uint32 adj_ret) {}
 inline CUDA_CALLABLE void adj_randf(uint32& state, uint32& adj_state, float adj_ret) {}
 inline CUDA_CALLABLE void adj_randf(uint32& state, float min, float max, uint32& adj_state, float& adj_min, float& adj_max, float adj_ret) {}
@@ -195,7 +195,7 @@ inline CUDA_CALLABLE void adj_sample_unit_hemisphere_surface(uint32& state, uint
 inline CUDA_CALLABLE void adj_sample_unit_hemisphere(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
 inline CUDA_CALLABLE void adj_sample_unit_square(uint32& state, uint32& adj_state, const vec2& adj_ret) {}
 inline CUDA_CALLABLE void adj_sample_unit_cube(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
-inline CUDA_CALLABLE void adj_sample_unit_hypercube(uint32& state, uint32& adj_state, const vec3& adj_ret) {}
+inline CUDA_CALLABLE void adj_sample_unit_hypercube(uint32& state, uint32& adj_state, const vec4& adj_ret) {}
 /*
  * log-gamma function to support some of these distributions. The

warp/native/range.h CHANGED Viewed

@@ -115,7 +115,13 @@ CUDA_CALLABLE inline range_t iter_reverse(const range_t& r)
     // generates a reverse range, equivalent to reversed(range())
     range_t rev;
-    if (r.step > 0)
+    if (r.step == 0)
+    {
+        // degenerate case where step == 0, return empty range
+        rev.start = r.start;
+        rev.end = r.start;
+    }
+    else if (r.step > 0)
     {
         rev.start = r.start + int((r.end - r.start - 1) / r.step) * r.step;
     }

warp/native/reduce.cpp CHANGED Viewed

@@ -119,7 +119,7 @@ template <typename T> void array_sum_host(const T *ptr_a, T *ptr_out, int count,
         accumulate_func(ptr_a + i * stride, ptr_out, type_length);
 }
-void array_inner_float_host(uint64_t a, uint64_t b, uint64_t out, int count, int byte_stride_a, int byte_stride_b,
+void wp_array_inner_float_host(uint64_t a, uint64_t b, uint64_t out, int count, int byte_stride_a, int byte_stride_b,
                             int type_length)
 {
     const float *ptr_a = (const float *)(a);
@@ -129,7 +129,7 @@ void array_inner_float_host(uint64_t a, uint64_t b, uint64_t out, int count, int
     array_inner_host(ptr_a, ptr_b, ptr_out, count, byte_stride_a, byte_stride_b, type_length);
 }
-void array_inner_double_host(uint64_t a, uint64_t b, uint64_t out, int count, int byte_stride_a, int byte_stride_b,
+void wp_array_inner_double_host(uint64_t a, uint64_t b, uint64_t out, int count, int byte_stride_a, int byte_stride_b,
                              int type_length)
 {
     const double *ptr_a = (const double *)(a);
@@ -139,14 +139,14 @@ void array_inner_double_host(uint64_t a, uint64_t b, uint64_t out, int count, in
     array_inner_host(ptr_a, ptr_b, ptr_out, count, byte_stride_a, byte_stride_b, type_length);
 }
-void array_sum_float_host(uint64_t a, uint64_t out, int count, int byte_stride_a, int type_length)
+void wp_array_sum_float_host(uint64_t a, uint64_t out, int count, int byte_stride_a, int type_length)
 {
     const float *ptr_a = (const float *)(a);
     float *ptr_out = (float *)(out);
     array_sum_host(ptr_a, ptr_out, count, byte_stride_a, type_length);
 }
-void array_sum_double_host(uint64_t a, uint64_t out, int count, int byte_stride_a, int type_length)
+void wp_array_sum_double_host(uint64_t a, uint64_t out, int count, int byte_stride_a, int type_length)
 {
     const double *ptr_a = (const double *)(a);
     double *ptr_out = (double *)(out);
@@ -154,21 +154,21 @@ void array_sum_double_host(uint64_t a, uint64_t out, int count, int byte_stride_
 }
 #if !WP_ENABLE_CUDA
-void array_inner_float_device(uint64_t a, uint64_t b, uint64_t out, int count, int byte_stride_a, int byte_stride_b,
-                              int type_length)
+void wp_array_inner_float_device(uint64_t a, uint64_t b, uint64_t out, int count, int byte_stride_a, int byte_stride_b,
+                                 int type_length)
 {
 }
-void array_inner_double_device(uint64_t a, uint64_t b, uint64_t out, int count, int byte_stride_a, int byte_stride_b,
-                               int type_length)
+void wp_array_inner_double_device(uint64_t a, uint64_t b, uint64_t out, int count, int byte_stride_a, int byte_stride_b,
+                                  int type_length)
 {
 }
-void array_sum_float_device(uint64_t a, uint64_t out, int count, int byte_stride_a, int type_length)
+void wp_array_sum_float_device(uint64_t a, uint64_t out, int count, int byte_stride_a, int type_length)
 {
 }
-void array_sum_double_device(uint64_t a, uint64_t out, int count, int byte_stride_a, int type_length)
+void wp_array_sum_double_device(uint64_t a, uint64_t out, int count, int byte_stride_a, int type_length)
 {
 }
 #endif