PyPI - warp-lang - Versions diffs - 1.7.2rc1__py3-none-win_amd64.whl → 1.8.1__py3-none-win_amd64.whl - Mend

warp-lang 1.7.2rc1__py3-none-win_amd64.whl → 1.8.1__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (193) hide show

warp/__init__.py +3 -1
warp/__init__.pyi +3489 -1
warp/autograd.py +45 -122
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +241 -252
warp/build_dll.py +130 -26
warp/builtins.py +1907 -384
warp/codegen.py +272 -104
warp/config.py +12 -1
warp/constants.py +1 -1
warp/context.py +770 -238
warp/dlpack.py +1 -1
warp/examples/benchmarks/benchmark_cloth.py +2 -2
warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
warp/examples/core/example_sample_mesh.py +1 -1
warp/examples/core/example_spin_lock.py +93 -0
warp/examples/core/example_work_queue.py +118 -0
warp/examples/fem/example_adaptive_grid.py +5 -5
warp/examples/fem/example_apic_fluid.py +1 -1
warp/examples/fem/example_burgers.py +1 -1
warp/examples/fem/example_convection_diffusion.py +9 -6
warp/examples/fem/example_darcy_ls_optimization.py +489 -0
warp/examples/fem/example_deformed_geometry.py +1 -1
warp/examples/fem/example_diffusion.py +2 -2
warp/examples/fem/example_diffusion_3d.py +1 -1
warp/examples/fem/example_distortion_energy.py +1 -1
warp/examples/fem/example_elastic_shape_optimization.py +387 -0
warp/examples/fem/example_magnetostatics.py +5 -3
warp/examples/fem/example_mixed_elasticity.py +5 -3
warp/examples/fem/example_navier_stokes.py +11 -9
warp/examples/fem/example_nonconforming_contact.py +5 -3
warp/examples/fem/example_streamlines.py +8 -3
warp/examples/fem/utils.py +9 -8
warp/examples/interop/example_jax_callable.py +34 -4
warp/examples/interop/example_jax_ffi_callback.py +2 -2
warp/examples/interop/example_jax_kernel.py +27 -1
warp/examples/optim/example_drone.py +1 -1
warp/examples/sim/example_cloth.py +1 -1
warp/examples/sim/example_cloth_self_contact.py +48 -54
warp/examples/tile/example_tile_block_cholesky.py +502 -0
warp/examples/tile/example_tile_cholesky.py +2 -1
warp/examples/tile/example_tile_convolution.py +1 -1
warp/examples/tile/example_tile_filtering.py +1 -1
warp/examples/tile/example_tile_matmul.py +1 -1
warp/examples/tile/example_tile_mlp.py +2 -0
warp/fabric.py +7 -7
warp/fem/__init__.py +5 -0
warp/fem/adaptivity.py +1 -1
warp/fem/cache.py +152 -63
warp/fem/dirichlet.py +2 -2
warp/fem/domain.py +136 -6
warp/fem/field/field.py +141 -99
warp/fem/field/nodal_field.py +85 -39
warp/fem/field/virtual.py +99 -52
warp/fem/geometry/adaptive_nanogrid.py +91 -86
warp/fem/geometry/closest_point.py +13 -0
warp/fem/geometry/deformed_geometry.py +102 -40
warp/fem/geometry/element.py +56 -2
warp/fem/geometry/geometry.py +323 -22
warp/fem/geometry/grid_2d.py +157 -62
warp/fem/geometry/grid_3d.py +116 -20
warp/fem/geometry/hexmesh.py +86 -20
warp/fem/geometry/nanogrid.py +166 -86
warp/fem/geometry/partition.py +59 -25
warp/fem/geometry/quadmesh.py +86 -135
warp/fem/geometry/tetmesh.py +47 -119
warp/fem/geometry/trimesh.py +77 -270
warp/fem/integrate.py +181 -95
warp/fem/linalg.py +25 -58
warp/fem/operator.py +124 -27
warp/fem/quadrature/pic_quadrature.py +36 -14
warp/fem/quadrature/quadrature.py +40 -16
warp/fem/space/__init__.py +1 -1
warp/fem/space/basis_function_space.py +66 -46
warp/fem/space/basis_space.py +17 -4
warp/fem/space/dof_mapper.py +1 -1
warp/fem/space/function_space.py +2 -2
warp/fem/space/grid_2d_function_space.py +4 -1
warp/fem/space/hexmesh_function_space.py +4 -2
warp/fem/space/nanogrid_function_space.py +3 -1
warp/fem/space/partition.py +11 -2
warp/fem/space/quadmesh_function_space.py +4 -1
warp/fem/space/restriction.py +5 -2
warp/fem/space/shape/__init__.py +10 -8
warp/fem/space/tetmesh_function_space.py +4 -1
warp/fem/space/topology.py +52 -21
warp/fem/space/trimesh_function_space.py +4 -1
warp/fem/utils.py +53 -8
warp/jax.py +1 -2
warp/jax_experimental/ffi.py +210 -67
warp/jax_experimental/xla_ffi.py +37 -24
warp/math.py +171 -1
warp/native/array.h +103 -4
warp/native/builtin.h +182 -35
warp/native/coloring.cpp +6 -2
warp/native/cuda_util.cpp +1 -1
warp/native/exports.h +118 -63
warp/native/intersect.h +5 -5
warp/native/mat.h +8 -13
warp/native/mathdx.cpp +11 -5
warp/native/matnn.h +1 -123
warp/native/mesh.h +1 -1
warp/native/quat.h +34 -6
warp/native/rand.h +7 -7
warp/native/sparse.cpp +121 -258
warp/native/sparse.cu +181 -274
warp/native/spatial.h +305 -17
warp/native/svd.h +23 -8
warp/native/tile.h +603 -73
warp/native/tile_radix_sort.h +1112 -0
warp/native/tile_reduce.h +239 -13
warp/native/tile_scan.h +240 -0
warp/native/tuple.h +189 -0
warp/native/vec.h +10 -20
warp/native/warp.cpp +36 -4
warp/native/warp.cu +588 -52
warp/native/warp.h +47 -74
warp/optim/linear.py +5 -1
warp/paddle.py +7 -8
warp/py.typed +0 -0
warp/render/render_opengl.py +110 -80
warp/render/render_usd.py +124 -62
warp/sim/__init__.py +9 -0
warp/sim/collide.py +253 -80
warp/sim/graph_coloring.py +8 -1
warp/sim/import_mjcf.py +4 -3
warp/sim/import_usd.py +11 -7
warp/sim/integrator.py +5 -2
warp/sim/integrator_euler.py +1 -1
warp/sim/integrator_featherstone.py +1 -1
warp/sim/integrator_vbd.py +761 -322
warp/sim/integrator_xpbd.py +1 -1
warp/sim/model.py +265 -260
warp/sim/utils.py +10 -7
warp/sparse.py +303 -166
warp/tape.py +54 -51
warp/tests/cuda/test_conditional_captures.py +1046 -0
warp/tests/cuda/test_streams.py +1 -1
warp/tests/geometry/test_volume.py +2 -2
warp/tests/interop/test_dlpack.py +9 -9
warp/tests/interop/test_jax.py +0 -1
warp/tests/run_coverage_serial.py +1 -1
warp/tests/sim/disabled_kinematics.py +2 -2
warp/tests/sim/{test_vbd.py → test_cloth.py} +378 -112
warp/tests/sim/test_collision.py +159 -51
warp/tests/sim/test_coloring.py +91 -2
warp/tests/test_array.py +254 -2
warp/tests/test_array_reduce.py +2 -2
warp/tests/test_assert.py +53 -0
warp/tests/test_atomic_cas.py +312 -0
warp/tests/test_codegen.py +142 -19
warp/tests/test_conditional.py +47 -1
warp/tests/test_ctypes.py +0 -20
warp/tests/test_devices.py +8 -0
warp/tests/test_fabricarray.py +4 -2
warp/tests/test_fem.py +58 -25
warp/tests/test_func.py +42 -1
warp/tests/test_grad.py +1 -1
warp/tests/test_lerp.py +1 -3
warp/tests/test_map.py +481 -0
warp/tests/test_mat.py +23 -24
warp/tests/test_quat.py +28 -15
warp/tests/test_rounding.py +10 -38
warp/tests/test_runlength_encode.py +7 -7
warp/tests/test_smoothstep.py +1 -1
warp/tests/test_sparse.py +83 -2
warp/tests/test_spatial.py +507 -1
warp/tests/test_static.py +48 -0
warp/tests/test_struct.py +2 -2
warp/tests/test_tape.py +38 -0
warp/tests/test_tuple.py +265 -0
warp/tests/test_types.py +2 -2
warp/tests/test_utils.py +24 -18
warp/tests/test_vec.py +38 -408
warp/tests/test_vec_constructors.py +325 -0
warp/tests/tile/test_tile.py +438 -131
warp/tests/tile/test_tile_mathdx.py +518 -14
warp/tests/tile/test_tile_matmul.py +179 -0
warp/tests/tile/test_tile_reduce.py +307 -5
warp/tests/tile/test_tile_shared_memory.py +136 -7
warp/tests/tile/test_tile_sort.py +121 -0
warp/tests/unittest_suites.py +14 -6
warp/types.py +462 -308
warp/utils.py +647 -86
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/METADATA +20 -6
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/RECORD +190 -176
warp/stubs.py +0 -3381
warp/tests/sim/test_xpbd.py +0 -399
warp/tests/test_mlp.py +0 -282
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/WHEEL +0 -0
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/licenses/LICENSE.md +0 -0
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/top_level.txt +0 -0

warp/native/spatial.h CHANGED Viewed

@@ -136,6 +136,13 @@ struct transform_t
     CUDA_CALLABLE inline transform_t(vec_t<3,Type> p=vec_t<3,Type>(), quat_t<Type> q=quat_t<Type>()) : p(p), q(q) {}
     CUDA_CALLABLE inline transform_t(Type)  {}  // helps uniform initialization
+    template<typename OtherType>
+    inline explicit CUDA_CALLABLE transform_t(const transform_t<OtherType>& other)
+    {
+        p = other.p;
+        q = other.q;
+    }
     CUDA_CALLABLE inline transform_t(const initializer_array<7, Type> &l)
     {
         p = vec_t<3,Type>(l[0], l[1], l[2]);
@@ -163,6 +170,35 @@ CUDA_CALLABLE inline transform_t<Type> transform_identity()
     return transform_t<Type>(vec_t<3,Type>(), quat_identity<Type>());
 }
+template<typename Type>
+inline CUDA_CALLABLE transform_t<Type> operator - (const transform_t<Type>& x)
+{
+    transform_t<Type> ret;
+    ret.p = -x.p;
+    ret.q = -x.q;
+    return ret;
+}
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> pos(const transform_t<Type>& x)
+{
+    return x;
+}
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> neg(const transform_t<Type>& x)
+{
+    return -x;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_neg(const transform_t<Type>& x, transform_t<Type>& adj_x, const transform_t<Type>& adj_ret)
+{
+    adj_x -= adj_ret;
+}
 template<typename Type>
 inline CUDA_CALLABLE bool operator==(const transform_t<Type>& a, const transform_t<Type>& b)
 {
@@ -188,6 +224,96 @@ CUDA_CALLABLE inline quat_t<Type> transform_get_rotation(const transform_t<Type>
     return t.q;
 }
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_get_translation(const transform_t<Type>& t, transform_t<Type>& adj_t, const vec_t<3,Type>& adj_ret)
+{
+    adj_t.p += adj_ret;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_get_rotation(const transform_t<Type>& t, transform_t<Type>& adj_t, const quat_t<Type>& adj_ret)
+{
+    adj_t.q += adj_ret;
+}
+template<typename Type>
+CUDA_CALLABLE inline void transform_set_translation(transform_t<Type>& t, const vec_t<3, Type>& p)
+{
+    t.p = p;
+}
+template<typename Type>
+CUDA_CALLABLE inline void transform_set_rotation(transform_t<Type>& t, const quat_t<Type>& q)
+{
+    t.q = q;
+}
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> transform_set_translation_copy(transform_t<Type>& t, const vec_t<3, Type>& p)
+{
+    transform_t<Type> ret(t);
+    ret.p = p;
+    return ret;
+}
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> transform_set_rotation_copy(transform_t<Type>& t, const quat_t<Type>& q)
+{
+    transform_t<Type> ret(t);
+    ret.q = q;
+    return ret;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_set_translation(transform_t<Type>& t, const vec_t<3, Type>& p, const transform_t<Type>& adj_t, vec_t<3, Type>& adj_p)
+{
+    adj_p += adj_t.p;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_set_rotation(transform_t<Type>& t, const quat_t<Type>& q, const transform_t<Type>& adj_t, quat_t<Type>& adj_q)
+{
+    adj_q += adj_t.q;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_set_translation_copy(transform_t<Type>& t, const vec_t<3, Type>& p, transform_t<Type>& adj_t, vec_t<3, Type>& adj_p, const transform_t<Type>& adj_ret)
+{
+    adj_p += adj_ret.p;
+    adj_t.q += adj_ret.q;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_set_rotation_copy(transform_t<Type>& t, const quat_t<Type>& q, transform_t<Type>& adj_t, quat_t<Type>& adj_q, const transform_t<Type>& adj_ret)
+{
+    adj_q += adj_ret.q;
+    adj_t.p += adj_ret.p;
+}
+template<typename Type>
+inline CUDA_CALLABLE void transform_add_inplace(transform_t<Type>& t, const vec_t<3, Type>& p)
+{
+    t.p += p;
+}
+template<typename Type>
+inline CUDA_CALLABLE void transform_sub_inplace(transform_t<Type>& t, const vec_t<3, Type>& p)
+{
+    t.p -= p;
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_transform_add_inplace(transform_t<Type>& t, const vec_t<3, Type>& p, transform_t<Type>& adj_t, vec_t<3, Type>& adj_p)
+{
+    adj_p += adj_t.p;
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_transform_sub_inplace(transform_t<Type>& t, const vec_t<3, Type>& p, transform_t<Type>& adj_t, vec_t<3, Type>& adj_p)
+{
+    adj_p -= adj_t.p;
+}
 template<typename Type>
 CUDA_CALLABLE inline transform_t<Type> transform_multiply(const transform_t<Type>& a, const transform_t<Type>& b)
 {
@@ -271,7 +397,6 @@ CUDA_CALLABLE inline transform_t<Type> operator*(Type s, const transform_t<Type>
     return mul(a, s);
 }
 template<typename Type>
 inline CUDA_CALLABLE Type tensordot(const transform_t<Type>& a, const transform_t<Type>& b)
 {
@@ -280,17 +405,192 @@ inline CUDA_CALLABLE Type tensordot(const transform_t<Type>& a, const transform_
 }
 template<typename Type>
-inline CUDA_CALLABLE Type extract(const transform_t<Type>& t, int i)
+inline CUDA_CALLABLE Type extract(const transform_t<Type>& t, int idx)
 {
-    return t[i];
+#ifndef NDEBUG
+    if (idx < 0 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    return t[idx];
 }
 template<typename Type>
-inline void CUDA_CALLABLE adj_extract(const transform_t<Type>& t, int i, transform_t<Type>& adj_t, int& adj_i, Type adj_ret)
+inline CUDA_CALLABLE Type* index(transform_t<Type>& t, int idx)
 {
-    adj_t[i] += adj_ret;
+#ifndef NDEBUG
+    if (idx < 0 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    return &t[idx];
 }
+template<typename Type>
+inline CUDA_CALLABLE Type* indexref(transform_t<Type>* t, int idx)
+{
+#ifndef NDEBUG
+    if (idx < 0 || idx >= 7)
+    {
+        printf("transformation store %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    return &((*t)[idx]);
+}
+template<typename Type>
+inline void CUDA_CALLABLE adj_extract(const transform_t<Type>& t, int idx, transform_t<Type>& adj_t, int& adj_idx, Type adj_ret)
+{
+    adj_t[idx] += adj_ret;
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_index(transform_t<Type>& t, int idx,
+                                       transform_t<Type>& adj_t, int adj_idx, const Type& adj_value)
+{
+    // nop
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_indexref(transform_t<Type>* t, int idx,
+                                       transform_t<Type>& adj_t, int adj_idx, const Type& adj_value)
+{
+    // nop
+}
+template<typename Type>
+inline CUDA_CALLABLE void add_inplace(transform_t<Type>& t, int idx, Type value)
+{
+#ifndef NDEBUG
+    if (idx < 0 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    t[idx] += value;
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_add_inplace(transform_t<Type>& t, int idx, Type value,
+                                        transform_t<Type>& adj_t, int adj_idx, Type& adj_value)
+{
+#ifndef NDEBUG
+    if (idx < 0 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    adj_value += adj_t[idx];
+}
+template<typename Type>
+inline CUDA_CALLABLE void sub_inplace(transform_t<Type>& t, int idx, Type value)
+{
+#ifndef NDEBUG
+    if (idx < 0 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    t[idx] -= value;
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_sub_inplace(transform_t<Type>& t, int idx, Type value,
+                                        transform_t<Type>& adj_t, int adj_idx, Type& adj_value)
+{
+#ifndef NDEBUG
+    if (idx < 0 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    adj_value -= adj_t[idx];
+}
+template<typename Type>
+inline CUDA_CALLABLE void assign_inplace(transform_t<Type>& t, int idx, Type value)
+{
+#ifndef NDEBUG
+    if (idx < 0 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    t[idx] = value;
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_assign_inplace(transform_t<Type>& t, int idx, Type value, transform_t<Type>& adj_t, int& adj_idx, Type& adj_value)
+{
+#ifndef NDEBUG
+    if (idx < 0 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    adj_value += adj_t[idx];
+}
+template<typename Type>
+inline CUDA_CALLABLE transform_t<Type> assign_copy(transform_t<Type>& t, int idx, Type value)
+{
+#ifndef NDEBUG
+    if (idx < 0 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    transform_t<Type> ret(t);
+    ret[idx] = value;
+    return ret;
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_assign_copy(transform_t<Type>& t, int idx, Type value, transform_t<Type>& adj_t, int& adj_idx, Type& adj_value, const transform_t<Type>& adj_ret)
+{
+#ifndef NDEBUG
+    if (idx < 0 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    adj_value += adj_ret[idx];
+    for(unsigned i=0; i < 7; ++i)
+    {
+        if (i != idx)
+            adj_t[i] += adj_ret[i];
+    }
+}
 // adjoint methods
 template<typename Type>
@@ -343,18 +643,6 @@ CUDA_CALLABLE inline void adj_transform_t(const vec_t<3,Type>& p, const quat_t<T
     adj_q += adj_ret.q;
 }
-template<typename Type>
-CUDA_CALLABLE inline void adj_transform_get_translation(const transform_t<Type>& t, transform_t<Type>& adj_t, const vec_t<3,Type>& adj_ret)
-{
-    adj_t.p += adj_ret;
-}
-template<typename Type>
-CUDA_CALLABLE inline void adj_transform_get_rotation(const transform_t<Type>& t, transform_t<Type>& adj_t, const quat_t<Type>& adj_ret)
-{
-    adj_t.q += adj_ret;
-}
 template<typename Type>
 CUDA_CALLABLE inline void adj_transform_inverse(const transform_t<Type>& t, transform_t<Type>& adj_t, const transform_t<Type>& adj_ret)
 {

warp/native/svd.h CHANGED Viewed

@@ -50,12 +50,14 @@ namespace wp
 template<typename Type>
 struct _svd_config {
+    static constexpr float SVD_EPSILON = 1.e-6f;
     static constexpr float QR_GIVENS_EPSILON = 1.e-6f;
     static constexpr int JACOBI_ITERATIONS = 4;
 };
 template<>
 struct _svd_config<double> {
+    static constexpr double SVD_EPSILON = 1.e-12;
     static constexpr double QR_GIVENS_EPSILON = 1.e-12;
     static constexpr int JACOBI_ITERATIONS = 8;
 };
@@ -528,13 +530,15 @@ inline CUDA_CALLABLE void adj_svd3(const mat_t<3,3,Type>& A,
                                   const mat_t<3,3,Type>& adj_U,
                                   const vec_t<3,Type>& adj_sigma,
                                   const mat_t<3,3,Type>& adj_V) {
+  const Type epsilon = _svd_config<Type>::SVD_EPSILON;
   Type sx2 = sigma[0] * sigma[0];
   Type sy2 = sigma[1] * sigma[1];
   Type sz2 = sigma[2] * sigma[2];
-  Type F01 = Type(1) / min(sy2 - sx2, Type(-1e-6f));
-  Type F02 = Type(1) / min(sz2 - sx2, Type(-1e-6f));
-  Type F12 = Type(1) / min(sz2 - sy2, Type(-1e-6f));
+  Type F01 = Type(1) / min(sy2 - sx2, Type(-epsilon));
+  Type F02 = Type(1) / min(sz2 - sx2, Type(-epsilon));
+  Type F12 = Type(1) / min(sz2 - sy2, Type(-epsilon));
   mat_t<3,3,Type> F = mat_t<3,3,Type>(0, F01, F02,
                   -F01, 0, F12,
@@ -553,8 +557,13 @@ inline CUDA_CALLABLE void adj_svd3(const mat_t<3,3,Type>& A,
   mat_t<3,3,Type> sigma_term = mul(U, mul(adj_sigma_mat, VT));
-  mat_t<3,3,Type> u_term = mul(mul(U, mul(cw_mul(F, (mul(UT, adj_U) - mul(transpose(adj_U), U))), s_mat)), VT);
-  mat_t<3,3,Type> v_term = mul(U, mul(s_mat, mul(cw_mul(F, (mul(VT, adj_V) - mul(transpose(adj_V), V))), VT)));
+  mat_t<3,3,Type> skew_u = cw_mul(F, mul(UT, adj_U) - mul(transpose(adj_U), U));
+  mat_t<3,3,Type> block_u = mul(skew_u, s_mat);
+  mat_t<3,3,Type> u_term = mul(mul(U, block_u), VT);
+  mat_t<3,3,Type> skew_v = cw_mul(F, mul(VT, adj_V) - mul(transpose(adj_V), V));
+  mat_t<3,3,Type> block_v = mul(skew_v, VT);
+  mat_t<3,3,Type> v_term = mul(U, mul(s_mat, block_v));
   adj_A = adj_A + (u_term + v_term + sigma_term);
 }
@@ -583,11 +592,13 @@ inline CUDA_CALLABLE void adj_svd2(const mat_t<2,2,Type>& A,
                                    const mat_t<2,2,Type>& adj_U,
                                    const vec_t<2,Type>& adj_sigma,
                                    const mat_t<2,2,Type>& adj_V) {
+    const Type epsilon = _svd_config<Type>::SVD_EPSILON;
     Type s1_squared = sigma[0] * sigma[0];
     Type s2_squared = sigma[1] * sigma[1];
     // Compute inverse of (s1^2 - s2^2) if possible, use small epsilon to prevent division by zero
-    Type F01 = Type(1) / min(s2_squared - s1_squared, Type(-1e-6f));
+    Type F01 = Type(1) / min(s2_squared - s1_squared, Type(-epsilon));
     // Construct the matrix F for the adjoint
     mat_t<2,2,Type> F = mat_t<2,2,Type>(0.0, F01,
@@ -609,10 +620,14 @@ inline CUDA_CALLABLE void adj_svd2(const mat_t<2,2,Type>& A,
     mat_t<2,2,Type> sigma_term = mul(U, mul(adj_sigma_mat, VT));
     // Compute the adjoint contributions for U (left singular vectors)
-    mat_t<2,2,Type> u_term = mul(mul(U, mul(cw_mul(F, (mul(UT, adj_U) - mul(transpose(adj_U), U))), s_mat)), VT);
+    mat_t<2,2,Type> skew_u = cw_mul(F, mul(UT, adj_U) - mul(transpose(adj_U), U));
+    mat_t<2,2,Type> block_u = mul(skew_u, s_mat);
+    mat_t<2,2,Type> u_term = mul(mul(U, block_u), VT);
     // Compute the adjoint contributions for V (right singular vectors)
-    mat_t<2,2,Type> v_term = mul(U, mul(s_mat, mul(cw_mul(F, (mul(VT, adj_V) - mul(transpose(adj_V), V))), VT)));
+    mat_t<2,2,Type> skew_v = cw_mul(F, mul(VT, adj_V) - mul(transpose(adj_V), V));
+    mat_t<2,2,Type> block_v = mul(skew_v, VT);
+    mat_t<2,2,Type> v_term = mul(U, mul(s_mat, block_v));
     // Combine the terms to compute the adjoint of A
     adj_A = adj_A + (u_term + v_term + sigma_term);