PyPI - warp-lang - Versions diffs - 1.6.2__py3-none-win_amd64.whl → 1.7.1__py3-none-win_amd64.whl - Mend

warp-lang 1.6.2__py3-none-win_amd64.whl → 1.7.1__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (191) hide show

warp/__init__.py +7 -1
warp/autograd.py +12 -2
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +410 -0
warp/build_dll.py +6 -14
warp/builtins.py +463 -372
warp/codegen.py +196 -124
warp/config.py +42 -6
warp/context.py +496 -271
warp/dlpack.py +8 -6
warp/examples/assets/nonuniform.usd +0 -0
warp/examples/assets/nvidia_logo.png +0 -0
warp/examples/benchmarks/benchmark_cloth.py +1 -1
warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
warp/examples/core/example_sample_mesh.py +300 -0
warp/examples/distributed/example_jacobi_mpi.py +507 -0
warp/examples/fem/example_apic_fluid.py +1 -1
warp/examples/fem/example_burgers.py +2 -2
warp/examples/fem/example_deformed_geometry.py +1 -1
warp/examples/fem/example_distortion_energy.py +1 -1
warp/examples/fem/example_magnetostatics.py +6 -6
warp/examples/fem/utils.py +9 -3
warp/examples/interop/example_jax_callable.py +116 -0
warp/examples/interop/example_jax_ffi_callback.py +132 -0
warp/examples/interop/example_jax_kernel.py +205 -0
warp/examples/optim/example_fluid_checkpoint.py +497 -0
warp/examples/tile/example_tile_matmul.py +2 -4
warp/fem/__init__.py +11 -1
warp/fem/adaptivity.py +4 -4
warp/fem/field/field.py +11 -1
warp/fem/field/nodal_field.py +56 -88
warp/fem/field/virtual.py +62 -23
warp/fem/geometry/adaptive_nanogrid.py +16 -13
warp/fem/geometry/closest_point.py +1 -1
warp/fem/geometry/deformed_geometry.py +5 -2
warp/fem/geometry/geometry.py +5 -0
warp/fem/geometry/grid_2d.py +12 -12
warp/fem/geometry/grid_3d.py +12 -15
warp/fem/geometry/hexmesh.py +5 -7
warp/fem/geometry/nanogrid.py +9 -11
warp/fem/geometry/quadmesh.py +13 -13
warp/fem/geometry/tetmesh.py +3 -4
warp/fem/geometry/trimesh.py +7 -20
warp/fem/integrate.py +262 -93
warp/fem/linalg.py +5 -5
warp/fem/quadrature/pic_quadrature.py +37 -22
warp/fem/quadrature/quadrature.py +194 -25
warp/fem/space/__init__.py +1 -1
warp/fem/space/basis_function_space.py +4 -2
warp/fem/space/basis_space.py +25 -18
warp/fem/space/hexmesh_function_space.py +2 -2
warp/fem/space/partition.py +6 -2
warp/fem/space/quadmesh_function_space.py +8 -8
warp/fem/space/shape/cube_shape_function.py +23 -23
warp/fem/space/shape/square_shape_function.py +12 -12
warp/fem/space/shape/triangle_shape_function.py +1 -1
warp/fem/space/tetmesh_function_space.py +3 -3
warp/fem/space/trimesh_function_space.py +2 -2
warp/fem/utils.py +12 -6
warp/jax.py +14 -1
warp/jax_experimental/__init__.py +16 -0
warp/{jax_experimental.py → jax_experimental/custom_call.py} +28 -29
warp/jax_experimental/ffi.py +702 -0
warp/jax_experimental/xla_ffi.py +602 -0
warp/math.py +89 -0
warp/native/array.h +13 -0
warp/native/builtin.h +29 -3
warp/native/bvh.cpp +3 -1
warp/native/bvh.cu +42 -14
warp/native/bvh.h +2 -1
warp/native/clang/clang.cpp +30 -3
warp/native/cuda_util.cpp +14 -0
warp/native/cuda_util.h +2 -0
warp/native/exports.h +68 -63
warp/native/intersect.h +26 -26
warp/native/intersect_adj.h +33 -33
warp/native/marching.cu +1 -1
warp/native/mat.h +513 -9
warp/native/mesh.h +10 -10
warp/native/quat.h +99 -11
warp/native/rand.h +6 -0
warp/native/sort.cpp +122 -59
warp/native/sort.cu +152 -15
warp/native/sort.h +8 -1
warp/native/sparse.cpp +43 -22
warp/native/sparse.cu +52 -17
warp/native/svd.h +116 -0
warp/native/tile.h +312 -116
warp/native/tile_reduce.h +46 -3
warp/native/vec.h +68 -7
warp/native/volume.cpp +85 -113
warp/native/volume_builder.cu +25 -10
warp/native/volume_builder.h +6 -0
warp/native/warp.cpp +5 -6
warp/native/warp.cu +100 -11
warp/native/warp.h +19 -10
warp/optim/linear.py +10 -10
warp/render/render_opengl.py +19 -17
warp/render/render_usd.py +93 -3
warp/sim/articulation.py +4 -4
warp/sim/collide.py +32 -19
warp/sim/import_mjcf.py +449 -155
warp/sim/import_urdf.py +32 -12
warp/sim/inertia.py +189 -156
warp/sim/integrator_euler.py +8 -5
warp/sim/integrator_featherstone.py +3 -10
warp/sim/integrator_vbd.py +207 -2
warp/sim/integrator_xpbd.py +8 -5
warp/sim/model.py +71 -25
warp/sim/render.py +4 -0
warp/sim/utils.py +2 -2
warp/sparse.py +642 -555
warp/stubs.py +217 -20
warp/tests/__main__.py +0 -15
warp/tests/assets/torus.usda +1 -1
warp/tests/cuda/__init__.py +0 -0
warp/tests/{test_mempool.py → cuda/test_mempool.py} +39 -0
warp/tests/{test_streams.py → cuda/test_streams.py} +71 -0
warp/tests/geometry/__init__.py +0 -0
warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +66 -63
warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +1 -1
warp/tests/{test_volume.py → geometry/test_volume.py} +41 -6
warp/tests/interop/__init__.py +0 -0
warp/tests/{test_dlpack.py → interop/test_dlpack.py} +28 -5
warp/tests/sim/__init__.py +0 -0
warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +9 -10
warp/tests/{test_collision.py → sim/test_collision.py} +236 -205
warp/tests/sim/test_inertia.py +161 -0
warp/tests/{test_model.py → sim/test_model.py} +40 -0
warp/tests/{flaky_test_sim_grad.py → sim/test_sim_grad.py} +4 -0
warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +2 -1
warp/tests/sim/test_vbd.py +597 -0
warp/tests/sim/test_xpbd.py +399 -0
warp/tests/test_bool.py +1 -1
warp/tests/test_codegen.py +24 -3
warp/tests/test_examples.py +40 -38
warp/tests/test_fem.py +98 -14
warp/tests/test_linear_solvers.py +0 -11
warp/tests/test_mat.py +577 -156
warp/tests/test_mat_scalar_ops.py +4 -4
warp/tests/test_overwrite.py +0 -60
warp/tests/test_quat.py +356 -151
warp/tests/test_rand.py +44 -37
warp/tests/test_sparse.py +47 -6
warp/tests/test_spatial.py +75 -0
warp/tests/test_static.py +1 -1
warp/tests/test_utils.py +84 -4
warp/tests/test_vec.py +336 -178
warp/tests/tile/__init__.py +0 -0
warp/tests/{test_tile.py → tile/test_tile.py} +136 -51
warp/tests/{test_tile_load.py → tile/test_tile_load.py} +98 -1
warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +9 -6
warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +25 -14
warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +60 -1
warp/tests/{test_tile_view.py → tile/test_tile_view.py} +1 -1
warp/tests/unittest_serial.py +1 -0
warp/tests/unittest_suites.py +45 -62
warp/tests/unittest_utils.py +2 -1
warp/thirdparty/unittest_parallel.py +3 -1
warp/types.py +175 -666
warp/utils.py +137 -72
{warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/METADATA +46 -12
{warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/RECORD +184 -171
{warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/WHEEL +1 -1
{warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info/licenses}/LICENSE.md +0 -26
warp/examples/optim/example_walker.py +0 -317
warp/native/cutlass_gemm.cpp +0 -43
warp/native/cutlass_gemm.cu +0 -382
warp/tests/test_matmul.py +0 -511
warp/tests/test_matmul_lite.py +0 -411
warp/tests/test_vbd.py +0 -386
warp/tests/unused_test_misc.py +0 -77
/warp/tests/{test_async.py → cuda/test_async.py} +0 -0
/warp/tests/{test_ipc.py → cuda/test_ipc.py} +0 -0
/warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +0 -0
/warp/tests/{test_peer.py → cuda/test_peer.py} +0 -0
/warp/tests/{test_pinned.py → cuda/test_pinned.py} +0 -0
/warp/tests/{test_bvh.py → geometry/test_bvh.py} +0 -0
/warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +0 -0
/warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +0 -0
/warp/tests/{test_mesh.py → geometry/test_mesh.py} +0 -0
/warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +0 -0
/warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +0 -0
/warp/tests/{test_jax.py → interop/test_jax.py} +0 -0
/warp/tests/{test_paddle.py → interop/test_paddle.py} +0 -0
/warp/tests/{test_torch.py → interop/test_torch.py} +0 -0
/warp/tests/{test_coloring.py → sim/test_coloring.py} +0 -0
/warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +0 -0
/warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +0 -0
{warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/top_level.txt +0 -0

warp/native/mat.h CHANGED Viewed

@@ -207,6 +207,159 @@ struct mat_t
     Type data[Rows][Cols];
 };
+template<typename Type>
+inline CUDA_CALLABLE mat_t<2, 2, Type> matrix_from_cols(vec_t<2, Type> c0, vec_t<2, Type> c1)
+{
+    mat_t<2, 2, Type> m;
+    m.data[0][0] = c0[0];
+    m.data[1][0] = c0[1];
+    m.data[0][1] = c1[0];
+    m.data[1][1] = c1[1];
+    return m;
+}
+template<typename Type>
+inline CUDA_CALLABLE mat_t<3, 3, Type> matrix_from_cols(vec_t<3, Type> c0, vec_t<3, Type> c1, vec_t<3, Type> c2)
+{
+    mat_t<3, 3, Type> m;
+    m.data[0][0] = c0[0];
+    m.data[1][0] = c0[1];
+    m.data[2][0] = c0[2];
+    m.data[0][1] = c1[0];
+    m.data[1][1] = c1[1];
+    m.data[2][1] = c1[2];
+    m.data[0][2] = c2[0];
+    m.data[1][2] = c2[1];
+    m.data[2][2] = c2[2];
+    return m;
+}
+template<typename Type>
+inline CUDA_CALLABLE mat_t<4, 4, Type> matrix_from_cols(vec_t<4, Type> c0, vec_t<4, Type> c1, vec_t<4, Type> c2, vec_t<4, Type> c3)
+{
+    mat_t<4, 4, Type> m;
+    m.data[0][0] = c0[0];
+    m.data[1][0] = c0[1];
+    m.data[2][0] = c0[2];
+    m.data[3][0] = c0[3];
+    m.data[0][1] = c1[0];
+    m.data[1][1] = c1[1];
+    m.data[2][1] = c1[2];
+    m.data[3][1] = c1[3];
+    m.data[0][2] = c2[0];
+    m.data[1][2] = c2[1];
+    m.data[2][2] = c2[2];
+    m.data[3][2] = c2[3];
+    m.data[0][3] = c3[0];
+    m.data[1][3] = c3[1];
+    m.data[2][3] = c3[2];
+    m.data[3][3] = c3[3];
+    return m;
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE mat_t<Rows, Cols, Type> matrix_from_cols(const initializer_array<Cols, vec_t<Rows, Type> >& l)
+{
+    mat_t<Rows, Cols, Type> m;
+    for (unsigned j=0; j < Cols; ++j)
+    {
+        for (unsigned i=0; i < Rows; ++i)
+        {
+            m.data[i][j] = l[j][i];
+        }
+    }
+    return m;
+}
+template<typename Type>
+inline CUDA_CALLABLE mat_t<2, 2, Type> matrix_from_rows(vec_t<2, Type> r0, vec_t<2, Type> r1)
+{
+    mat_t<2, 2, Type> m;
+    m.data[0][0] = r0[0];
+    m.data[0][1] = r0[1];
+    m.data[1][0] = r1[0];
+    m.data[1][1] = r1[1];
+    return m;
+}
+template<typename Type>
+inline CUDA_CALLABLE mat_t<3, 3, Type> matrix_from_rows(vec_t<3, Type> r0, vec_t<3, Type> r1, vec_t<3, Type> r2)
+{
+    mat_t<3, 3, Type> m;
+    m.data[0][0] = r0[0];
+    m.data[0][1] = r0[1];
+    m.data[0][2] = r0[2];
+    m.data[1][0] = r1[0];
+    m.data[1][1] = r1[1];
+    m.data[1][2] = r1[2];
+    m.data[2][0] = r2[0];
+    m.data[2][1] = r2[1];
+    m.data[2][2] = r2[2];
+    return m;
+}
+template<typename Type>
+inline CUDA_CALLABLE mat_t<4, 4, Type> matrix_from_rows(vec_t<4, Type> r0, vec_t<4, Type> r1, vec_t<4, Type> r2, vec_t<4, Type> r3)
+{
+    mat_t<4, 4, Type> m;
+    m.data[0][0] = r0[0];
+    m.data[0][1] = r0[1];
+    m.data[0][2] = r0[2];
+    m.data[0][3] = r0[3];
+    m.data[1][0] = r1[0];
+    m.data[1][1] = r1[1];
+    m.data[1][2] = r1[2];
+    m.data[1][3] = r1[3];
+    m.data[2][0] = r2[0];
+    m.data[2][1] = r2[1];
+    m.data[2][2] = r2[2];
+    m.data[2][3] = r2[3];
+    m.data[3][0] = r3[0];
+    m.data[3][1] = r3[1];
+    m.data[3][2] = r3[2];
+    m.data[3][3] = r3[3];
+    return m;
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE mat_t<Rows, Cols, Type> matrix_from_rows(const initializer_array<Rows, vec_t<Cols, Type> >& l)
+{
+    mat_t<Rows, Cols, Type> m;
+    for (unsigned i=0; i < Rows; ++i)
+    {
+        for (unsigned j=0; j < Cols; ++j)
+        {
+            m.data[i][j] = l[i][j];
+        }
+    }
+    return m;
+}
 template<unsigned Rows, typename Type>
 inline CUDA_CALLABLE mat_t<Rows, Rows, Type> identity()
@@ -404,37 +557,241 @@ inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row, int
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE void augassign_add(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
+inline CUDA_CALLABLE void add_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
 {
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+    if (col < 0 || col >= Cols)
+    {
+        printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
     m.data[row][col] += value;
 }
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE void adj_augassign_add(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
+inline CUDA_CALLABLE void add_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
+{
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    for(unsigned i=0; i < Cols; ++i)
+    {
+        m.data[row][i] += value[i];
+    }
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void adj_add_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
                                         mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
 {
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+    if (col < 0 || col >= Cols)
+    {
+        printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
     adj_value += adj_m.data[row][col];
 }
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE void augassign_sub(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
+inline CUDA_CALLABLE void adj_add_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
+                                        mat_t<Rows,Cols,Type>& adj_m, int adj_row, vec_t<Cols,Type>& adj_value)
+{
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    for(unsigned i=0; i < Cols; ++i)
+    {
+        adj_value[i] += adj_m.data[row][i];
+    }
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void sub_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
 {
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+    if (col < 0 || col >= Cols)
+    {
+        printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
     m.data[row][col] -= value;
 }
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE void adj_augassign_sub(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
+inline CUDA_CALLABLE void sub_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
+{
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    for(unsigned i=0; i < Cols; ++i)
+    {
+        m.data[row][i] -= value[i];
+    }
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void adj_sub_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
                                         mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
 {
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+    if (col < 0 || col >= Cols)
+    {
+        printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
     adj_value -= adj_m.data[row][col];
 }
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
+inline CUDA_CALLABLE void adj_sub_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
+                                        mat_t<Rows,Cols,Type>& adj_m, int adj_row, vec_t<Cols,Type>& adj_value)
+{
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    for(unsigned i=0; i < Cols; ++i)
+    {
+        adj_value[i] -= adj_m.data[row][i];
+    }
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
+{
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+    if (col < 0 || col >= Cols)
+    {
+        printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    m.data[row][col] = value;
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
+{
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    for(unsigned i=0; i < Cols; ++i)
+    {
+        m.data[row][i] = value[i];
+    }
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void adj_assign_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
+                                        mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type& adj_value)
+{
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+    if (col < 0 || col >= Cols)
+    {
+        printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    adj_value += adj_m.data[row][col];
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void adj_assign_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
+                                        mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value)
+{
+#ifndef NDEBUG
+    if (row < 0 || row >= Rows)
+    {
+        printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    for(unsigned i=0; i < Cols; ++i)
+    {
+        adj_value[i] += adj_m.data[row][i];
+    }
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
 {
 #ifndef NDEBUG
     if (row < 0 || row >= Rows)
@@ -456,7 +813,7 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
+inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
 {
 #ifndef NDEBUG
     if (row < 0 || row >= Rows)
@@ -476,7 +833,7 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE void adj_assign(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
+inline CUDA_CALLABLE void adj_assign_copy(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
                                         mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
 {
 #ifndef NDEBUG
@@ -505,7 +862,7 @@ inline CUDA_CALLABLE void adj_assign(mat_t<Rows,Cols,Type>& m, int row, int col,
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE void adj_assign(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
+inline CUDA_CALLABLE void adj_assign_copy(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
                                         mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
 {
 #ifndef NDEBUG
@@ -710,7 +1067,7 @@ inline CUDA_CALLABLE mat_t<Rows,ColsOut,Type> mul(const mat_t<Rows,Cols,Type>& a
     mat_t<Rows,ColsOut,Type> t(0);
     for (unsigned i=0; i < Rows; ++i)
     {
-        for (unsigned j=0; j < ColsOut; ++j)
+        for (unsigned j=0; j < ColsOut; ++j)
         {
             Type sum(0.0);
@@ -1573,6 +1930,128 @@ inline CUDA_CALLABLE void adj_mat_t(const vec_t<4,Type> &cmps0, const vec_t<4,Ty
     }
 }
+template<typename Type>
+inline CUDA_CALLABLE void adj_matrix_from_cols(
+    const vec_t<2, Type>& c0, const vec_t<2, Type>& c1,
+    vec_t<2, Type>& adj_c0, vec_t<2, Type>& adj_c1,
+    const mat_t<2, 2, Type>& adj_ret
+)
+{
+    for (unsigned i=0; i < 2; ++i)
+    {
+        adj_c0[i] += adj_ret.data[i][0];
+        adj_c1[i] += adj_ret.data[i][1];
+    }
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_matrix_from_cols(
+    const vec_t<3, Type>& c0, const vec_t<3, Type>& c1, const vec_t<3, Type>& c2,
+    vec_t<3, Type>& adj_c0, vec_t<3, Type>& adj_c1, vec_t<3, Type>& adj_c2,
+    const mat_t<3, 3, Type>& adj_ret
+)
+{
+    for (unsigned i=0; i < 3; ++i)
+    {
+        adj_c0[i] += adj_ret.data[i][0];
+        adj_c1[i] += adj_ret.data[i][1];
+        adj_c2[i] += adj_ret.data[i][2];
+    }
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_matrix_from_cols(
+    const vec_t<4, Type>& c0, const vec_t<4, Type>& c1, const vec_t<4, Type>& c2, const vec_t<4, Type>& c3,
+    vec_t<4, Type>& adj_c0, vec_t<4, Type>& adj_c1, vec_t<4, Type>& adj_c2, vec_t<4, Type>& adj_c3,
+    const mat_t<4, 4, Type>& adj_ret
+)
+{
+    for (unsigned i=0; i < 4; ++i)
+    {
+        adj_c0[i] += adj_ret.data[i][0];
+        adj_c1[i] += adj_ret.data[i][1];
+        adj_c2[i] += adj_ret.data[i][2];
+        adj_c3[i] += adj_ret.data[i][3];
+    }
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void adj_matrix_from_cols(
+    const initializer_array<Cols, vec_t<Rows, Type> >& l,
+    const initializer_array<Cols, vec_t<Rows, Type>* >& adj_l,
+    const mat_t<Rows, Cols, Type>& adj_ret
+)
+{
+    for (unsigned j=0; j < Cols; ++j)
+    {
+        for (unsigned i=0; i < Rows; ++i)
+        {
+            (*adj_l[j])[i] += adj_ret.data[i][j];
+        }
+    }
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_matrix_from_rows(
+    const vec_t<2, Type>& r0, const vec_t<2, Type>& r1,
+    vec_t<2, Type>& adj_r0, vec_t<2, Type>& adj_r1,
+    const mat_t<2, 2, Type>& adj_ret
+)
+{
+    for (unsigned j=0; j < 2; ++j)
+    {
+        adj_r0[j] += adj_ret.data[0][j];
+        adj_r1[j] += adj_ret.data[1][j];
+    }
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_matrix_from_rows(
+    const vec_t<3, Type>& c0, const vec_t<3, Type>& c1, const vec_t<3, Type>& c2,
+    vec_t<3, Type>& adj_c0, vec_t<3, Type>& adj_c1, vec_t<3, Type>& adj_c2,
+    const mat_t<3, 3, Type>& adj_ret
+)
+{
+    for (unsigned j=0; j < 3; ++j)
+    {
+        adj_c0[j] += adj_ret.data[0][j];
+        adj_c1[j] += adj_ret.data[1][j];
+        adj_c2[j] += adj_ret.data[2][j];
+    }
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_matrix_from_rows(
+    const vec_t<4, Type>& c0, const vec_t<4, Type>& c1, const vec_t<4, Type>& c2, const vec_t<4, Type>& c3,
+    vec_t<4, Type>& adj_c0, vec_t<4, Type>& adj_c1, vec_t<4, Type>& adj_c2, vec_t<4, Type>& adj_c3,
+    const mat_t<4, 4, Type>& adj_ret
+)
+{
+    for (unsigned j=0; j < 4; ++j)
+    {
+        adj_c0[j] += adj_ret.data[0][j];
+        adj_c1[j] += adj_ret.data[1][j];
+        adj_c2[j] += adj_ret.data[2][j];
+        adj_c3[j] += adj_ret.data[3][j];
+    }
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void adj_matrix_from_rows(
+    const initializer_array<Rows, vec_t<Cols, Type> >& l,
+    const initializer_array<Rows, vec_t<Cols, Type>* >& adj_l,
+    const mat_t<Rows, Cols, Type>& adj_ret
+)
+{
+    for (unsigned i=0; i < Rows; ++i)
+    {
+        for (unsigned j=0; j < Cols; ++j)
+        {
+            (*adj_l[i])[j] += adj_ret.data[i][j];
+        }
+    }
+}
 template<unsigned Rows, unsigned Cols, typename Type>
 CUDA_CALLABLE inline mat_t<Rows, Cols, Type> lerp(const mat_t<Rows, Cols, Type>& a, const mat_t<Rows, Cols, Type>& b, Type t)
 {
@@ -1713,4 +2192,29 @@ CUDA_CALLABLE inline void adj_len(const mat_t<Rows,Cols,Type>& x, mat_t<Rows,Col
 {
 }
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void expect_near(const mat_t<Rows,Cols,Type>& actual, const mat_t<Rows,Cols,Type>& expected, const Type& tolerance)
+{
+    Type diff(0);
+    for (unsigned i = 0; i < Rows; ++i)
+    {
+        for (unsigned j = 0; j < Cols; ++j)
+        {
+            diff = max(diff, abs(actual.data[i][j] - expected.data[i][j]));
+        }
+    }
+    if (diff > tolerance)
+    {
+        printf("Error, expect_near() failed with tolerance "); print(tolerance);
+        printf("\t Expected: "); print(expected);
+        printf("\t Actual: "); print(actual);
+    }
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void adj_expect_near(const mat_t<Rows,Cols,Type>& actual, const mat_t<Rows,Cols,Type>& expected, Type tolerance, mat_t<Rows,Cols,Type>& adj_actual, mat_t<Rows,Cols,Type>& adj_expected, Type adj_tolerance)
+{
+    // nop
+}
 } // namespace wp

warp/native/mesh.h CHANGED Viewed

@@ -123,7 +123,7 @@ CUDA_CALLABLE inline bool mesh_query_point(uint64_t id, const vec3& point, float
 {
     Mesh mesh = mesh_get(id);
-    int stack[32];
+    int stack[BVH_QUERY_STACK_SIZE];
     stack[0] = *mesh.bvh.root;
     int count = 1;
@@ -311,7 +311,7 @@ CUDA_CALLABLE inline bool mesh_query_point_no_sign(uint64_t id, const vec3& poin
 {
     Mesh mesh = mesh_get(id);
-    int stack[32];
+    int stack[BVH_QUERY_STACK_SIZE];
     stack[0] = *mesh.bvh.root;
     int count = 1;
@@ -495,7 +495,7 @@ CUDA_CALLABLE inline bool mesh_query_furthest_point_no_sign(uint64_t id, const v
 {
     Mesh mesh = mesh_get(id);
-    int stack[32];
+    int stack[BVH_QUERY_STACK_SIZE];
     stack[0] = *mesh.bvh.root;
     int count = 1;
@@ -682,7 +682,7 @@ CUDA_CALLABLE inline bool mesh_query_point_sign_normal(uint64_t id, const vec3&
 {
     Mesh mesh = mesh_get(id);
-    int stack[32];
+    int stack[BVH_QUERY_STACK_SIZE];
     stack[0] = *mesh.bvh.root;
     int count = 1;
     float min_dist = max_dist;
@@ -926,9 +926,9 @@ CUDA_CALLABLE inline float solid_angle_iterative(uint64_t id, const vec3& p, con
 {
     Mesh mesh = mesh_get(id);
-    int stack[32];
-    int at_child[32]; // 0 for left, 1 for right, 2 for done
-    float angle[32];
+    int stack[BVH_QUERY_STACK_SIZE];
+    int at_child[BVH_QUERY_STACK_SIZE]; // 0 for left, 1 for right, 2 for done
+    float angle[BVH_QUERY_STACK_SIZE];
     stack[0] = *mesh.bvh.root;
     at_child[0] = 0;
@@ -1017,7 +1017,7 @@ CUDA_CALLABLE inline bool mesh_query_point_sign_winding_number(uint64_t id, cons
 {
     Mesh mesh = mesh_get(id);
-    int stack[32];
+    int stack[BVH_QUERY_STACK_SIZE];
     stack[0] = *mesh.bvh.root;
     int count = 1;
@@ -1371,7 +1371,7 @@ CUDA_CALLABLE inline bool mesh_query_ray(uint64_t id, const vec3& start, const v
 {
     Mesh mesh = mesh_get(id);
-    int stack[32];
+    int stack[BVH_QUERY_STACK_SIZE];
     stack[0] = *mesh.bvh.root;
     int count = 1;
@@ -1587,7 +1587,7 @@ struct mesh_query_aabb_t
     // Mesh Id
     Mesh mesh;
     // BVH traversal stack:
-    int stack[32];
+    int stack[BVH_QUERY_STACK_SIZE];
     int count;
     // inputs