PyPI - warp-lang - Versions diffs - 1.5.0__py3-none-manylinux2014_aarch64.whl → 1.6.0__py3-none-manylinux2014_aarch64.whl - Mend

warp-lang 1.5.0__py3-none-manylinux2014_aarch64.whl → 1.6.0__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (132) hide show

warp/__init__.py +5 -0
warp/autograd.py +414 -191
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +40 -12
warp/build_dll.py +13 -6
warp/builtins.py +1124 -497
warp/codegen.py +261 -136
warp/config.py +1 -1
warp/context.py +357 -119
warp/examples/assets/square_cloth.usd +0 -0
warp/examples/benchmarks/benchmark_gemm.py +27 -18
warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
warp/examples/core/example_torch.py +18 -34
warp/examples/fem/example_apic_fluid.py +1 -0
warp/examples/fem/example_mixed_elasticity.py +1 -1
warp/examples/optim/example_bounce.py +1 -1
warp/examples/optim/example_cloth_throw.py +1 -1
warp/examples/optim/example_diffray.py +4 -15
warp/examples/optim/example_drone.py +1 -1
warp/examples/optim/example_softbody_properties.py +392 -0
warp/examples/optim/example_trajectory.py +1 -3
warp/examples/optim/example_walker.py +5 -0
warp/examples/sim/example_cartpole.py +0 -2
warp/examples/sim/example_cloth.py +3 -1
warp/examples/sim/example_cloth_self_contact.py +260 -0
warp/examples/sim/example_granular_collision_sdf.py +4 -5
warp/examples/sim/example_jacobian_ik.py +0 -2
warp/examples/sim/example_quadruped.py +5 -2
warp/examples/tile/example_tile_cholesky.py +79 -0
warp/examples/tile/example_tile_convolution.py +2 -2
warp/examples/tile/example_tile_fft.py +2 -2
warp/examples/tile/example_tile_filtering.py +3 -3
warp/examples/tile/example_tile_matmul.py +4 -4
warp/examples/tile/example_tile_mlp.py +12 -12
warp/examples/tile/example_tile_nbody.py +180 -0
warp/examples/tile/example_tile_walker.py +319 -0
warp/fem/geometry/geometry.py +0 -2
warp/math.py +147 -0
warp/native/array.h +12 -0
warp/native/builtin.h +0 -1
warp/native/bvh.cpp +149 -70
warp/native/bvh.cu +287 -68
warp/native/bvh.h +195 -85
warp/native/clang/clang.cpp +5 -1
warp/native/coloring.cpp +5 -1
warp/native/cuda_util.cpp +91 -53
warp/native/cuda_util.h +5 -0
warp/native/exports.h +40 -40
warp/native/intersect.h +17 -0
warp/native/mat.h +41 -0
warp/native/mathdx.cpp +19 -0
warp/native/mesh.cpp +25 -8
warp/native/mesh.cu +153 -101
warp/native/mesh.h +482 -403
warp/native/quat.h +40 -0
warp/native/solid_angle.h +7 -0
warp/native/sort.cpp +85 -0
warp/native/sort.cu +34 -0
warp/native/sort.h +3 -1
warp/native/spatial.h +11 -0
warp/native/tile.h +1187 -669
warp/native/tile_reduce.h +8 -6
warp/native/vec.h +41 -0
warp/native/warp.cpp +8 -1
warp/native/warp.cu +263 -40
warp/native/warp.h +19 -5
warp/optim/linear.py +22 -4
warp/render/render_opengl.py +130 -64
warp/sim/__init__.py +6 -1
warp/sim/collide.py +270 -26
warp/sim/import_urdf.py +8 -8
warp/sim/integrator_euler.py +25 -7
warp/sim/integrator_featherstone.py +154 -35
warp/sim/integrator_vbd.py +842 -40
warp/sim/model.py +134 -72
warp/sparse.py +1 -1
warp/stubs.py +265 -132
warp/tape.py +28 -30
warp/tests/aux_test_module_unload.py +15 -0
warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
warp/tests/test_array.py +74 -0
warp/tests/test_assert.py +242 -0
warp/tests/test_codegen.py +14 -61
warp/tests/test_collision.py +2 -2
warp/tests/test_coloring.py +12 -2
warp/tests/test_examples.py +12 -1
warp/tests/test_func.py +21 -4
warp/tests/test_grad_debug.py +87 -2
warp/tests/test_hash_grid.py +1 -1
warp/tests/test_ipc.py +116 -0
warp/tests/test_lerp.py +13 -87
warp/tests/test_mat.py +138 -167
warp/tests/test_math.py +47 -1
warp/tests/test_matmul.py +17 -16
warp/tests/test_matmul_lite.py +10 -15
warp/tests/test_mesh.py +84 -60
warp/tests/test_mesh_query_aabb.py +165 -0
warp/tests/test_mesh_query_point.py +328 -286
warp/tests/test_mesh_query_ray.py +134 -121
warp/tests/test_mlp.py +2 -2
warp/tests/test_operators.py +43 -0
warp/tests/test_overwrite.py +47 -2
warp/tests/test_quat.py +77 -0
warp/tests/test_reload.py +29 -0
warp/tests/test_sim_grad_bounce_linear.py +204 -0
warp/tests/test_smoothstep.py +17 -83
warp/tests/test_static.py +19 -3
warp/tests/test_tape.py +25 -0
warp/tests/test_tile.py +178 -191
warp/tests/test_tile_load.py +356 -0
warp/tests/test_tile_mathdx.py +61 -8
warp/tests/test_tile_mlp.py +17 -17
warp/tests/test_tile_reduce.py +24 -18
warp/tests/test_tile_shared_memory.py +66 -17
warp/tests/test_tile_view.py +165 -0
warp/tests/test_torch.py +35 -0
warp/tests/test_utils.py +36 -24
warp/tests/test_vec.py +110 -0
warp/tests/unittest_suites.py +29 -4
warp/tests/unittest_utils.py +30 -13
warp/thirdparty/unittest_parallel.py +2 -2
warp/types.py +411 -101
warp/utils.py +10 -7
{warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/METADATA +92 -69
{warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/RECORD +130 -119
{warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/WHEEL +1 -1
warp/examples/benchmarks/benchmark_tile.py +0 -179
warp/native/tile_gemm.h +0 -341
{warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/LICENSE.md +0 -0
{warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/top_level.txt +0 -0

warp/sim/integrator_featherstone.py CHANGED Viewed

@@ -1162,31 +1162,97 @@ def create_inertia_matrix_kernel(num_joints, num_dofs):
     ):
         articulation = wp.tid()
-        J = wp.tile_load(J_arr[articulation], 0, 0, m=wp.static(6 * num_joints), n=num_dofs)
-        P = wp.tile_zeros(m=wp.static(6 * num_joints), n=num_dofs, dtype=float)
+        J = wp.tile_load(J_arr[articulation], shape=(wp.static(6 * num_joints), num_dofs))
+        P = wp.tile_zeros(shape=(wp.static(6 * num_joints), num_dofs), dtype=float)
         # compute P = M*J where M is a 6x6 block diagonal mass matrix
         for i in range(int(num_joints)):
             # 6x6 block matrices are on the diagonal
-            M_body = wp.tile_load(M_arr[articulation], i, i, m=6, n=6)
+            M_body = wp.tile_load(M_arr[articulation], shape=(6, 6), offset=(i * 6, i * 6))
             # load a 6xN row from the Jacobian
-            J_body = wp.tile_view(J, i * 6, 0, m=6, n=num_dofs)
+            J_body = wp.tile_view(J, offset=(i * 6, 0), shape=(6, num_dofs))
             # compute weighted row
             P_body = wp.tile_matmul(M_body, J_body)
             # assign to the P slice
-            wp.tile_assign(P, i * 6, 0, P_body)
+            wp.tile_assign(
+                P,
+                P_body,
+                offset=(
+                    i * 6,
+                    0,
+                ),
+            )
         # compute H = J^T*P
         H = wp.tile_matmul(wp.tile_transpose(J), P)
-        wp.tile_store(H_arr[articulation], 0, 0, H)
+        wp.tile_store(H_arr[articulation], H)
     return eval_dense_gemm_tile
+def create_batched_cholesky_kernel(num_dofs):
+    assert num_dofs == 18
+    @wp.kernel
+    def eval_tiled_dense_cholesky_batched(
+        A: wp.array3d(dtype=float), R: wp.array2d(dtype=float), L: wp.array3d(dtype=float)
+    ):
+        articulation = wp.tid()
+        a = wp.tile_load(A[articulation], shape=(num_dofs, num_dofs), storage="shared")
+        r = wp.tile_load(R[articulation], shape=num_dofs, storage="shared")
+        wp.tile_diag_add(a, r)
+        wp.tile_cholesky(a)
+        wp.tile_store(L[articulation], wp.tile_transpose(a))
+    return eval_tiled_dense_cholesky_batched
+def create_inertia_matrix_cholesky_kernel(num_joints, num_dofs):
+    @wp.kernel
+    def eval_dense_gemm_and_cholesky_tile(
+        J_arr: wp.array3d(dtype=float),
+        M_arr: wp.array3d(dtype=float),
+        R_arr: wp.array2d(dtype=float),
+        H_arr: wp.array3d(dtype=float),
+        L_arr: wp.array3d(dtype=float),
+    ):
+        articulation = wp.tid()
+        J = wp.tile_load(J_arr[articulation], shape=(wp.static(6 * num_joints), num_dofs))
+        P = wp.tile_zeros(shape=(wp.static(6 * num_joints), num_dofs), dtype=float)
+        # compute P = M*J where M is a 6x6 block diagonal mass matrix
+        for i in range(int(num_joints)):
+            # 6x6 block matrices are on the diagonal
+            M_body = wp.tile_load(M_arr[articulation], shape=(6, 6), offset=(i * 6, i * 6))
+            # load a 6xN row from the Jacobian
+            J_body = wp.tile_view(J, offset=(i * 6, 0), shape=(6, num_dofs))
+            # compute weighted row
+            P_body = wp.tile_matmul(M_body, J_body)
+            # assign to the P slice
+            wp.tile_assign(P, P_body, offset=(i * 6, 0))
+        # compute H = J^T*P
+        H = wp.tile_matmul(wp.tile_transpose(J), P)
+        wp.tile_store(H_arr[articulation], H)
+        # cholesky L L^T = (H + diag(R))
+        R = wp.tile_load(R_arr[articulation], shape=num_dofs, storage="shared")
+        H_R = wp.tile_diag_add(H, R)
+        L = wp.tile_cholesky(H_R)
+        wp.tile_store(L_arr[articulation], L)
+    return eval_dense_gemm_and_cholesky_tile
 @wp.kernel
 def eval_dense_gemm_batched(
     m: wp.array(dtype=int),
@@ -1458,16 +1524,28 @@ class FeatherstoneIntegrator(Integrator):
     """
-    def __init__(self, model, angular_damping=0.05, update_mass_matrix_every=1, use_tile_gemm=False):
+    def __init__(
+        self,
+        model,
+        angular_damping=0.05,
+        update_mass_matrix_every=1,
+        friction_smoothing=1.0,
+        use_tile_gemm=False,
+        fuse_cholesky=True,
+    ):
         """
         Args:
             model (Model): the model to be simulated.
             angular_damping (float, optional): Angular damping factor. Defaults to 0.05.
             update_mass_matrix_every (int, optional): How often to update the mass matrix (every n-th time the :meth:`simulate` function gets called). Defaults to 1.
+            friction_smoothing (float, optional): The delta value for the Huber norm (see :func:`warp.math.norm_huber`) used for the friction velocity normalization. Defaults to 1.0.
         """
         self.angular_damping = angular_damping
         self.update_mass_matrix_every = update_mass_matrix_every
+        self.friction_smoothing = friction_smoothing
         self.use_tile_gemm = use_tile_gemm
+        self.fuse_cholesky = fuse_cholesky
         self._step = 0
         self.compute_articulation_indices(model)
@@ -1475,7 +1553,14 @@ class FeatherstoneIntegrator(Integrator):
         if self.use_tile_gemm:
             # create a custom kernel to evaluate the system matrix for this type
-            self.eval_inertia_matrix_kernel = create_inertia_matrix_kernel(int(self.joint_count), int(self.dof_count))
+            if self.fuse_cholesky:
+                self.eval_inertia_matrix_cholesky_kernel = create_inertia_matrix_cholesky_kernel(
+                    int(self.joint_count), int(self.dof_count)
+                )
+            else:
+                self.eval_inertia_matrix_kernel = create_inertia_matrix_kernel(
+                    int(self.joint_count), int(self.dof_count)
+                )
             # ensure matrix is reloaded since otherwise an unload can happen during graph capture
             # todo: should not be necessary?
@@ -1758,6 +1843,7 @@ class FeatherstoneIntegrator(Integrator):
                             model.rigid_contact_shape0,
                             model.rigid_contact_shape1,
                             True,
+                            self.friction_smoothing,
                         ],
                         outputs=[body_f],
                         device=model.device,
@@ -1842,23 +1928,56 @@ class FeatherstoneIntegrator(Integrator):
                             # reshape arrays
                             M_tiled = self.M.reshape((-1, 6 * self.joint_count, 6 * self.joint_count))
                             J_tiled = self.J.reshape((-1, 6 * self.joint_count, self.dof_count))
+                            R_tiled = model.joint_armature.reshape((-1, self.dof_count))
                             H_tiled = self.H.reshape((-1, self.dof_count, self.dof_count))
-                            wp.launch_tiled(
-                                self.eval_inertia_matrix_kernel,
-                                dim=model.articulation_count,
-                                inputs=[J_tiled, M_tiled],
-                                outputs=[H_tiled],
-                                device=model.device,
-                                block_dim=256,
-                            )
-                            # J = J_tiled.numpy()[0]
-                            # M = M_tiled.numpy()[0]
-                            # H = J.T@M@J
+                            L_tiled = self.L.reshape((-1, self.dof_count, self.dof_count))
+                            assert H_tiled.shape == (model.articulation_count, 18, 18)
+                            assert L_tiled.shape == (model.articulation_count, 18, 18)
+                            assert R_tiled.shape == (model.articulation_count, 18)
+                            if self.fuse_cholesky:
+                                wp.launch_tiled(
+                                    self.eval_inertia_matrix_cholesky_kernel,
+                                    dim=model.articulation_count,
+                                    inputs=[J_tiled, M_tiled, R_tiled],
+                                    outputs=[H_tiled, L_tiled],
+                                    device=model.device,
+                                    block_dim=64,
+                                )
+                            else:
+                                wp.launch_tiled(
+                                    self.eval_inertia_matrix_kernel,
+                                    dim=model.articulation_count,
+                                    inputs=[J_tiled, M_tiled],
+                                    outputs=[H_tiled],
+                                    device=model.device,
+                                    block_dim=256,
+                                )
+                                wp.launch(
+                                    eval_dense_cholesky_batched,
+                                    dim=model.articulation_count,
+                                    inputs=[
+                                        self.articulation_H_start,
+                                        self.articulation_H_rows,
+                                        self.H,
+                                        model.joint_armature,
+                                    ],
+                                    outputs=[self.L],
+                                    device=model.device,
+                                )
                             # import numpy as np
-                            # np.testing.assert_allclose(H, H_tiled.numpy()[0])
+                            # J = J_tiled.numpy()
+                            # M = M_tiled.numpy()
+                            # R = R_tiled.numpy()
+                            # for i in range(model.articulation_count):
+                            #     r = R[i,:,0]
+                            #     H = J[i].T @ M[i] @ J[i]
+                            #     L = np.linalg.cholesky(H + np.diag(r))
+                            #     np.testing.assert_allclose(H, H_tiled.numpy()[i], rtol=1e-2, atol=1e-2)
+                            #     np.testing.assert_allclose(L, L_tiled.numpy()[i], rtol=1e-1, atol=1e-1)
                         else:
                             # form P = M*J
@@ -1904,19 +2023,19 @@ class FeatherstoneIntegrator(Integrator):
                                 device=model.device,
                             )
-                        # compute decomposition
-                        wp.launch(
-                            eval_dense_cholesky_batched,
-                            dim=model.articulation_count,
-                            inputs=[
-                                self.articulation_H_start,
-                                self.articulation_H_rows,
-                                self.H,
-                                model.joint_armature,
-                            ],
-                            outputs=[self.L],
-                            device=model.device,
-                        )
+                            # compute decomposition
+                            wp.launch(
+                                eval_dense_cholesky_batched,
+                                dim=model.articulation_count,
+                                inputs=[
+                                    self.articulation_H_start,
+                                    self.articulation_H_rows,
+                                    self.H,
+                                    model.joint_armature,
+                                ],
+                                outputs=[self.L],
+                                device=model.device,
+                            )
                         # print("joint_act:")
                         # print(control.joint_act.numpy())