PyPI - warp-lang - Versions diffs - 1.4.2__py3-none-win_amd64.whl → 1.5.1__py3-none-win_amd64.whl - Mend

warp-lang 1.4.2__py3-none-win_amd64.whl → 1.5.1__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (166) hide show

warp/__init__.py +4 -0
warp/autograd.py +43 -8
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +21 -2
warp/build_dll.py +23 -6
warp/builtins.py +1819 -7
warp/codegen.py +197 -61
warp/config.py +2 -2
warp/context.py +379 -107
warp/examples/assets/pixel.jpg +0 -0
warp/examples/benchmarks/benchmark_cloth_paddle.py +86 -0
warp/examples/benchmarks/benchmark_gemm.py +121 -0
warp/examples/benchmarks/benchmark_interop_paddle.py +158 -0
warp/examples/benchmarks/benchmark_tile.py +179 -0
warp/examples/fem/example_adaptive_grid.py +37 -10
warp/examples/fem/example_apic_fluid.py +3 -2
warp/examples/fem/example_convection_diffusion_dg.py +4 -5
warp/examples/fem/example_deformed_geometry.py +1 -1
warp/examples/fem/example_diffusion_3d.py +47 -4
warp/examples/fem/example_distortion_energy.py +220 -0
warp/examples/fem/example_magnetostatics.py +127 -85
warp/examples/fem/example_nonconforming_contact.py +5 -5
warp/examples/fem/example_stokes.py +3 -1
warp/examples/fem/example_streamlines.py +12 -19
warp/examples/fem/utils.py +38 -15
warp/examples/sim/example_cloth.py +4 -25
warp/examples/sim/example_quadruped.py +2 -1
warp/examples/tile/example_tile_convolution.py +58 -0
warp/examples/tile/example_tile_fft.py +47 -0
warp/examples/tile/example_tile_filtering.py +105 -0
warp/examples/tile/example_tile_matmul.py +79 -0
warp/examples/tile/example_tile_mlp.py +375 -0
warp/fem/__init__.py +8 -0
warp/fem/cache.py +16 -12
warp/fem/dirichlet.py +1 -1
warp/fem/domain.py +44 -1
warp/fem/field/__init__.py +1 -2
warp/fem/field/field.py +31 -19
warp/fem/field/nodal_field.py +101 -49
warp/fem/field/virtual.py +794 -0
warp/fem/geometry/__init__.py +2 -2
warp/fem/geometry/deformed_geometry.py +3 -105
warp/fem/geometry/element.py +13 -0
warp/fem/geometry/geometry.py +165 -7
warp/fem/geometry/grid_2d.py +3 -6
warp/fem/geometry/grid_3d.py +31 -28
warp/fem/geometry/hexmesh.py +3 -46
warp/fem/geometry/nanogrid.py +3 -2
warp/fem/geometry/{quadmesh_2d.py → quadmesh.py} +280 -159
warp/fem/geometry/tetmesh.py +2 -43
warp/fem/geometry/{trimesh_2d.py → trimesh.py} +354 -186
warp/fem/integrate.py +683 -261
warp/fem/linalg.py +404 -0
warp/fem/operator.py +101 -18
warp/fem/polynomial.py +5 -5
warp/fem/quadrature/quadrature.py +45 -21
warp/fem/space/__init__.py +45 -11
warp/fem/space/basis_function_space.py +451 -0
warp/fem/space/basis_space.py +58 -11
warp/fem/space/function_space.py +146 -5
warp/fem/space/grid_2d_function_space.py +80 -66
warp/fem/space/grid_3d_function_space.py +113 -68
warp/fem/space/hexmesh_function_space.py +96 -108
warp/fem/space/nanogrid_function_space.py +62 -110
warp/fem/space/quadmesh_function_space.py +208 -0
warp/fem/space/shape/__init__.py +45 -7
warp/fem/space/shape/cube_shape_function.py +328 -54
warp/fem/space/shape/shape_function.py +10 -1
warp/fem/space/shape/square_shape_function.py +328 -60
warp/fem/space/shape/tet_shape_function.py +269 -19
warp/fem/space/shape/triangle_shape_function.py +238 -19
warp/fem/space/tetmesh_function_space.py +69 -37
warp/fem/space/topology.py +38 -0
warp/fem/space/trimesh_function_space.py +179 -0
warp/fem/utils.py +6 -331
warp/jax_experimental.py +3 -1
warp/native/array.h +15 -0
warp/native/builtin.h +66 -26
warp/native/bvh.h +4 -0
warp/native/coloring.cpp +604 -0
warp/native/cuda_util.cpp +68 -51
warp/native/cuda_util.h +2 -1
warp/native/fabric.h +8 -0
warp/native/hashgrid.h +4 -0
warp/native/marching.cu +8 -0
warp/native/mat.h +14 -3
warp/native/mathdx.cpp +59 -0
warp/native/mesh.h +4 -0
warp/native/range.h +13 -1
warp/native/reduce.cpp +9 -1
warp/native/reduce.cu +7 -0
warp/native/runlength_encode.cpp +9 -1
warp/native/runlength_encode.cu +7 -1
warp/native/scan.cpp +8 -0
warp/native/scan.cu +8 -0
warp/native/scan.h +8 -1
warp/native/sparse.cpp +8 -0
warp/native/sparse.cu +8 -0
warp/native/temp_buffer.h +7 -0
warp/native/tile.h +1854 -0
warp/native/tile_gemm.h +341 -0
warp/native/tile_reduce.h +210 -0
warp/native/volume_builder.cu +8 -0
warp/native/volume_builder.h +8 -0
warp/native/warp.cpp +10 -2
warp/native/warp.cu +369 -15
warp/native/warp.h +12 -2
warp/optim/adam.py +39 -4
warp/paddle.py +29 -12
warp/render/render_opengl.py +140 -67
warp/sim/graph_coloring.py +292 -0
warp/sim/import_urdf.py +8 -8
warp/sim/integrator_euler.py +4 -2
warp/sim/integrator_featherstone.py +115 -44
warp/sim/integrator_vbd.py +6 -0
warp/sim/model.py +109 -32
warp/sparse.py +1 -1
warp/stubs.py +569 -4
warp/tape.py +12 -7
warp/tests/assets/pixel.npy +0 -0
warp/tests/aux_test_instancing_gc.py +18 -0
warp/tests/test_array.py +39 -0
warp/tests/test_codegen.py +81 -1
warp/tests/test_codegen_instancing.py +30 -0
warp/tests/test_collision.py +110 -0
warp/tests/test_coloring.py +251 -0
warp/tests/test_context.py +34 -0
warp/tests/test_examples.py +21 -5
warp/tests/test_fem.py +453 -113
warp/tests/test_func.py +34 -4
warp/tests/test_generics.py +52 -0
warp/tests/test_iter.py +68 -0
warp/tests/test_lerp.py +13 -87
warp/tests/test_mat_scalar_ops.py +1 -1
warp/tests/test_matmul.py +6 -9
warp/tests/test_matmul_lite.py +6 -11
warp/tests/test_mesh_query_point.py +1 -1
warp/tests/test_module_hashing.py +23 -0
warp/tests/test_overwrite.py +45 -0
warp/tests/test_paddle.py +27 -87
warp/tests/test_print.py +56 -1
warp/tests/test_smoothstep.py +17 -83
warp/tests/test_spatial.py +1 -1
warp/tests/test_static.py +3 -3
warp/tests/test_tile.py +744 -0
warp/tests/test_tile_mathdx.py +144 -0
warp/tests/test_tile_mlp.py +383 -0
warp/tests/test_tile_reduce.py +374 -0
warp/tests/test_tile_shared_memory.py +190 -0
warp/tests/test_vbd.py +12 -20
warp/tests/test_volume.py +43 -0
warp/tests/unittest_suites.py +19 -2
warp/tests/unittest_utils.py +4 -2
warp/types.py +340 -74
warp/utils.py +23 -3
{warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/METADATA +32 -7
{warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/RECORD +161 -134
{warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/WHEEL +1 -1
warp/fem/field/test.py +0 -180
warp/fem/field/trial.py +0 -183
warp/fem/space/collocated_function_space.py +0 -102
warp/fem/space/quadmesh_2d_function_space.py +0 -261
warp/fem/space/trimesh_2d_function_space.py +0 -153
{warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/LICENSE.md +0 -0
{warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/top_level.txt +0 -0

warp/sim/graph_coloring.py ADDED Viewed

@@ -0,0 +1,292 @@
+# Copyright (c) 2024 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+from enum import Enum
+import numpy as np
+import warp as wp
+import warp.utils
+class ColoringAlgorithm(Enum):
+    MCS = 0
+    GREEDY = 1
+@wp.kernel
+def construct_trimesh_graph_edges_kernel(
+    trimesh_edge_indices: wp.array(dtype=int, ndim=2),
+    add_bending: bool,
+    graph_edge_indices: wp.array(dtype=int, ndim=2),
+    graph_num_edges: wp.array(dtype=int),
+):
+    num_diagonal_edges = wp.int32(0)
+    num_non_diagonal_edges = trimesh_edge_indices.shape[0]
+    for e_idx in range(trimesh_edge_indices.shape[0]):
+        v1 = trimesh_edge_indices[e_idx, 2]
+        v2 = trimesh_edge_indices[e_idx, 3]
+        graph_edge_indices[e_idx, 0] = v1
+        graph_edge_indices[e_idx, 1] = v2
+        o1 = trimesh_edge_indices[e_idx, 0]
+        o2 = trimesh_edge_indices[e_idx, 1]
+        if o1 != -1 and o2 != -1 and add_bending:
+            graph_edge_indices[num_non_diagonal_edges + num_diagonal_edges, 0] = o1
+            graph_edge_indices[num_non_diagonal_edges + num_diagonal_edges, 1] = o2
+            num_diagonal_edges = num_diagonal_edges + 1
+    graph_num_edges[0] = num_diagonal_edges + num_non_diagonal_edges
+@wp.kernel
+def validate_graph_coloring(edge_indices: wp.array(dtype=int, ndim=2), colors: wp.array(dtype=int)):
+    edge_idx = wp.tid()
+    e_v_1 = edge_indices[edge_idx, 0]
+    e_v_2 = edge_indices[edge_idx, 1]
+    wp.expect_neq(colors[e_v_1], colors[e_v_2])
+@wp.kernel
+def count_color_group_size(
+    colors: wp.array(dtype=int),
+    group_sizes: wp.array(dtype=int),
+):
+    for particle_idx in range(colors.shape[0]):
+        particle_color = colors[particle_idx]
+        group_sizes[particle_color] = group_sizes[particle_color] + 1
+@wp.kernel
+def fill_color_groups(
+    colors: wp.array(dtype=int),
+    group_fill_count: wp.array(dtype=int),
+    group_offsets: wp.array(dtype=int),
+    # flattened color groups
+    color_groups_flatten: wp.array(dtype=int),
+):
+    for particle_idx in range(colors.shape[0]):
+        particle_color = colors[particle_idx]
+        group_offset = group_offsets[particle_color]
+        group_idx = group_fill_count[particle_color]
+        color_groups_flatten[group_idx + group_offset] = wp.int32(particle_idx)
+        group_fill_count[particle_color] = group_idx + 1
+def convert_to_color_groups(num_colors, particle_colors, return_wp_array=False, device="cpu"):
+    group_sizes = wp.zeros(shape=(num_colors,), dtype=int, device="cpu")
+    wp.launch(kernel=count_color_group_size, inputs=[particle_colors, group_sizes], device="cpu", dim=1)
+    group_sizes_np = group_sizes.numpy()
+    group_offsets_np = np.concatenate([np.array([0]), np.cumsum(group_sizes_np)])
+    group_offsets = wp.array(group_offsets_np, dtype=int, device="cpu")
+    group_fill_count = wp.zeros(shape=(num_colors,), dtype=int, device="cpu")
+    color_groups_flatten = wp.empty(shape=(group_sizes_np.sum(),), dtype=int, device="cpu")
+    wp.launch(
+        kernel=fill_color_groups,
+        inputs=[particle_colors, group_fill_count, group_offsets, color_groups_flatten],
+        device="cpu",
+        dim=1,
+    )
+    color_groups_flatten_np = color_groups_flatten.numpy()
+    color_groups = []
+    if return_wp_array:
+        for color_idx in range(num_colors):
+            color_groups.append(
+                wp.array(
+                    color_groups_flatten_np[group_offsets_np[color_idx] : group_offsets_np[color_idx + 1]],
+                    dtype=int,
+                    device=device,
+                )
+            )
+    else:
+        for color_idx in range(num_colors):
+            color_groups.append(color_groups_flatten_np[group_offsets_np[color_idx] : group_offsets_np[color_idx + 1]])
+    return color_groups
+def construct_trimesh_graph_edges(trimesh_edge_indices, return_wp_array=False):
+    if isinstance(trimesh_edge_indices, np.ndarray):
+        trimesh_edge_indices = wp.array(trimesh_edge_indices, dtype=int, device="cpu")
+    # preallocate maximum amount of memory, which is model.edge_count * 2
+    graph_edge_indices = wp.empty(shape=(trimesh_edge_indices.shape[0] * 2, 2), dtype=int, device="cpu")
+    graph_num_edges = wp.zeros(shape=(1,), dtype=int, device="cpu")
+    wp.launch(
+        kernel=construct_trimesh_graph_edges_kernel,
+        inputs=[
+            trimesh_edge_indices.to("cpu"),
+            True,
+        ],
+        outputs=[graph_edge_indices, graph_num_edges],
+        dim=1,
+        device="cpu",
+    )
+    num_edges = graph_num_edges.numpy()[0]
+    graph_edge_indices_true_size = graph_edge_indices.numpy()[:num_edges, :]
+    if return_wp_array:
+        graph_edge_indices_true_size = wp.array(graph_edge_indices_true_size, dtype=int, device="cpu")
+    return graph_edge_indices_true_size
+def color_trimesh(
+    num_nodes,
+    trimesh_edge_indices,
+    include_bending_energy,
+    balance_colors=True,
+    target_max_min_color_ratio=1.1,
+    algorithm: ColoringAlgorithm = ColoringAlgorithm.MCS,
+):
+    """
+    A function that generates vertex coloring for a trimesh, which is represented by the number of vertices and edges of the mesh.
+    It will convert the trimesh to a graph and then apply coloring.
+    It returns a list of `np.array` with `dtype`=`int`. The length of the list is the number of colors
+    and each `np.array` contains the indices of vertices with this color.
+    Args:
+        num_nodes: The number of the nodes in the graph
+        trimesh_edge_indices: A `wp.array` with of shape (number_edges, 4), each row is (o1, o2, v1, v2), see `sim.Model`'s definition of `edge_indices`.
+        include_bending_energy: whether to consider bending energy in the coloring process. If set to `True`, the generated
+            graph will contain all the edges connecting o1 and o2; otherwise, the graph will be equivalent to the trimesh.
+        balance_colors: the parameter passed to `color_graph`, see `color_graph`'s document
+        target_max_min_color_ratio: the parameter passed to `color_graph`, see `color_graph`'s document
+        algorithm: the parameter passed to `color_graph`, see `color_graph`'s document
+    """
+    if include_bending_energy:
+        graph_edge_indices = construct_trimesh_graph_edges(trimesh_edge_indices, return_wp_array=True)
+    else:
+        graph_edge_indices = wp.array(trimesh_edge_indices[:, 2:], dtype=int, device="cpu")
+    color_groups = color_graph(num_nodes, graph_edge_indices, balance_colors, target_max_min_color_ratio, algorithm)
+    return color_groups
+def color_graph(
+    num_nodes,
+    graph_edge_indices,
+    balance_colors=True,
+    target_max_min_color_ratio=1.1,
+    algorithm: ColoringAlgorithm = ColoringAlgorithm.MCS,
+):
+    """
+    A function that generates coloring for a graph, which is represented by the number of nodes and an array of edges.
+    It returns a list of `np.array` with `dtype`=`int`. The length of the list is the number of colors
+    and each `np.array` contains the indices of vertices with this color.
+    Args:
+        num_nodes: The number of the nodes in the graph
+        graph_edge_indices: A `wp.array` with of shape (number_edges, 2)
+        balance_colors: Whether to apply the color balancing algorithm to balance the size of each color
+        target_max_min_color_ratio: the color balancing algorithm will stop when the ratio between the largest color and
+            the smallest color reaches this value
+        algorithm: Value should an enum type of ColoringAlgorithm, otherwise it will raise an error. ColoringAlgorithm.mcs means using the MCS coloring algorithm,
+            while ColoringAlgorithm.ordered_greedy means using the degree-ordered greedy algorithm. The MCS algorithm typically generates 30% to 50% fewer colors
+            compared to the ordered greedy algorithm, while maintaining the same linear complexity. Although MCS has a constant overhead that makes it about twice
+            as slow as the greedy algorithm, it produces significantly better coloring results. We recommend using MCS, especially if coloring is only part of the
+            preprocessing stage.e.
+    Note:
+        References to the coloring algorithm:
+        MCS: Pereira, F. M. Q., & Palsberg, J. (2005, November). Register allocation via coloring of chordal graphs. In Asian Symposium on Programming Languages and Systems (pp. 315-329). Berlin, Heidelberg: Springer Berlin Heidelberg.
+        Ordered Greedy: Ton-That, Q. M., Kry, P. G., & Andrews, S. (2023). Parallel block Neo-Hookean XPBD using graph clustering. Computers & Graphics, 110, 1-10.
+    """
+    if num_nodes == 0:
+        return
+    particle_colors = wp.empty(shape=(num_nodes), dtype=wp.int32, device="cpu")
+    if graph_edge_indices.ndim != 2:
+        raise ValueError(
+            f"graph_edge_indices must be a 2 dimensional array! The provided one is {graph_edge_indices.ndim} dimensional."
+        )
+    num_colors = wp.context.runtime.core.graph_coloring(
+        num_nodes,
+        graph_edge_indices.__ctype__(),
+        algorithm.value,
+        particle_colors.__ctype__(),
+    )
+    if balance_colors:
+        max_min_ratio = wp.context.runtime.core.balance_coloring(
+            num_nodes,
+            graph_edge_indices.__ctype__(),
+            num_colors,
+            target_max_min_color_ratio,
+            particle_colors.__ctype__(),
+        )
+        if max_min_ratio > target_max_min_color_ratio:
+            wp.utils.warn(
+                f"The graph is not optimizable anymore, terminated with a max/min ratio: {max_min_ratio} without reaching the target ratio: {target_max_min_color_ratio}"
+            )
+    color_groups = convert_to_color_groups(num_colors, particle_colors, return_wp_array=False)
+    return color_groups
+def combine_independent_particle_coloring(color_groups_1, color_groups_2):
+    """
+    A function that combines 2 independent coloring groups. Note that color_groups_1 and color_groups_2 must be from 2 independent
+    graphs so that there is no connection between them. This algorithm will sort color_groups_1 in ascending order and
+    sort color_groups_2 in descending order, and combine each group with the same index, this way we are always combining
+    the smaller group with the larger group.
+    Args:
+        color_groups_1: A list of `np.array` with `dtype`=`int`. The length of the list is the number of colors
+            and each `np.array` contains the indices of vertices with this color.
+        color_groups_2: A list of `np.array` with `dtype`=`int`. The length of the list is the number of colors
+            and each `np.array` contains the indices of vertices with this color.
+    """
+    if len(color_groups_1) == 0:
+        return color_groups_2
+    if len(color_groups_2) == 0:
+        return color_groups_1
+    num_colors_after_combining = max(len(color_groups_1), len(color_groups_2))
+    color_groups_combined = []
+    # this made sure that the leftover groups are always the largest
+    if len(color_groups_1) < len(color_groups_2):
+        color_groups_1, color_groups_2 = color_groups_2, color_groups_1
+    # sort group 1 in ascending order
+    color_groups_1_sorted = sorted(color_groups_1, key=lambda group: len(group))
+    # sort group 1 in descending order
+    color_groups_2_sorted = sorted(color_groups_2, key=lambda group: -len(group))
+    # so that we are combining the smaller group with the larger group
+    # which will balance the load of each group
+    for i in range(num_colors_after_combining):
+        group_1 = color_groups_1_sorted[i] if i < len(color_groups_1) else None
+        group_2 = color_groups_2_sorted[i] if i < len(color_groups_2) else None
+        if group_1 is not None and group_2 is not None:
+            color_groups_combined.append(np.concatenate([group_1, group_2]))
+        elif group_1 is not None:
+            color_groups_combined.append(group_1)
+        else:
+            color_groups_combined.append(group_2)
+    return color_groups_combined

warp/sim/import_urdf.py CHANGED Viewed

@@ -211,14 +211,14 @@ def parse_urdf(
                 if hasattr(m, "geometry"):
                     # multiple meshes are contained in a scene
                     for geom in m.geometry.values():
-                        vertices = np.array(geom.vertices, dtype=np.float32) * scaling
-                        faces = np.array(geom.faces.flatten(), dtype=np.int32)
-                        mesh = Mesh(vertices, faces)
+                        geom_vertices = np.array(geom.vertices, dtype=np.float32) * scaling
+                        geom_faces = np.array(geom.faces.flatten(), dtype=np.int32)
+                        geom_mesh = Mesh(geom_vertices, geom_faces)
                         s = builder.add_shape_mesh(
                             body=link,
                             pos=wp.vec3(tf.p),
                             rot=wp.quat(tf.q),
-                            mesh=mesh,
+                            mesh=geom_mesh,
                             density=density,
                             is_visible=visible,
                             has_ground_collision=not just_visual,
@@ -228,14 +228,14 @@ def parse_urdf(
                         shapes.append(s)
                 else:
                     # a single mesh
-                    vertices = np.array(m.vertices, dtype=np.float32) * scaling
-                    faces = np.array(m.faces.flatten(), dtype=np.int32)
-                    mesh = Mesh(vertices, faces)
+                    m_vertices = np.array(m.vertices, dtype=np.float32) * scaling
+                    m_faces = np.array(m.faces.flatten(), dtype=np.int32)
+                    m_mesh = Mesh(m_vertices, m_faces)
                     s = builder.add_shape_mesh(
                         body=link,
                         pos=wp.vec3(tf.p),
                         rot=wp.quat(tf.q),
-                        mesh=mesh,
+                        mesh=m_mesh,
                         density=density,
                         is_visible=visible,
                         has_ground_collision=not just_visual,

warp/sim/integrator_euler.py CHANGED Viewed

@@ -264,6 +264,7 @@ def eval_triangles_contact(
     v: wp.array(dtype=wp.vec3),
     indices: wp.array2d(dtype=int),
     materials: wp.array2d(dtype=float),
+    particle_radius: wp.array(dtype=float),
     f: wp.array(dtype=wp.vec3),
 ):
     tid = wp.tid()
@@ -303,7 +304,7 @@ def eval_triangles_contact(
     diff = pos - closest
     dist = wp.dot(diff, diff)
     n = wp.normalize(diff)
-    c = wp.min(dist - 0.01, 0.0)  # 0 unless within 0.01 of surface
+    c = wp.min(dist - particle_radius[particle_no], 0.0)  # 0 unless within particle's contact radius
     # c = wp.leaky_min(dot(n, x0)-0.01, 0.0, 0.0)
     fn = n * c * 1e5
@@ -795,7 +796,7 @@ def eval_particle_contacts(
     r = bx - wp.transform_point(X_wb, X_com)
     n = contact_normal[tid]
-    c = wp.dot(n, px - bx) - particle_radius[tid]
+    c = wp.dot(n, px - bx) - particle_radius[particle_index]
     if c > particle_ka:
         return
@@ -1697,6 +1698,7 @@ def eval_triangle_contact_forces(model: Model, state: State, particle_f: wp.arra
                 state.particle_qd,
                 model.tri_indices,
                 model.tri_materials,
+                model.particle_radius,
             ],
             outputs=[particle_f],
             device=model.device,

warp/sim/integrator_featherstone.py CHANGED Viewed

@@ -1155,6 +1155,38 @@ def dense_gemm(
 #         dense_gemm(p, n, m, True, False, add_to_C, A_start, B_start, C_start, A, wp.adjoint[C], wp.adjoint[B])
+def create_inertia_matrix_kernel(num_joints, num_dofs):
+    @wp.kernel
+    def eval_dense_gemm_tile(
+        J_arr: wp.array3d(dtype=float), M_arr: wp.array3d(dtype=float), H_arr: wp.array3d(dtype=float)
+    ):
+        articulation = wp.tid()
+        J = wp.tile_load(J_arr[articulation], 0, 0, m=wp.static(6 * num_joints), n=num_dofs)
+        P = wp.tile_zeros(m=wp.static(6 * num_joints), n=num_dofs, dtype=float)
+        # compute P = M*J where M is a 6x6 block diagonal mass matrix
+        for i in range(int(num_joints)):
+            # 6x6 block matrices are on the diagonal
+            M_body = wp.tile_load(M_arr[articulation], i, i, m=6, n=6)
+            # load a 6xN row from the Jacobian
+            J_body = wp.tile_view(J, i * 6, 0, m=6, n=num_dofs)
+            # compute weighted row
+            P_body = wp.tile_matmul(M_body, J_body)
+            # assign to the P slice
+            wp.tile_assign(P, i * 6, 0, P_body)
+        # compute H = J^T*P
+        H = wp.tile_matmul(wp.tile_transpose(J), P)
+        wp.tile_store(H_arr[articulation], 0, 0, H)
+    return eval_dense_gemm_tile
 @wp.kernel
 def eval_dense_gemm_batched(
     m: wp.array(dtype=int),
@@ -1426,7 +1458,7 @@ class FeatherstoneIntegrator(Integrator):
     """
-    def __init__(self, model, angular_damping=0.05, update_mass_matrix_every=1):
+    def __init__(self, model, angular_damping=0.05, update_mass_matrix_every=1, use_tile_gemm=False):
         """
         Args:
             model (Model): the model to be simulated.
@@ -1435,9 +1467,19 @@ class FeatherstoneIntegrator(Integrator):
         """
         self.angular_damping = angular_damping
         self.update_mass_matrix_every = update_mass_matrix_every
+        self.use_tile_gemm = use_tile_gemm
+        self._step = 0
         self.compute_articulation_indices(model)
         self.allocate_model_aux_vars(model)
-        self._step = 0
+        if self.use_tile_gemm:
+            # create a custom kernel to evaluate the system matrix for this type
+            self.eval_inertia_matrix_kernel = create_inertia_matrix_kernel(int(self.joint_count), int(self.dof_count))
+            # ensure matrix is reloaded since otherwise an unload can happen during graph capture
+            # todo: should not be necessary?
+            wp.load_module(device=wp.get_device())
     def compute_articulation_indices(self, model):
         # calculate total size and offsets of Jacobian and mass matrices for entire system
@@ -1486,6 +1528,12 @@ class FeatherstoneIntegrator(Integrator):
                 articulation_J_rows.append(joint_count * 6)
                 articulation_J_cols.append(dof_count)
+                if self.use_tile_gemm:
+                    # store the joint and dof count assuming all
+                    # articulations have the same structure
+                    self.joint_count = joint_count
+                    self.dof_count = dof_count
                 self.J_size += 6 * joint_count * dof_count
                 self.M_size += 6 * joint_count * 6 * joint_count
                 self.H_size += dof_count * dof_count
@@ -1790,48 +1838,71 @@ class FeatherstoneIntegrator(Integrator):
                             device=model.device,
                         )
-                        # form P = M*J
-                        wp.launch(
-                            eval_dense_gemm_batched,
-                            dim=model.articulation_count,
-                            inputs=[
-                                self.articulation_M_rows,
-                                self.articulation_J_cols,
-                                self.articulation_J_rows,
-                                False,
-                                False,
-                                self.articulation_M_start,
-                                self.articulation_J_start,
-                                # P start is the same as J start since it has the same dims as J
-                                self.articulation_J_start,
-                                self.M,
-                                self.J,
-                            ],
-                            outputs=[self.P],
-                            device=model.device,
-                        )
-                        # form H = J^T*P
-                        wp.launch(
-                            eval_dense_gemm_batched,
-                            dim=model.articulation_count,
-                            inputs=[
-                                self.articulation_J_cols,
-                                self.articulation_J_cols,
-                                # P rows is the same as J rows
-                                self.articulation_J_rows,
-                                True,
-                                False,
-                                self.articulation_J_start,
-                                # P start is the same as J start since it has the same dims as J
-                                self.articulation_J_start,
-                                self.articulation_H_start,
-                                self.J,
-                                self.P,
-                            ],
-                            outputs=[self.H],
-                            device=model.device,
-                        )
+                        if self.use_tile_gemm:
+                            # reshape arrays
+                            M_tiled = self.M.reshape((-1, 6 * self.joint_count, 6 * self.joint_count))
+                            J_tiled = self.J.reshape((-1, 6 * self.joint_count, self.dof_count))
+                            H_tiled = self.H.reshape((-1, self.dof_count, self.dof_count))
+                            wp.launch_tiled(
+                                self.eval_inertia_matrix_kernel,
+                                dim=model.articulation_count,
+                                inputs=[J_tiled, M_tiled],
+                                outputs=[H_tiled],
+                                device=model.device,
+                                block_dim=256,
+                            )
+                            # J = J_tiled.numpy()[0]
+                            # M = M_tiled.numpy()[0]
+                            # H = J.T@M@J
+                            # import numpy as np
+                            # np.testing.assert_allclose(H, H_tiled.numpy()[0])
+                        else:
+                            # form P = M*J
+                            wp.launch(
+                                eval_dense_gemm_batched,
+                                dim=model.articulation_count,
+                                inputs=[
+                                    self.articulation_M_rows,
+                                    self.articulation_J_cols,
+                                    self.articulation_J_rows,
+                                    False,
+                                    False,
+                                    self.articulation_M_start,
+                                    self.articulation_J_start,
+                                    # P start is the same as J start since it has the same dims as J
+                                    self.articulation_J_start,
+                                    self.M,
+                                    self.J,
+                                ],
+                                outputs=[self.P],
+                                device=model.device,
+                            )
+                            # form H = J^T*P
+                            wp.launch(
+                                eval_dense_gemm_batched,
+                                dim=model.articulation_count,
+                                inputs=[
+                                    self.articulation_J_cols,
+                                    self.articulation_J_cols,
+                                    # P rows is the same as J rows
+                                    self.articulation_J_rows,
+                                    True,
+                                    False,
+                                    self.articulation_J_start,
+                                    # P start is the same as J start since it has the same dims as J
+                                    self.articulation_J_start,
+                                    self.articulation_H_start,
+                                    self.J,
+                                    self.P,
+                                ],
+                                outputs=[self.H],
+                                device=model.device,
+                            )
                         # compute decomposition
                         wp.launch(

warp/sim/integrator_vbd.py CHANGED Viewed

@@ -740,6 +740,12 @@ class VBDIntegrator(Integrator):
         self.body_particle_contact_count = wp.zeros((model.particle_count,), dtype=wp.int32, device=self.device)
         self.friction_epsilon = friction_epsilon
+        if len(self.model.particle_coloring) == 0:
+            raise ValueError(
+                "model.particle_coloring is empty! When using the VBDIntegrator you must call ModelBuilder.color() "
+                "or ModelBuilder.set_coloring() before calling ModelBuilder.finalize()."
+            )
         # tests
         # wp.launch(kernel=_test_compute_force_element_adjacency,
         #           inputs=[self.adjacency, model.edge_indices, model.tri_indices],