warp-lang 1.4.2__py3-none-manylinux2014_x86_64.whl → 1.5.1__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +4 -0
- warp/autograd.py +43 -8
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +21 -2
- warp/build_dll.py +23 -6
- warp/builtins.py +1819 -7
- warp/codegen.py +197 -61
- warp/config.py +2 -2
- warp/context.py +379 -107
- warp/examples/assets/pixel.jpg +0 -0
- warp/examples/benchmarks/benchmark_cloth_paddle.py +86 -0
- warp/examples/benchmarks/benchmark_gemm.py +121 -0
- warp/examples/benchmarks/benchmark_interop_paddle.py +158 -0
- warp/examples/benchmarks/benchmark_tile.py +179 -0
- warp/examples/fem/example_adaptive_grid.py +37 -10
- warp/examples/fem/example_apic_fluid.py +3 -2
- warp/examples/fem/example_convection_diffusion_dg.py +4 -5
- warp/examples/fem/example_deformed_geometry.py +1 -1
- warp/examples/fem/example_diffusion_3d.py +47 -4
- warp/examples/fem/example_distortion_energy.py +220 -0
- warp/examples/fem/example_magnetostatics.py +127 -85
- warp/examples/fem/example_nonconforming_contact.py +5 -5
- warp/examples/fem/example_stokes.py +3 -1
- warp/examples/fem/example_streamlines.py +12 -19
- warp/examples/fem/utils.py +38 -15
- warp/examples/sim/example_cloth.py +4 -25
- warp/examples/sim/example_quadruped.py +2 -1
- warp/examples/tile/example_tile_convolution.py +58 -0
- warp/examples/tile/example_tile_fft.py +47 -0
- warp/examples/tile/example_tile_filtering.py +105 -0
- warp/examples/tile/example_tile_matmul.py +79 -0
- warp/examples/tile/example_tile_mlp.py +375 -0
- warp/fem/__init__.py +8 -0
- warp/fem/cache.py +16 -12
- warp/fem/dirichlet.py +1 -1
- warp/fem/domain.py +44 -1
- warp/fem/field/__init__.py +1 -2
- warp/fem/field/field.py +31 -19
- warp/fem/field/nodal_field.py +101 -49
- warp/fem/field/virtual.py +794 -0
- warp/fem/geometry/__init__.py +2 -2
- warp/fem/geometry/deformed_geometry.py +3 -105
- warp/fem/geometry/element.py +13 -0
- warp/fem/geometry/geometry.py +165 -7
- warp/fem/geometry/grid_2d.py +3 -6
- warp/fem/geometry/grid_3d.py +31 -28
- warp/fem/geometry/hexmesh.py +3 -46
- warp/fem/geometry/nanogrid.py +3 -2
- warp/fem/geometry/{quadmesh_2d.py → quadmesh.py} +280 -159
- warp/fem/geometry/tetmesh.py +2 -43
- warp/fem/geometry/{trimesh_2d.py → trimesh.py} +354 -186
- warp/fem/integrate.py +683 -261
- warp/fem/linalg.py +404 -0
- warp/fem/operator.py +101 -18
- warp/fem/polynomial.py +5 -5
- warp/fem/quadrature/quadrature.py +45 -21
- warp/fem/space/__init__.py +45 -11
- warp/fem/space/basis_function_space.py +451 -0
- warp/fem/space/basis_space.py +58 -11
- warp/fem/space/function_space.py +146 -5
- warp/fem/space/grid_2d_function_space.py +80 -66
- warp/fem/space/grid_3d_function_space.py +113 -68
- warp/fem/space/hexmesh_function_space.py +96 -108
- warp/fem/space/nanogrid_function_space.py +62 -110
- warp/fem/space/quadmesh_function_space.py +208 -0
- warp/fem/space/shape/__init__.py +45 -7
- warp/fem/space/shape/cube_shape_function.py +328 -54
- warp/fem/space/shape/shape_function.py +10 -1
- warp/fem/space/shape/square_shape_function.py +328 -60
- warp/fem/space/shape/tet_shape_function.py +269 -19
- warp/fem/space/shape/triangle_shape_function.py +238 -19
- warp/fem/space/tetmesh_function_space.py +69 -37
- warp/fem/space/topology.py +38 -0
- warp/fem/space/trimesh_function_space.py +179 -0
- warp/fem/utils.py +6 -331
- warp/jax_experimental.py +3 -1
- warp/native/array.h +15 -0
- warp/native/builtin.h +66 -26
- warp/native/bvh.h +4 -0
- warp/native/coloring.cpp +604 -0
- warp/native/cuda_util.cpp +68 -51
- warp/native/cuda_util.h +2 -1
- warp/native/fabric.h +8 -0
- warp/native/hashgrid.h +4 -0
- warp/native/marching.cu +8 -0
- warp/native/mat.h +14 -3
- warp/native/mathdx.cpp +59 -0
- warp/native/mesh.h +4 -0
- warp/native/range.h +13 -1
- warp/native/reduce.cpp +9 -1
- warp/native/reduce.cu +7 -0
- warp/native/runlength_encode.cpp +9 -1
- warp/native/runlength_encode.cu +7 -1
- warp/native/scan.cpp +8 -0
- warp/native/scan.cu +8 -0
- warp/native/scan.h +8 -1
- warp/native/sparse.cpp +8 -0
- warp/native/sparse.cu +8 -0
- warp/native/temp_buffer.h +7 -0
- warp/native/tile.h +1854 -0
- warp/native/tile_gemm.h +341 -0
- warp/native/tile_reduce.h +210 -0
- warp/native/volume_builder.cu +8 -0
- warp/native/volume_builder.h +8 -0
- warp/native/warp.cpp +10 -2
- warp/native/warp.cu +369 -15
- warp/native/warp.h +12 -2
- warp/optim/adam.py +39 -4
- warp/paddle.py +29 -12
- warp/render/render_opengl.py +140 -67
- warp/sim/graph_coloring.py +292 -0
- warp/sim/import_urdf.py +8 -8
- warp/sim/integrator_euler.py +4 -2
- warp/sim/integrator_featherstone.py +115 -44
- warp/sim/integrator_vbd.py +6 -0
- warp/sim/model.py +109 -32
- warp/sparse.py +1 -1
- warp/stubs.py +569 -4
- warp/tape.py +12 -7
- warp/tests/assets/pixel.npy +0 -0
- warp/tests/aux_test_instancing_gc.py +18 -0
- warp/tests/test_array.py +39 -0
- warp/tests/test_codegen.py +81 -1
- warp/tests/test_codegen_instancing.py +30 -0
- warp/tests/test_collision.py +110 -0
- warp/tests/test_coloring.py +251 -0
- warp/tests/test_context.py +34 -0
- warp/tests/test_examples.py +21 -5
- warp/tests/test_fem.py +453 -113
- warp/tests/test_func.py +34 -4
- warp/tests/test_generics.py +52 -0
- warp/tests/test_iter.py +68 -0
- warp/tests/test_lerp.py +13 -87
- warp/tests/test_mat_scalar_ops.py +1 -1
- warp/tests/test_matmul.py +6 -9
- warp/tests/test_matmul_lite.py +6 -11
- warp/tests/test_mesh_query_point.py +1 -1
- warp/tests/test_module_hashing.py +23 -0
- warp/tests/test_overwrite.py +45 -0
- warp/tests/test_paddle.py +27 -87
- warp/tests/test_print.py +56 -1
- warp/tests/test_smoothstep.py +17 -83
- warp/tests/test_spatial.py +1 -1
- warp/tests/test_static.py +3 -3
- warp/tests/test_tile.py +744 -0
- warp/tests/test_tile_mathdx.py +144 -0
- warp/tests/test_tile_mlp.py +383 -0
- warp/tests/test_tile_reduce.py +374 -0
- warp/tests/test_tile_shared_memory.py +190 -0
- warp/tests/test_vbd.py +12 -20
- warp/tests/test_volume.py +43 -0
- warp/tests/unittest_suites.py +19 -2
- warp/tests/unittest_utils.py +4 -2
- warp/types.py +340 -74
- warp/utils.py +23 -3
- {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/METADATA +32 -7
- {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/RECORD +161 -134
- {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/WHEEL +1 -1
- warp/fem/field/test.py +0 -180
- warp/fem/field/trial.py +0 -183
- warp/fem/space/collocated_function_space.py +0 -102
- warp/fem/space/quadmesh_2d_function_space.py +0 -261
- warp/fem/space/trimesh_2d_function_space.py +0 -153
- {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
# and proprietary rights in and to this software, related documentation
|
|
4
|
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
# distribution of this software and related documentation without an express
|
|
6
|
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
|
|
8
|
+
from enum import Enum
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
import warp as wp
|
|
13
|
+
import warp.utils
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ColoringAlgorithm(Enum):
|
|
17
|
+
MCS = 0
|
|
18
|
+
GREEDY = 1
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@wp.kernel
|
|
22
|
+
def construct_trimesh_graph_edges_kernel(
|
|
23
|
+
trimesh_edge_indices: wp.array(dtype=int, ndim=2),
|
|
24
|
+
add_bending: bool,
|
|
25
|
+
graph_edge_indices: wp.array(dtype=int, ndim=2),
|
|
26
|
+
graph_num_edges: wp.array(dtype=int),
|
|
27
|
+
):
|
|
28
|
+
num_diagonal_edges = wp.int32(0)
|
|
29
|
+
num_non_diagonal_edges = trimesh_edge_indices.shape[0]
|
|
30
|
+
for e_idx in range(trimesh_edge_indices.shape[0]):
|
|
31
|
+
v1 = trimesh_edge_indices[e_idx, 2]
|
|
32
|
+
v2 = trimesh_edge_indices[e_idx, 3]
|
|
33
|
+
|
|
34
|
+
graph_edge_indices[e_idx, 0] = v1
|
|
35
|
+
graph_edge_indices[e_idx, 1] = v2
|
|
36
|
+
|
|
37
|
+
o1 = trimesh_edge_indices[e_idx, 0]
|
|
38
|
+
o2 = trimesh_edge_indices[e_idx, 1]
|
|
39
|
+
|
|
40
|
+
if o1 != -1 and o2 != -1 and add_bending:
|
|
41
|
+
graph_edge_indices[num_non_diagonal_edges + num_diagonal_edges, 0] = o1
|
|
42
|
+
graph_edge_indices[num_non_diagonal_edges + num_diagonal_edges, 1] = o2
|
|
43
|
+
|
|
44
|
+
num_diagonal_edges = num_diagonal_edges + 1
|
|
45
|
+
|
|
46
|
+
graph_num_edges[0] = num_diagonal_edges + num_non_diagonal_edges
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@wp.kernel
|
|
50
|
+
def validate_graph_coloring(edge_indices: wp.array(dtype=int, ndim=2), colors: wp.array(dtype=int)):
|
|
51
|
+
edge_idx = wp.tid()
|
|
52
|
+
e_v_1 = edge_indices[edge_idx, 0]
|
|
53
|
+
e_v_2 = edge_indices[edge_idx, 1]
|
|
54
|
+
|
|
55
|
+
wp.expect_neq(colors[e_v_1], colors[e_v_2])
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@wp.kernel
|
|
59
|
+
def count_color_group_size(
|
|
60
|
+
colors: wp.array(dtype=int),
|
|
61
|
+
group_sizes: wp.array(dtype=int),
|
|
62
|
+
):
|
|
63
|
+
for particle_idx in range(colors.shape[0]):
|
|
64
|
+
particle_color = colors[particle_idx]
|
|
65
|
+
group_sizes[particle_color] = group_sizes[particle_color] + 1
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@wp.kernel
|
|
69
|
+
def fill_color_groups(
|
|
70
|
+
colors: wp.array(dtype=int),
|
|
71
|
+
group_fill_count: wp.array(dtype=int),
|
|
72
|
+
group_offsets: wp.array(dtype=int),
|
|
73
|
+
# flattened color groups
|
|
74
|
+
color_groups_flatten: wp.array(dtype=int),
|
|
75
|
+
):
|
|
76
|
+
for particle_idx in range(colors.shape[0]):
|
|
77
|
+
particle_color = colors[particle_idx]
|
|
78
|
+
group_offset = group_offsets[particle_color]
|
|
79
|
+
group_idx = group_fill_count[particle_color]
|
|
80
|
+
color_groups_flatten[group_idx + group_offset] = wp.int32(particle_idx)
|
|
81
|
+
|
|
82
|
+
group_fill_count[particle_color] = group_idx + 1
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def convert_to_color_groups(num_colors, particle_colors, return_wp_array=False, device="cpu"):
|
|
86
|
+
group_sizes = wp.zeros(shape=(num_colors,), dtype=int, device="cpu")
|
|
87
|
+
wp.launch(kernel=count_color_group_size, inputs=[particle_colors, group_sizes], device="cpu", dim=1)
|
|
88
|
+
|
|
89
|
+
group_sizes_np = group_sizes.numpy()
|
|
90
|
+
group_offsets_np = np.concatenate([np.array([0]), np.cumsum(group_sizes_np)])
|
|
91
|
+
group_offsets = wp.array(group_offsets_np, dtype=int, device="cpu")
|
|
92
|
+
|
|
93
|
+
group_fill_count = wp.zeros(shape=(num_colors,), dtype=int, device="cpu")
|
|
94
|
+
color_groups_flatten = wp.empty(shape=(group_sizes_np.sum(),), dtype=int, device="cpu")
|
|
95
|
+
wp.launch(
|
|
96
|
+
kernel=fill_color_groups,
|
|
97
|
+
inputs=[particle_colors, group_fill_count, group_offsets, color_groups_flatten],
|
|
98
|
+
device="cpu",
|
|
99
|
+
dim=1,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
color_groups_flatten_np = color_groups_flatten.numpy()
|
|
103
|
+
|
|
104
|
+
color_groups = []
|
|
105
|
+
if return_wp_array:
|
|
106
|
+
for color_idx in range(num_colors):
|
|
107
|
+
color_groups.append(
|
|
108
|
+
wp.array(
|
|
109
|
+
color_groups_flatten_np[group_offsets_np[color_idx] : group_offsets_np[color_idx + 1]],
|
|
110
|
+
dtype=int,
|
|
111
|
+
device=device,
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
else:
|
|
115
|
+
for color_idx in range(num_colors):
|
|
116
|
+
color_groups.append(color_groups_flatten_np[group_offsets_np[color_idx] : group_offsets_np[color_idx + 1]])
|
|
117
|
+
|
|
118
|
+
return color_groups
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def construct_trimesh_graph_edges(trimesh_edge_indices, return_wp_array=False):
|
|
122
|
+
if isinstance(trimesh_edge_indices, np.ndarray):
|
|
123
|
+
trimesh_edge_indices = wp.array(trimesh_edge_indices, dtype=int, device="cpu")
|
|
124
|
+
|
|
125
|
+
# preallocate maximum amount of memory, which is model.edge_count * 2
|
|
126
|
+
graph_edge_indices = wp.empty(shape=(trimesh_edge_indices.shape[0] * 2, 2), dtype=int, device="cpu")
|
|
127
|
+
graph_num_edges = wp.zeros(shape=(1,), dtype=int, device="cpu")
|
|
128
|
+
|
|
129
|
+
wp.launch(
|
|
130
|
+
kernel=construct_trimesh_graph_edges_kernel,
|
|
131
|
+
inputs=[
|
|
132
|
+
trimesh_edge_indices.to("cpu"),
|
|
133
|
+
True,
|
|
134
|
+
],
|
|
135
|
+
outputs=[graph_edge_indices, graph_num_edges],
|
|
136
|
+
dim=1,
|
|
137
|
+
device="cpu",
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
num_edges = graph_num_edges.numpy()[0]
|
|
141
|
+
graph_edge_indices_true_size = graph_edge_indices.numpy()[:num_edges, :]
|
|
142
|
+
|
|
143
|
+
if return_wp_array:
|
|
144
|
+
graph_edge_indices_true_size = wp.array(graph_edge_indices_true_size, dtype=int, device="cpu")
|
|
145
|
+
|
|
146
|
+
return graph_edge_indices_true_size
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def color_trimesh(
|
|
150
|
+
num_nodes,
|
|
151
|
+
trimesh_edge_indices,
|
|
152
|
+
include_bending_energy,
|
|
153
|
+
balance_colors=True,
|
|
154
|
+
target_max_min_color_ratio=1.1,
|
|
155
|
+
algorithm: ColoringAlgorithm = ColoringAlgorithm.MCS,
|
|
156
|
+
):
|
|
157
|
+
"""
|
|
158
|
+
A function that generates vertex coloring for a trimesh, which is represented by the number of vertices and edges of the mesh.
|
|
159
|
+
It will convert the trimesh to a graph and then apply coloring.
|
|
160
|
+
It returns a list of `np.array` with `dtype`=`int`. The length of the list is the number of colors
|
|
161
|
+
and each `np.array` contains the indices of vertices with this color.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
num_nodes: The number of the nodes in the graph
|
|
165
|
+
trimesh_edge_indices: A `wp.array` with of shape (number_edges, 4), each row is (o1, o2, v1, v2), see `sim.Model`'s definition of `edge_indices`.
|
|
166
|
+
include_bending_energy: whether to consider bending energy in the coloring process. If set to `True`, the generated
|
|
167
|
+
graph will contain all the edges connecting o1 and o2; otherwise, the graph will be equivalent to the trimesh.
|
|
168
|
+
balance_colors: the parameter passed to `color_graph`, see `color_graph`'s document
|
|
169
|
+
target_max_min_color_ratio: the parameter passed to `color_graph`, see `color_graph`'s document
|
|
170
|
+
algorithm: the parameter passed to `color_graph`, see `color_graph`'s document
|
|
171
|
+
|
|
172
|
+
"""
|
|
173
|
+
if include_bending_energy:
|
|
174
|
+
graph_edge_indices = construct_trimesh_graph_edges(trimesh_edge_indices, return_wp_array=True)
|
|
175
|
+
else:
|
|
176
|
+
graph_edge_indices = wp.array(trimesh_edge_indices[:, 2:], dtype=int, device="cpu")
|
|
177
|
+
|
|
178
|
+
color_groups = color_graph(num_nodes, graph_edge_indices, balance_colors, target_max_min_color_ratio, algorithm)
|
|
179
|
+
return color_groups
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def color_graph(
|
|
183
|
+
num_nodes,
|
|
184
|
+
graph_edge_indices,
|
|
185
|
+
balance_colors=True,
|
|
186
|
+
target_max_min_color_ratio=1.1,
|
|
187
|
+
algorithm: ColoringAlgorithm = ColoringAlgorithm.MCS,
|
|
188
|
+
):
|
|
189
|
+
"""
|
|
190
|
+
A function that generates coloring for a graph, which is represented by the number of nodes and an array of edges.
|
|
191
|
+
It returns a list of `np.array` with `dtype`=`int`. The length of the list is the number of colors
|
|
192
|
+
and each `np.array` contains the indices of vertices with this color.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
num_nodes: The number of the nodes in the graph
|
|
196
|
+
graph_edge_indices: A `wp.array` with of shape (number_edges, 2)
|
|
197
|
+
balance_colors: Whether to apply the color balancing algorithm to balance the size of each color
|
|
198
|
+
target_max_min_color_ratio: the color balancing algorithm will stop when the ratio between the largest color and
|
|
199
|
+
the smallest color reaches this value
|
|
200
|
+
algorithm: Value should an enum type of ColoringAlgorithm, otherwise it will raise an error. ColoringAlgorithm.mcs means using the MCS coloring algorithm,
|
|
201
|
+
while ColoringAlgorithm.ordered_greedy means using the degree-ordered greedy algorithm. The MCS algorithm typically generates 30% to 50% fewer colors
|
|
202
|
+
compared to the ordered greedy algorithm, while maintaining the same linear complexity. Although MCS has a constant overhead that makes it about twice
|
|
203
|
+
as slow as the greedy algorithm, it produces significantly better coloring results. We recommend using MCS, especially if coloring is only part of the
|
|
204
|
+
preprocessing stage.e.
|
|
205
|
+
|
|
206
|
+
Note:
|
|
207
|
+
|
|
208
|
+
References to the coloring algorithm:
|
|
209
|
+
MCS: Pereira, F. M. Q., & Palsberg, J. (2005, November). Register allocation via coloring of chordal graphs. In Asian Symposium on Programming Languages and Systems (pp. 315-329). Berlin, Heidelberg: Springer Berlin Heidelberg.
|
|
210
|
+
Ordered Greedy: Ton-That, Q. M., Kry, P. G., & Andrews, S. (2023). Parallel block Neo-Hookean XPBD using graph clustering. Computers & Graphics, 110, 1-10.
|
|
211
|
+
"""
|
|
212
|
+
if num_nodes == 0:
|
|
213
|
+
return
|
|
214
|
+
|
|
215
|
+
particle_colors = wp.empty(shape=(num_nodes), dtype=wp.int32, device="cpu")
|
|
216
|
+
|
|
217
|
+
if graph_edge_indices.ndim != 2:
|
|
218
|
+
raise ValueError(
|
|
219
|
+
f"graph_edge_indices must be a 2 dimensional array! The provided one is {graph_edge_indices.ndim} dimensional."
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
num_colors = wp.context.runtime.core.graph_coloring(
|
|
223
|
+
num_nodes,
|
|
224
|
+
graph_edge_indices.__ctype__(),
|
|
225
|
+
algorithm.value,
|
|
226
|
+
particle_colors.__ctype__(),
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
if balance_colors:
|
|
230
|
+
max_min_ratio = wp.context.runtime.core.balance_coloring(
|
|
231
|
+
num_nodes,
|
|
232
|
+
graph_edge_indices.__ctype__(),
|
|
233
|
+
num_colors,
|
|
234
|
+
target_max_min_color_ratio,
|
|
235
|
+
particle_colors.__ctype__(),
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
if max_min_ratio > target_max_min_color_ratio:
|
|
239
|
+
wp.utils.warn(
|
|
240
|
+
f"The graph is not optimizable anymore, terminated with a max/min ratio: {max_min_ratio} without reaching the target ratio: {target_max_min_color_ratio}"
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
color_groups = convert_to_color_groups(num_colors, particle_colors, return_wp_array=False)
|
|
244
|
+
|
|
245
|
+
return color_groups
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def combine_independent_particle_coloring(color_groups_1, color_groups_2):
|
|
249
|
+
"""
|
|
250
|
+
A function that combines 2 independent coloring groups. Note that color_groups_1 and color_groups_2 must be from 2 independent
|
|
251
|
+
graphs so that there is no connection between them. This algorithm will sort color_groups_1 in ascending order and
|
|
252
|
+
sort color_groups_2 in descending order, and combine each group with the same index, this way we are always combining
|
|
253
|
+
the smaller group with the larger group.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
color_groups_1: A list of `np.array` with `dtype`=`int`. The length of the list is the number of colors
|
|
257
|
+
and each `np.array` contains the indices of vertices with this color.
|
|
258
|
+
color_groups_2: A list of `np.array` with `dtype`=`int`. The length of the list is the number of colors
|
|
259
|
+
and each `np.array` contains the indices of vertices with this color.
|
|
260
|
+
|
|
261
|
+
"""
|
|
262
|
+
if len(color_groups_1) == 0:
|
|
263
|
+
return color_groups_2
|
|
264
|
+
if len(color_groups_2) == 0:
|
|
265
|
+
return color_groups_1
|
|
266
|
+
|
|
267
|
+
num_colors_after_combining = max(len(color_groups_1), len(color_groups_2))
|
|
268
|
+
color_groups_combined = []
|
|
269
|
+
|
|
270
|
+
# this made sure that the leftover groups are always the largest
|
|
271
|
+
if len(color_groups_1) < len(color_groups_2):
|
|
272
|
+
color_groups_1, color_groups_2 = color_groups_2, color_groups_1
|
|
273
|
+
|
|
274
|
+
# sort group 1 in ascending order
|
|
275
|
+
color_groups_1_sorted = sorted(color_groups_1, key=lambda group: len(group))
|
|
276
|
+
# sort group 1 in descending order
|
|
277
|
+
color_groups_2_sorted = sorted(color_groups_2, key=lambda group: -len(group))
|
|
278
|
+
# so that we are combining the smaller group with the larger group
|
|
279
|
+
# which will balance the load of each group
|
|
280
|
+
|
|
281
|
+
for i in range(num_colors_after_combining):
|
|
282
|
+
group_1 = color_groups_1_sorted[i] if i < len(color_groups_1) else None
|
|
283
|
+
group_2 = color_groups_2_sorted[i] if i < len(color_groups_2) else None
|
|
284
|
+
|
|
285
|
+
if group_1 is not None and group_2 is not None:
|
|
286
|
+
color_groups_combined.append(np.concatenate([group_1, group_2]))
|
|
287
|
+
elif group_1 is not None:
|
|
288
|
+
color_groups_combined.append(group_1)
|
|
289
|
+
else:
|
|
290
|
+
color_groups_combined.append(group_2)
|
|
291
|
+
|
|
292
|
+
return color_groups_combined
|
warp/sim/import_urdf.py
CHANGED
|
@@ -211,14 +211,14 @@ def parse_urdf(
|
|
|
211
211
|
if hasattr(m, "geometry"):
|
|
212
212
|
# multiple meshes are contained in a scene
|
|
213
213
|
for geom in m.geometry.values():
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
214
|
+
geom_vertices = np.array(geom.vertices, dtype=np.float32) * scaling
|
|
215
|
+
geom_faces = np.array(geom.faces.flatten(), dtype=np.int32)
|
|
216
|
+
geom_mesh = Mesh(geom_vertices, geom_faces)
|
|
217
217
|
s = builder.add_shape_mesh(
|
|
218
218
|
body=link,
|
|
219
219
|
pos=wp.vec3(tf.p),
|
|
220
220
|
rot=wp.quat(tf.q),
|
|
221
|
-
mesh=
|
|
221
|
+
mesh=geom_mesh,
|
|
222
222
|
density=density,
|
|
223
223
|
is_visible=visible,
|
|
224
224
|
has_ground_collision=not just_visual,
|
|
@@ -228,14 +228,14 @@ def parse_urdf(
|
|
|
228
228
|
shapes.append(s)
|
|
229
229
|
else:
|
|
230
230
|
# a single mesh
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
231
|
+
m_vertices = np.array(m.vertices, dtype=np.float32) * scaling
|
|
232
|
+
m_faces = np.array(m.faces.flatten(), dtype=np.int32)
|
|
233
|
+
m_mesh = Mesh(m_vertices, m_faces)
|
|
234
234
|
s = builder.add_shape_mesh(
|
|
235
235
|
body=link,
|
|
236
236
|
pos=wp.vec3(tf.p),
|
|
237
237
|
rot=wp.quat(tf.q),
|
|
238
|
-
mesh=
|
|
238
|
+
mesh=m_mesh,
|
|
239
239
|
density=density,
|
|
240
240
|
is_visible=visible,
|
|
241
241
|
has_ground_collision=not just_visual,
|
warp/sim/integrator_euler.py
CHANGED
|
@@ -264,6 +264,7 @@ def eval_triangles_contact(
|
|
|
264
264
|
v: wp.array(dtype=wp.vec3),
|
|
265
265
|
indices: wp.array2d(dtype=int),
|
|
266
266
|
materials: wp.array2d(dtype=float),
|
|
267
|
+
particle_radius: wp.array(dtype=float),
|
|
267
268
|
f: wp.array(dtype=wp.vec3),
|
|
268
269
|
):
|
|
269
270
|
tid = wp.tid()
|
|
@@ -303,7 +304,7 @@ def eval_triangles_contact(
|
|
|
303
304
|
diff = pos - closest
|
|
304
305
|
dist = wp.dot(diff, diff)
|
|
305
306
|
n = wp.normalize(diff)
|
|
306
|
-
c = wp.min(dist -
|
|
307
|
+
c = wp.min(dist - particle_radius[particle_no], 0.0) # 0 unless within particle's contact radius
|
|
307
308
|
# c = wp.leaky_min(dot(n, x0)-0.01, 0.0, 0.0)
|
|
308
309
|
fn = n * c * 1e5
|
|
309
310
|
|
|
@@ -795,7 +796,7 @@ def eval_particle_contacts(
|
|
|
795
796
|
r = bx - wp.transform_point(X_wb, X_com)
|
|
796
797
|
|
|
797
798
|
n = contact_normal[tid]
|
|
798
|
-
c = wp.dot(n, px - bx) - particle_radius[
|
|
799
|
+
c = wp.dot(n, px - bx) - particle_radius[particle_index]
|
|
799
800
|
|
|
800
801
|
if c > particle_ka:
|
|
801
802
|
return
|
|
@@ -1697,6 +1698,7 @@ def eval_triangle_contact_forces(model: Model, state: State, particle_f: wp.arra
|
|
|
1697
1698
|
state.particle_qd,
|
|
1698
1699
|
model.tri_indices,
|
|
1699
1700
|
model.tri_materials,
|
|
1701
|
+
model.particle_radius,
|
|
1700
1702
|
],
|
|
1701
1703
|
outputs=[particle_f],
|
|
1702
1704
|
device=model.device,
|
|
@@ -1155,6 +1155,38 @@ def dense_gemm(
|
|
|
1155
1155
|
# dense_gemm(p, n, m, True, False, add_to_C, A_start, B_start, C_start, A, wp.adjoint[C], wp.adjoint[B])
|
|
1156
1156
|
|
|
1157
1157
|
|
|
1158
|
+
def create_inertia_matrix_kernel(num_joints, num_dofs):
|
|
1159
|
+
@wp.kernel
|
|
1160
|
+
def eval_dense_gemm_tile(
|
|
1161
|
+
J_arr: wp.array3d(dtype=float), M_arr: wp.array3d(dtype=float), H_arr: wp.array3d(dtype=float)
|
|
1162
|
+
):
|
|
1163
|
+
articulation = wp.tid()
|
|
1164
|
+
|
|
1165
|
+
J = wp.tile_load(J_arr[articulation], 0, 0, m=wp.static(6 * num_joints), n=num_dofs)
|
|
1166
|
+
P = wp.tile_zeros(m=wp.static(6 * num_joints), n=num_dofs, dtype=float)
|
|
1167
|
+
|
|
1168
|
+
# compute P = M*J where M is a 6x6 block diagonal mass matrix
|
|
1169
|
+
for i in range(int(num_joints)):
|
|
1170
|
+
# 6x6 block matrices are on the diagonal
|
|
1171
|
+
M_body = wp.tile_load(M_arr[articulation], i, i, m=6, n=6)
|
|
1172
|
+
|
|
1173
|
+
# load a 6xN row from the Jacobian
|
|
1174
|
+
J_body = wp.tile_view(J, i * 6, 0, m=6, n=num_dofs)
|
|
1175
|
+
|
|
1176
|
+
# compute weighted row
|
|
1177
|
+
P_body = wp.tile_matmul(M_body, J_body)
|
|
1178
|
+
|
|
1179
|
+
# assign to the P slice
|
|
1180
|
+
wp.tile_assign(P, i * 6, 0, P_body)
|
|
1181
|
+
|
|
1182
|
+
# compute H = J^T*P
|
|
1183
|
+
H = wp.tile_matmul(wp.tile_transpose(J), P)
|
|
1184
|
+
|
|
1185
|
+
wp.tile_store(H_arr[articulation], 0, 0, H)
|
|
1186
|
+
|
|
1187
|
+
return eval_dense_gemm_tile
|
|
1188
|
+
|
|
1189
|
+
|
|
1158
1190
|
@wp.kernel
|
|
1159
1191
|
def eval_dense_gemm_batched(
|
|
1160
1192
|
m: wp.array(dtype=int),
|
|
@@ -1426,7 +1458,7 @@ class FeatherstoneIntegrator(Integrator):
|
|
|
1426
1458
|
|
|
1427
1459
|
"""
|
|
1428
1460
|
|
|
1429
|
-
def __init__(self, model, angular_damping=0.05, update_mass_matrix_every=1):
|
|
1461
|
+
def __init__(self, model, angular_damping=0.05, update_mass_matrix_every=1, use_tile_gemm=False):
|
|
1430
1462
|
"""
|
|
1431
1463
|
Args:
|
|
1432
1464
|
model (Model): the model to be simulated.
|
|
@@ -1435,9 +1467,19 @@ class FeatherstoneIntegrator(Integrator):
|
|
|
1435
1467
|
"""
|
|
1436
1468
|
self.angular_damping = angular_damping
|
|
1437
1469
|
self.update_mass_matrix_every = update_mass_matrix_every
|
|
1470
|
+
self.use_tile_gemm = use_tile_gemm
|
|
1471
|
+
self._step = 0
|
|
1472
|
+
|
|
1438
1473
|
self.compute_articulation_indices(model)
|
|
1439
1474
|
self.allocate_model_aux_vars(model)
|
|
1440
|
-
|
|
1475
|
+
|
|
1476
|
+
if self.use_tile_gemm:
|
|
1477
|
+
# create a custom kernel to evaluate the system matrix for this type
|
|
1478
|
+
self.eval_inertia_matrix_kernel = create_inertia_matrix_kernel(int(self.joint_count), int(self.dof_count))
|
|
1479
|
+
|
|
1480
|
+
# ensure matrix is reloaded since otherwise an unload can happen during graph capture
|
|
1481
|
+
# todo: should not be necessary?
|
|
1482
|
+
wp.load_module(device=wp.get_device())
|
|
1441
1483
|
|
|
1442
1484
|
def compute_articulation_indices(self, model):
|
|
1443
1485
|
# calculate total size and offsets of Jacobian and mass matrices for entire system
|
|
@@ -1486,6 +1528,12 @@ class FeatherstoneIntegrator(Integrator):
|
|
|
1486
1528
|
articulation_J_rows.append(joint_count * 6)
|
|
1487
1529
|
articulation_J_cols.append(dof_count)
|
|
1488
1530
|
|
|
1531
|
+
if self.use_tile_gemm:
|
|
1532
|
+
# store the joint and dof count assuming all
|
|
1533
|
+
# articulations have the same structure
|
|
1534
|
+
self.joint_count = joint_count
|
|
1535
|
+
self.dof_count = dof_count
|
|
1536
|
+
|
|
1489
1537
|
self.J_size += 6 * joint_count * dof_count
|
|
1490
1538
|
self.M_size += 6 * joint_count * 6 * joint_count
|
|
1491
1539
|
self.H_size += dof_count * dof_count
|
|
@@ -1790,48 +1838,71 @@ class FeatherstoneIntegrator(Integrator):
|
|
|
1790
1838
|
device=model.device,
|
|
1791
1839
|
)
|
|
1792
1840
|
|
|
1793
|
-
|
|
1794
|
-
|
|
1795
|
-
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
self.
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
]
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
1816
|
-
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
|
|
1820
|
-
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1824
|
-
|
|
1825
|
-
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
|
|
1832
|
-
|
|
1833
|
-
|
|
1834
|
-
|
|
1841
|
+
if self.use_tile_gemm:
|
|
1842
|
+
# reshape arrays
|
|
1843
|
+
M_tiled = self.M.reshape((-1, 6 * self.joint_count, 6 * self.joint_count))
|
|
1844
|
+
J_tiled = self.J.reshape((-1, 6 * self.joint_count, self.dof_count))
|
|
1845
|
+
H_tiled = self.H.reshape((-1, self.dof_count, self.dof_count))
|
|
1846
|
+
|
|
1847
|
+
wp.launch_tiled(
|
|
1848
|
+
self.eval_inertia_matrix_kernel,
|
|
1849
|
+
dim=model.articulation_count,
|
|
1850
|
+
inputs=[J_tiled, M_tiled],
|
|
1851
|
+
outputs=[H_tiled],
|
|
1852
|
+
device=model.device,
|
|
1853
|
+
block_dim=256,
|
|
1854
|
+
)
|
|
1855
|
+
|
|
1856
|
+
# J = J_tiled.numpy()[0]
|
|
1857
|
+
# M = M_tiled.numpy()[0]
|
|
1858
|
+
# H = J.T@M@J
|
|
1859
|
+
|
|
1860
|
+
# import numpy as np
|
|
1861
|
+
# np.testing.assert_allclose(H, H_tiled.numpy()[0])
|
|
1862
|
+
|
|
1863
|
+
else:
|
|
1864
|
+
# form P = M*J
|
|
1865
|
+
wp.launch(
|
|
1866
|
+
eval_dense_gemm_batched,
|
|
1867
|
+
dim=model.articulation_count,
|
|
1868
|
+
inputs=[
|
|
1869
|
+
self.articulation_M_rows,
|
|
1870
|
+
self.articulation_J_cols,
|
|
1871
|
+
self.articulation_J_rows,
|
|
1872
|
+
False,
|
|
1873
|
+
False,
|
|
1874
|
+
self.articulation_M_start,
|
|
1875
|
+
self.articulation_J_start,
|
|
1876
|
+
# P start is the same as J start since it has the same dims as J
|
|
1877
|
+
self.articulation_J_start,
|
|
1878
|
+
self.M,
|
|
1879
|
+
self.J,
|
|
1880
|
+
],
|
|
1881
|
+
outputs=[self.P],
|
|
1882
|
+
device=model.device,
|
|
1883
|
+
)
|
|
1884
|
+
|
|
1885
|
+
# form H = J^T*P
|
|
1886
|
+
wp.launch(
|
|
1887
|
+
eval_dense_gemm_batched,
|
|
1888
|
+
dim=model.articulation_count,
|
|
1889
|
+
inputs=[
|
|
1890
|
+
self.articulation_J_cols,
|
|
1891
|
+
self.articulation_J_cols,
|
|
1892
|
+
# P rows is the same as J rows
|
|
1893
|
+
self.articulation_J_rows,
|
|
1894
|
+
True,
|
|
1895
|
+
False,
|
|
1896
|
+
self.articulation_J_start,
|
|
1897
|
+
# P start is the same as J start since it has the same dims as J
|
|
1898
|
+
self.articulation_J_start,
|
|
1899
|
+
self.articulation_H_start,
|
|
1900
|
+
self.J,
|
|
1901
|
+
self.P,
|
|
1902
|
+
],
|
|
1903
|
+
outputs=[self.H],
|
|
1904
|
+
device=model.device,
|
|
1905
|
+
)
|
|
1835
1906
|
|
|
1836
1907
|
# compute decomposition
|
|
1837
1908
|
wp.launch(
|
warp/sim/integrator_vbd.py
CHANGED
|
@@ -740,6 +740,12 @@ class VBDIntegrator(Integrator):
|
|
|
740
740
|
self.body_particle_contact_count = wp.zeros((model.particle_count,), dtype=wp.int32, device=self.device)
|
|
741
741
|
self.friction_epsilon = friction_epsilon
|
|
742
742
|
|
|
743
|
+
if len(self.model.particle_coloring) == 0:
|
|
744
|
+
raise ValueError(
|
|
745
|
+
"model.particle_coloring is empty! When using the VBDIntegrator you must call ModelBuilder.color() "
|
|
746
|
+
"or ModelBuilder.set_coloring() before calling ModelBuilder.finalize()."
|
|
747
|
+
)
|
|
748
|
+
|
|
743
749
|
# tests
|
|
744
750
|
# wp.launch(kernel=_test_compute_force_element_adjacency,
|
|
745
751
|
# inputs=[self.adjacency, model.edge_indices, model.tri_indices],
|