warp-lang 1.4.2__py3-none-win_amd64.whl → 1.5.1__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (166) hide show
  1. warp/__init__.py +4 -0
  2. warp/autograd.py +43 -8
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +21 -2
  6. warp/build_dll.py +23 -6
  7. warp/builtins.py +1819 -7
  8. warp/codegen.py +197 -61
  9. warp/config.py +2 -2
  10. warp/context.py +379 -107
  11. warp/examples/assets/pixel.jpg +0 -0
  12. warp/examples/benchmarks/benchmark_cloth_paddle.py +86 -0
  13. warp/examples/benchmarks/benchmark_gemm.py +121 -0
  14. warp/examples/benchmarks/benchmark_interop_paddle.py +158 -0
  15. warp/examples/benchmarks/benchmark_tile.py +179 -0
  16. warp/examples/fem/example_adaptive_grid.py +37 -10
  17. warp/examples/fem/example_apic_fluid.py +3 -2
  18. warp/examples/fem/example_convection_diffusion_dg.py +4 -5
  19. warp/examples/fem/example_deformed_geometry.py +1 -1
  20. warp/examples/fem/example_diffusion_3d.py +47 -4
  21. warp/examples/fem/example_distortion_energy.py +220 -0
  22. warp/examples/fem/example_magnetostatics.py +127 -85
  23. warp/examples/fem/example_nonconforming_contact.py +5 -5
  24. warp/examples/fem/example_stokes.py +3 -1
  25. warp/examples/fem/example_streamlines.py +12 -19
  26. warp/examples/fem/utils.py +38 -15
  27. warp/examples/sim/example_cloth.py +4 -25
  28. warp/examples/sim/example_quadruped.py +2 -1
  29. warp/examples/tile/example_tile_convolution.py +58 -0
  30. warp/examples/tile/example_tile_fft.py +47 -0
  31. warp/examples/tile/example_tile_filtering.py +105 -0
  32. warp/examples/tile/example_tile_matmul.py +79 -0
  33. warp/examples/tile/example_tile_mlp.py +375 -0
  34. warp/fem/__init__.py +8 -0
  35. warp/fem/cache.py +16 -12
  36. warp/fem/dirichlet.py +1 -1
  37. warp/fem/domain.py +44 -1
  38. warp/fem/field/__init__.py +1 -2
  39. warp/fem/field/field.py +31 -19
  40. warp/fem/field/nodal_field.py +101 -49
  41. warp/fem/field/virtual.py +794 -0
  42. warp/fem/geometry/__init__.py +2 -2
  43. warp/fem/geometry/deformed_geometry.py +3 -105
  44. warp/fem/geometry/element.py +13 -0
  45. warp/fem/geometry/geometry.py +165 -7
  46. warp/fem/geometry/grid_2d.py +3 -6
  47. warp/fem/geometry/grid_3d.py +31 -28
  48. warp/fem/geometry/hexmesh.py +3 -46
  49. warp/fem/geometry/nanogrid.py +3 -2
  50. warp/fem/geometry/{quadmesh_2d.py → quadmesh.py} +280 -159
  51. warp/fem/geometry/tetmesh.py +2 -43
  52. warp/fem/geometry/{trimesh_2d.py → trimesh.py} +354 -186
  53. warp/fem/integrate.py +683 -261
  54. warp/fem/linalg.py +404 -0
  55. warp/fem/operator.py +101 -18
  56. warp/fem/polynomial.py +5 -5
  57. warp/fem/quadrature/quadrature.py +45 -21
  58. warp/fem/space/__init__.py +45 -11
  59. warp/fem/space/basis_function_space.py +451 -0
  60. warp/fem/space/basis_space.py +58 -11
  61. warp/fem/space/function_space.py +146 -5
  62. warp/fem/space/grid_2d_function_space.py +80 -66
  63. warp/fem/space/grid_3d_function_space.py +113 -68
  64. warp/fem/space/hexmesh_function_space.py +96 -108
  65. warp/fem/space/nanogrid_function_space.py +62 -110
  66. warp/fem/space/quadmesh_function_space.py +208 -0
  67. warp/fem/space/shape/__init__.py +45 -7
  68. warp/fem/space/shape/cube_shape_function.py +328 -54
  69. warp/fem/space/shape/shape_function.py +10 -1
  70. warp/fem/space/shape/square_shape_function.py +328 -60
  71. warp/fem/space/shape/tet_shape_function.py +269 -19
  72. warp/fem/space/shape/triangle_shape_function.py +238 -19
  73. warp/fem/space/tetmesh_function_space.py +69 -37
  74. warp/fem/space/topology.py +38 -0
  75. warp/fem/space/trimesh_function_space.py +179 -0
  76. warp/fem/utils.py +6 -331
  77. warp/jax_experimental.py +3 -1
  78. warp/native/array.h +15 -0
  79. warp/native/builtin.h +66 -26
  80. warp/native/bvh.h +4 -0
  81. warp/native/coloring.cpp +604 -0
  82. warp/native/cuda_util.cpp +68 -51
  83. warp/native/cuda_util.h +2 -1
  84. warp/native/fabric.h +8 -0
  85. warp/native/hashgrid.h +4 -0
  86. warp/native/marching.cu +8 -0
  87. warp/native/mat.h +14 -3
  88. warp/native/mathdx.cpp +59 -0
  89. warp/native/mesh.h +4 -0
  90. warp/native/range.h +13 -1
  91. warp/native/reduce.cpp +9 -1
  92. warp/native/reduce.cu +7 -0
  93. warp/native/runlength_encode.cpp +9 -1
  94. warp/native/runlength_encode.cu +7 -1
  95. warp/native/scan.cpp +8 -0
  96. warp/native/scan.cu +8 -0
  97. warp/native/scan.h +8 -1
  98. warp/native/sparse.cpp +8 -0
  99. warp/native/sparse.cu +8 -0
  100. warp/native/temp_buffer.h +7 -0
  101. warp/native/tile.h +1854 -0
  102. warp/native/tile_gemm.h +341 -0
  103. warp/native/tile_reduce.h +210 -0
  104. warp/native/volume_builder.cu +8 -0
  105. warp/native/volume_builder.h +8 -0
  106. warp/native/warp.cpp +10 -2
  107. warp/native/warp.cu +369 -15
  108. warp/native/warp.h +12 -2
  109. warp/optim/adam.py +39 -4
  110. warp/paddle.py +29 -12
  111. warp/render/render_opengl.py +140 -67
  112. warp/sim/graph_coloring.py +292 -0
  113. warp/sim/import_urdf.py +8 -8
  114. warp/sim/integrator_euler.py +4 -2
  115. warp/sim/integrator_featherstone.py +115 -44
  116. warp/sim/integrator_vbd.py +6 -0
  117. warp/sim/model.py +109 -32
  118. warp/sparse.py +1 -1
  119. warp/stubs.py +569 -4
  120. warp/tape.py +12 -7
  121. warp/tests/assets/pixel.npy +0 -0
  122. warp/tests/aux_test_instancing_gc.py +18 -0
  123. warp/tests/test_array.py +39 -0
  124. warp/tests/test_codegen.py +81 -1
  125. warp/tests/test_codegen_instancing.py +30 -0
  126. warp/tests/test_collision.py +110 -0
  127. warp/tests/test_coloring.py +251 -0
  128. warp/tests/test_context.py +34 -0
  129. warp/tests/test_examples.py +21 -5
  130. warp/tests/test_fem.py +453 -113
  131. warp/tests/test_func.py +34 -4
  132. warp/tests/test_generics.py +52 -0
  133. warp/tests/test_iter.py +68 -0
  134. warp/tests/test_lerp.py +13 -87
  135. warp/tests/test_mat_scalar_ops.py +1 -1
  136. warp/tests/test_matmul.py +6 -9
  137. warp/tests/test_matmul_lite.py +6 -11
  138. warp/tests/test_mesh_query_point.py +1 -1
  139. warp/tests/test_module_hashing.py +23 -0
  140. warp/tests/test_overwrite.py +45 -0
  141. warp/tests/test_paddle.py +27 -87
  142. warp/tests/test_print.py +56 -1
  143. warp/tests/test_smoothstep.py +17 -83
  144. warp/tests/test_spatial.py +1 -1
  145. warp/tests/test_static.py +3 -3
  146. warp/tests/test_tile.py +744 -0
  147. warp/tests/test_tile_mathdx.py +144 -0
  148. warp/tests/test_tile_mlp.py +383 -0
  149. warp/tests/test_tile_reduce.py +374 -0
  150. warp/tests/test_tile_shared_memory.py +190 -0
  151. warp/tests/test_vbd.py +12 -20
  152. warp/tests/test_volume.py +43 -0
  153. warp/tests/unittest_suites.py +19 -2
  154. warp/tests/unittest_utils.py +4 -2
  155. warp/types.py +340 -74
  156. warp/utils.py +23 -3
  157. {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/METADATA +32 -7
  158. {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/RECORD +161 -134
  159. {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/WHEEL +1 -1
  160. warp/fem/field/test.py +0 -180
  161. warp/fem/field/trial.py +0 -183
  162. warp/fem/space/collocated_function_space.py +0 -102
  163. warp/fem/space/quadmesh_2d_function_space.py +0 -261
  164. warp/fem/space/trimesh_2d_function_space.py +0 -153
  165. {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/LICENSE.md +0 -0
  166. {warp_lang-1.4.2.dist-info → warp_lang-1.5.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,292 @@
1
+ # Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ from enum import Enum
9
+
10
+ import numpy as np
11
+
12
+ import warp as wp
13
+ import warp.utils
14
+
15
+
16
+ class ColoringAlgorithm(Enum):
17
+ MCS = 0
18
+ GREEDY = 1
19
+
20
+
21
+ @wp.kernel
22
+ def construct_trimesh_graph_edges_kernel(
23
+ trimesh_edge_indices: wp.array(dtype=int, ndim=2),
24
+ add_bending: bool,
25
+ graph_edge_indices: wp.array(dtype=int, ndim=2),
26
+ graph_num_edges: wp.array(dtype=int),
27
+ ):
28
+ num_diagonal_edges = wp.int32(0)
29
+ num_non_diagonal_edges = trimesh_edge_indices.shape[0]
30
+ for e_idx in range(trimesh_edge_indices.shape[0]):
31
+ v1 = trimesh_edge_indices[e_idx, 2]
32
+ v2 = trimesh_edge_indices[e_idx, 3]
33
+
34
+ graph_edge_indices[e_idx, 0] = v1
35
+ graph_edge_indices[e_idx, 1] = v2
36
+
37
+ o1 = trimesh_edge_indices[e_idx, 0]
38
+ o2 = trimesh_edge_indices[e_idx, 1]
39
+
40
+ if o1 != -1 and o2 != -1 and add_bending:
41
+ graph_edge_indices[num_non_diagonal_edges + num_diagonal_edges, 0] = o1
42
+ graph_edge_indices[num_non_diagonal_edges + num_diagonal_edges, 1] = o2
43
+
44
+ num_diagonal_edges = num_diagonal_edges + 1
45
+
46
+ graph_num_edges[0] = num_diagonal_edges + num_non_diagonal_edges
47
+
48
+
49
+ @wp.kernel
50
+ def validate_graph_coloring(edge_indices: wp.array(dtype=int, ndim=2), colors: wp.array(dtype=int)):
51
+ edge_idx = wp.tid()
52
+ e_v_1 = edge_indices[edge_idx, 0]
53
+ e_v_2 = edge_indices[edge_idx, 1]
54
+
55
+ wp.expect_neq(colors[e_v_1], colors[e_v_2])
56
+
57
+
58
+ @wp.kernel
59
+ def count_color_group_size(
60
+ colors: wp.array(dtype=int),
61
+ group_sizes: wp.array(dtype=int),
62
+ ):
63
+ for particle_idx in range(colors.shape[0]):
64
+ particle_color = colors[particle_idx]
65
+ group_sizes[particle_color] = group_sizes[particle_color] + 1
66
+
67
+
68
+ @wp.kernel
69
+ def fill_color_groups(
70
+ colors: wp.array(dtype=int),
71
+ group_fill_count: wp.array(dtype=int),
72
+ group_offsets: wp.array(dtype=int),
73
+ # flattened color groups
74
+ color_groups_flatten: wp.array(dtype=int),
75
+ ):
76
+ for particle_idx in range(colors.shape[0]):
77
+ particle_color = colors[particle_idx]
78
+ group_offset = group_offsets[particle_color]
79
+ group_idx = group_fill_count[particle_color]
80
+ color_groups_flatten[group_idx + group_offset] = wp.int32(particle_idx)
81
+
82
+ group_fill_count[particle_color] = group_idx + 1
83
+
84
+
85
+ def convert_to_color_groups(num_colors, particle_colors, return_wp_array=False, device="cpu"):
86
+ group_sizes = wp.zeros(shape=(num_colors,), dtype=int, device="cpu")
87
+ wp.launch(kernel=count_color_group_size, inputs=[particle_colors, group_sizes], device="cpu", dim=1)
88
+
89
+ group_sizes_np = group_sizes.numpy()
90
+ group_offsets_np = np.concatenate([np.array([0]), np.cumsum(group_sizes_np)])
91
+ group_offsets = wp.array(group_offsets_np, dtype=int, device="cpu")
92
+
93
+ group_fill_count = wp.zeros(shape=(num_colors,), dtype=int, device="cpu")
94
+ color_groups_flatten = wp.empty(shape=(group_sizes_np.sum(),), dtype=int, device="cpu")
95
+ wp.launch(
96
+ kernel=fill_color_groups,
97
+ inputs=[particle_colors, group_fill_count, group_offsets, color_groups_flatten],
98
+ device="cpu",
99
+ dim=1,
100
+ )
101
+
102
+ color_groups_flatten_np = color_groups_flatten.numpy()
103
+
104
+ color_groups = []
105
+ if return_wp_array:
106
+ for color_idx in range(num_colors):
107
+ color_groups.append(
108
+ wp.array(
109
+ color_groups_flatten_np[group_offsets_np[color_idx] : group_offsets_np[color_idx + 1]],
110
+ dtype=int,
111
+ device=device,
112
+ )
113
+ )
114
+ else:
115
+ for color_idx in range(num_colors):
116
+ color_groups.append(color_groups_flatten_np[group_offsets_np[color_idx] : group_offsets_np[color_idx + 1]])
117
+
118
+ return color_groups
119
+
120
+
121
+ def construct_trimesh_graph_edges(trimesh_edge_indices, return_wp_array=False):
122
+ if isinstance(trimesh_edge_indices, np.ndarray):
123
+ trimesh_edge_indices = wp.array(trimesh_edge_indices, dtype=int, device="cpu")
124
+
125
+ # preallocate maximum amount of memory, which is model.edge_count * 2
126
+ graph_edge_indices = wp.empty(shape=(trimesh_edge_indices.shape[0] * 2, 2), dtype=int, device="cpu")
127
+ graph_num_edges = wp.zeros(shape=(1,), dtype=int, device="cpu")
128
+
129
+ wp.launch(
130
+ kernel=construct_trimesh_graph_edges_kernel,
131
+ inputs=[
132
+ trimesh_edge_indices.to("cpu"),
133
+ True,
134
+ ],
135
+ outputs=[graph_edge_indices, graph_num_edges],
136
+ dim=1,
137
+ device="cpu",
138
+ )
139
+
140
+ num_edges = graph_num_edges.numpy()[0]
141
+ graph_edge_indices_true_size = graph_edge_indices.numpy()[:num_edges, :]
142
+
143
+ if return_wp_array:
144
+ graph_edge_indices_true_size = wp.array(graph_edge_indices_true_size, dtype=int, device="cpu")
145
+
146
+ return graph_edge_indices_true_size
147
+
148
+
149
+ def color_trimesh(
150
+ num_nodes,
151
+ trimesh_edge_indices,
152
+ include_bending_energy,
153
+ balance_colors=True,
154
+ target_max_min_color_ratio=1.1,
155
+ algorithm: ColoringAlgorithm = ColoringAlgorithm.MCS,
156
+ ):
157
+ """
158
+ A function that generates vertex coloring for a trimesh, which is represented by the number of vertices and edges of the mesh.
159
+ It will convert the trimesh to a graph and then apply coloring.
160
+ It returns a list of `np.array` with `dtype`=`int`. The length of the list is the number of colors
161
+ and each `np.array` contains the indices of vertices with this color.
162
+
163
+ Args:
164
+ num_nodes: The number of the nodes in the graph
165
+ trimesh_edge_indices: A `wp.array` with of shape (number_edges, 4), each row is (o1, o2, v1, v2), see `sim.Model`'s definition of `edge_indices`.
166
+ include_bending_energy: whether to consider bending energy in the coloring process. If set to `True`, the generated
167
+ graph will contain all the edges connecting o1 and o2; otherwise, the graph will be equivalent to the trimesh.
168
+ balance_colors: the parameter passed to `color_graph`, see `color_graph`'s document
169
+ target_max_min_color_ratio: the parameter passed to `color_graph`, see `color_graph`'s document
170
+ algorithm: the parameter passed to `color_graph`, see `color_graph`'s document
171
+
172
+ """
173
+ if include_bending_energy:
174
+ graph_edge_indices = construct_trimesh_graph_edges(trimesh_edge_indices, return_wp_array=True)
175
+ else:
176
+ graph_edge_indices = wp.array(trimesh_edge_indices[:, 2:], dtype=int, device="cpu")
177
+
178
+ color_groups = color_graph(num_nodes, graph_edge_indices, balance_colors, target_max_min_color_ratio, algorithm)
179
+ return color_groups
180
+
181
+
182
+ def color_graph(
183
+ num_nodes,
184
+ graph_edge_indices,
185
+ balance_colors=True,
186
+ target_max_min_color_ratio=1.1,
187
+ algorithm: ColoringAlgorithm = ColoringAlgorithm.MCS,
188
+ ):
189
+ """
190
+ A function that generates coloring for a graph, which is represented by the number of nodes and an array of edges.
191
+ It returns a list of `np.array` with `dtype`=`int`. The length of the list is the number of colors
192
+ and each `np.array` contains the indices of vertices with this color.
193
+
194
+ Args:
195
+ num_nodes: The number of the nodes in the graph
196
+ graph_edge_indices: A `wp.array` with of shape (number_edges, 2)
197
+ balance_colors: Whether to apply the color balancing algorithm to balance the size of each color
198
+ target_max_min_color_ratio: the color balancing algorithm will stop when the ratio between the largest color and
199
+ the smallest color reaches this value
200
+ algorithm: Value should an enum type of ColoringAlgorithm, otherwise it will raise an error. ColoringAlgorithm.mcs means using the MCS coloring algorithm,
201
+ while ColoringAlgorithm.ordered_greedy means using the degree-ordered greedy algorithm. The MCS algorithm typically generates 30% to 50% fewer colors
202
+ compared to the ordered greedy algorithm, while maintaining the same linear complexity. Although MCS has a constant overhead that makes it about twice
203
+ as slow as the greedy algorithm, it produces significantly better coloring results. We recommend using MCS, especially if coloring is only part of the
204
+ preprocessing stage.e.
205
+
206
+ Note:
207
+
208
+ References to the coloring algorithm:
209
+ MCS: Pereira, F. M. Q., & Palsberg, J. (2005, November). Register allocation via coloring of chordal graphs. In Asian Symposium on Programming Languages and Systems (pp. 315-329). Berlin, Heidelberg: Springer Berlin Heidelberg.
210
+ Ordered Greedy: Ton-That, Q. M., Kry, P. G., & Andrews, S. (2023). Parallel block Neo-Hookean XPBD using graph clustering. Computers & Graphics, 110, 1-10.
211
+ """
212
+ if num_nodes == 0:
213
+ return
214
+
215
+ particle_colors = wp.empty(shape=(num_nodes), dtype=wp.int32, device="cpu")
216
+
217
+ if graph_edge_indices.ndim != 2:
218
+ raise ValueError(
219
+ f"graph_edge_indices must be a 2 dimensional array! The provided one is {graph_edge_indices.ndim} dimensional."
220
+ )
221
+
222
+ num_colors = wp.context.runtime.core.graph_coloring(
223
+ num_nodes,
224
+ graph_edge_indices.__ctype__(),
225
+ algorithm.value,
226
+ particle_colors.__ctype__(),
227
+ )
228
+
229
+ if balance_colors:
230
+ max_min_ratio = wp.context.runtime.core.balance_coloring(
231
+ num_nodes,
232
+ graph_edge_indices.__ctype__(),
233
+ num_colors,
234
+ target_max_min_color_ratio,
235
+ particle_colors.__ctype__(),
236
+ )
237
+
238
+ if max_min_ratio > target_max_min_color_ratio:
239
+ wp.utils.warn(
240
+ f"The graph is not optimizable anymore, terminated with a max/min ratio: {max_min_ratio} without reaching the target ratio: {target_max_min_color_ratio}"
241
+ )
242
+
243
+ color_groups = convert_to_color_groups(num_colors, particle_colors, return_wp_array=False)
244
+
245
+ return color_groups
246
+
247
+
248
+ def combine_independent_particle_coloring(color_groups_1, color_groups_2):
249
+ """
250
+ A function that combines 2 independent coloring groups. Note that color_groups_1 and color_groups_2 must be from 2 independent
251
+ graphs so that there is no connection between them. This algorithm will sort color_groups_1 in ascending order and
252
+ sort color_groups_2 in descending order, and combine each group with the same index, this way we are always combining
253
+ the smaller group with the larger group.
254
+
255
+ Args:
256
+ color_groups_1: A list of `np.array` with `dtype`=`int`. The length of the list is the number of colors
257
+ and each `np.array` contains the indices of vertices with this color.
258
+ color_groups_2: A list of `np.array` with `dtype`=`int`. The length of the list is the number of colors
259
+ and each `np.array` contains the indices of vertices with this color.
260
+
261
+ """
262
+ if len(color_groups_1) == 0:
263
+ return color_groups_2
264
+ if len(color_groups_2) == 0:
265
+ return color_groups_1
266
+
267
+ num_colors_after_combining = max(len(color_groups_1), len(color_groups_2))
268
+ color_groups_combined = []
269
+
270
+ # this made sure that the leftover groups are always the largest
271
+ if len(color_groups_1) < len(color_groups_2):
272
+ color_groups_1, color_groups_2 = color_groups_2, color_groups_1
273
+
274
+ # sort group 1 in ascending order
275
+ color_groups_1_sorted = sorted(color_groups_1, key=lambda group: len(group))
276
+ # sort group 1 in descending order
277
+ color_groups_2_sorted = sorted(color_groups_2, key=lambda group: -len(group))
278
+ # so that we are combining the smaller group with the larger group
279
+ # which will balance the load of each group
280
+
281
+ for i in range(num_colors_after_combining):
282
+ group_1 = color_groups_1_sorted[i] if i < len(color_groups_1) else None
283
+ group_2 = color_groups_2_sorted[i] if i < len(color_groups_2) else None
284
+
285
+ if group_1 is not None and group_2 is not None:
286
+ color_groups_combined.append(np.concatenate([group_1, group_2]))
287
+ elif group_1 is not None:
288
+ color_groups_combined.append(group_1)
289
+ else:
290
+ color_groups_combined.append(group_2)
291
+
292
+ return color_groups_combined
warp/sim/import_urdf.py CHANGED
@@ -211,14 +211,14 @@ def parse_urdf(
211
211
  if hasattr(m, "geometry"):
212
212
  # multiple meshes are contained in a scene
213
213
  for geom in m.geometry.values():
214
- vertices = np.array(geom.vertices, dtype=np.float32) * scaling
215
- faces = np.array(geom.faces.flatten(), dtype=np.int32)
216
- mesh = Mesh(vertices, faces)
214
+ geom_vertices = np.array(geom.vertices, dtype=np.float32) * scaling
215
+ geom_faces = np.array(geom.faces.flatten(), dtype=np.int32)
216
+ geom_mesh = Mesh(geom_vertices, geom_faces)
217
217
  s = builder.add_shape_mesh(
218
218
  body=link,
219
219
  pos=wp.vec3(tf.p),
220
220
  rot=wp.quat(tf.q),
221
- mesh=mesh,
221
+ mesh=geom_mesh,
222
222
  density=density,
223
223
  is_visible=visible,
224
224
  has_ground_collision=not just_visual,
@@ -228,14 +228,14 @@ def parse_urdf(
228
228
  shapes.append(s)
229
229
  else:
230
230
  # a single mesh
231
- vertices = np.array(m.vertices, dtype=np.float32) * scaling
232
- faces = np.array(m.faces.flatten(), dtype=np.int32)
233
- mesh = Mesh(vertices, faces)
231
+ m_vertices = np.array(m.vertices, dtype=np.float32) * scaling
232
+ m_faces = np.array(m.faces.flatten(), dtype=np.int32)
233
+ m_mesh = Mesh(m_vertices, m_faces)
234
234
  s = builder.add_shape_mesh(
235
235
  body=link,
236
236
  pos=wp.vec3(tf.p),
237
237
  rot=wp.quat(tf.q),
238
- mesh=mesh,
238
+ mesh=m_mesh,
239
239
  density=density,
240
240
  is_visible=visible,
241
241
  has_ground_collision=not just_visual,
@@ -264,6 +264,7 @@ def eval_triangles_contact(
264
264
  v: wp.array(dtype=wp.vec3),
265
265
  indices: wp.array2d(dtype=int),
266
266
  materials: wp.array2d(dtype=float),
267
+ particle_radius: wp.array(dtype=float),
267
268
  f: wp.array(dtype=wp.vec3),
268
269
  ):
269
270
  tid = wp.tid()
@@ -303,7 +304,7 @@ def eval_triangles_contact(
303
304
  diff = pos - closest
304
305
  dist = wp.dot(diff, diff)
305
306
  n = wp.normalize(diff)
306
- c = wp.min(dist - 0.01, 0.0) # 0 unless within 0.01 of surface
307
+ c = wp.min(dist - particle_radius[particle_no], 0.0) # 0 unless within particle's contact radius
307
308
  # c = wp.leaky_min(dot(n, x0)-0.01, 0.0, 0.0)
308
309
  fn = n * c * 1e5
309
310
 
@@ -795,7 +796,7 @@ def eval_particle_contacts(
795
796
  r = bx - wp.transform_point(X_wb, X_com)
796
797
 
797
798
  n = contact_normal[tid]
798
- c = wp.dot(n, px - bx) - particle_radius[tid]
799
+ c = wp.dot(n, px - bx) - particle_radius[particle_index]
799
800
 
800
801
  if c > particle_ka:
801
802
  return
@@ -1697,6 +1698,7 @@ def eval_triangle_contact_forces(model: Model, state: State, particle_f: wp.arra
1697
1698
  state.particle_qd,
1698
1699
  model.tri_indices,
1699
1700
  model.tri_materials,
1701
+ model.particle_radius,
1700
1702
  ],
1701
1703
  outputs=[particle_f],
1702
1704
  device=model.device,
@@ -1155,6 +1155,38 @@ def dense_gemm(
1155
1155
  # dense_gemm(p, n, m, True, False, add_to_C, A_start, B_start, C_start, A, wp.adjoint[C], wp.adjoint[B])
1156
1156
 
1157
1157
 
1158
+ def create_inertia_matrix_kernel(num_joints, num_dofs):
1159
+ @wp.kernel
1160
+ def eval_dense_gemm_tile(
1161
+ J_arr: wp.array3d(dtype=float), M_arr: wp.array3d(dtype=float), H_arr: wp.array3d(dtype=float)
1162
+ ):
1163
+ articulation = wp.tid()
1164
+
1165
+ J = wp.tile_load(J_arr[articulation], 0, 0, m=wp.static(6 * num_joints), n=num_dofs)
1166
+ P = wp.tile_zeros(m=wp.static(6 * num_joints), n=num_dofs, dtype=float)
1167
+
1168
+ # compute P = M*J where M is a 6x6 block diagonal mass matrix
1169
+ for i in range(int(num_joints)):
1170
+ # 6x6 block matrices are on the diagonal
1171
+ M_body = wp.tile_load(M_arr[articulation], i, i, m=6, n=6)
1172
+
1173
+ # load a 6xN row from the Jacobian
1174
+ J_body = wp.tile_view(J, i * 6, 0, m=6, n=num_dofs)
1175
+
1176
+ # compute weighted row
1177
+ P_body = wp.tile_matmul(M_body, J_body)
1178
+
1179
+ # assign to the P slice
1180
+ wp.tile_assign(P, i * 6, 0, P_body)
1181
+
1182
+ # compute H = J^T*P
1183
+ H = wp.tile_matmul(wp.tile_transpose(J), P)
1184
+
1185
+ wp.tile_store(H_arr[articulation], 0, 0, H)
1186
+
1187
+ return eval_dense_gemm_tile
1188
+
1189
+
1158
1190
  @wp.kernel
1159
1191
  def eval_dense_gemm_batched(
1160
1192
  m: wp.array(dtype=int),
@@ -1426,7 +1458,7 @@ class FeatherstoneIntegrator(Integrator):
1426
1458
 
1427
1459
  """
1428
1460
 
1429
- def __init__(self, model, angular_damping=0.05, update_mass_matrix_every=1):
1461
+ def __init__(self, model, angular_damping=0.05, update_mass_matrix_every=1, use_tile_gemm=False):
1430
1462
  """
1431
1463
  Args:
1432
1464
  model (Model): the model to be simulated.
@@ -1435,9 +1467,19 @@ class FeatherstoneIntegrator(Integrator):
1435
1467
  """
1436
1468
  self.angular_damping = angular_damping
1437
1469
  self.update_mass_matrix_every = update_mass_matrix_every
1470
+ self.use_tile_gemm = use_tile_gemm
1471
+ self._step = 0
1472
+
1438
1473
  self.compute_articulation_indices(model)
1439
1474
  self.allocate_model_aux_vars(model)
1440
- self._step = 0
1475
+
1476
+ if self.use_tile_gemm:
1477
+ # create a custom kernel to evaluate the system matrix for this type
1478
+ self.eval_inertia_matrix_kernel = create_inertia_matrix_kernel(int(self.joint_count), int(self.dof_count))
1479
+
1480
+ # ensure matrix is reloaded since otherwise an unload can happen during graph capture
1481
+ # todo: should not be necessary?
1482
+ wp.load_module(device=wp.get_device())
1441
1483
 
1442
1484
  def compute_articulation_indices(self, model):
1443
1485
  # calculate total size and offsets of Jacobian and mass matrices for entire system
@@ -1486,6 +1528,12 @@ class FeatherstoneIntegrator(Integrator):
1486
1528
  articulation_J_rows.append(joint_count * 6)
1487
1529
  articulation_J_cols.append(dof_count)
1488
1530
 
1531
+ if self.use_tile_gemm:
1532
+ # store the joint and dof count assuming all
1533
+ # articulations have the same structure
1534
+ self.joint_count = joint_count
1535
+ self.dof_count = dof_count
1536
+
1489
1537
  self.J_size += 6 * joint_count * dof_count
1490
1538
  self.M_size += 6 * joint_count * 6 * joint_count
1491
1539
  self.H_size += dof_count * dof_count
@@ -1790,48 +1838,71 @@ class FeatherstoneIntegrator(Integrator):
1790
1838
  device=model.device,
1791
1839
  )
1792
1840
 
1793
- # form P = M*J
1794
- wp.launch(
1795
- eval_dense_gemm_batched,
1796
- dim=model.articulation_count,
1797
- inputs=[
1798
- self.articulation_M_rows,
1799
- self.articulation_J_cols,
1800
- self.articulation_J_rows,
1801
- False,
1802
- False,
1803
- self.articulation_M_start,
1804
- self.articulation_J_start,
1805
- # P start is the same as J start since it has the same dims as J
1806
- self.articulation_J_start,
1807
- self.M,
1808
- self.J,
1809
- ],
1810
- outputs=[self.P],
1811
- device=model.device,
1812
- )
1813
-
1814
- # form H = J^T*P
1815
- wp.launch(
1816
- eval_dense_gemm_batched,
1817
- dim=model.articulation_count,
1818
- inputs=[
1819
- self.articulation_J_cols,
1820
- self.articulation_J_cols,
1821
- # P rows is the same as J rows
1822
- self.articulation_J_rows,
1823
- True,
1824
- False,
1825
- self.articulation_J_start,
1826
- # P start is the same as J start since it has the same dims as J
1827
- self.articulation_J_start,
1828
- self.articulation_H_start,
1829
- self.J,
1830
- self.P,
1831
- ],
1832
- outputs=[self.H],
1833
- device=model.device,
1834
- )
1841
+ if self.use_tile_gemm:
1842
+ # reshape arrays
1843
+ M_tiled = self.M.reshape((-1, 6 * self.joint_count, 6 * self.joint_count))
1844
+ J_tiled = self.J.reshape((-1, 6 * self.joint_count, self.dof_count))
1845
+ H_tiled = self.H.reshape((-1, self.dof_count, self.dof_count))
1846
+
1847
+ wp.launch_tiled(
1848
+ self.eval_inertia_matrix_kernel,
1849
+ dim=model.articulation_count,
1850
+ inputs=[J_tiled, M_tiled],
1851
+ outputs=[H_tiled],
1852
+ device=model.device,
1853
+ block_dim=256,
1854
+ )
1855
+
1856
+ # J = J_tiled.numpy()[0]
1857
+ # M = M_tiled.numpy()[0]
1858
+ # H = J.T@M@J
1859
+
1860
+ # import numpy as np
1861
+ # np.testing.assert_allclose(H, H_tiled.numpy()[0])
1862
+
1863
+ else:
1864
+ # form P = M*J
1865
+ wp.launch(
1866
+ eval_dense_gemm_batched,
1867
+ dim=model.articulation_count,
1868
+ inputs=[
1869
+ self.articulation_M_rows,
1870
+ self.articulation_J_cols,
1871
+ self.articulation_J_rows,
1872
+ False,
1873
+ False,
1874
+ self.articulation_M_start,
1875
+ self.articulation_J_start,
1876
+ # P start is the same as J start since it has the same dims as J
1877
+ self.articulation_J_start,
1878
+ self.M,
1879
+ self.J,
1880
+ ],
1881
+ outputs=[self.P],
1882
+ device=model.device,
1883
+ )
1884
+
1885
+ # form H = J^T*P
1886
+ wp.launch(
1887
+ eval_dense_gemm_batched,
1888
+ dim=model.articulation_count,
1889
+ inputs=[
1890
+ self.articulation_J_cols,
1891
+ self.articulation_J_cols,
1892
+ # P rows is the same as J rows
1893
+ self.articulation_J_rows,
1894
+ True,
1895
+ False,
1896
+ self.articulation_J_start,
1897
+ # P start is the same as J start since it has the same dims as J
1898
+ self.articulation_J_start,
1899
+ self.articulation_H_start,
1900
+ self.J,
1901
+ self.P,
1902
+ ],
1903
+ outputs=[self.H],
1904
+ device=model.device,
1905
+ )
1835
1906
 
1836
1907
  # compute decomposition
1837
1908
  wp.launch(
@@ -740,6 +740,12 @@ class VBDIntegrator(Integrator):
740
740
  self.body_particle_contact_count = wp.zeros((model.particle_count,), dtype=wp.int32, device=self.device)
741
741
  self.friction_epsilon = friction_epsilon
742
742
 
743
+ if len(self.model.particle_coloring) == 0:
744
+ raise ValueError(
745
+ "model.particle_coloring is empty! When using the VBDIntegrator you must call ModelBuilder.color() "
746
+ "or ModelBuilder.set_coloring() before calling ModelBuilder.finalize()."
747
+ )
748
+
743
749
  # tests
744
750
  # wp.launch(kernel=_test_compute_force_element_adjacency,
745
751
  # inputs=[self.adjacency, model.edge_indices, model.tri_indices],