warp-lang 1.5.1__py3-none-manylinux2014_x86_64.whl → 1.6.1__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +5 -0
- warp/autograd.py +414 -191
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +40 -12
- warp/build_dll.py +13 -6
- warp/builtins.py +1077 -481
- warp/codegen.py +250 -122
- warp/config.py +65 -21
- warp/context.py +500 -149
- warp/examples/assets/square_cloth.usd +0 -0
- warp/examples/benchmarks/benchmark_gemm.py +27 -18
- warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
- warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
- warp/examples/core/example_marching_cubes.py +1 -1
- warp/examples/core/example_mesh.py +1 -1
- warp/examples/core/example_torch.py +18 -34
- warp/examples/core/example_wave.py +1 -1
- warp/examples/fem/example_apic_fluid.py +1 -0
- warp/examples/fem/example_mixed_elasticity.py +1 -1
- warp/examples/optim/example_bounce.py +1 -1
- warp/examples/optim/example_cloth_throw.py +1 -1
- warp/examples/optim/example_diffray.py +4 -15
- warp/examples/optim/example_drone.py +1 -1
- warp/examples/optim/example_softbody_properties.py +392 -0
- warp/examples/optim/example_trajectory.py +1 -3
- warp/examples/optim/example_walker.py +5 -0
- warp/examples/sim/example_cartpole.py +0 -2
- warp/examples/sim/example_cloth_self_contact.py +314 -0
- warp/examples/sim/example_granular_collision_sdf.py +4 -5
- warp/examples/sim/example_jacobian_ik.py +0 -2
- warp/examples/sim/example_quadruped.py +5 -2
- warp/examples/tile/example_tile_cholesky.py +79 -0
- warp/examples/tile/example_tile_convolution.py +2 -2
- warp/examples/tile/example_tile_fft.py +2 -2
- warp/examples/tile/example_tile_filtering.py +3 -3
- warp/examples/tile/example_tile_matmul.py +4 -4
- warp/examples/tile/example_tile_mlp.py +12 -12
- warp/examples/tile/example_tile_nbody.py +191 -0
- warp/examples/tile/example_tile_walker.py +319 -0
- warp/math.py +147 -0
- warp/native/array.h +12 -0
- warp/native/builtin.h +0 -1
- warp/native/bvh.cpp +149 -70
- warp/native/bvh.cu +287 -68
- warp/native/bvh.h +195 -85
- warp/native/clang/clang.cpp +6 -2
- warp/native/crt.h +1 -0
- warp/native/cuda_util.cpp +35 -0
- warp/native/cuda_util.h +5 -0
- warp/native/exports.h +40 -40
- warp/native/intersect.h +17 -0
- warp/native/mat.h +57 -3
- warp/native/mathdx.cpp +19 -0
- warp/native/mesh.cpp +25 -8
- warp/native/mesh.cu +153 -101
- warp/native/mesh.h +482 -403
- warp/native/quat.h +40 -0
- warp/native/solid_angle.h +7 -0
- warp/native/sort.cpp +85 -0
- warp/native/sort.cu +34 -0
- warp/native/sort.h +3 -1
- warp/native/spatial.h +11 -0
- warp/native/tile.h +1189 -664
- warp/native/tile_reduce.h +8 -6
- warp/native/vec.h +41 -0
- warp/native/warp.cpp +8 -1
- warp/native/warp.cu +263 -40
- warp/native/warp.h +19 -5
- warp/optim/linear.py +22 -4
- warp/render/render_opengl.py +132 -59
- warp/render/render_usd.py +10 -2
- warp/sim/__init__.py +6 -1
- warp/sim/collide.py +289 -32
- warp/sim/import_urdf.py +20 -5
- warp/sim/integrator_euler.py +25 -7
- warp/sim/integrator_featherstone.py +147 -35
- warp/sim/integrator_vbd.py +842 -40
- warp/sim/model.py +173 -112
- warp/sim/render.py +2 -2
- warp/stubs.py +249 -116
- warp/tape.py +28 -30
- warp/tests/aux_test_module_unload.py +15 -0
- warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
- warp/tests/test_array.py +100 -0
- warp/tests/test_assert.py +242 -0
- warp/tests/test_codegen.py +14 -61
- warp/tests/test_collision.py +8 -8
- warp/tests/test_examples.py +16 -1
- warp/tests/test_grad_debug.py +87 -2
- warp/tests/test_hash_grid.py +1 -1
- warp/tests/test_ipc.py +116 -0
- warp/tests/test_launch.py +77 -26
- warp/tests/test_mat.py +213 -168
- warp/tests/test_math.py +47 -1
- warp/tests/test_matmul.py +11 -7
- warp/tests/test_matmul_lite.py +4 -4
- warp/tests/test_mesh.py +84 -60
- warp/tests/test_mesh_query_aabb.py +165 -0
- warp/tests/test_mesh_query_point.py +328 -286
- warp/tests/test_mesh_query_ray.py +134 -121
- warp/tests/test_mlp.py +2 -2
- warp/tests/test_operators.py +43 -0
- warp/tests/test_overwrite.py +6 -5
- warp/tests/test_quat.py +77 -0
- warp/tests/test_reload.py +29 -0
- warp/tests/test_sim_grad_bounce_linear.py +204 -0
- warp/tests/test_static.py +16 -0
- warp/tests/test_tape.py +25 -0
- warp/tests/test_tile.py +134 -191
- warp/tests/test_tile_load.py +399 -0
- warp/tests/test_tile_mathdx.py +61 -8
- warp/tests/test_tile_mlp.py +17 -17
- warp/tests/test_tile_reduce.py +24 -18
- warp/tests/test_tile_shared_memory.py +66 -17
- warp/tests/test_tile_view.py +165 -0
- warp/tests/test_torch.py +35 -0
- warp/tests/test_utils.py +36 -24
- warp/tests/test_vec.py +110 -0
- warp/tests/unittest_suites.py +29 -4
- warp/tests/unittest_utils.py +30 -11
- warp/thirdparty/unittest_parallel.py +5 -2
- warp/types.py +419 -111
- warp/utils.py +9 -5
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/METADATA +86 -45
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/RECORD +129 -118
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/WHEEL +1 -1
- warp/examples/benchmarks/benchmark_tile.py +0 -179
- warp/native/tile_gemm.h +0 -341
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
# and proprietary rights in and to this software, related documentation
|
|
4
|
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
# distribution of this software and related documentation without an express
|
|
6
|
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
|
|
8
|
+
###########################################################################
|
|
9
|
+
# Example Sim Cloth Self Contact
|
|
10
|
+
#
|
|
11
|
+
# This simulation demonstrates twisting an FEM cloth model using the VBD
|
|
12
|
+
# integrator, showcasing its ability to handle complex self-contacts while
|
|
13
|
+
# ensuring it remains intersection-free.
|
|
14
|
+
#
|
|
15
|
+
###########################################################################
|
|
16
|
+
|
|
17
|
+
import math
|
|
18
|
+
import os
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
from pxr import Usd, UsdGeom
|
|
22
|
+
|
|
23
|
+
import warp as wp
|
|
24
|
+
import warp.examples
|
|
25
|
+
import warp.sim
|
|
26
|
+
import warp.sim.render
|
|
27
|
+
from warp.sim.model import PARTICLE_FLAG_ACTIVE
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@wp.kernel
|
|
31
|
+
def initialize_rotation(
|
|
32
|
+
# input
|
|
33
|
+
vertex_indices_to_rot: wp.array(dtype=wp.int32),
|
|
34
|
+
pos: wp.array(dtype=wp.vec3),
|
|
35
|
+
rot_centers: wp.array(dtype=wp.vec3),
|
|
36
|
+
rot_axes: wp.array(dtype=wp.vec3),
|
|
37
|
+
t: wp.array(dtype=float),
|
|
38
|
+
# output
|
|
39
|
+
roots: wp.array(dtype=wp.vec3),
|
|
40
|
+
roots_to_ps: wp.array(dtype=wp.vec3),
|
|
41
|
+
):
|
|
42
|
+
tid = wp.tid()
|
|
43
|
+
v_index = vertex_indices_to_rot[wp.tid()]
|
|
44
|
+
|
|
45
|
+
p = pos[v_index]
|
|
46
|
+
rot_center = rot_centers[tid]
|
|
47
|
+
rot_axis = rot_axes[tid]
|
|
48
|
+
op = p - rot_center
|
|
49
|
+
|
|
50
|
+
root = wp.dot(op, rot_axis) * rot_axis
|
|
51
|
+
|
|
52
|
+
root_to_p = p - root
|
|
53
|
+
|
|
54
|
+
roots[tid] = root
|
|
55
|
+
roots_to_ps[tid] = root_to_p
|
|
56
|
+
|
|
57
|
+
if tid == 0:
|
|
58
|
+
t[0] = 0.0
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@wp.kernel
|
|
62
|
+
def apply_rotation(
|
|
63
|
+
# input
|
|
64
|
+
vertex_indices_to_rot: wp.array(dtype=wp.int32),
|
|
65
|
+
rot_axes: wp.array(dtype=wp.vec3),
|
|
66
|
+
roots: wp.array(dtype=wp.vec3),
|
|
67
|
+
roots_to_ps: wp.array(dtype=wp.vec3),
|
|
68
|
+
t: wp.array(dtype=float),
|
|
69
|
+
angular_velocity: float,
|
|
70
|
+
dt: float,
|
|
71
|
+
end_time: float,
|
|
72
|
+
# output
|
|
73
|
+
pos_0: wp.array(dtype=wp.vec3),
|
|
74
|
+
pos_1: wp.array(dtype=wp.vec3),
|
|
75
|
+
):
|
|
76
|
+
cur_t = t[0]
|
|
77
|
+
if cur_t > end_time:
|
|
78
|
+
return
|
|
79
|
+
|
|
80
|
+
tid = wp.tid()
|
|
81
|
+
v_index = vertex_indices_to_rot[wp.tid()]
|
|
82
|
+
|
|
83
|
+
rot_axis = rot_axes[tid]
|
|
84
|
+
|
|
85
|
+
ux = rot_axis[0]
|
|
86
|
+
uy = rot_axis[1]
|
|
87
|
+
uz = rot_axis[2]
|
|
88
|
+
|
|
89
|
+
theta = cur_t * angular_velocity
|
|
90
|
+
|
|
91
|
+
R = wp.mat33(
|
|
92
|
+
wp.cos(theta) + ux * ux * (1.0 - wp.cos(theta)),
|
|
93
|
+
ux * uy * (1.0 - wp.cos(theta)) - uz * wp.sin(theta),
|
|
94
|
+
ux * uz * (1.0 - wp.cos(theta)) + uy * wp.sin(theta),
|
|
95
|
+
uy * ux * (1.0 - wp.cos(theta)) + uz * wp.sin(theta),
|
|
96
|
+
wp.cos(theta) + uy * uy * (1.0 - wp.cos(theta)),
|
|
97
|
+
uy * uz * (1.0 - wp.cos(theta)) - ux * wp.sin(theta),
|
|
98
|
+
uz * ux * (1.0 - wp.cos(theta)) - uy * wp.sin(theta),
|
|
99
|
+
uz * uy * (1.0 - wp.cos(theta)) + ux * wp.sin(theta),
|
|
100
|
+
wp.cos(theta) + uz * uz * (1.0 - wp.cos(theta)),
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
root = roots[tid]
|
|
104
|
+
root_to_p = roots_to_ps[tid]
|
|
105
|
+
root_to_p_rot = R * root_to_p
|
|
106
|
+
p_rot = root + root_to_p_rot
|
|
107
|
+
|
|
108
|
+
pos_0[v_index] = p_rot
|
|
109
|
+
pos_1[v_index] = p_rot
|
|
110
|
+
|
|
111
|
+
if tid == 0:
|
|
112
|
+
t[0] = cur_t + dt
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class Example:
|
|
116
|
+
def __init__(self, stage_path="example_cloth_self_contact.usd", num_frames=600):
|
|
117
|
+
fps = 60
|
|
118
|
+
self.frame_dt = 1.0 / fps
|
|
119
|
+
# must be an even number when using CUDA Graph
|
|
120
|
+
self.num_substeps = 10
|
|
121
|
+
self.iterations = 4
|
|
122
|
+
self.dt = self.frame_dt / self.num_substeps
|
|
123
|
+
|
|
124
|
+
self.num_frames = num_frames
|
|
125
|
+
self.sim_time = 0.0
|
|
126
|
+
self.profiler = {}
|
|
127
|
+
|
|
128
|
+
self.rot_angular_velocity = math.pi / 3
|
|
129
|
+
self.rot_end_time = 10
|
|
130
|
+
self.use_cuda_graph = wp.get_device().is_cuda
|
|
131
|
+
|
|
132
|
+
usd_stage = Usd.Stage.Open(os.path.join(warp.examples.get_asset_directory(), "square_cloth.usd"))
|
|
133
|
+
usd_geom = UsdGeom.Mesh(usd_stage.GetPrimAtPath("/root/cloth/cloth"))
|
|
134
|
+
|
|
135
|
+
mesh_points = np.array(usd_geom.GetPointsAttr().Get())
|
|
136
|
+
mesh_indices = np.array(usd_geom.GetFaceVertexIndicesAttr().Get())
|
|
137
|
+
|
|
138
|
+
self.input_scale_factor = 1.0
|
|
139
|
+
self.renderer_scale_factor = 0.01
|
|
140
|
+
|
|
141
|
+
vertices = [wp.vec3(v) * self.input_scale_factor for v in mesh_points]
|
|
142
|
+
self.faces = mesh_indices.reshape(-1, 3)
|
|
143
|
+
|
|
144
|
+
builder = wp.sim.ModelBuilder()
|
|
145
|
+
builder.add_cloth_mesh(
|
|
146
|
+
pos=wp.vec3(0.0, 0.0, 0.0),
|
|
147
|
+
rot=wp.quat_identity(),
|
|
148
|
+
scale=1.0,
|
|
149
|
+
vertices=vertices,
|
|
150
|
+
indices=mesh_indices,
|
|
151
|
+
vel=wp.vec3(0.0, 0.0, 0.0),
|
|
152
|
+
density=0.02,
|
|
153
|
+
tri_ke=1.0e5,
|
|
154
|
+
tri_ka=1.0e5,
|
|
155
|
+
tri_kd=2.0e-6,
|
|
156
|
+
edge_ke=10,
|
|
157
|
+
)
|
|
158
|
+
builder.color()
|
|
159
|
+
self.model = builder.finalize()
|
|
160
|
+
self.model.ground = False
|
|
161
|
+
self.model.soft_contact_ke = 1.0e5
|
|
162
|
+
self.model.soft_contact_kd = 1.0e-6
|
|
163
|
+
self.model.soft_contact_mu = 0.2
|
|
164
|
+
|
|
165
|
+
# set up contact query and contact detection distances
|
|
166
|
+
self.model.soft_contact_radius = 0.2
|
|
167
|
+
self.model.soft_contact_margin = 0.35
|
|
168
|
+
|
|
169
|
+
cloth_size = 50
|
|
170
|
+
left_side = [cloth_size - 1 + i * cloth_size for i in range(cloth_size)]
|
|
171
|
+
right_side = [i * cloth_size for i in range(cloth_size)]
|
|
172
|
+
rot_point_indices = left_side + right_side
|
|
173
|
+
|
|
174
|
+
if len(rot_point_indices):
|
|
175
|
+
flags = self.model.particle_flags.numpy()
|
|
176
|
+
for fixed_vertex_id in rot_point_indices:
|
|
177
|
+
flags[fixed_vertex_id] = wp.uint32(int(flags[fixed_vertex_id]) & ~int(PARTICLE_FLAG_ACTIVE))
|
|
178
|
+
|
|
179
|
+
self.model.particle_flags = wp.array(flags)
|
|
180
|
+
|
|
181
|
+
self.integrator = wp.sim.VBDIntegrator(
|
|
182
|
+
self.model,
|
|
183
|
+
self.iterations,
|
|
184
|
+
handle_self_contact=True,
|
|
185
|
+
)
|
|
186
|
+
self.state0 = self.model.state()
|
|
187
|
+
self.state1 = self.model.state()
|
|
188
|
+
|
|
189
|
+
rot_axes = [[1, 0, 0]] * len(right_side) + [[-1, 0, 0]] * len(left_side)
|
|
190
|
+
|
|
191
|
+
self.rot_point_indices = wp.array(rot_point_indices, dtype=int)
|
|
192
|
+
self.t = wp.zeros((1,), dtype=float)
|
|
193
|
+
self.rot_centers = wp.zeros(len(rot_point_indices), dtype=wp.vec3)
|
|
194
|
+
self.rot_axes = wp.array(rot_axes, dtype=wp.vec3)
|
|
195
|
+
|
|
196
|
+
self.roots = wp.zeros_like(self.rot_centers)
|
|
197
|
+
self.roots_to_ps = wp.zeros_like(self.rot_centers)
|
|
198
|
+
|
|
199
|
+
wp.launch(
|
|
200
|
+
kernel=initialize_rotation,
|
|
201
|
+
dim=self.rot_point_indices.shape[0],
|
|
202
|
+
inputs=[
|
|
203
|
+
self.rot_point_indices,
|
|
204
|
+
self.state0.particle_q,
|
|
205
|
+
self.rot_centers,
|
|
206
|
+
self.rot_axes,
|
|
207
|
+
self.t,
|
|
208
|
+
],
|
|
209
|
+
outputs=[
|
|
210
|
+
self.roots,
|
|
211
|
+
self.roots_to_ps,
|
|
212
|
+
],
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
if stage_path:
|
|
216
|
+
self.renderer = wp.sim.render.SimRenderer(self.model, stage_path, scaling=1)
|
|
217
|
+
else:
|
|
218
|
+
self.renderer = None
|
|
219
|
+
self.cuda_graph = None
|
|
220
|
+
if self.use_cuda_graph:
|
|
221
|
+
with wp.ScopedCapture() as capture:
|
|
222
|
+
for _ in range(self.num_substeps):
|
|
223
|
+
wp.launch(
|
|
224
|
+
kernel=apply_rotation,
|
|
225
|
+
dim=self.rot_point_indices.shape[0],
|
|
226
|
+
inputs=[
|
|
227
|
+
self.rot_point_indices,
|
|
228
|
+
self.rot_axes,
|
|
229
|
+
self.roots,
|
|
230
|
+
self.roots_to_ps,
|
|
231
|
+
self.t,
|
|
232
|
+
self.rot_angular_velocity,
|
|
233
|
+
self.dt,
|
|
234
|
+
self.rot_end_time,
|
|
235
|
+
],
|
|
236
|
+
outputs=[
|
|
237
|
+
self.state0.particle_q,
|
|
238
|
+
self.state1.particle_q,
|
|
239
|
+
],
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
self.integrator.simulate(self.model, self.state0, self.state1, self.dt, None)
|
|
243
|
+
(self.state0, self.state1) = (self.state1, self.state0)
|
|
244
|
+
|
|
245
|
+
self.cuda_graph = capture.graph
|
|
246
|
+
|
|
247
|
+
def step(self):
|
|
248
|
+
with wp.ScopedTimer("step", print=False, dict=self.profiler):
|
|
249
|
+
if self.use_cuda_graph:
|
|
250
|
+
wp.capture_launch(self.cuda_graph)
|
|
251
|
+
else:
|
|
252
|
+
for _ in range(self.num_substeps):
|
|
253
|
+
wp.launch(
|
|
254
|
+
kernel=apply_rotation,
|
|
255
|
+
dim=self.rot_point_indices.shape[0],
|
|
256
|
+
inputs=[
|
|
257
|
+
self.rot_point_indices,
|
|
258
|
+
self.rot_axes,
|
|
259
|
+
self.roots,
|
|
260
|
+
self.roots_to_ps,
|
|
261
|
+
self.t,
|
|
262
|
+
self.rot_angular_velocity,
|
|
263
|
+
self.dt,
|
|
264
|
+
self.rot_end_time,
|
|
265
|
+
],
|
|
266
|
+
outputs=[
|
|
267
|
+
self.state0.particle_q,
|
|
268
|
+
self.state1.particle_q,
|
|
269
|
+
],
|
|
270
|
+
)
|
|
271
|
+
self.integrator.simulate(self.model, self.state0, self.state1, self.dt)
|
|
272
|
+
|
|
273
|
+
(self.state0, self.state1) = (self.state1, self.state0)
|
|
274
|
+
|
|
275
|
+
self.sim_time += self.dt
|
|
276
|
+
|
|
277
|
+
def render(self):
|
|
278
|
+
if self.renderer is None:
|
|
279
|
+
return
|
|
280
|
+
|
|
281
|
+
with wp.ScopedTimer("render", print=False):
|
|
282
|
+
self.renderer.begin_frame(self.sim_time)
|
|
283
|
+
self.renderer.render(self.state0)
|
|
284
|
+
self.renderer.end_frame()
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
if __name__ == "__main__":
|
|
288
|
+
import argparse
|
|
289
|
+
|
|
290
|
+
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
291
|
+
parser.add_argument("--device", type=str, default=None, help="Override the default Warp device.")
|
|
292
|
+
parser.add_argument(
|
|
293
|
+
"--stage_path",
|
|
294
|
+
type=lambda x: None if x == "None" else str(x),
|
|
295
|
+
default="example_cloth_self_contact.usd",
|
|
296
|
+
help="Path to the output USD file.",
|
|
297
|
+
)
|
|
298
|
+
parser.add_argument("--num_frames", type=int, default=300, help="Total number of frames.")
|
|
299
|
+
|
|
300
|
+
args = parser.parse_known_args()[0]
|
|
301
|
+
|
|
302
|
+
with wp.ScopedDevice(args.device):
|
|
303
|
+
example = Example(stage_path=args.stage_path, num_frames=args.num_frames)
|
|
304
|
+
|
|
305
|
+
for i in range(example.num_frames):
|
|
306
|
+
example.step()
|
|
307
|
+
example.render()
|
|
308
|
+
print(f"[{i:4d}/{example.num_frames}]")
|
|
309
|
+
|
|
310
|
+
frame_times = example.profiler["step"]
|
|
311
|
+
print("\nAverage frame sim time: {:.2f} ms".format(sum(frame_times) / len(frame_times)))
|
|
312
|
+
|
|
313
|
+
if example.renderer:
|
|
314
|
+
example.renderer.save()
|
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
# Note: requires a CUDA-capable device
|
|
16
16
|
###########################################################################
|
|
17
17
|
|
|
18
|
-
import math
|
|
19
18
|
import os
|
|
20
19
|
|
|
21
20
|
import numpy as np
|
|
@@ -66,8 +65,8 @@ class Example:
|
|
|
66
65
|
sdf=rock_sdf,
|
|
67
66
|
body=-1,
|
|
68
67
|
pos=wp.vec3(0.0, 0.0, 0.0),
|
|
69
|
-
rot=wp.
|
|
70
|
-
scale=wp.vec3(0
|
|
68
|
+
rot=wp.quat(0.0, 0.0, 0.0, 1.0),
|
|
69
|
+
scale=wp.vec3(1.0, 1.0, 1.0),
|
|
71
70
|
)
|
|
72
71
|
|
|
73
72
|
mins = np.array([-3.0, -3.0, -3.0])
|
|
@@ -149,13 +148,13 @@ class Example:
|
|
|
149
148
|
with wp.ScopedTimer("render"):
|
|
150
149
|
self.renderer.begin_frame(self.sim_time)
|
|
151
150
|
|
|
152
|
-
# Note the extra wp.quat_from_axis_angle(wp.vec3(1.0, 0.0, 0.0), math.pi) is because .usd is oriented differently from .nvdb
|
|
153
151
|
self.renderer.render_ref(
|
|
154
152
|
name="collision",
|
|
155
153
|
path=os.path.join(warp.examples.get_asset_directory(), "rocks.usd"),
|
|
156
154
|
pos=wp.vec3(0.0, 0.0, 0.0),
|
|
157
155
|
rot=wp.quat(0.0, 0.0, 0.0, 1.0),
|
|
158
|
-
scale=wp.vec3(0
|
|
156
|
+
scale=wp.vec3(1.0, 1.0, 1.0),
|
|
157
|
+
color=(0.35, 0.55, 0.9),
|
|
159
158
|
)
|
|
160
159
|
|
|
161
160
|
self.renderer.render_sphere(
|
|
@@ -93,7 +93,7 @@ class Example:
|
|
|
93
93
|
fps = 100
|
|
94
94
|
self.frame_dt = 1.0 / fps
|
|
95
95
|
|
|
96
|
-
self.sim_substeps =
|
|
96
|
+
self.sim_substeps = 10
|
|
97
97
|
self.sim_dt = self.frame_dt / self.sim_substeps
|
|
98
98
|
|
|
99
99
|
self.num_envs = num_envs
|
|
@@ -116,10 +116,13 @@ class Example:
|
|
|
116
116
|
self.model.joint_attach_ke = 16000.0
|
|
117
117
|
self.model.joint_attach_kd = 200.0
|
|
118
118
|
self.use_tile_gemm = False
|
|
119
|
+
self.fuse_cholesky = False
|
|
119
120
|
|
|
120
121
|
# self.integrator = wp.sim.XPBDIntegrator()
|
|
121
122
|
# self.integrator = wp.sim.SemiImplicitIntegrator()
|
|
122
|
-
self.integrator = wp.sim.FeatherstoneIntegrator(
|
|
123
|
+
self.integrator = wp.sim.FeatherstoneIntegrator(
|
|
124
|
+
self.model, use_tile_gemm=self.use_tile_gemm, fuse_cholesky=self.fuse_cholesky
|
|
125
|
+
)
|
|
123
126
|
|
|
124
127
|
if stage_path:
|
|
125
128
|
self.renderer = wp.sim.render.SimRenderer(self.model, stage_path)
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Copyright (c) 2025 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
# and proprietary rights in and to this software, related documentation
|
|
4
|
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
# distribution of this software and related documentation without an express
|
|
6
|
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
|
|
8
|
+
###########################################################################
|
|
9
|
+
# Example Tile Cholesky
|
|
10
|
+
#
|
|
11
|
+
# Shows how to write a simple kernel computing a Cholesky factorize and
|
|
12
|
+
# triangular solve using Warp Cholesky Tile APIs.
|
|
13
|
+
#
|
|
14
|
+
###########################################################################
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
import warp as wp
|
|
19
|
+
|
|
20
|
+
wp.init()
|
|
21
|
+
wp.set_module_options({"enable_backward": False})
|
|
22
|
+
|
|
23
|
+
BLOCK_DIM = 128
|
|
24
|
+
TILE = 32
|
|
25
|
+
|
|
26
|
+
# Both should work
|
|
27
|
+
np_type, wp_type = np.float64, wp.float64
|
|
28
|
+
# np_type, wp_type = np.float32, wp.float32
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@wp.kernel
|
|
32
|
+
def cholesky(
|
|
33
|
+
A: wp.array2d(dtype=wp_type),
|
|
34
|
+
L: wp.array2d(dtype=wp_type),
|
|
35
|
+
X: wp.array1d(dtype=wp_type),
|
|
36
|
+
Y: wp.array1d(dtype=wp_type),
|
|
37
|
+
):
|
|
38
|
+
i, j, _ = wp.tid()
|
|
39
|
+
|
|
40
|
+
a = wp.tile_load(A, shape=(TILE, TILE))
|
|
41
|
+
l = wp.tile_cholesky(a)
|
|
42
|
+
wp.tile_store(L, l)
|
|
43
|
+
|
|
44
|
+
x = wp.tile_load(X, shape=TILE)
|
|
45
|
+
y = wp.tile_cholesky_solve(l, x)
|
|
46
|
+
wp.tile_store(Y, y)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
if __name__ == "__main__":
|
|
50
|
+
wp.set_device("cuda:0")
|
|
51
|
+
|
|
52
|
+
A_h = np.ones((TILE, TILE), dtype=np_type) + 5 * np.diag(np.ones(TILE), 0)
|
|
53
|
+
L_h = np.zeros_like(A_h)
|
|
54
|
+
|
|
55
|
+
A_wp = wp.array2d(A_h, dtype=wp_type)
|
|
56
|
+
L_wp = wp.array2d(L_h, dtype=wp_type)
|
|
57
|
+
|
|
58
|
+
X_h = np.arange(TILE, dtype=np_type)
|
|
59
|
+
Y_h = np.zeros_like(X_h)
|
|
60
|
+
|
|
61
|
+
X_wp = wp.array1d(X_h, dtype=wp_type)
|
|
62
|
+
Y_wp = wp.array1d(Y_h, dtype=wp_type)
|
|
63
|
+
|
|
64
|
+
wp.launch_tiled(cholesky, dim=[1, 1], inputs=[A_wp, L_wp, X_wp, Y_wp], block_dim=BLOCK_DIM)
|
|
65
|
+
|
|
66
|
+
L_np = np.linalg.cholesky(A_h)
|
|
67
|
+
Y_np = np.linalg.solve(A_h, X_h)
|
|
68
|
+
|
|
69
|
+
print("A:\n", A_h)
|
|
70
|
+
print("L (Warp):\n", L_wp)
|
|
71
|
+
print("L (Numpy):\n", L_np)
|
|
72
|
+
|
|
73
|
+
print("x:\n", X_h)
|
|
74
|
+
print("A\\n (Warp):\n", Y_wp.numpy())
|
|
75
|
+
print("A\\x (Numpy):\n", Y_np)
|
|
76
|
+
|
|
77
|
+
assert np.allclose(Y_wp.numpy(), Y_np) and np.allclose(L_wp.numpy(), L_np)
|
|
78
|
+
|
|
79
|
+
print("Example Tile Cholesky passed")
|
|
@@ -34,11 +34,11 @@ def filter(x: wp.vec2d):
|
|
|
34
34
|
@wp.kernel
|
|
35
35
|
def conv_tiled(x: wp.array2d(dtype=wp.vec2d), y: wp.array2d(dtype=wp.vec2d)):
|
|
36
36
|
i, j, _ = wp.tid()
|
|
37
|
-
a = wp.tile_load(x,
|
|
37
|
+
a = wp.tile_load(x, shape=(TILE_M, TILE_N))
|
|
38
38
|
wp.tile_fft(a)
|
|
39
39
|
b = wp.tile_map(filter, a)
|
|
40
40
|
wp.tile_ifft(b)
|
|
41
|
-
wp.tile_store(y,
|
|
41
|
+
wp.tile_store(y, b)
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
if __name__ == "__main__":
|
|
@@ -26,10 +26,10 @@ TILE_N = 32
|
|
|
26
26
|
@wp.kernel
|
|
27
27
|
def fft_tiled(x: wp.array2d(dtype=wp.vec2d), y: wp.array2d(dtype=wp.vec2d)):
|
|
28
28
|
i, j, _ = wp.tid()
|
|
29
|
-
a = wp.tile_load(x,
|
|
29
|
+
a = wp.tile_load(x, shape=(TILE_M, TILE_N))
|
|
30
30
|
wp.tile_fft(a)
|
|
31
31
|
wp.tile_ifft(a)
|
|
32
|
-
wp.tile_store(y,
|
|
32
|
+
wp.tile_store(y, a)
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
if __name__ == "__main__":
|
|
@@ -38,12 +38,12 @@ def cplx_prod(x: wp.vec2d, y: wp.vec2d):
|
|
|
38
38
|
@wp.kernel
|
|
39
39
|
def conv_tiled(x: wp.array2d(dtype=wp.vec2d), y: wp.array2d(dtype=wp.vec2d), z: wp.array2d(dtype=wp.vec2d)):
|
|
40
40
|
i, j, _ = wp.tid()
|
|
41
|
-
a = wp.tile_load(x,
|
|
42
|
-
b = wp.tile_load(y,
|
|
41
|
+
a = wp.tile_load(x, shape=(TILE_M, TILE_N))
|
|
42
|
+
b = wp.tile_load(y, shape=(TILE_M, TILE_N))
|
|
43
43
|
wp.tile_fft(a)
|
|
44
44
|
c = wp.tile_map(cplx_prod, a, b)
|
|
45
45
|
wp.tile_ifft(c)
|
|
46
|
-
wp.tile_store(z,
|
|
46
|
+
wp.tile_store(z, c)
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
if __name__ == "__main__":
|
|
@@ -30,7 +30,7 @@ def tile_gemm(A: wp.array2d(dtype=wp.float32), B: wp.array2d(dtype=wp.float16),
|
|
|
30
30
|
# output tile index
|
|
31
31
|
i, j = wp.tid()
|
|
32
32
|
|
|
33
|
-
sum = wp.tile_zeros(
|
|
33
|
+
sum = wp.tile_zeros(shape=(TILE_M, TILE_N), dtype=wp.float64)
|
|
34
34
|
|
|
35
35
|
_M = A.shape[0]
|
|
36
36
|
_N = B.shape[1]
|
|
@@ -39,13 +39,13 @@ def tile_gemm(A: wp.array2d(dtype=wp.float32), B: wp.array2d(dtype=wp.float16),
|
|
|
39
39
|
count = int(K / TILE_K)
|
|
40
40
|
|
|
41
41
|
for k in range(0, count):
|
|
42
|
-
a = wp.tile_load(A,
|
|
43
|
-
b = wp.tile_load(B,
|
|
42
|
+
a = wp.tile_load(A, shape=(TILE_M, TILE_K), offset=(i * TILE_M, k * TILE_K))
|
|
43
|
+
b = wp.tile_load(B, shape=(TILE_K, TILE_N), offset=(k * TILE_K, j * TILE_N))
|
|
44
44
|
|
|
45
45
|
# sum += a*b
|
|
46
46
|
wp.tile_matmul(a, b, sum)
|
|
47
47
|
|
|
48
|
-
wp.tile_store(C, i, j
|
|
48
|
+
wp.tile_store(C, sum, offset=(i * TILE_M, j * TILE_N))
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
if __name__ == "__main__":
|
|
@@ -117,23 +117,23 @@ def compute(
|
|
|
117
117
|
f = wp.tile(local)
|
|
118
118
|
|
|
119
119
|
# input layer
|
|
120
|
-
w0 = wp.tile_load(weights_0,
|
|
121
|
-
b0 = wp.tile_load(bias_0,
|
|
122
|
-
z = wp.tile_map(relu, wp.tile_matmul(w0, f) + wp.tile_broadcast(b0,
|
|
120
|
+
w0 = wp.tile_load(weights_0, shape=(DIM_HID, DIM_IN))
|
|
121
|
+
b0 = wp.tile_load(bias_0, shape=(DIM_HID, 1))
|
|
122
|
+
z = wp.tile_map(relu, wp.tile_matmul(w0, f) + wp.tile_broadcast(b0, shape=(DIM_HID, NUM_THREADS)))
|
|
123
123
|
|
|
124
124
|
# hidden layer
|
|
125
|
-
w1 = wp.tile_load(weights_1,
|
|
126
|
-
b1 = wp.tile_load(bias_1,
|
|
127
|
-
z = wp.tile_map(relu, wp.tile_matmul(w1, z) + wp.tile_broadcast(b1,
|
|
125
|
+
w1 = wp.tile_load(weights_1, shape=(DIM_HID, DIM_HID))
|
|
126
|
+
b1 = wp.tile_load(bias_1, shape=(DIM_HID, 1))
|
|
127
|
+
z = wp.tile_map(relu, wp.tile_matmul(w1, z) + wp.tile_broadcast(b1, shape=(DIM_HID, NUM_THREADS)))
|
|
128
128
|
|
|
129
|
-
w2 = wp.tile_load(weights_2,
|
|
130
|
-
b2 = wp.tile_load(bias_2,
|
|
131
|
-
z = wp.tile_map(relu, wp.tile_matmul(w2, z) + wp.tile_broadcast(b2,
|
|
129
|
+
w2 = wp.tile_load(weights_2, shape=(DIM_HID, DIM_HID))
|
|
130
|
+
b2 = wp.tile_load(bias_2, shape=(DIM_HID, 1))
|
|
131
|
+
z = wp.tile_map(relu, wp.tile_matmul(w2, z) + wp.tile_broadcast(b2, shape=(DIM_HID, NUM_THREADS)))
|
|
132
132
|
|
|
133
133
|
# output layer
|
|
134
|
-
w3 = wp.tile_load(weights_3,
|
|
135
|
-
b3 = wp.tile_load(bias_3,
|
|
136
|
-
o = wp.tile_map(relu, wp.tile_matmul(w3, z) + wp.tile_broadcast(b3,
|
|
134
|
+
w3 = wp.tile_load(weights_3, shape=(DIM_OUT, DIM_HID))
|
|
135
|
+
b3 = wp.tile_load(bias_3, shape=(DIM_OUT, 1))
|
|
136
|
+
o = wp.tile_map(relu, wp.tile_matmul(w3, z) + wp.tile_broadcast(b3, shape=(DIM_OUT, NUM_THREADS)))
|
|
137
137
|
|
|
138
138
|
# untile back to SIMT
|
|
139
139
|
output = wp.untile(o)
|