warp-lang 1.8.1__py3-none-manylinux_2_34_aarch64.whl → 1.9.0__py3-none-manylinux_2_34_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +282 -103
- warp/__init__.pyi +482 -110
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +93 -30
- warp/build_dll.py +47 -67
- warp/builtins.py +955 -137
- warp/codegen.py +312 -206
- warp/config.py +1 -1
- warp/context.py +1249 -784
- warp/examples/core/example_marching_cubes.py +1 -0
- warp/examples/core/example_render_opengl.py +100 -3
- warp/examples/fem/example_apic_fluid.py +98 -52
- warp/examples/fem/example_convection_diffusion_dg.py +25 -4
- warp/examples/fem/example_diffusion_mgpu.py +8 -3
- warp/examples/fem/utils.py +68 -22
- warp/fabric.py +1 -1
- warp/fem/cache.py +27 -19
- warp/fem/domain.py +2 -2
- warp/fem/field/nodal_field.py +2 -2
- warp/fem/field/virtual.py +264 -166
- warp/fem/geometry/geometry.py +5 -5
- warp/fem/integrate.py +129 -51
- warp/fem/space/restriction.py +4 -0
- warp/fem/space/shape/tet_shape_function.py +3 -10
- warp/jax_experimental/custom_call.py +1 -1
- warp/jax_experimental/ffi.py +2 -1
- warp/marching_cubes.py +708 -0
- warp/native/array.h +99 -4
- warp/native/builtin.h +82 -5
- warp/native/bvh.cpp +64 -28
- warp/native/bvh.cu +58 -58
- warp/native/bvh.h +2 -2
- warp/native/clang/clang.cpp +7 -7
- warp/native/coloring.cpp +8 -2
- warp/native/crt.cpp +2 -2
- warp/native/crt.h +3 -5
- warp/native/cuda_util.cpp +41 -10
- warp/native/cuda_util.h +10 -4
- warp/native/exports.h +1842 -1908
- warp/native/fabric.h +2 -1
- warp/native/hashgrid.cpp +37 -37
- warp/native/hashgrid.cu +2 -2
- warp/native/initializer_array.h +1 -1
- warp/native/intersect.h +2 -2
- warp/native/mat.h +1910 -116
- warp/native/mathdx.cpp +43 -43
- warp/native/mesh.cpp +24 -24
- warp/native/mesh.cu +26 -26
- warp/native/mesh.h +4 -2
- warp/native/nanovdb/GridHandle.h +179 -12
- warp/native/nanovdb/HostBuffer.h +8 -7
- warp/native/nanovdb/NanoVDB.h +517 -895
- warp/native/nanovdb/NodeManager.h +323 -0
- warp/native/nanovdb/PNanoVDB.h +2 -2
- warp/native/quat.h +331 -14
- warp/native/range.h +7 -1
- warp/native/reduce.cpp +10 -10
- warp/native/reduce.cu +13 -14
- warp/native/runlength_encode.cpp +2 -2
- warp/native/runlength_encode.cu +5 -5
- warp/native/scan.cpp +3 -3
- warp/native/scan.cu +4 -4
- warp/native/sort.cpp +10 -10
- warp/native/sort.cu +22 -22
- warp/native/sparse.cpp +8 -8
- warp/native/sparse.cu +13 -13
- warp/native/spatial.h +366 -17
- warp/native/temp_buffer.h +2 -2
- warp/native/tile.h +283 -69
- warp/native/vec.h +381 -14
- warp/native/volume.cpp +54 -54
- warp/native/volume.cu +1 -1
- warp/native/volume.h +2 -1
- warp/native/volume_builder.cu +30 -37
- warp/native/warp.cpp +150 -149
- warp/native/warp.cu +323 -192
- warp/native/warp.h +227 -226
- warp/optim/linear.py +736 -271
- warp/render/imgui_manager.py +289 -0
- warp/render/render_opengl.py +85 -6
- warp/sim/graph_coloring.py +2 -2
- warp/sparse.py +558 -175
- warp/tests/aux_test_module_aot.py +7 -0
- warp/tests/cuda/test_async.py +3 -3
- warp/tests/cuda/test_conditional_captures.py +101 -0
- warp/tests/geometry/test_marching_cubes.py +233 -12
- warp/tests/sim/test_coloring.py +6 -6
- warp/tests/test_array.py +56 -5
- warp/tests/test_codegen.py +3 -2
- warp/tests/test_context.py +8 -15
- warp/tests/test_enum.py +136 -0
- warp/tests/test_examples.py +2 -2
- warp/tests/test_fem.py +45 -2
- warp/tests/test_fixedarray.py +229 -0
- warp/tests/test_func.py +18 -15
- warp/tests/test_future_annotations.py +7 -5
- warp/tests/test_linear_solvers.py +30 -0
- warp/tests/test_map.py +1 -1
- warp/tests/test_mat.py +1518 -378
- warp/tests/test_mat_assign_copy.py +178 -0
- warp/tests/test_mat_constructors.py +574 -0
- warp/tests/test_module_aot.py +287 -0
- warp/tests/test_print.py +69 -0
- warp/tests/test_quat.py +140 -34
- warp/tests/test_quat_assign_copy.py +145 -0
- warp/tests/test_reload.py +2 -1
- warp/tests/test_sparse.py +71 -0
- warp/tests/test_spatial.py +140 -34
- warp/tests/test_spatial_assign_copy.py +160 -0
- warp/tests/test_struct.py +43 -3
- warp/tests/test_types.py +0 -20
- warp/tests/test_vec.py +179 -34
- warp/tests/test_vec_assign_copy.py +143 -0
- warp/tests/tile/test_tile.py +184 -18
- warp/tests/tile/test_tile_cholesky.py +605 -0
- warp/tests/tile/test_tile_load.py +169 -0
- warp/tests/tile/test_tile_mathdx.py +2 -558
- warp/tests/tile/test_tile_matmul.py +1 -1
- warp/tests/tile/test_tile_mlp.py +1 -1
- warp/tests/tile/test_tile_shared_memory.py +5 -5
- warp/tests/unittest_suites.py +6 -0
- warp/tests/walkthrough_debug.py +1 -1
- warp/thirdparty/unittest_parallel.py +108 -9
- warp/types.py +554 -264
- warp/utils.py +68 -86
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/RECORD +131 -121
- warp/native/marching.cpp +0 -19
- warp/native/marching.cu +0 -514
- warp/native/marching.h +0 -19
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
#
|
|
19
19
|
# Demonstrates how to set up tiled rendering and retrieves the pixels from
|
|
20
20
|
# OpenGLRenderer as a Warp array while keeping all memory on the GPU.
|
|
21
|
+
# It also shows how to add an ImGui UI to the renderer.
|
|
21
22
|
#
|
|
22
23
|
###########################################################################
|
|
23
24
|
|
|
@@ -25,14 +26,95 @@ import numpy as np
|
|
|
25
26
|
|
|
26
27
|
import warp as wp
|
|
27
28
|
import warp.render
|
|
29
|
+
from warp.render.imgui_manager import ImGuiManager
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ExampleImGuiManager(ImGuiManager):
|
|
33
|
+
"""An example ImGui manager that displays a few float values."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, renderer, window_pos=(10, 10), window_size=(300, 400)):
|
|
36
|
+
super().__init__(renderer)
|
|
37
|
+
if not self.is_available:
|
|
38
|
+
return
|
|
39
|
+
|
|
40
|
+
# UI properties
|
|
41
|
+
self.window_pos = window_pos
|
|
42
|
+
self.window_size = window_size
|
|
43
|
+
|
|
44
|
+
# Values to display in the UI
|
|
45
|
+
self.some_float = 123.456
|
|
46
|
+
self.editable_float1 = 10.0
|
|
47
|
+
self.editable_float2 = 20.0
|
|
48
|
+
self.editable_float3 = 30.0
|
|
49
|
+
self.editable_vec2 = wp.vec2(0.5, 1.2)
|
|
50
|
+
self.editable_vec3 = wp.vec3(2.1, 3.4, 4.7)
|
|
51
|
+
self.editable_vec4 = wp.vec4(1.5, 3.2, 4.8, 6.1)
|
|
52
|
+
self.warp_array_float = wp.array([0.7, 1.4, 2.8], dtype=float)
|
|
53
|
+
self.warp_array_vec2 = wp.array([wp.vec2(1.1, 2.3), wp.vec2(3.4, 4.2), wp.vec2(5.6, 6.9)], dtype=wp.vec2)
|
|
54
|
+
self.warp_array_vec3 = wp.array(
|
|
55
|
+
[wp.vec3(0.5, 1.7, 2.9), wp.vec3(3.2, 4.8, 5.1), wp.vec3(6.4, 7.6, 8.3)], dtype=wp.vec3
|
|
56
|
+
)
|
|
57
|
+
self.warp_array_vec4 = wp.array([wp.vec4(1.2, 2.4, 3.6, 4.8), wp.vec4(5.1, 6.3, 7.5, 8.7)], dtype=wp.vec4)
|
|
58
|
+
|
|
59
|
+
def draw_ui(self):
|
|
60
|
+
# set window position and size once
|
|
61
|
+
self.imgui.set_next_window_size(self.window_size[0], self.window_size[1], self.imgui.ONCE)
|
|
62
|
+
self.imgui.set_next_window_position(self.window_pos[0], self.window_pos[1], self.imgui.ONCE)
|
|
63
|
+
|
|
64
|
+
self.imgui.begin("Warp Float Values")
|
|
65
|
+
|
|
66
|
+
self.imgui.text(f"A read-only float: {self.some_float}")
|
|
67
|
+
self.imgui.separator()
|
|
68
|
+
|
|
69
|
+
self.imgui.text("Editable floats:")
|
|
70
|
+
changed1, self.editable_float1 = self.imgui.slider_float("Slider", self.editable_float1, 0.0, 100.0)
|
|
71
|
+
changed2, self.editable_float2 = self.imgui.drag_float("Drag", self.editable_float2, 0.1, 0.0, 100.0)
|
|
72
|
+
changed3, self.editable_float3 = self.imgui.input_float("Input", self.editable_float3)
|
|
73
|
+
|
|
74
|
+
changed, self.editable_vec2 = self.drag_vec2("Vec2", self.editable_vec2)
|
|
75
|
+
changed, self.editable_vec3 = self.drag_vec3("Vec3", self.editable_vec3)
|
|
76
|
+
changed, self.editable_vec4 = self.drag_vec4("Vec4", self.editable_vec4)
|
|
77
|
+
|
|
78
|
+
changed, self.warp_array_float = self.drag_float_list("Float", self.warp_array_float)
|
|
79
|
+
changed, self.warp_array_vec2 = self.drag_vec2_list("Vec2", self.warp_array_vec2)
|
|
80
|
+
changed, self.warp_array_vec3 = self.drag_vec3_list("Vec3", self.warp_array_vec3)
|
|
81
|
+
changed, self.warp_array_vec4 = self.drag_vec4_list("Vec4", self.warp_array_vec4)
|
|
82
|
+
|
|
83
|
+
self.imgui.separator()
|
|
84
|
+
self.imgui.text("File Dialog Examples:")
|
|
85
|
+
|
|
86
|
+
if self.imgui.button("Open File"):
|
|
87
|
+
file_path = self.open_load_file_dialog(
|
|
88
|
+
title="Select a File", filetypes=[("Text Files", "*.txt"), ("All Files", "*.*")]
|
|
89
|
+
)
|
|
90
|
+
if file_path:
|
|
91
|
+
print(f"Selected file to open: {file_path}")
|
|
92
|
+
|
|
93
|
+
if self.imgui.button("Save File"):
|
|
94
|
+
file_path = self.open_save_file_dialog(
|
|
95
|
+
title="Save As", defaultextension=".txt", filetypes=[("Text Files", "*.txt"), ("All Files", "*.*")]
|
|
96
|
+
)
|
|
97
|
+
if file_path:
|
|
98
|
+
print(f"Selected file to save: {file_path}")
|
|
99
|
+
|
|
100
|
+
self.imgui.end()
|
|
28
101
|
|
|
29
102
|
|
|
30
103
|
class Example:
|
|
31
|
-
def __init__(self, num_tiles=4, custom_tile_arrangement=False):
|
|
104
|
+
def __init__(self, num_tiles=4, custom_tile_arrangement=False, use_imgui=True):
|
|
32
105
|
if num_tiles < 1:
|
|
33
106
|
raise ValueError("num_tiles must be greater than or equal to 1.")
|
|
34
107
|
|
|
35
108
|
self.renderer = wp.render.OpenGLRenderer(vsync=False)
|
|
109
|
+
self.use_imgui = use_imgui
|
|
110
|
+
|
|
111
|
+
if self.use_imgui:
|
|
112
|
+
self.imgui_manager = ExampleImGuiManager(self.renderer)
|
|
113
|
+
if self.imgui_manager.is_available:
|
|
114
|
+
self.renderer.render_2d_callbacks.append(self.imgui_manager.render_frame)
|
|
115
|
+
else:
|
|
116
|
+
self.use_imgui = False
|
|
117
|
+
|
|
36
118
|
instance_ids = []
|
|
37
119
|
|
|
38
120
|
if custom_tile_arrangement:
|
|
@@ -81,6 +163,11 @@ class Example:
|
|
|
81
163
|
)
|
|
82
164
|
self.renderer.end_frame()
|
|
83
165
|
|
|
166
|
+
def clear(self):
|
|
167
|
+
if self.use_imgui:
|
|
168
|
+
self.imgui_manager.shutdown()
|
|
169
|
+
self.renderer.clear()
|
|
170
|
+
|
|
84
171
|
|
|
85
172
|
if __name__ == "__main__":
|
|
86
173
|
import argparse
|
|
@@ -103,11 +190,21 @@ if __name__ == "__main__":
|
|
|
103
190
|
help="Whether to split tiles into subplots when --show_plot is True.",
|
|
104
191
|
)
|
|
105
192
|
parser.add_argument("--custom_tile_arrangement", action="store_true", help="Apply custom tile arrangement.")
|
|
193
|
+
parser.add_argument(
|
|
194
|
+
"--use_imgui",
|
|
195
|
+
type=lambda x: bool(distutils.util.strtobool(x.strip())),
|
|
196
|
+
default=True,
|
|
197
|
+
help="Enable or disable the ImGui window.",
|
|
198
|
+
)
|
|
106
199
|
|
|
107
200
|
args = parser.parse_known_args()[0]
|
|
108
201
|
|
|
109
202
|
with wp.ScopedDevice(args.device):
|
|
110
|
-
example = Example(
|
|
203
|
+
example = Example(
|
|
204
|
+
num_tiles=args.num_tiles,
|
|
205
|
+
custom_tile_arrangement=args.custom_tile_arrangement,
|
|
206
|
+
use_imgui=args.use_imgui,
|
|
207
|
+
)
|
|
111
208
|
|
|
112
209
|
channels = 1 if args.render_mode == "depth" else 3
|
|
113
210
|
|
|
@@ -190,4 +287,4 @@ if __name__ == "__main__":
|
|
|
190
287
|
fig.canvas.draw()
|
|
191
288
|
fig.canvas.flush_events()
|
|
192
289
|
|
|
193
|
-
example.
|
|
290
|
+
example.clear()
|
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
# grid and the PicQuadrature class.
|
|
21
21
|
###########################################################################
|
|
22
22
|
|
|
23
|
+
from dataclasses import dataclass
|
|
23
24
|
from typing import Any
|
|
24
25
|
|
|
25
26
|
import numpy as np
|
|
@@ -27,9 +28,8 @@ import numpy as np
|
|
|
27
28
|
import warp as wp
|
|
28
29
|
import warp.examples.fem.utils as fem_example_utils
|
|
29
30
|
import warp.fem as fem
|
|
30
|
-
import warp.
|
|
31
|
+
import warp.render
|
|
31
32
|
from warp.fem import Domain, Field, Sample, at_node, div, grad, integrand
|
|
32
|
-
from warp.sim import Model, State
|
|
33
33
|
from warp.sparse import BsrMatrix, bsr_mm, bsr_mv, bsr_transposed
|
|
34
34
|
|
|
35
35
|
|
|
@@ -186,76 +186,83 @@ def solve_incompressibility(
|
|
|
186
186
|
|
|
187
187
|
|
|
188
188
|
class Example:
|
|
189
|
-
|
|
189
|
+
@dataclass
|
|
190
|
+
class State:
|
|
191
|
+
particle_q: wp.array(dtype=wp.vec3)
|
|
192
|
+
particle_qd: wp.array(dtype=wp.vec3)
|
|
193
|
+
particle_qd_grad: wp.array(dtype=wp.mat33)
|
|
194
|
+
|
|
195
|
+
def __init__(self, quiet=False, stage_path="example_apic_fluid.usd", voxel_size=1.0, opengl=False):
|
|
196
|
+
self.gravity = wp.vec3(0.0, -10.0, 0.0)
|
|
197
|
+
|
|
190
198
|
fps = 60
|
|
199
|
+
self.sim_substeps = 1
|
|
191
200
|
self.frame_dt = 1.0 / fps
|
|
192
201
|
self.current_frame = 0
|
|
193
|
-
|
|
194
|
-
self.sim_substeps = 1
|
|
195
202
|
self.sim_dt = self.frame_dt / self.sim_substeps
|
|
196
203
|
self.voxel_size = voxel_size
|
|
197
204
|
|
|
198
205
|
self._quiet = quiet
|
|
199
206
|
|
|
200
207
|
# particle emission
|
|
201
|
-
particle_grid_lo = wp.vec3(-5)
|
|
202
|
-
particle_grid_hi = wp.vec3(5)
|
|
203
|
-
|
|
204
|
-
grid_cell_size = voxel_size
|
|
205
|
-
grid_cell_volume = np.prod(grid_cell_size)
|
|
206
|
-
|
|
207
208
|
PARTICLES_PER_CELL_DIM = 2
|
|
208
|
-
self.radius = float(np.max(
|
|
209
|
+
self.radius = float(np.max(voxel_size) / (2 * PARTICLES_PER_CELL_DIM))
|
|
209
210
|
|
|
211
|
+
particle_grid_lo = np.full(3, -5)
|
|
212
|
+
particle_grid_hi = np.full(3, 5)
|
|
210
213
|
particle_grid_res = (
|
|
211
214
|
np.array((particle_grid_hi - particle_grid_lo) / voxel_size, dtype=int) * PARTICLES_PER_CELL_DIM
|
|
212
215
|
)
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
216
|
+
|
|
217
|
+
self.particle_volumes, particle_q = self._spawn_particles(
|
|
218
|
+
particle_grid_res, particle_grid_lo, particle_grid_hi, packing_fraction=1.0
|
|
219
|
+
)
|
|
220
|
+
particle_qd = wp.zeros_like(particle_q)
|
|
221
|
+
|
|
222
|
+
particle_count = particle_q.shape[0]
|
|
223
|
+
if not self._quiet:
|
|
224
|
+
print("Particle count:", particle_count)
|
|
225
|
+
|
|
226
|
+
# Allocate states
|
|
227
|
+
self.state_0 = self.State(
|
|
228
|
+
wp.clone(particle_q),
|
|
229
|
+
wp.clone(particle_qd),
|
|
230
|
+
particle_qd_grad=wp.zeros(shape=(particle_count), dtype=wp.mat33),
|
|
231
|
+
)
|
|
232
|
+
self.state_1 = self.State(
|
|
233
|
+
wp.clone(particle_q),
|
|
234
|
+
wp.clone(particle_qd),
|
|
235
|
+
particle_qd_grad=wp.zeros(shape=(particle_count), dtype=wp.mat33),
|
|
229
236
|
)
|
|
230
|
-
self.model: Model = builder.finalize()
|
|
231
|
-
self.model.ground = False
|
|
232
237
|
|
|
233
238
|
# Storage for temporary variables
|
|
234
239
|
self.temporary_store = fem.TemporaryStore()
|
|
235
240
|
|
|
236
|
-
|
|
237
|
-
|
|
241
|
+
# initialize renderers
|
|
242
|
+
self.opengl_renderer = None
|
|
243
|
+
self.usd_renderer = None
|
|
238
244
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
245
|
+
try:
|
|
246
|
+
if opengl:
|
|
247
|
+
self.opengl_renderer = warp.render.OpenGLRenderer(
|
|
248
|
+
screen_width=1024,
|
|
249
|
+
screen_height=1024,
|
|
250
|
+
)
|
|
251
|
+
except Exception as err:
|
|
252
|
+
wp.utils.warn(f"Could not initialize OpenGL renderer: {err}.")
|
|
244
253
|
|
|
245
254
|
try:
|
|
246
255
|
if stage_path:
|
|
247
|
-
self.
|
|
248
|
-
else:
|
|
249
|
-
self.renderer = None
|
|
256
|
+
self.usd_renderer = warp.render.UsdRenderer(stage_path)
|
|
250
257
|
except Exception as err:
|
|
251
|
-
print(f"Could not initialize
|
|
258
|
+
print(f"Could not initialize Usd renderer '{stage_path}': {err}.")
|
|
252
259
|
|
|
253
260
|
def step(self):
|
|
254
261
|
fem.set_default_temporary_store(self.temporary_store)
|
|
255
262
|
|
|
256
263
|
self.current_frame = self.current_frame + 1
|
|
257
264
|
|
|
258
|
-
with wp.ScopedTimer(f"simulate frame {self.current_frame}",
|
|
265
|
+
with wp.ScopedTimer(f"simulate frame {self.current_frame}", synchronize=True):
|
|
259
266
|
for _s in range(self.sim_substeps):
|
|
260
267
|
# Allocate the voxels and create the warp.fem geometry
|
|
261
268
|
volume = wp.Volume.allocate_by_voxels(
|
|
@@ -297,7 +304,7 @@ class Example:
|
|
|
297
304
|
|
|
298
305
|
# Bin particles to grid cells
|
|
299
306
|
pic = fem.PicQuadrature(
|
|
300
|
-
domain=domain, positions=self.state_0.particle_q, measures=self.
|
|
307
|
+
domain=domain, positions=self.state_0.particle_q, measures=self.particle_volumes
|
|
301
308
|
)
|
|
302
309
|
|
|
303
310
|
# Compute inverse particle volume for each grid node
|
|
@@ -318,7 +325,7 @@ class Example:
|
|
|
318
325
|
"velocities": self.state_0.particle_qd,
|
|
319
326
|
"velocity_gradients": self.state_0.particle_qd_grad,
|
|
320
327
|
"dt": self.sim_dt,
|
|
321
|
-
"gravity": self.
|
|
328
|
+
"gravity": self.gravity,
|
|
322
329
|
},
|
|
323
330
|
output_dtype=wp.vec3,
|
|
324
331
|
)
|
|
@@ -377,16 +384,54 @@ class Example:
|
|
|
377
384
|
|
|
378
385
|
fem.set_default_temporary_store(None)
|
|
379
386
|
|
|
387
|
+
@staticmethod
|
|
388
|
+
def _spawn_particles(res, bounds_lo, bounds_hi, packing_fraction):
|
|
389
|
+
Nx = res[0]
|
|
390
|
+
Ny = res[1]
|
|
391
|
+
Nz = res[2]
|
|
392
|
+
|
|
393
|
+
px = np.linspace(bounds_lo[0], bounds_hi[0], Nx + 1)
|
|
394
|
+
py = np.linspace(bounds_lo[1], bounds_hi[1], Ny + 1)
|
|
395
|
+
pz = np.linspace(bounds_lo[2], bounds_hi[2], Nz + 1)
|
|
396
|
+
|
|
397
|
+
points = np.stack(np.meshgrid(px, py, pz)).reshape(3, -1).T
|
|
398
|
+
|
|
399
|
+
cell_size = (bounds_hi - bounds_lo) / res
|
|
400
|
+
cell_volume = np.prod(cell_size)
|
|
401
|
+
|
|
402
|
+
radius = np.max(cell_size) * 0.5
|
|
403
|
+
volume = np.prod(cell_volume) * packing_fraction
|
|
404
|
+
|
|
405
|
+
rng = np.random.default_rng(42)
|
|
406
|
+
points += 2.0 * radius * (rng.random(points.shape) - 0.5)
|
|
407
|
+
|
|
408
|
+
volumes = wp.full(points.shape[0], volume, dtype=float)
|
|
409
|
+
points = wp.array(np.ascontiguousarray(points), dtype=wp.vec3)
|
|
410
|
+
return volumes, points
|
|
411
|
+
|
|
380
412
|
def render(self, is_live=False):
|
|
381
|
-
if self.
|
|
413
|
+
if self.usd_renderer is None and self.opengl_renderer is None:
|
|
382
414
|
return
|
|
383
415
|
|
|
384
|
-
with wp.ScopedTimer("render",
|
|
416
|
+
with wp.ScopedTimer("render", synchronize=True):
|
|
385
417
|
time = self.current_frame * self.frame_dt
|
|
386
418
|
|
|
387
|
-
self.
|
|
388
|
-
|
|
389
|
-
|
|
419
|
+
if self.usd_renderer is not None:
|
|
420
|
+
self.usd_renderer.begin_frame(time)
|
|
421
|
+
self.usd_renderer.render_points(
|
|
422
|
+
"particles",
|
|
423
|
+
self.state_0.particle_q.numpy(),
|
|
424
|
+
radius=self.radius,
|
|
425
|
+
)
|
|
426
|
+
self.usd_renderer.end_frame()
|
|
427
|
+
if self.opengl_renderer is not None:
|
|
428
|
+
self.opengl_renderer.begin_frame(time)
|
|
429
|
+
self.opengl_renderer.render_points(
|
|
430
|
+
"particles",
|
|
431
|
+
self.state_0.particle_q,
|
|
432
|
+
radius=self.radius,
|
|
433
|
+
)
|
|
434
|
+
self.opengl_renderer.end_frame()
|
|
390
435
|
|
|
391
436
|
|
|
392
437
|
if __name__ == "__main__":
|
|
@@ -404,6 +449,7 @@ if __name__ == "__main__":
|
|
|
404
449
|
)
|
|
405
450
|
parser.add_argument("--num_frames", type=int, default=250, help="Total number of frames.")
|
|
406
451
|
parser.add_argument("--quiet", action="store_true")
|
|
452
|
+
parser.add_argument("--opengl", action="store_true")
|
|
407
453
|
parser.add_argument(
|
|
408
454
|
"--voxel_size",
|
|
409
455
|
type=float,
|
|
@@ -413,11 +459,11 @@ if __name__ == "__main__":
|
|
|
413
459
|
args = parser.parse_known_args()[0]
|
|
414
460
|
|
|
415
461
|
with wp.ScopedDevice(args.device):
|
|
416
|
-
example = Example(quiet=args.quiet, stage_path=args.stage_path, voxel_size=args.voxel_size)
|
|
462
|
+
example = Example(quiet=args.quiet, stage_path=args.stage_path, voxel_size=args.voxel_size, opengl=args.opengl)
|
|
417
463
|
|
|
418
464
|
for _ in range(args.num_frames):
|
|
419
465
|
example.step()
|
|
420
466
|
example.render()
|
|
421
467
|
|
|
422
|
-
if example.
|
|
423
|
-
example.
|
|
468
|
+
if example.usd_renderer is not None:
|
|
469
|
+
example.usd_renderer.save()
|
|
@@ -116,11 +116,11 @@ class Example:
|
|
|
116
116
|
values={"ang_vel": ang_vel},
|
|
117
117
|
)
|
|
118
118
|
|
|
119
|
-
|
|
119
|
+
self._side_test = fem.make_test(space=scalar_space, domain=sides)
|
|
120
120
|
side_trial = fem.make_trial(space=scalar_space, domain=sides)
|
|
121
121
|
fem.integrate(
|
|
122
122
|
upwind_transport_form,
|
|
123
|
-
fields={"phi": side_trial, "psi":
|
|
123
|
+
fields={"phi": side_trial, "psi": self._side_test},
|
|
124
124
|
values={"ang_vel": ang_vel},
|
|
125
125
|
output=matrix_transport,
|
|
126
126
|
add=True,
|
|
@@ -132,7 +132,7 @@ class Example:
|
|
|
132
132
|
)
|
|
133
133
|
matrix_diffusion += fem.integrate(
|
|
134
134
|
sip_diffusion_form,
|
|
135
|
-
fields={"phi": side_trial, "psi":
|
|
135
|
+
fields={"phi": side_trial, "psi": self._side_test},
|
|
136
136
|
)
|
|
137
137
|
self._matrix = matrix_inertia + matrix_transport + viscosity * matrix_diffusion
|
|
138
138
|
|
|
@@ -140,8 +140,12 @@ class Example:
|
|
|
140
140
|
self._phi_field = scalar_space.make_field()
|
|
141
141
|
fem.interpolate(initial_condition, dest=self._phi_field)
|
|
142
142
|
|
|
143
|
+
self._phi_curvature_field = scalar_space.make_field()
|
|
144
|
+
self._compute_phi_curvature()
|
|
145
|
+
|
|
143
146
|
self.renderer = fem_example_utils.Plot()
|
|
144
147
|
self.renderer.add_field("phi", self._phi_field)
|
|
148
|
+
self.renderer.add_field("phi_curvature", self._phi_curvature_field)
|
|
145
149
|
|
|
146
150
|
def step(self):
|
|
147
151
|
self.current_frame += 1
|
|
@@ -154,12 +158,29 @@ class Example:
|
|
|
154
158
|
|
|
155
159
|
phi = wp.zeros_like(rhs)
|
|
156
160
|
fem_example_utils.bsr_cg(self._matrix, b=rhs, x=phi, method="bicgstab", quiet=self._quiet)
|
|
157
|
-
|
|
158
161
|
wp.utils.array_cast(in_array=phi, out_array=self._phi_field.dof_values)
|
|
159
162
|
|
|
163
|
+
# for visualization purposes only
|
|
164
|
+
self._compute_phi_curvature()
|
|
165
|
+
|
|
166
|
+
def _compute_phi_curvature(self):
|
|
167
|
+
fem.integrate(
|
|
168
|
+
diffusion_form,
|
|
169
|
+
fields={"u": self._phi_field, "v": self._test},
|
|
170
|
+
output=self._phi_curvature_field.dof_values,
|
|
171
|
+
)
|
|
172
|
+
fem.integrate(
|
|
173
|
+
sip_diffusion_form,
|
|
174
|
+
fields={"phi": self._phi_field.trace(), "psi": self._side_test},
|
|
175
|
+
output=self._phi_curvature_field.dof_values,
|
|
176
|
+
add=True,
|
|
177
|
+
)
|
|
178
|
+
|
|
160
179
|
def render(self):
|
|
161
180
|
self.renderer.begin_frame(time=self.current_frame * self.sim_dt)
|
|
162
181
|
self.renderer.add_field("phi", self._phi_field)
|
|
182
|
+
self.renderer.add_field("phi_curvature", self._phi_curvature_field)
|
|
183
|
+
|
|
163
184
|
self.renderer.end_frame()
|
|
164
185
|
|
|
165
186
|
|
|
@@ -77,8 +77,7 @@ class DistributedSystem:
|
|
|
77
77
|
stream = wp.get_stream()
|
|
78
78
|
|
|
79
79
|
for mat_i, x_i, y_i, idx in zip(*self.rank_data):
|
|
80
|
-
|
|
81
|
-
tmp_i = wp.array(ptr=tmp.ptr, device=tmp.device, capacity=tmp.capacity, dtype=tmp.dtype, shape=idx.shape)
|
|
80
|
+
tmp_i = tmp[: idx.size]
|
|
82
81
|
|
|
83
82
|
# Compress rhs on rank 0
|
|
84
83
|
x_idx = wp.indexedarray(x, idx)
|
|
@@ -161,7 +160,13 @@ class Example:
|
|
|
161
160
|
|
|
162
161
|
with wp.ScopedDevice(main_device):
|
|
163
162
|
fem_example_utils.bsr_cg(
|
|
164
|
-
A,
|
|
163
|
+
A,
|
|
164
|
+
x=global_res,
|
|
165
|
+
b=glob_rhs,
|
|
166
|
+
use_diag_precond=False,
|
|
167
|
+
quiet=self._quiet,
|
|
168
|
+
mv_routine=A.mv_routine,
|
|
169
|
+
mv_routine_uses_multiple_cuda_contexts=True,
|
|
165
170
|
)
|
|
166
171
|
|
|
167
172
|
array_cast(in_array=global_res, out_array=self._scalar_field.dof_values)
|
warp/examples/fem/utils.py
CHANGED
|
@@ -14,12 +14,14 @@
|
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
|
|
16
16
|
|
|
17
|
+
import gc
|
|
17
18
|
from typing import Any, Dict, Optional, Tuple
|
|
18
19
|
|
|
19
20
|
import numpy as np
|
|
20
21
|
|
|
21
22
|
import warp as wp
|
|
22
23
|
import warp.fem as fem
|
|
24
|
+
from warp.context import assert_conditional_graph_support
|
|
23
25
|
from warp.optim.linear import LinearOperator, aslinearoperator, preconditioner
|
|
24
26
|
from warp.sparse import BsrMatrix, bsr_get_diag, bsr_mv, bsr_transposed
|
|
25
27
|
|
|
@@ -230,6 +232,7 @@ def bsr_cg(
|
|
|
230
232
|
quiet=False,
|
|
231
233
|
method: str = "cg",
|
|
232
234
|
M: BsrMatrix = None,
|
|
235
|
+
mv_routine_uses_multiple_cuda_contexts: bool = False,
|
|
233
236
|
) -> Tuple[float, int]:
|
|
234
237
|
"""Solves the linear system A x = b using an iterative solver, optionally with diagonal preconditioning
|
|
235
238
|
|
|
@@ -244,6 +247,8 @@ def bsr_cg(
|
|
|
244
247
|
mv_routine: Matrix-vector multiplication routine to use for multiplications with ``A``
|
|
245
248
|
quiet: if True, do not print iteration residuals
|
|
246
249
|
method: Iterative solver method to use, defaults to Conjugate Gradient
|
|
250
|
+
mv_routine_uses_multiple_cuda_contexts: Whether the matrix-vector multiplication routine uses multiple CUDA contexts,
|
|
251
|
+
which prevents the use of conditional CUDA graphs.
|
|
247
252
|
|
|
248
253
|
Returns:
|
|
249
254
|
Tuple (residual norm, iteration count)
|
|
@@ -260,10 +265,53 @@ def bsr_cg(
|
|
|
260
265
|
|
|
261
266
|
func = _get_linear_solver_func(method_name=method)
|
|
262
267
|
|
|
263
|
-
|
|
264
|
-
print(f"{func.__name__}: at iteration {i} error = \t {err} \t tol: {tol}")
|
|
268
|
+
callback = None
|
|
265
269
|
|
|
266
|
-
|
|
270
|
+
use_cuda_graph = A.device.is_cuda and not wp.config.verify_cuda
|
|
271
|
+
capturable = use_cuda_graph and not mv_routine_uses_multiple_cuda_contexts
|
|
272
|
+
|
|
273
|
+
if capturable:
|
|
274
|
+
try:
|
|
275
|
+
assert_conditional_graph_support()
|
|
276
|
+
except RuntimeError:
|
|
277
|
+
capturable = False
|
|
278
|
+
|
|
279
|
+
if not quiet:
|
|
280
|
+
if capturable:
|
|
281
|
+
|
|
282
|
+
@wp.func_native(snippet=f'printf("%s: ", "{func.__name__}");')
|
|
283
|
+
def print_method_name():
|
|
284
|
+
pass
|
|
285
|
+
|
|
286
|
+
@fem.cache.dynamic_kernel(suffix=f"{check_every}{func.__name__}")
|
|
287
|
+
def device_cg_callback(
|
|
288
|
+
cur_iter: wp.array(dtype=int),
|
|
289
|
+
err_sq: wp.array(dtype=Any),
|
|
290
|
+
atol_sq: wp.array(dtype=Any),
|
|
291
|
+
):
|
|
292
|
+
if cur_iter[0] % check_every == 0:
|
|
293
|
+
print_method_name()
|
|
294
|
+
wp.printf(
|
|
295
|
+
"at iteration %d error = \t %f \t tol: %f\n",
|
|
296
|
+
cur_iter[0],
|
|
297
|
+
wp.sqrt(err_sq[0]),
|
|
298
|
+
wp.sqrt(atol_sq[0]),
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
if check_every > 0:
|
|
302
|
+
callback = device_cg_callback
|
|
303
|
+
else:
|
|
304
|
+
|
|
305
|
+
def print_callback(i, err, tol):
|
|
306
|
+
print(f"{func.__name__}: at iteration {i} error = \t {err} \t tol: {tol}")
|
|
307
|
+
|
|
308
|
+
callback = print_callback
|
|
309
|
+
|
|
310
|
+
if use_cuda_graph:
|
|
311
|
+
# Temporarily disable garbage collection
|
|
312
|
+
# Garbage collection of externally-allocated objects during graph capture may lead to
|
|
313
|
+
# invalid operations or memory access errors.
|
|
314
|
+
gc.disable()
|
|
267
315
|
|
|
268
316
|
end_iter, err, atol = func(
|
|
269
317
|
A=A,
|
|
@@ -271,12 +319,20 @@ def bsr_cg(
|
|
|
271
319
|
x=x,
|
|
272
320
|
maxiter=max_iters,
|
|
273
321
|
tol=tol,
|
|
274
|
-
check_every=check_every,
|
|
322
|
+
check_every=0 if capturable else check_every,
|
|
275
323
|
M=M,
|
|
276
324
|
callback=callback,
|
|
277
|
-
use_cuda_graph=
|
|
325
|
+
use_cuda_graph=use_cuda_graph,
|
|
278
326
|
)
|
|
279
327
|
|
|
328
|
+
if use_cuda_graph:
|
|
329
|
+
gc.enable()
|
|
330
|
+
|
|
331
|
+
if isinstance(end_iter, wp.array):
|
|
332
|
+
end_iter = end_iter.numpy()[0]
|
|
333
|
+
err = np.sqrt(err.numpy()[0])
|
|
334
|
+
atol = np.sqrt(atol.numpy()[0])
|
|
335
|
+
|
|
280
336
|
if not quiet:
|
|
281
337
|
res_str = "OK" if err <= atol else "TRUNCATED"
|
|
282
338
|
print(f"{func.__name__}: terminated after {end_iter} iterations with error = \t {err} ({res_str})")
|
|
@@ -437,28 +493,18 @@ def bsr_solve_saddle(
|
|
|
437
493
|
wp.copy(src=b_u, dest=saddle_system.u_slice(b))
|
|
438
494
|
wp.copy(src=b_p, dest=saddle_system.p_slice(b))
|
|
439
495
|
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
callback = None if quiet else print_callback
|
|
446
|
-
|
|
447
|
-
end_iter, err, atol = func(
|
|
448
|
-
A=saddle_system,
|
|
449
|
-
b=b,
|
|
450
|
-
x=x,
|
|
451
|
-
maxiter=max_iters,
|
|
496
|
+
err, end_iter = bsr_cg(
|
|
497
|
+
saddle_system,
|
|
498
|
+
x,
|
|
499
|
+
b,
|
|
500
|
+
max_iters=max_iters,
|
|
452
501
|
tol=tol,
|
|
453
502
|
check_every=check_every,
|
|
503
|
+
quiet=quiet,
|
|
504
|
+
method=method,
|
|
454
505
|
M=saddle_system.preconditioner,
|
|
455
|
-
callback=callback,
|
|
456
506
|
)
|
|
457
507
|
|
|
458
|
-
if not quiet:
|
|
459
|
-
res_str = "OK" if err <= atol else "TRUNCATED"
|
|
460
|
-
print(f"{func.__name__}: terminated after {end_iter} iterations with absolute error = \t {err} ({res_str})")
|
|
461
|
-
|
|
462
508
|
wp.copy(dest=x_u, src=saddle_system.u_slice(x))
|
|
463
509
|
wp.copy(dest=x_p, src=saddle_system.p_slice(x))
|
|
464
510
|
|
warp/fabric.py
CHANGED
|
@@ -211,7 +211,7 @@ class fabricarray(noncontiguous_array_base[T]):
|
|
|
211
211
|
allocator = self.device.get_allocator()
|
|
212
212
|
buckets_ptr = allocator.alloc(buckets_size)
|
|
213
213
|
cuda_stream = self.device.stream.cuda_stream
|
|
214
|
-
runtime.core.
|
|
214
|
+
runtime.core.wp_memcpy_h2d(
|
|
215
215
|
self.device.context, buckets_ptr, ctypes.addressof(buckets), buckets_size, cuda_stream
|
|
216
216
|
)
|
|
217
217
|
self.deleter = allocator.deleter
|