warp-lang 1.8.1__py3-none-manylinux_2_34_aarch64.whl → 1.9.1__py3-none-manylinux_2_34_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (141) hide show
  1. warp/__init__.py +282 -103
  2. warp/__init__.pyi +1904 -114
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +93 -30
  6. warp/build_dll.py +331 -101
  7. warp/builtins.py +1244 -160
  8. warp/codegen.py +317 -206
  9. warp/config.py +1 -1
  10. warp/context.py +1465 -789
  11. warp/examples/core/example_marching_cubes.py +1 -0
  12. warp/examples/core/example_render_opengl.py +100 -3
  13. warp/examples/fem/example_apic_fluid.py +98 -52
  14. warp/examples/fem/example_convection_diffusion_dg.py +25 -4
  15. warp/examples/fem/example_diffusion_mgpu.py +8 -3
  16. warp/examples/fem/utils.py +68 -22
  17. warp/examples/interop/example_jax_kernel.py +2 -1
  18. warp/fabric.py +1 -1
  19. warp/fem/cache.py +27 -19
  20. warp/fem/domain.py +2 -2
  21. warp/fem/field/nodal_field.py +2 -2
  22. warp/fem/field/virtual.py +264 -166
  23. warp/fem/geometry/geometry.py +5 -5
  24. warp/fem/integrate.py +129 -51
  25. warp/fem/space/restriction.py +4 -0
  26. warp/fem/space/shape/tet_shape_function.py +3 -10
  27. warp/jax_experimental/custom_call.py +25 -2
  28. warp/jax_experimental/ffi.py +22 -1
  29. warp/jax_experimental/xla_ffi.py +16 -7
  30. warp/marching_cubes.py +708 -0
  31. warp/native/array.h +99 -4
  32. warp/native/builtin.h +86 -9
  33. warp/native/bvh.cpp +64 -28
  34. warp/native/bvh.cu +58 -58
  35. warp/native/bvh.h +2 -2
  36. warp/native/clang/clang.cpp +7 -7
  37. warp/native/coloring.cpp +8 -2
  38. warp/native/crt.cpp +2 -2
  39. warp/native/crt.h +3 -5
  40. warp/native/cuda_util.cpp +41 -10
  41. warp/native/cuda_util.h +10 -4
  42. warp/native/exports.h +1842 -1908
  43. warp/native/fabric.h +2 -1
  44. warp/native/hashgrid.cpp +37 -37
  45. warp/native/hashgrid.cu +2 -2
  46. warp/native/initializer_array.h +1 -1
  47. warp/native/intersect.h +2 -2
  48. warp/native/mat.h +1910 -116
  49. warp/native/mathdx.cpp +43 -43
  50. warp/native/mesh.cpp +24 -24
  51. warp/native/mesh.cu +26 -26
  52. warp/native/mesh.h +4 -2
  53. warp/native/nanovdb/GridHandle.h +179 -12
  54. warp/native/nanovdb/HostBuffer.h +8 -7
  55. warp/native/nanovdb/NanoVDB.h +517 -895
  56. warp/native/nanovdb/NodeManager.h +323 -0
  57. warp/native/nanovdb/PNanoVDB.h +2 -2
  58. warp/native/quat.h +331 -14
  59. warp/native/range.h +7 -1
  60. warp/native/reduce.cpp +10 -10
  61. warp/native/reduce.cu +13 -14
  62. warp/native/runlength_encode.cpp +2 -2
  63. warp/native/runlength_encode.cu +5 -5
  64. warp/native/scan.cpp +3 -3
  65. warp/native/scan.cu +4 -4
  66. warp/native/sort.cpp +10 -10
  67. warp/native/sort.cu +40 -31
  68. warp/native/sort.h +2 -0
  69. warp/native/sparse.cpp +8 -8
  70. warp/native/sparse.cu +13 -13
  71. warp/native/spatial.h +366 -17
  72. warp/native/temp_buffer.h +2 -2
  73. warp/native/tile.h +471 -82
  74. warp/native/vec.h +328 -14
  75. warp/native/volume.cpp +54 -54
  76. warp/native/volume.cu +1 -1
  77. warp/native/volume.h +2 -1
  78. warp/native/volume_builder.cu +30 -37
  79. warp/native/warp.cpp +150 -149
  80. warp/native/warp.cu +377 -216
  81. warp/native/warp.h +227 -226
  82. warp/optim/linear.py +736 -271
  83. warp/render/imgui_manager.py +289 -0
  84. warp/render/render_opengl.py +99 -18
  85. warp/render/render_usd.py +1 -0
  86. warp/sim/graph_coloring.py +2 -2
  87. warp/sparse.py +558 -175
  88. warp/tests/aux_test_module_aot.py +7 -0
  89. warp/tests/cuda/test_async.py +3 -3
  90. warp/tests/cuda/test_conditional_captures.py +101 -0
  91. warp/tests/geometry/test_hash_grid.py +38 -0
  92. warp/tests/geometry/test_marching_cubes.py +233 -12
  93. warp/tests/interop/test_jax.py +608 -28
  94. warp/tests/sim/test_coloring.py +6 -6
  95. warp/tests/test_array.py +58 -5
  96. warp/tests/test_codegen.py +4 -3
  97. warp/tests/test_context.py +8 -15
  98. warp/tests/test_enum.py +136 -0
  99. warp/tests/test_examples.py +2 -2
  100. warp/tests/test_fem.py +49 -6
  101. warp/tests/test_fixedarray.py +229 -0
  102. warp/tests/test_func.py +18 -15
  103. warp/tests/test_future_annotations.py +7 -5
  104. warp/tests/test_linear_solvers.py +30 -0
  105. warp/tests/test_map.py +15 -1
  106. warp/tests/test_mat.py +1518 -378
  107. warp/tests/test_mat_assign_copy.py +178 -0
  108. warp/tests/test_mat_constructors.py +574 -0
  109. warp/tests/test_module_aot.py +287 -0
  110. warp/tests/test_print.py +69 -0
  111. warp/tests/test_quat.py +140 -34
  112. warp/tests/test_quat_assign_copy.py +145 -0
  113. warp/tests/test_reload.py +2 -1
  114. warp/tests/test_sparse.py +71 -0
  115. warp/tests/test_spatial.py +140 -34
  116. warp/tests/test_spatial_assign_copy.py +160 -0
  117. warp/tests/test_struct.py +43 -3
  118. warp/tests/test_tuple.py +96 -0
  119. warp/tests/test_types.py +61 -20
  120. warp/tests/test_vec.py +179 -34
  121. warp/tests/test_vec_assign_copy.py +143 -0
  122. warp/tests/tile/test_tile.py +245 -18
  123. warp/tests/tile/test_tile_cholesky.py +605 -0
  124. warp/tests/tile/test_tile_load.py +169 -0
  125. warp/tests/tile/test_tile_mathdx.py +2 -558
  126. warp/tests/tile/test_tile_matmul.py +1 -1
  127. warp/tests/tile/test_tile_mlp.py +1 -1
  128. warp/tests/tile/test_tile_shared_memory.py +5 -5
  129. warp/tests/unittest_suites.py +6 -0
  130. warp/tests/walkthrough_debug.py +1 -1
  131. warp/thirdparty/unittest_parallel.py +108 -9
  132. warp/types.py +571 -267
  133. warp/utils.py +68 -86
  134. {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/METADATA +29 -69
  135. {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/RECORD +138 -128
  136. warp/native/marching.cpp +0 -19
  137. warp/native/marching.cu +0 -514
  138. warp/native/marching.h +0 -19
  139. {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/WHEEL +0 -0
  140. {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/licenses/LICENSE.md +0 -0
  141. {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/top_level.txt +0 -0
@@ -22,6 +22,7 @@
22
22
  # Note: requires a CUDA-capable device
23
23
  ###########################################################################
24
24
 
25
+
25
26
  import warp as wp
26
27
  import warp.render
27
28
 
@@ -18,6 +18,7 @@
18
18
  #
19
19
  # Demonstrates how to set up tiled rendering and retrieves the pixels from
20
20
  # OpenGLRenderer as a Warp array while keeping all memory on the GPU.
21
+ # It also shows how to add an ImGui UI to the renderer.
21
22
  #
22
23
  ###########################################################################
23
24
 
@@ -25,14 +26,95 @@ import numpy as np
25
26
 
26
27
  import warp as wp
27
28
  import warp.render
29
+ from warp.render.imgui_manager import ImGuiManager
30
+
31
+
32
+ class ExampleImGuiManager(ImGuiManager):
33
+ """An example ImGui manager that displays a few float values."""
34
+
35
+ def __init__(self, renderer, window_pos=(10, 10), window_size=(300, 400)):
36
+ super().__init__(renderer)
37
+ if not self.is_available:
38
+ return
39
+
40
+ # UI properties
41
+ self.window_pos = window_pos
42
+ self.window_size = window_size
43
+
44
+ # Values to display in the UI
45
+ self.some_float = 123.456
46
+ self.editable_float1 = 10.0
47
+ self.editable_float2 = 20.0
48
+ self.editable_float3 = 30.0
49
+ self.editable_vec2 = wp.vec2(0.5, 1.2)
50
+ self.editable_vec3 = wp.vec3(2.1, 3.4, 4.7)
51
+ self.editable_vec4 = wp.vec4(1.5, 3.2, 4.8, 6.1)
52
+ self.warp_array_float = wp.array([0.7, 1.4, 2.8], dtype=float)
53
+ self.warp_array_vec2 = wp.array([wp.vec2(1.1, 2.3), wp.vec2(3.4, 4.2), wp.vec2(5.6, 6.9)], dtype=wp.vec2)
54
+ self.warp_array_vec3 = wp.array(
55
+ [wp.vec3(0.5, 1.7, 2.9), wp.vec3(3.2, 4.8, 5.1), wp.vec3(6.4, 7.6, 8.3)], dtype=wp.vec3
56
+ )
57
+ self.warp_array_vec4 = wp.array([wp.vec4(1.2, 2.4, 3.6, 4.8), wp.vec4(5.1, 6.3, 7.5, 8.7)], dtype=wp.vec4)
58
+
59
+ def draw_ui(self):
60
+ # set window position and size once
61
+ self.imgui.set_next_window_size(self.window_size[0], self.window_size[1], self.imgui.ONCE)
62
+ self.imgui.set_next_window_position(self.window_pos[0], self.window_pos[1], self.imgui.ONCE)
63
+
64
+ self.imgui.begin("Warp Float Values")
65
+
66
+ self.imgui.text(f"A read-only float: {self.some_float}")
67
+ self.imgui.separator()
68
+
69
+ self.imgui.text("Editable floats:")
70
+ changed1, self.editable_float1 = self.imgui.slider_float("Slider", self.editable_float1, 0.0, 100.0)
71
+ changed2, self.editable_float2 = self.imgui.drag_float("Drag", self.editable_float2, 0.1, 0.0, 100.0)
72
+ changed3, self.editable_float3 = self.imgui.input_float("Input", self.editable_float3)
73
+
74
+ changed, self.editable_vec2 = self.drag_vec2("Vec2", self.editable_vec2)
75
+ changed, self.editable_vec3 = self.drag_vec3("Vec3", self.editable_vec3)
76
+ changed, self.editable_vec4 = self.drag_vec4("Vec4", self.editable_vec4)
77
+
78
+ changed, self.warp_array_float = self.drag_float_list("Float", self.warp_array_float)
79
+ changed, self.warp_array_vec2 = self.drag_vec2_list("Vec2", self.warp_array_vec2)
80
+ changed, self.warp_array_vec3 = self.drag_vec3_list("Vec3", self.warp_array_vec3)
81
+ changed, self.warp_array_vec4 = self.drag_vec4_list("Vec4", self.warp_array_vec4)
82
+
83
+ self.imgui.separator()
84
+ self.imgui.text("File Dialog Examples:")
85
+
86
+ if self.imgui.button("Open File"):
87
+ file_path = self.open_load_file_dialog(
88
+ title="Select a File", filetypes=[("Text Files", "*.txt"), ("All Files", "*.*")]
89
+ )
90
+ if file_path:
91
+ print(f"Selected file to open: {file_path}")
92
+
93
+ if self.imgui.button("Save File"):
94
+ file_path = self.open_save_file_dialog(
95
+ title="Save As", defaultextension=".txt", filetypes=[("Text Files", "*.txt"), ("All Files", "*.*")]
96
+ )
97
+ if file_path:
98
+ print(f"Selected file to save: {file_path}")
99
+
100
+ self.imgui.end()
28
101
 
29
102
 
30
103
  class Example:
31
- def __init__(self, num_tiles=4, custom_tile_arrangement=False):
104
+ def __init__(self, num_tiles=4, custom_tile_arrangement=False, use_imgui=True):
32
105
  if num_tiles < 1:
33
106
  raise ValueError("num_tiles must be greater than or equal to 1.")
34
107
 
35
108
  self.renderer = wp.render.OpenGLRenderer(vsync=False)
109
+ self.use_imgui = use_imgui
110
+
111
+ if self.use_imgui:
112
+ self.imgui_manager = ExampleImGuiManager(self.renderer)
113
+ if self.imgui_manager.is_available:
114
+ self.renderer.render_2d_callbacks.append(self.imgui_manager.render_frame)
115
+ else:
116
+ self.use_imgui = False
117
+
36
118
  instance_ids = []
37
119
 
38
120
  if custom_tile_arrangement:
@@ -81,6 +163,11 @@ class Example:
81
163
  )
82
164
  self.renderer.end_frame()
83
165
 
166
+ def clear(self):
167
+ if self.use_imgui:
168
+ self.imgui_manager.shutdown()
169
+ self.renderer.clear()
170
+
84
171
 
85
172
  if __name__ == "__main__":
86
173
  import argparse
@@ -103,11 +190,21 @@ if __name__ == "__main__":
103
190
  help="Whether to split tiles into subplots when --show_plot is True.",
104
191
  )
105
192
  parser.add_argument("--custom_tile_arrangement", action="store_true", help="Apply custom tile arrangement.")
193
+ parser.add_argument(
194
+ "--use_imgui",
195
+ type=lambda x: bool(distutils.util.strtobool(x.strip())),
196
+ default=True,
197
+ help="Enable or disable the ImGui window.",
198
+ )
106
199
 
107
200
  args = parser.parse_known_args()[0]
108
201
 
109
202
  with wp.ScopedDevice(args.device):
110
- example = Example(num_tiles=args.num_tiles, custom_tile_arrangement=args.custom_tile_arrangement)
203
+ example = Example(
204
+ num_tiles=args.num_tiles,
205
+ custom_tile_arrangement=args.custom_tile_arrangement,
206
+ use_imgui=args.use_imgui,
207
+ )
111
208
 
112
209
  channels = 1 if args.render_mode == "depth" else 3
113
210
 
@@ -190,4 +287,4 @@ if __name__ == "__main__":
190
287
  fig.canvas.draw()
191
288
  fig.canvas.flush_events()
192
289
 
193
- example.renderer.clear()
290
+ example.clear()
@@ -20,6 +20,7 @@
20
20
  # grid and the PicQuadrature class.
21
21
  ###########################################################################
22
22
 
23
+ from dataclasses import dataclass
23
24
  from typing import Any
24
25
 
25
26
  import numpy as np
@@ -27,9 +28,8 @@ import numpy as np
27
28
  import warp as wp
28
29
  import warp.examples.fem.utils as fem_example_utils
29
30
  import warp.fem as fem
30
- import warp.sim.render
31
+ import warp.render
31
32
  from warp.fem import Domain, Field, Sample, at_node, div, grad, integrand
32
- from warp.sim import Model, State
33
33
  from warp.sparse import BsrMatrix, bsr_mm, bsr_mv, bsr_transposed
34
34
 
35
35
 
@@ -186,76 +186,83 @@ def solve_incompressibility(
186
186
 
187
187
 
188
188
  class Example:
189
- def __init__(self, quiet=False, stage_path="example_apic_fluid.usd", voxel_size=1.0):
189
+ @dataclass
190
+ class State:
191
+ particle_q: wp.array(dtype=wp.vec3)
192
+ particle_qd: wp.array(dtype=wp.vec3)
193
+ particle_qd_grad: wp.array(dtype=wp.mat33)
194
+
195
+ def __init__(self, quiet=False, stage_path="example_apic_fluid.usd", voxel_size=1.0, opengl=False):
196
+ self.gravity = wp.vec3(0.0, -10.0, 0.0)
197
+
190
198
  fps = 60
199
+ self.sim_substeps = 1
191
200
  self.frame_dt = 1.0 / fps
192
201
  self.current_frame = 0
193
-
194
- self.sim_substeps = 1
195
202
  self.sim_dt = self.frame_dt / self.sim_substeps
196
203
  self.voxel_size = voxel_size
197
204
 
198
205
  self._quiet = quiet
199
206
 
200
207
  # particle emission
201
- particle_grid_lo = wp.vec3(-5)
202
- particle_grid_hi = wp.vec3(5)
203
-
204
- grid_cell_size = voxel_size
205
- grid_cell_volume = np.prod(grid_cell_size)
206
-
207
208
  PARTICLES_PER_CELL_DIM = 2
208
- self.radius = float(np.max(grid_cell_size) / (2 * PARTICLES_PER_CELL_DIM))
209
+ self.radius = float(np.max(voxel_size) / (2 * PARTICLES_PER_CELL_DIM))
209
210
 
211
+ particle_grid_lo = np.full(3, -5)
212
+ particle_grid_hi = np.full(3, 5)
210
213
  particle_grid_res = (
211
214
  np.array((particle_grid_hi - particle_grid_lo) / voxel_size, dtype=int) * PARTICLES_PER_CELL_DIM
212
215
  )
213
- particle_grid_offset = wp.vec3(self.radius, self.radius, self.radius)
214
-
215
- # Initialize warp.sim model, spawn particles
216
- builder = wp.sim.ModelBuilder()
217
- builder.add_particle_grid(
218
- dim_x=particle_grid_res[0],
219
- dim_y=particle_grid_res[1],
220
- dim_z=particle_grid_res[2],
221
- cell_x=self.radius * 2.0,
222
- cell_y=self.radius * 2.0,
223
- cell_z=self.radius * 2.0,
224
- pos=particle_grid_lo + particle_grid_offset,
225
- rot=wp.quat_identity(),
226
- vel=wp.vec3(0.0, 0.0, 0.0),
227
- mass=grid_cell_volume / PARTICLES_PER_CELL_DIM**3,
228
- jitter=self.radius * 1.0,
216
+
217
+ self.particle_volumes, particle_q = self._spawn_particles(
218
+ particle_grid_res, particle_grid_lo, particle_grid_hi, packing_fraction=1.0
219
+ )
220
+ particle_qd = wp.zeros_like(particle_q)
221
+
222
+ particle_count = particle_q.shape[0]
223
+ if not self._quiet:
224
+ print("Particle count:", particle_count)
225
+
226
+ # Allocate states
227
+ self.state_0 = self.State(
228
+ wp.clone(particle_q),
229
+ wp.clone(particle_qd),
230
+ particle_qd_grad=wp.zeros(shape=(particle_count), dtype=wp.mat33),
231
+ )
232
+ self.state_1 = self.State(
233
+ wp.clone(particle_q),
234
+ wp.clone(particle_qd),
235
+ particle_qd_grad=wp.zeros(shape=(particle_count), dtype=wp.mat33),
229
236
  )
230
- self.model: Model = builder.finalize()
231
- self.model.ground = False
232
237
 
233
238
  # Storage for temporary variables
234
239
  self.temporary_store = fem.TemporaryStore()
235
240
 
236
- if not self._quiet:
237
- print("Particle count:", self.model.particle_count)
241
+ # initialize renderers
242
+ self.opengl_renderer = None
243
+ self.usd_renderer = None
238
244
 
239
- self.state_0: State = self.model.state()
240
- self.state_0.particle_qd_grad = wp.zeros(shape=(self.model.particle_count), dtype=wp.mat33)
241
-
242
- self.state_1: State = self.model.state()
243
- self.state_1.particle_qd_grad = wp.zeros(shape=(self.model.particle_count), dtype=wp.mat33)
245
+ try:
246
+ if opengl:
247
+ self.opengl_renderer = warp.render.OpenGLRenderer(
248
+ screen_width=1024,
249
+ screen_height=1024,
250
+ )
251
+ except Exception as err:
252
+ wp.utils.warn(f"Could not initialize OpenGL renderer: {err}.")
244
253
 
245
254
  try:
246
255
  if stage_path:
247
- self.renderer = warp.sim.render.SimRenderer(self.model, stage_path, scaling=20.0)
248
- else:
249
- self.renderer = None
256
+ self.usd_renderer = warp.render.UsdRenderer(stage_path)
250
257
  except Exception as err:
251
- print(f"Could not initialize SimRenderer for stage '{stage_path}': {err}.")
258
+ print(f"Could not initialize Usd renderer '{stage_path}': {err}.")
252
259
 
253
260
  def step(self):
254
261
  fem.set_default_temporary_store(self.temporary_store)
255
262
 
256
263
  self.current_frame = self.current_frame + 1
257
264
 
258
- with wp.ScopedTimer(f"simulate frame {self.current_frame}", active=True):
265
+ with wp.ScopedTimer(f"simulate frame {self.current_frame}", synchronize=True):
259
266
  for _s in range(self.sim_substeps):
260
267
  # Allocate the voxels and create the warp.fem geometry
261
268
  volume = wp.Volume.allocate_by_voxels(
@@ -297,7 +304,7 @@ class Example:
297
304
 
298
305
  # Bin particles to grid cells
299
306
  pic = fem.PicQuadrature(
300
- domain=domain, positions=self.state_0.particle_q, measures=self.model.particle_mass
307
+ domain=domain, positions=self.state_0.particle_q, measures=self.particle_volumes
301
308
  )
302
309
 
303
310
  # Compute inverse particle volume for each grid node
@@ -318,7 +325,7 @@ class Example:
318
325
  "velocities": self.state_0.particle_qd,
319
326
  "velocity_gradients": self.state_0.particle_qd_grad,
320
327
  "dt": self.sim_dt,
321
- "gravity": self.model.gravity,
328
+ "gravity": self.gravity,
322
329
  },
323
330
  output_dtype=wp.vec3,
324
331
  )
@@ -377,16 +384,54 @@ class Example:
377
384
 
378
385
  fem.set_default_temporary_store(None)
379
386
 
387
+ @staticmethod
388
+ def _spawn_particles(res, bounds_lo, bounds_hi, packing_fraction):
389
+ Nx = res[0]
390
+ Ny = res[1]
391
+ Nz = res[2]
392
+
393
+ px = np.linspace(bounds_lo[0], bounds_hi[0], Nx + 1)
394
+ py = np.linspace(bounds_lo[1], bounds_hi[1], Ny + 1)
395
+ pz = np.linspace(bounds_lo[2], bounds_hi[2], Nz + 1)
396
+
397
+ points = np.stack(np.meshgrid(px, py, pz)).reshape(3, -1).T
398
+
399
+ cell_size = (bounds_hi - bounds_lo) / res
400
+ cell_volume = np.prod(cell_size)
401
+
402
+ radius = np.max(cell_size) * 0.5
403
+ volume = np.prod(cell_volume) * packing_fraction
404
+
405
+ rng = np.random.default_rng(42)
406
+ points += 2.0 * radius * (rng.random(points.shape) - 0.5)
407
+
408
+ volumes = wp.full(points.shape[0], volume, dtype=float)
409
+ points = wp.array(np.ascontiguousarray(points), dtype=wp.vec3)
410
+ return volumes, points
411
+
380
412
  def render(self, is_live=False):
381
- if self.renderer is None:
413
+ if self.usd_renderer is None and self.opengl_renderer is None:
382
414
  return
383
415
 
384
- with wp.ScopedTimer("render", active=True):
416
+ with wp.ScopedTimer("render", synchronize=True):
385
417
  time = self.current_frame * self.frame_dt
386
418
 
387
- self.renderer.begin_frame(time)
388
- self.renderer.render(self.state_0)
389
- self.renderer.end_frame()
419
+ if self.usd_renderer is not None:
420
+ self.usd_renderer.begin_frame(time)
421
+ self.usd_renderer.render_points(
422
+ "particles",
423
+ self.state_0.particle_q.numpy(),
424
+ radius=self.radius,
425
+ )
426
+ self.usd_renderer.end_frame()
427
+ if self.opengl_renderer is not None:
428
+ self.opengl_renderer.begin_frame(time)
429
+ self.opengl_renderer.render_points(
430
+ "particles",
431
+ self.state_0.particle_q,
432
+ radius=self.radius,
433
+ )
434
+ self.opengl_renderer.end_frame()
390
435
 
391
436
 
392
437
  if __name__ == "__main__":
@@ -404,6 +449,7 @@ if __name__ == "__main__":
404
449
  )
405
450
  parser.add_argument("--num_frames", type=int, default=250, help="Total number of frames.")
406
451
  parser.add_argument("--quiet", action="store_true")
452
+ parser.add_argument("--opengl", action="store_true")
407
453
  parser.add_argument(
408
454
  "--voxel_size",
409
455
  type=float,
@@ -413,11 +459,11 @@ if __name__ == "__main__":
413
459
  args = parser.parse_known_args()[0]
414
460
 
415
461
  with wp.ScopedDevice(args.device):
416
- example = Example(quiet=args.quiet, stage_path=args.stage_path, voxel_size=args.voxel_size)
462
+ example = Example(quiet=args.quiet, stage_path=args.stage_path, voxel_size=args.voxel_size, opengl=args.opengl)
417
463
 
418
464
  for _ in range(args.num_frames):
419
465
  example.step()
420
466
  example.render()
421
467
 
422
- if example.renderer:
423
- example.renderer.save()
468
+ if example.usd_renderer is not None:
469
+ example.usd_renderer.save()
@@ -116,11 +116,11 @@ class Example:
116
116
  values={"ang_vel": ang_vel},
117
117
  )
118
118
 
119
- side_test = fem.make_test(space=scalar_space, domain=sides)
119
+ self._side_test = fem.make_test(space=scalar_space, domain=sides)
120
120
  side_trial = fem.make_trial(space=scalar_space, domain=sides)
121
121
  fem.integrate(
122
122
  upwind_transport_form,
123
- fields={"phi": side_trial, "psi": side_test},
123
+ fields={"phi": side_trial, "psi": self._side_test},
124
124
  values={"ang_vel": ang_vel},
125
125
  output=matrix_transport,
126
126
  add=True,
@@ -132,7 +132,7 @@ class Example:
132
132
  )
133
133
  matrix_diffusion += fem.integrate(
134
134
  sip_diffusion_form,
135
- fields={"phi": side_trial, "psi": side_test},
135
+ fields={"phi": side_trial, "psi": self._side_test},
136
136
  )
137
137
  self._matrix = matrix_inertia + matrix_transport + viscosity * matrix_diffusion
138
138
 
@@ -140,8 +140,12 @@ class Example:
140
140
  self._phi_field = scalar_space.make_field()
141
141
  fem.interpolate(initial_condition, dest=self._phi_field)
142
142
 
143
+ self._phi_curvature_field = scalar_space.make_field()
144
+ self._compute_phi_curvature()
145
+
143
146
  self.renderer = fem_example_utils.Plot()
144
147
  self.renderer.add_field("phi", self._phi_field)
148
+ self.renderer.add_field("phi_curvature", self._phi_curvature_field)
145
149
 
146
150
  def step(self):
147
151
  self.current_frame += 1
@@ -154,12 +158,29 @@ class Example:
154
158
 
155
159
  phi = wp.zeros_like(rhs)
156
160
  fem_example_utils.bsr_cg(self._matrix, b=rhs, x=phi, method="bicgstab", quiet=self._quiet)
157
-
158
161
  wp.utils.array_cast(in_array=phi, out_array=self._phi_field.dof_values)
159
162
 
163
+ # for visualization purposes only
164
+ self._compute_phi_curvature()
165
+
166
+ def _compute_phi_curvature(self):
167
+ fem.integrate(
168
+ diffusion_form,
169
+ fields={"u": self._phi_field, "v": self._test},
170
+ output=self._phi_curvature_field.dof_values,
171
+ )
172
+ fem.integrate(
173
+ sip_diffusion_form,
174
+ fields={"phi": self._phi_field.trace(), "psi": self._side_test},
175
+ output=self._phi_curvature_field.dof_values,
176
+ add=True,
177
+ )
178
+
160
179
  def render(self):
161
180
  self.renderer.begin_frame(time=self.current_frame * self.sim_dt)
162
181
  self.renderer.add_field("phi", self._phi_field)
182
+ self.renderer.add_field("phi_curvature", self._phi_curvature_field)
183
+
163
184
  self.renderer.end_frame()
164
185
 
165
186
 
@@ -77,8 +77,7 @@ class DistributedSystem:
77
77
  stream = wp.get_stream()
78
78
 
79
79
  for mat_i, x_i, y_i, idx in zip(*self.rank_data):
80
- # WAR copy with indexed array requiring matching shape
81
- tmp_i = wp.array(ptr=tmp.ptr, device=tmp.device, capacity=tmp.capacity, dtype=tmp.dtype, shape=idx.shape)
80
+ tmp_i = tmp[: idx.size]
82
81
 
83
82
  # Compress rhs on rank 0
84
83
  x_idx = wp.indexedarray(x, idx)
@@ -161,7 +160,13 @@ class Example:
161
160
 
162
161
  with wp.ScopedDevice(main_device):
163
162
  fem_example_utils.bsr_cg(
164
- A, x=global_res, b=glob_rhs, use_diag_precond=False, quiet=self._quiet, mv_routine=A.mv_routine
163
+ A,
164
+ x=global_res,
165
+ b=glob_rhs,
166
+ use_diag_precond=False,
167
+ quiet=self._quiet,
168
+ mv_routine=A.mv_routine,
169
+ mv_routine_uses_multiple_cuda_contexts=True,
165
170
  )
166
171
 
167
172
  array_cast(in_array=global_res, out_array=self._scalar_field.dof_values)
@@ -14,12 +14,14 @@
14
14
  # limitations under the License.
15
15
 
16
16
 
17
+ import gc
17
18
  from typing import Any, Dict, Optional, Tuple
18
19
 
19
20
  import numpy as np
20
21
 
21
22
  import warp as wp
22
23
  import warp.fem as fem
24
+ from warp.context import assert_conditional_graph_support
23
25
  from warp.optim.linear import LinearOperator, aslinearoperator, preconditioner
24
26
  from warp.sparse import BsrMatrix, bsr_get_diag, bsr_mv, bsr_transposed
25
27
 
@@ -230,6 +232,7 @@ def bsr_cg(
230
232
  quiet=False,
231
233
  method: str = "cg",
232
234
  M: BsrMatrix = None,
235
+ mv_routine_uses_multiple_cuda_contexts: bool = False,
233
236
  ) -> Tuple[float, int]:
234
237
  """Solves the linear system A x = b using an iterative solver, optionally with diagonal preconditioning
235
238
 
@@ -244,6 +247,8 @@ def bsr_cg(
244
247
  mv_routine: Matrix-vector multiplication routine to use for multiplications with ``A``
245
248
  quiet: if True, do not print iteration residuals
246
249
  method: Iterative solver method to use, defaults to Conjugate Gradient
250
+ mv_routine_uses_multiple_cuda_contexts: Whether the matrix-vector multiplication routine uses multiple CUDA contexts,
251
+ which prevents the use of conditional CUDA graphs.
247
252
 
248
253
  Returns:
249
254
  Tuple (residual norm, iteration count)
@@ -260,10 +265,53 @@ def bsr_cg(
260
265
 
261
266
  func = _get_linear_solver_func(method_name=method)
262
267
 
263
- def print_callback(i, err, tol):
264
- print(f"{func.__name__}: at iteration {i} error = \t {err} \t tol: {tol}")
268
+ callback = None
265
269
 
266
- callback = None if quiet else print_callback
270
+ use_cuda_graph = A.device.is_cuda and not wp.config.verify_cuda
271
+ capturable = use_cuda_graph and not mv_routine_uses_multiple_cuda_contexts
272
+
273
+ if capturable:
274
+ try:
275
+ assert_conditional_graph_support()
276
+ except RuntimeError:
277
+ capturable = False
278
+
279
+ if not quiet:
280
+ if capturable:
281
+
282
+ @wp.func_native(snippet=f'printf("%s: ", "{func.__name__}");')
283
+ def print_method_name():
284
+ pass
285
+
286
+ @fem.cache.dynamic_kernel(suffix=f"{check_every}{func.__name__}")
287
+ def device_cg_callback(
288
+ cur_iter: wp.array(dtype=int),
289
+ err_sq: wp.array(dtype=Any),
290
+ atol_sq: wp.array(dtype=Any),
291
+ ):
292
+ if cur_iter[0] % check_every == 0:
293
+ print_method_name()
294
+ wp.printf(
295
+ "at iteration %d error = \t %f \t tol: %f\n",
296
+ cur_iter[0],
297
+ wp.sqrt(err_sq[0]),
298
+ wp.sqrt(atol_sq[0]),
299
+ )
300
+
301
+ if check_every > 0:
302
+ callback = device_cg_callback
303
+ else:
304
+
305
+ def print_callback(i, err, tol):
306
+ print(f"{func.__name__}: at iteration {i} error = \t {err} \t tol: {tol}")
307
+
308
+ callback = print_callback
309
+
310
+ if use_cuda_graph:
311
+ # Temporarily disable garbage collection
312
+ # Garbage collection of externally-allocated objects during graph capture may lead to
313
+ # invalid operations or memory access errors.
314
+ gc.disable()
267
315
 
268
316
  end_iter, err, atol = func(
269
317
  A=A,
@@ -271,12 +319,20 @@ def bsr_cg(
271
319
  x=x,
272
320
  maxiter=max_iters,
273
321
  tol=tol,
274
- check_every=check_every,
322
+ check_every=0 if capturable else check_every,
275
323
  M=M,
276
324
  callback=callback,
277
- use_cuda_graph=not wp.config.verify_cuda,
325
+ use_cuda_graph=use_cuda_graph,
278
326
  )
279
327
 
328
+ if use_cuda_graph:
329
+ gc.enable()
330
+
331
+ if isinstance(end_iter, wp.array):
332
+ end_iter = end_iter.numpy()[0]
333
+ err = np.sqrt(err.numpy()[0])
334
+ atol = np.sqrt(atol.numpy()[0])
335
+
280
336
  if not quiet:
281
337
  res_str = "OK" if err <= atol else "TRUNCATED"
282
338
  print(f"{func.__name__}: terminated after {end_iter} iterations with error = \t {err} ({res_str})")
@@ -437,28 +493,18 @@ def bsr_solve_saddle(
437
493
  wp.copy(src=b_u, dest=saddle_system.u_slice(b))
438
494
  wp.copy(src=b_p, dest=saddle_system.p_slice(b))
439
495
 
440
- func = _get_linear_solver_func(method_name=method)
441
-
442
- def print_callback(i, err, tol):
443
- print(f"{func.__name__}: at iteration {i} error = \t {err} \t tol: {tol}")
444
-
445
- callback = None if quiet else print_callback
446
-
447
- end_iter, err, atol = func(
448
- A=saddle_system,
449
- b=b,
450
- x=x,
451
- maxiter=max_iters,
496
+ err, end_iter = bsr_cg(
497
+ saddle_system,
498
+ x,
499
+ b,
500
+ max_iters=max_iters,
452
501
  tol=tol,
453
502
  check_every=check_every,
503
+ quiet=quiet,
504
+ method=method,
454
505
  M=saddle_system.preconditioner,
455
- callback=callback,
456
506
  )
457
507
 
458
- if not quiet:
459
- res_str = "OK" if err <= atol else "TRUNCATED"
460
- print(f"{func.__name__}: terminated after {end_iter} iterations with absolute error = \t {err} ({res_str})")
461
-
462
508
  wp.copy(dest=x_u, src=saddle_system.u_slice(x))
463
509
  wp.copy(dest=x_p, src=saddle_system.p_slice(x))
464
510
 
@@ -45,7 +45,8 @@ def sincos_kernel(angle: wp.array(dtype=float), sin_out: wp.array(dtype=float),
45
45
  @wp.kernel
46
46
  def diagonal_kernel(output: wp.array(dtype=wp.mat33)):
47
47
  tid = wp.tid()
48
- output[tid] = wp.mat33(1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0)
48
+ d = float(tid + 1)
49
+ output[tid] = wp.mat33(d, 0.0, 0.0, 0.0, d * 2.0, 0.0, 0.0, 0.0, d * 3.0)
49
50
 
50
51
 
51
52
  @wp.kernel
warp/fabric.py CHANGED
@@ -211,7 +211,7 @@ class fabricarray(noncontiguous_array_base[T]):
211
211
  allocator = self.device.get_allocator()
212
212
  buckets_ptr = allocator.alloc(buckets_size)
213
213
  cuda_stream = self.device.stream.cuda_stream
214
- runtime.core.memcpy_h2d(
214
+ runtime.core.wp_memcpy_h2d(
215
215
  self.device.context, buckets_ptr, ctypes.addressof(buckets), buckets_size, cuda_stream
216
216
  )
217
217
  self.deleter = allocator.deleter