warp-lang 1.6.2__py3-none-macosx_10_13_universal2.whl → 1.7.1__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (191) hide show
  1. warp/__init__.py +7 -1
  2. warp/autograd.py +12 -2
  3. warp/bin/libwarp-clang.dylib +0 -0
  4. warp/bin/libwarp.dylib +0 -0
  5. warp/build.py +410 -0
  6. warp/build_dll.py +6 -14
  7. warp/builtins.py +463 -372
  8. warp/codegen.py +196 -124
  9. warp/config.py +42 -6
  10. warp/context.py +496 -271
  11. warp/dlpack.py +8 -6
  12. warp/examples/assets/nonuniform.usd +0 -0
  13. warp/examples/assets/nvidia_logo.png +0 -0
  14. warp/examples/benchmarks/benchmark_cloth.py +1 -1
  15. warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
  16. warp/examples/core/example_sample_mesh.py +300 -0
  17. warp/examples/distributed/example_jacobi_mpi.py +507 -0
  18. warp/examples/fem/example_apic_fluid.py +1 -1
  19. warp/examples/fem/example_burgers.py +2 -2
  20. warp/examples/fem/example_deformed_geometry.py +1 -1
  21. warp/examples/fem/example_distortion_energy.py +1 -1
  22. warp/examples/fem/example_magnetostatics.py +6 -6
  23. warp/examples/fem/utils.py +9 -3
  24. warp/examples/interop/example_jax_callable.py +116 -0
  25. warp/examples/interop/example_jax_ffi_callback.py +132 -0
  26. warp/examples/interop/example_jax_kernel.py +205 -0
  27. warp/examples/optim/example_fluid_checkpoint.py +497 -0
  28. warp/examples/tile/example_tile_matmul.py +2 -4
  29. warp/fem/__init__.py +11 -1
  30. warp/fem/adaptivity.py +4 -4
  31. warp/fem/field/field.py +11 -1
  32. warp/fem/field/nodal_field.py +56 -88
  33. warp/fem/field/virtual.py +62 -23
  34. warp/fem/geometry/adaptive_nanogrid.py +16 -13
  35. warp/fem/geometry/closest_point.py +1 -1
  36. warp/fem/geometry/deformed_geometry.py +5 -2
  37. warp/fem/geometry/geometry.py +5 -0
  38. warp/fem/geometry/grid_2d.py +12 -12
  39. warp/fem/geometry/grid_3d.py +12 -15
  40. warp/fem/geometry/hexmesh.py +5 -7
  41. warp/fem/geometry/nanogrid.py +9 -11
  42. warp/fem/geometry/quadmesh.py +13 -13
  43. warp/fem/geometry/tetmesh.py +3 -4
  44. warp/fem/geometry/trimesh.py +7 -20
  45. warp/fem/integrate.py +262 -93
  46. warp/fem/linalg.py +5 -5
  47. warp/fem/quadrature/pic_quadrature.py +37 -22
  48. warp/fem/quadrature/quadrature.py +194 -25
  49. warp/fem/space/__init__.py +1 -1
  50. warp/fem/space/basis_function_space.py +4 -2
  51. warp/fem/space/basis_space.py +25 -18
  52. warp/fem/space/hexmesh_function_space.py +2 -2
  53. warp/fem/space/partition.py +6 -2
  54. warp/fem/space/quadmesh_function_space.py +8 -8
  55. warp/fem/space/shape/cube_shape_function.py +23 -23
  56. warp/fem/space/shape/square_shape_function.py +12 -12
  57. warp/fem/space/shape/triangle_shape_function.py +1 -1
  58. warp/fem/space/tetmesh_function_space.py +3 -3
  59. warp/fem/space/trimesh_function_space.py +2 -2
  60. warp/fem/utils.py +12 -6
  61. warp/jax.py +14 -1
  62. warp/jax_experimental/__init__.py +16 -0
  63. warp/{jax_experimental.py → jax_experimental/custom_call.py} +28 -29
  64. warp/jax_experimental/ffi.py +702 -0
  65. warp/jax_experimental/xla_ffi.py +602 -0
  66. warp/math.py +89 -0
  67. warp/native/array.h +13 -0
  68. warp/native/builtin.h +29 -3
  69. warp/native/bvh.cpp +3 -1
  70. warp/native/bvh.cu +42 -14
  71. warp/native/bvh.h +2 -1
  72. warp/native/clang/clang.cpp +30 -3
  73. warp/native/cuda_util.cpp +14 -0
  74. warp/native/cuda_util.h +2 -0
  75. warp/native/exports.h +68 -63
  76. warp/native/intersect.h +26 -26
  77. warp/native/intersect_adj.h +33 -33
  78. warp/native/marching.cu +1 -1
  79. warp/native/mat.h +513 -9
  80. warp/native/mesh.h +10 -10
  81. warp/native/quat.h +99 -11
  82. warp/native/rand.h +6 -0
  83. warp/native/sort.cpp +122 -59
  84. warp/native/sort.cu +152 -15
  85. warp/native/sort.h +8 -1
  86. warp/native/sparse.cpp +43 -22
  87. warp/native/sparse.cu +52 -17
  88. warp/native/svd.h +116 -0
  89. warp/native/tile.h +312 -116
  90. warp/native/tile_reduce.h +46 -3
  91. warp/native/vec.h +68 -7
  92. warp/native/volume.cpp +85 -113
  93. warp/native/volume_builder.cu +25 -10
  94. warp/native/volume_builder.h +6 -0
  95. warp/native/warp.cpp +5 -6
  96. warp/native/warp.cu +100 -11
  97. warp/native/warp.h +19 -10
  98. warp/optim/linear.py +10 -10
  99. warp/render/render_opengl.py +19 -17
  100. warp/render/render_usd.py +93 -3
  101. warp/sim/articulation.py +4 -4
  102. warp/sim/collide.py +32 -19
  103. warp/sim/import_mjcf.py +449 -155
  104. warp/sim/import_urdf.py +32 -12
  105. warp/sim/inertia.py +189 -156
  106. warp/sim/integrator_euler.py +8 -5
  107. warp/sim/integrator_featherstone.py +3 -10
  108. warp/sim/integrator_vbd.py +207 -2
  109. warp/sim/integrator_xpbd.py +8 -5
  110. warp/sim/model.py +71 -25
  111. warp/sim/render.py +4 -0
  112. warp/sim/utils.py +2 -2
  113. warp/sparse.py +642 -555
  114. warp/stubs.py +217 -20
  115. warp/tests/__main__.py +0 -15
  116. warp/tests/assets/torus.usda +1 -1
  117. warp/tests/cuda/__init__.py +0 -0
  118. warp/tests/{test_mempool.py → cuda/test_mempool.py} +39 -0
  119. warp/tests/{test_streams.py → cuda/test_streams.py} +71 -0
  120. warp/tests/geometry/__init__.py +0 -0
  121. warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +66 -63
  122. warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +1 -1
  123. warp/tests/{test_volume.py → geometry/test_volume.py} +41 -6
  124. warp/tests/interop/__init__.py +0 -0
  125. warp/tests/{test_dlpack.py → interop/test_dlpack.py} +28 -5
  126. warp/tests/sim/__init__.py +0 -0
  127. warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +9 -10
  128. warp/tests/{test_collision.py → sim/test_collision.py} +236 -205
  129. warp/tests/sim/test_inertia.py +161 -0
  130. warp/tests/{test_model.py → sim/test_model.py} +40 -0
  131. warp/tests/{flaky_test_sim_grad.py → sim/test_sim_grad.py} +4 -0
  132. warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +2 -1
  133. warp/tests/sim/test_vbd.py +597 -0
  134. warp/tests/sim/test_xpbd.py +399 -0
  135. warp/tests/test_bool.py +1 -1
  136. warp/tests/test_codegen.py +24 -3
  137. warp/tests/test_examples.py +40 -38
  138. warp/tests/test_fem.py +98 -14
  139. warp/tests/test_linear_solvers.py +0 -11
  140. warp/tests/test_mat.py +577 -156
  141. warp/tests/test_mat_scalar_ops.py +4 -4
  142. warp/tests/test_overwrite.py +0 -60
  143. warp/tests/test_quat.py +356 -151
  144. warp/tests/test_rand.py +44 -37
  145. warp/tests/test_sparse.py +47 -6
  146. warp/tests/test_spatial.py +75 -0
  147. warp/tests/test_static.py +1 -1
  148. warp/tests/test_utils.py +84 -4
  149. warp/tests/test_vec.py +336 -178
  150. warp/tests/tile/__init__.py +0 -0
  151. warp/tests/{test_tile.py → tile/test_tile.py} +136 -51
  152. warp/tests/{test_tile_load.py → tile/test_tile_load.py} +98 -1
  153. warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +9 -6
  154. warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +25 -14
  155. warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +60 -1
  156. warp/tests/{test_tile_view.py → tile/test_tile_view.py} +1 -1
  157. warp/tests/unittest_serial.py +1 -0
  158. warp/tests/unittest_suites.py +45 -62
  159. warp/tests/unittest_utils.py +2 -1
  160. warp/thirdparty/unittest_parallel.py +3 -1
  161. warp/types.py +175 -666
  162. warp/utils.py +137 -72
  163. {warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/METADATA +46 -12
  164. {warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/RECORD +184 -171
  165. {warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/WHEEL +1 -1
  166. {warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info/licenses}/LICENSE.md +0 -26
  167. warp/examples/optim/example_walker.py +0 -317
  168. warp/native/cutlass_gemm.cpp +0 -43
  169. warp/native/cutlass_gemm.cu +0 -382
  170. warp/tests/test_matmul.py +0 -511
  171. warp/tests/test_matmul_lite.py +0 -411
  172. warp/tests/test_vbd.py +0 -386
  173. warp/tests/unused_test_misc.py +0 -77
  174. /warp/tests/{test_async.py → cuda/test_async.py} +0 -0
  175. /warp/tests/{test_ipc.py → cuda/test_ipc.py} +0 -0
  176. /warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +0 -0
  177. /warp/tests/{test_peer.py → cuda/test_peer.py} +0 -0
  178. /warp/tests/{test_pinned.py → cuda/test_pinned.py} +0 -0
  179. /warp/tests/{test_bvh.py → geometry/test_bvh.py} +0 -0
  180. /warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +0 -0
  181. /warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +0 -0
  182. /warp/tests/{test_mesh.py → geometry/test_mesh.py} +0 -0
  183. /warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +0 -0
  184. /warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +0 -0
  185. /warp/tests/{test_jax.py → interop/test_jax.py} +0 -0
  186. /warp/tests/{test_paddle.py → interop/test_paddle.py} +0 -0
  187. /warp/tests/{test_torch.py → interop/test_torch.py} +0 -0
  188. /warp/tests/{test_coloring.py → sim/test_coloring.py} +0 -0
  189. /warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +0 -0
  190. /warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +0 -0
  191. {warp_lang-1.6.2.dist-info → warp_lang-1.7.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,497 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ ###########################################################################
17
+ # Example Fluid Checkpoint
18
+ #
19
+ # Shows how to implement a differentiable 2D stable-fluids solver and
20
+ # optimize the initial velocity field to form the NVIDIA logo at the end
21
+ # of the simulation. Gradient checkpointing to reduce memory usage
22
+ # is manually implemented.
23
+ #
24
+ # References:
25
+ # https://github.com/HIPS/autograd/blob/master/examples/fluidsim/fluidsim.py
26
+ #
27
+ ###########################################################################
28
+
29
+ import math
30
+ import os
31
+
32
+ import numpy as np
33
+
34
+ import warp as wp
35
+ import warp.examples
36
+ import warp.optim
37
+
38
+ try:
39
+ from PIL import Image
40
+ except ImportError as err:
41
+ raise ImportError("This example requires the Pillow package. Please install it with 'pip install Pillow'.") from err
42
+
43
+
44
+ N_GRID = wp.constant(512)
45
+ DH = 1.0 / N_GRID # Grid spacing
46
+ FLUID_COLUMN_WIDTH = N_GRID / 10.0
47
+
48
+
49
+ @wp.func
50
+ def cyclic_index(idx: wp.int32):
51
+ """Helper function to index with periodic boundary conditions."""
52
+ ret_idx = idx % N_GRID
53
+ if ret_idx < 0:
54
+ ret_idx += N_GRID
55
+ return ret_idx
56
+
57
+
58
+ @wp.kernel
59
+ def fill_initial_density(density: wp.array2d(dtype=wp.float32)):
60
+ """Initialize the density array with three bands of fluid."""
61
+ i, j = wp.tid()
62
+
63
+ y_pos = wp.float32(i)
64
+
65
+ if FLUID_COLUMN_WIDTH <= y_pos < 2.0 * FLUID_COLUMN_WIDTH:
66
+ density[i, j] = 1.0
67
+ elif 4.5 * FLUID_COLUMN_WIDTH <= y_pos < 5.5 * FLUID_COLUMN_WIDTH:
68
+ density[i, j] = 1.0
69
+ elif 8.0 * FLUID_COLUMN_WIDTH <= y_pos < 9.0 * FLUID_COLUMN_WIDTH:
70
+ density[i, j] = 1.0
71
+ else:
72
+ density[i, j] = 0.0
73
+
74
+
75
+ @wp.kernel
76
+ def advect(
77
+ dt: float,
78
+ vx: wp.array2d(dtype=float),
79
+ vy: wp.array2d(dtype=float),
80
+ f0: wp.array2d(dtype=float),
81
+ f1: wp.array2d(dtype=float),
82
+ ):
83
+ """Move field f0 according to vx and vy velocities using an implicit Euler integrator."""
84
+
85
+ i, j = wp.tid()
86
+
87
+ center_xs = wp.float32(i) - vx[i, j] * dt
88
+ center_ys = wp.float32(j) - vy[i, j] * dt
89
+
90
+ # Compute indices of source cells.
91
+ left_idx = wp.int32(wp.floor(center_xs))
92
+ bot_idx = wp.int32(wp.floor(center_ys))
93
+
94
+ s1 = center_xs - wp.float32(left_idx) # Relative weight of right cell
95
+ s0 = 1.0 - s1
96
+ t1 = center_ys - wp.float32(bot_idx) # Relative weight of top cell
97
+ t0 = 1.0 - t1
98
+
99
+ i0 = cyclic_index(left_idx)
100
+ i1 = cyclic_index(left_idx + 1)
101
+ j0 = cyclic_index(bot_idx)
102
+ j1 = cyclic_index(bot_idx + 1)
103
+
104
+ # Perform bilinear interpolation of the four cells bounding the back-in-time position
105
+ f1[i, j] = s0 * (t0 * f0[i0, j0] + t1 * f0[i0, j1]) + s1 * (t0 * f0[i1, j0] + t1 * f0[i1, j1])
106
+
107
+
108
+ @wp.kernel
109
+ def divergence(wx: wp.array2d(dtype=float), wy: wp.array2d(dtype=float), div: wp.array2d(dtype=float)):
110
+ """Compute div(w)."""
111
+
112
+ i, j = wp.tid()
113
+
114
+ div[i, j] = (
115
+ 0.5
116
+ * (
117
+ wx[cyclic_index(i + 1), j]
118
+ - wx[cyclic_index(i - 1), j]
119
+ + wy[i, cyclic_index(j + 1)]
120
+ - wy[i, cyclic_index(j - 1)]
121
+ )
122
+ / DH
123
+ )
124
+
125
+
126
+ @wp.kernel
127
+ def jacobi_iter(div: wp.array2d(dtype=float), p0: wp.array2d(dtype=float), p1: wp.array2d(dtype=float)):
128
+ """Calculate a single Jacobi iteration for solving the pressure Poisson equation."""
129
+
130
+ i, j = wp.tid()
131
+
132
+ p1[i, j] = 0.25 * (
133
+ -DH * DH * div[i, j]
134
+ + p0[cyclic_index(i - 1), j]
135
+ + p0[cyclic_index(i + 1), j]
136
+ + p0[i, cyclic_index(j - 1)]
137
+ + p0[i, cyclic_index(j + 1)]
138
+ )
139
+
140
+
141
+ @wp.kernel
142
+ def update_velocities(
143
+ p: wp.array2d(dtype=float),
144
+ wx: wp.array2d(dtype=float),
145
+ wy: wp.array2d(dtype=float),
146
+ vx: wp.array2d(dtype=float),
147
+ vy: wp.array2d(dtype=float),
148
+ ):
149
+ """Given p and (wx, wy), compute an 'incompressible' velocity field (vx, vy)."""
150
+
151
+ i, j = wp.tid()
152
+
153
+ vx[i, j] = wx[i, j] - 0.5 * (p[cyclic_index(i + 1), j] - p[cyclic_index(i - 1), j]) / DH
154
+ vy[i, j] = wy[i, j] - 0.5 * (p[i, cyclic_index(j + 1)] - p[i, cyclic_index(j - 1)]) / DH
155
+
156
+
157
+ @wp.kernel
158
+ def compute_loss(
159
+ actual_state: wp.array2d(dtype=float), target_state: wp.array2d(dtype=float), loss: wp.array(dtype=float)
160
+ ):
161
+ i, j = wp.tid()
162
+
163
+ loss_value = (
164
+ (actual_state[i, j] - target_state[i, j])
165
+ * (actual_state[i, j] - target_state[i, j])
166
+ / wp.float32(N_GRID * N_GRID)
167
+ )
168
+
169
+ wp.atomic_add(loss, 0, loss_value)
170
+
171
+
172
+ class Example:
173
+ def __init__(self, sim_steps=1000):
174
+ self.pressure_arrays = []
175
+ self.wx_arrays = []
176
+ self.wy_arrays = []
177
+ self.vx_arrays = []
178
+ self.vy_arrays = []
179
+ self.density_arrays = []
180
+ self.div_arrays = []
181
+
182
+ # Memory usage is minimized when the segment size is approx. sqrt(sim_steps)
183
+ self.segment_size = math.ceil(math.sqrt(sim_steps))
184
+
185
+ # TODO: For now, let's just round up sim_steps so each segment is the same size
186
+ self.num_segments = math.ceil(sim_steps / self.segment_size)
187
+ self.sim_steps = self.segment_size * self.num_segments
188
+
189
+ self.pressure_iterations = 50
190
+ self.dt = 1.0
191
+
192
+ # Store enough arrays to step through a segment without overwriting arrays
193
+ # NOTE: Need an extra array to store the final time-advanced velocities and densities
194
+ for _step in range(self.segment_size + 1):
195
+ self.vx_arrays.append(wp.zeros((N_GRID, N_GRID), dtype=float, requires_grad=True))
196
+ self.vy_arrays.append(wp.zeros((N_GRID, N_GRID), dtype=float, requires_grad=True))
197
+ self.density_arrays.append(wp.zeros((N_GRID, N_GRID), dtype=float, requires_grad=True))
198
+
199
+ for _step in range(self.segment_size):
200
+ self.wx_arrays.append(wp.zeros((N_GRID, N_GRID), dtype=float, requires_grad=True))
201
+ self.wy_arrays.append(wp.zeros((N_GRID, N_GRID), dtype=float, requires_grad=True))
202
+ self.div_arrays.append(wp.zeros((N_GRID, N_GRID), dtype=float, requires_grad=True))
203
+
204
+ for _iter in range(self.pressure_iterations):
205
+ self.pressure_arrays.append(wp.zeros((N_GRID, N_GRID), dtype=float, requires_grad=True))
206
+
207
+ # Allocate one more pressure array for the final time step
208
+ self.pressure_arrays.append(wp.zeros((N_GRID, N_GRID), dtype=float, requires_grad=True))
209
+
210
+ # Allocate memory to save the fluid state at the start of each segment
211
+ self.segment_start_vx_arrays = []
212
+ self.segment_start_vy_arrays = []
213
+ self.segment_start_density_arrays = []
214
+ self.segment_start_pressure_arrays = []
215
+
216
+ for _segment_index in range(self.num_segments):
217
+ self.segment_start_vx_arrays.append(wp.zeros((N_GRID, N_GRID), dtype=float))
218
+ self.segment_start_vy_arrays.append(wp.zeros((N_GRID, N_GRID), dtype=float))
219
+ self.segment_start_density_arrays.append(wp.zeros((N_GRID, N_GRID), dtype=float))
220
+ self.segment_start_pressure_arrays.append(wp.zeros((N_GRID, N_GRID), dtype=float))
221
+
222
+ # To restore previously computed gradients before calling tape.backward()
223
+ self.vx_array_grad_saved = wp.zeros((N_GRID, N_GRID), dtype=float)
224
+ self.vy_array_grad_saved = wp.zeros((N_GRID, N_GRID), dtype=float)
225
+ self.density_array_grad_saved = wp.zeros((N_GRID, N_GRID), dtype=float)
226
+ self.pressure_array_grad_saved = wp.zeros((N_GRID, N_GRID), dtype=float)
227
+
228
+ wp.launch(fill_initial_density, (N_GRID, N_GRID), inputs=[self.density_arrays[0]])
229
+
230
+ target_base = Image.open(os.path.join(warp.examples.get_asset_directory(), "nvidia_logo.png"))
231
+ target_resized = target_base.resize((N_GRID, N_GRID))
232
+
233
+ target_np = np.array(target_resized)[:, :, 0] / 255.0
234
+ self.target_wp = wp.array(target_np, dtype=float)
235
+
236
+ self.loss = wp.zeros((1,), dtype=float, requires_grad=True)
237
+
238
+ self.train_rate = 0.01
239
+ self.optimizer = warp.optim.Adam([self.vx_arrays[0].flatten(), self.vy_arrays[0].flatten()], lr=self.train_rate)
240
+
241
+ # Capture forward/backward passes and tape.zero()
242
+ self.use_cuda_graph = wp.get_device().is_cuda
243
+ self.forward_graph = None
244
+ self.backward_graph = None
245
+ self.zero_tape_graph = None
246
+
247
+ if self.use_cuda_graph:
248
+ with wp.ScopedCapture() as capture:
249
+ self.forward()
250
+ self.forward_graph = capture.graph
251
+
252
+ with wp.ScopedCapture() as capture:
253
+ self.backward()
254
+ self.backward_graph = capture.graph
255
+
256
+ # tape.zero() launches many memsets, which can be a significant overhead for smaller problems
257
+ with wp.ScopedCapture() as capture:
258
+ self.tape.zero()
259
+ self.zero_tape_graph = capture.graph
260
+
261
+ def step(self, step_index) -> None:
262
+ """Perform a single time step from t=step_index-1 to t=step_index.
263
+
264
+ 1. Self-advection of velocity components (store output in wx_arrays and wy_arrays)
265
+ 2. Incompressibility constraint (store output in vx_arrays and vy_arrays)
266
+ 3. Advection of density using velocities (vx_arrays, vy_arrays)
267
+ """
268
+
269
+ wp.launch(
270
+ advect,
271
+ (N_GRID, N_GRID),
272
+ inputs=[
273
+ self.dt,
274
+ self.vx_arrays[step_index - 1],
275
+ self.vy_arrays[step_index - 1],
276
+ self.vx_arrays[step_index - 1],
277
+ ],
278
+ outputs=[self.wx_arrays[step_index - 1]],
279
+ )
280
+ wp.launch(
281
+ advect,
282
+ (N_GRID, N_GRID),
283
+ inputs=[
284
+ self.dt,
285
+ self.vx_arrays[step_index - 1],
286
+ self.vy_arrays[step_index - 1],
287
+ self.vy_arrays[step_index - 1],
288
+ ],
289
+ outputs=[self.wy_arrays[step_index - 1]],
290
+ )
291
+
292
+ # Pressure projection using a few Jacobi iterations
293
+ wp.launch(
294
+ divergence,
295
+ (N_GRID, N_GRID),
296
+ inputs=[self.wx_arrays[step_index - 1], self.wy_arrays[step_index - 1]],
297
+ outputs=[self.div_arrays[step_index - 1]],
298
+ )
299
+
300
+ # NOTE: Uses previous step's final pressure as the initial guess
301
+ for k in range(self.pressure_iterations):
302
+ input_index = self.pressure_iterations * (step_index - 1) + k
303
+ output_index = input_index + 1
304
+
305
+ wp.launch(
306
+ jacobi_iter,
307
+ (N_GRID, N_GRID),
308
+ inputs=[self.div_arrays[step_index - 1], self.pressure_arrays[input_index]],
309
+ outputs=[self.pressure_arrays[output_index]],
310
+ )
311
+
312
+ # NOTE: output_index should be self.pressure_iterations*step_index at this point
313
+ wp.launch(
314
+ update_velocities,
315
+ (N_GRID, N_GRID),
316
+ inputs=[self.pressure_arrays[output_index], self.wx_arrays[step_index - 1], self.wy_arrays[step_index - 1]],
317
+ outputs=[self.vx_arrays[step_index], self.vy_arrays[step_index]],
318
+ )
319
+
320
+ wp.launch(
321
+ advect,
322
+ (N_GRID, N_GRID),
323
+ inputs=[
324
+ self.dt,
325
+ self.vx_arrays[step_index],
326
+ self.vy_arrays[step_index],
327
+ self.density_arrays[step_index - 1],
328
+ ],
329
+ outputs=[self.density_arrays[step_index]],
330
+ )
331
+
332
+ def forward(self) -> None:
333
+ """Advance the simulation forward in segments, storing the fluid state at the start of each segment.
334
+
335
+ The loss function is also evaluated at the end of the function.
336
+ """
337
+ self.loss.zero_()
338
+
339
+ for segment_index in range(self.num_segments):
340
+ # Save start-of-segment values
341
+ wp.copy(self.segment_start_vx_arrays[segment_index], self.vx_arrays[0])
342
+ wp.copy(self.segment_start_vy_arrays[segment_index], self.vy_arrays[0])
343
+ wp.copy(self.segment_start_density_arrays[segment_index], self.density_arrays[0])
344
+ wp.copy(self.segment_start_pressure_arrays[segment_index], self.pressure_arrays[0])
345
+
346
+ for t in range(1, self.segment_size + 1):
347
+ # sim_t = (segment_index - 1) * self.segment_size + t
348
+ self.step(t)
349
+
350
+ # Set the initial conditions for the next segment
351
+ if segment_index < self.num_segments - 1:
352
+ wp.copy(self.vx_arrays[0], self.vx_arrays[-1])
353
+ wp.copy(self.vy_arrays[0], self.vy_arrays[-1])
354
+ wp.copy(self.density_arrays[0], self.density_arrays[-1])
355
+ wp.copy(self.pressure_arrays[0], self.pressure_arrays[-1])
356
+
357
+ wp.launch(
358
+ compute_loss,
359
+ (N_GRID, N_GRID),
360
+ inputs=[self.density_arrays[self.segment_size], self.target_wp],
361
+ outputs=[self.loss],
362
+ )
363
+
364
+ def backward(self) -> None:
365
+ """Compute the adjoints using a checkpointing approach.
366
+
367
+ Starting from the final segment, the forward pass for the segment is
368
+ repeated, this time recording the kernel launches onto a tape. Any
369
+ previously computed adjoints are restored prior to evaluating the
370
+ backward pass for the segment. This process is repeated until the
371
+ adjoints of the initial state have been calculated.
372
+ """
373
+
374
+ for segment_index in range(self.num_segments - 1, -1, -1):
375
+ # Restore state at the start of the segment
376
+ wp.copy(self.vx_arrays[0], self.segment_start_vx_arrays[segment_index])
377
+ wp.copy(self.vy_arrays[0], self.segment_start_vy_arrays[segment_index])
378
+ wp.copy(self.density_arrays[0], self.segment_start_density_arrays[segment_index])
379
+ wp.copy(self.pressure_arrays[0], self.segment_start_pressure_arrays[segment_index])
380
+
381
+ # Record operations on tape
382
+ with wp.Tape() as self.tape:
383
+ for t in range(1, self.segment_size + 1):
384
+ self.step(t)
385
+
386
+ if segment_index == self.num_segments - 1:
387
+ self.loss.grad.fill_(1.0)
388
+
389
+ wp.launch(
390
+ compute_loss,
391
+ (N_GRID, N_GRID),
392
+ inputs=[self.density_arrays[self.segment_size], self.target_wp],
393
+ outputs=[self.loss],
394
+ adj_inputs=[self.density_arrays[self.segment_size].grad, None],
395
+ adj_outputs=[self.loss.grad],
396
+ adjoint=True,
397
+ )
398
+ else:
399
+ # Fill in previously computed gradients from the last segment
400
+ wp.copy(self.vx_arrays[-1].grad, self.vx_array_grad_saved)
401
+ wp.copy(self.vy_arrays[-1].grad, self.vy_array_grad_saved)
402
+ wp.copy(self.density_arrays[-1].grad, self.density_array_grad_saved)
403
+ wp.copy(self.pressure_arrays[-1].grad, self.pressure_array_grad_saved)
404
+
405
+ self.tape.backward()
406
+
407
+ if segment_index > 0:
408
+ # Save the gradients to variables and zero-out the gradients for the next segment
409
+ wp.copy(self.vx_array_grad_saved, self.vx_arrays[0].grad)
410
+ wp.copy(self.vy_array_grad_saved, self.vy_arrays[0].grad)
411
+ wp.copy(self.density_array_grad_saved, self.density_arrays[0].grad)
412
+ wp.copy(self.pressure_array_grad_saved, self.pressure_arrays[0].grad)
413
+
414
+ self.tape.zero()
415
+
416
+ # Done with backward pass, we're interested in self.vx_arrays[0].grad and self.vy_arrays[0].grad
417
+
418
+
419
+ if __name__ == "__main__":
420
+ import argparse
421
+
422
+ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
423
+ parser.add_argument("--device", type=str, default=None, help="Override the default Warp device.")
424
+ parser.add_argument(
425
+ "--num_frames", type=int, default=1000, help="Number of frames to simulate before computing loss."
426
+ )
427
+ parser.add_argument("--train_iters", type=int, default=50, help="Total number of training iterations.")
428
+ parser.add_argument(
429
+ "--headless",
430
+ action="store_true",
431
+ help="Run in headless mode, suppressing the opening of any graphical windows.",
432
+ )
433
+
434
+ args = parser.parse_known_args()[0]
435
+
436
+ with wp.ScopedDevice(args.device):
437
+ example = Example(sim_steps=args.num_frames)
438
+
439
+ wp.synchronize_device()
440
+
441
+ if (device := wp.get_device()).is_cuda:
442
+ print(f"Current memory usage: {wp.get_mempool_used_mem_current(device) / (1024 * 1024 * 1024):.4f} GiB")
443
+
444
+ # Main training loop
445
+ for train_iter in range(args.train_iters):
446
+ if example.forward_graph:
447
+ wp.capture_launch(example.forward_graph)
448
+ else:
449
+ example.forward()
450
+
451
+ if example.backward_graph:
452
+ wp.capture_launch(example.backward_graph)
453
+ else:
454
+ example.backward()
455
+
456
+ example.optimizer.step([example.vx_arrays[0].grad.flatten(), example.vy_arrays[0].grad.flatten()])
457
+
458
+ # Clear grad arrays for next iteration
459
+ if example.zero_tape_graph:
460
+ wp.capture_launch(example.zero_tape_graph)
461
+ else:
462
+ example.tape.zero()
463
+
464
+ print(f"Iteration {train_iter:05d} loss: {example.loss.numpy()[0]:.6f}")
465
+
466
+ if not args.headless:
467
+ import matplotlib
468
+ import matplotlib.pyplot as plt
469
+
470
+ if matplotlib.rcParams["figure.raise_window"]:
471
+ matplotlib.rcParams["figure.raise_window"] = False
472
+
473
+ fig, ax = plt.subplots()
474
+ image = ax.imshow(example.density_arrays[-1].numpy(), cmap="viridis", origin="lower", vmin=0, vmax=1)
475
+ ax.set_xticks([])
476
+ ax.set_yticks([])
477
+ ax.set_title("Fluid Density")
478
+
479
+ # Run the final simulation to the stop time
480
+ for _ in range(args.num_frames):
481
+ example.step(1)
482
+ # Swap pointers
483
+ (example.vx_arrays[0], example.vx_arrays[1]) = (example.vx_arrays[1], example.vx_arrays[0])
484
+ (example.vy_arrays[0], example.vy_arrays[1]) = (example.vy_arrays[1], example.vy_arrays[0])
485
+ (example.density_arrays[0], example.density_arrays[1]) = (
486
+ example.density_arrays[1],
487
+ example.density_arrays[0],
488
+ )
489
+ (example.pressure_arrays[0], example.pressure_arrays[example.pressure_iterations]) = (
490
+ example.pressure_arrays[example.pressure_iterations],
491
+ example.pressure_arrays[0],
492
+ )
493
+
494
+ image.set_data(example.density_arrays[0].numpy())
495
+ plt.pause(0.001)
496
+
497
+ plt.show()
@@ -57,8 +57,6 @@ def tile_gemm(A: wp.array2d(dtype=wp.float32), B: wp.array2d(dtype=wp.float16),
57
57
 
58
58
 
59
59
  if __name__ == "__main__":
60
- wp.set_device("cuda:0")
61
-
62
60
  # generate some tile aligned matrix dimensions
63
61
  M = TILE_M * 7
64
62
  K = TILE_K * 6
@@ -76,12 +74,12 @@ if __name__ == "__main__":
76
74
  with wp.Tape() as tape:
77
75
  wp.launch_tiled(
78
76
  tile_gemm,
79
- dim=(int(M / TILE_M), int(N / TILE_N)),
77
+ dim=(M // TILE_M, N // TILE_N),
80
78
  inputs=[A_wp, B_wp],
81
79
  outputs=[C_wp],
82
80
  block_dim=TILE_THREADS,
83
81
  )
84
82
 
85
- assert np.allclose(C_wp.numpy(), A @ B)
83
+ assert np.allclose(C_wp.numpy(), A @ B, atol=1.0e-4)
86
84
 
87
85
  print("Example matrix multiplication passed")
warp/fem/__init__.py CHANGED
@@ -93,4 +93,14 @@ from .space import (
93
93
  make_space_partition,
94
94
  make_space_restriction,
95
95
  )
96
- from .types import NULL_ELEMENT_INDEX, Coords, Domain, ElementIndex, Field, Sample, make_free_sample
96
+ from .types import (
97
+ NULL_ELEMENT_INDEX,
98
+ NULL_QP_INDEX,
99
+ Coords,
100
+ Domain,
101
+ ElementIndex,
102
+ Field,
103
+ QuadraturePointIndex,
104
+ Sample,
105
+ make_free_sample,
106
+ )
warp/fem/adaptivity.py CHANGED
@@ -368,7 +368,7 @@ def _fill_graded_cells(
368
368
  level = int(coarse_level[cell])
369
369
  refinement = wp.min(1, coarse_refinement[cell])
370
370
 
371
- count = wp.select(refinement > 0, 1, 8)
371
+ count = wp.where(refinement > 0, 8, 1)
372
372
  offset = wp.atomic_sub(fine_count, 0, count) - count
373
373
 
374
374
  f_level = level - refinement
@@ -403,7 +403,7 @@ def _sample_refinement(
403
403
  if sampled_level >= 0:
404
404
  min_level = wp.min(sampled_level, min_level)
405
405
 
406
- return wp.select(min_level < level_count, -1, cur_level - wp.clamp(min_level, 0, cur_level))
406
+ return wp.where(min_level < level_count, cur_level - wp.clamp(min_level, 0, cur_level), -1)
407
407
 
408
408
 
409
409
  @integrand
@@ -431,7 +431,7 @@ def _count_refined_voxels(
431
431
 
432
432
  coarse_refinement[cell] = wp.int8(refinement)
433
433
  if refinement >= 0:
434
- wp.atomic_add(fine_count, 0, wp.select(refinement > 0, 1, 8))
434
+ wp.atomic_add(fine_count, 0, wp.where(refinement > 0, 8, 1))
435
435
 
436
436
 
437
437
  @wp.kernel
@@ -449,7 +449,7 @@ def _fill_refined_voxels(
449
449
  refinement = wp.min(1, int(coarse_refinement[cell]))
450
450
 
451
451
  if refinement >= 0:
452
- count = wp.select(refinement > 0, 1, 8)
452
+ count = wp.where(refinement > 0, 8, 1)
453
453
  offset = wp.atomic_sub(fine_count, 0, count) - count
454
454
 
455
455
  f_level = level - refinement
warp/fem/field/field.py CHANGED
@@ -180,7 +180,7 @@ class SpaceField(GeometryField):
180
180
 
181
181
  @property
182
182
  def gradient_dtype(self):
183
- """Return type of the gradient operator. Assumes self.gradient_valid()"""
183
+ """Return type of the (world space) gradient operator. Assumes self.gradient_valid()"""
184
184
  if wp.types.type_is_vector(self.dtype):
185
185
  return cache.cached_mat_type(
186
186
  shape=(wp.types.type_length(self.dtype), self.geometry.dimension),
@@ -188,6 +188,16 @@ class SpaceField(GeometryField):
188
188
  )
189
189
  return cache.cached_vec_type(length=self.geometry.dimension, dtype=wp.types.type_scalar_type(self.dtype))
190
190
 
191
+ @property
192
+ def reference_gradient_dtype(self):
193
+ """Return type of the reference space gradient operator. Assumes self.gradient_valid()"""
194
+ if wp.types.type_is_vector(self.dtype):
195
+ return cache.cached_mat_type(
196
+ shape=(wp.types.type_length(self.dtype), self.geometry.cell_dimension),
197
+ dtype=wp.types.type_scalar_type(self.dtype),
198
+ )
199
+ return cache.cached_vec_type(length=self.geometry.cell_dimension, dtype=wp.types.type_scalar_type(self.dtype))
200
+
191
201
  @property
192
202
  def divergence_dtype(self):
193
203
  """Return type of the divergence operator. Assumes self.gradient_valid()"""