tide-GPR 0.0.9__py3-none-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tide/__init__.py +65 -0
- tide/autograd_utils.py +26 -0
- tide/backend_utils.py +536 -0
- tide/callbacks.py +348 -0
- tide/cfl.py +64 -0
- tide/csrc/CMakeLists.txt +263 -0
- tide/csrc/common_cpu.h +31 -0
- tide/csrc/common_gpu.h +56 -0
- tide/csrc/maxwell.c +2133 -0
- tide/csrc/maxwell.cu +2297 -0
- tide/csrc/maxwell_born.cu +0 -0
- tide/csrc/staggered_grid.h +175 -0
- tide/csrc/staggered_grid_3d.h +124 -0
- tide/csrc/storage_utils.c +78 -0
- tide/csrc/storage_utils.cu +135 -0
- tide/csrc/storage_utils.h +36 -0
- tide/grid_utils.py +31 -0
- tide/maxwell.py +2651 -0
- tide/padding.py +139 -0
- tide/resampling.py +246 -0
- tide/staggered.py +567 -0
- tide/storage.py +131 -0
- tide/tide/libtide_C.so +0 -0
- tide/utils.py +274 -0
- tide/validation.py +71 -0
- tide/wavelets.py +72 -0
- tide_gpr-0.0.9.dist-info/METADATA +256 -0
- tide_gpr-0.0.9.dist-info/RECORD +31 -0
- tide_gpr-0.0.9.dist-info/WHEEL +5 -0
- tide_gpr-0.0.9.dist-info/licenses/LICENSE +46 -0
- tide_gpr.libs/libgomp-24e2ab19.so.1.0.0 +0 -0
tide/callbacks.py
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
"""Callback state and helpers for TIDE propagators."""
|
|
2
|
+
|
|
3
|
+
from typing import (
|
|
4
|
+
TYPE_CHECKING,
|
|
5
|
+
Callable,
|
|
6
|
+
Dict,
|
|
7
|
+
Optional,
|
|
8
|
+
Union,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from types import EllipsisType
|
|
13
|
+
|
|
14
|
+
import torch
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CallbackState:
|
|
18
|
+
"""State provided to user callbacks during wave propagation.
|
|
19
|
+
|
|
20
|
+
This class encapsulates the simulation state at a given time step,
|
|
21
|
+
providing convenient access to wavefields, model parameters, and
|
|
22
|
+
gradients with different views (full, pml, inner).
|
|
23
|
+
|
|
24
|
+
The three views correspond to different regions of the computational domain:
|
|
25
|
+
- 'full': The entire padded domain including FD padding
|
|
26
|
+
- 'pml': The model region plus PML absorbing layers
|
|
27
|
+
- 'inner': Only the physical model region (excluding PML)
|
|
28
|
+
|
|
29
|
+
Example:
|
|
30
|
+
>>> def my_callback(state: CallbackState):
|
|
31
|
+
... # Get the Ey field in the inner (physical) region
|
|
32
|
+
... ey = state.get_wavefield("Ey", view="inner")
|
|
33
|
+
... print(f"Step {state.step}, max |Ey| = {ey.abs().max():.6e}")
|
|
34
|
+
...
|
|
35
|
+
... # Get the permittivity model
|
|
36
|
+
... eps = state.get_model("epsilon", view="inner")
|
|
37
|
+
...
|
|
38
|
+
... # During backward pass, get gradients
|
|
39
|
+
... if state.is_backward:
|
|
40
|
+
... grad_eps = state.get_gradient("epsilon", view="inner")
|
|
41
|
+
>>>
|
|
42
|
+
>>> # Use with maxwell propagator
|
|
43
|
+
>>> result = maxwell.maxwelltm(..., forward_callback=my_callback)
|
|
44
|
+
|
|
45
|
+
Attributes:
|
|
46
|
+
dt: The time step size in seconds.
|
|
47
|
+
step: The current time step number (0-indexed).
|
|
48
|
+
nt: Total number of time steps.
|
|
49
|
+
is_backward: Whether this is during backward (adjoint) propagation.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
dt: float,
|
|
55
|
+
step: int,
|
|
56
|
+
nt: int,
|
|
57
|
+
wavefields: Dict[str, torch.Tensor],
|
|
58
|
+
models: Dict[str, torch.Tensor],
|
|
59
|
+
gradients: Optional[Dict[str, torch.Tensor]] = None,
|
|
60
|
+
fd_pad: Optional[list[int]] = None,
|
|
61
|
+
pml_width: Optional[list[int]] = None,
|
|
62
|
+
is_backward: bool = False,
|
|
63
|
+
grid_spacing: Optional[list[float]] = None,
|
|
64
|
+
) -> None:
|
|
65
|
+
"""Initialize the callback state.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
dt: The time step size in seconds.
|
|
69
|
+
step: The current time step number.
|
|
70
|
+
nt: Total number of time steps.
|
|
71
|
+
wavefields: A dictionary mapping wavefield names to tensors.
|
|
72
|
+
For Maxwell TM: {"Ey", "Hx", "Hz", "m_Ey_x", "m_Ey_z", ...}
|
|
73
|
+
models: A dictionary mapping model names to tensors.
|
|
74
|
+
For Maxwell TM: {"epsilon", "sigma", "mu", "ca", "cb", "cq"}
|
|
75
|
+
gradients: A dictionary mapping gradient names to tensors.
|
|
76
|
+
Only available during backward pass.
|
|
77
|
+
fd_pad: Padding for finite difference stencil [y0, y1, x0, x1].
|
|
78
|
+
If None, assumes no padding.
|
|
79
|
+
pml_width: Width of PML layers [top, bottom, left, right].
|
|
80
|
+
If None, assumes no PML.
|
|
81
|
+
is_backward: Whether this is during backward propagation.
|
|
82
|
+
grid_spacing: Grid spacing [dy, dx] in meters.
|
|
83
|
+
"""
|
|
84
|
+
self.dt = dt
|
|
85
|
+
self.step = step
|
|
86
|
+
self.nt = nt
|
|
87
|
+
self.is_backward = is_backward
|
|
88
|
+
self._wavefields = wavefields
|
|
89
|
+
self._models = models
|
|
90
|
+
self._gradients = gradients if gradients is not None else {}
|
|
91
|
+
self._fd_pad = fd_pad if fd_pad is not None else [0, 0, 0, 0]
|
|
92
|
+
self._pml_width = pml_width if pml_width is not None else [0, 0, 0, 0]
|
|
93
|
+
self._grid_spacing = grid_spacing
|
|
94
|
+
|
|
95
|
+
# Determine spatial ndim from padding (preferred) or model tensors.
|
|
96
|
+
# Padding lists are in [d0_low, d0_high, d1_low, d1_high, ...] format.
|
|
97
|
+
if fd_pad is not None and len(fd_pad) in {4, 6}:
|
|
98
|
+
self._ndim = len(fd_pad) // 2
|
|
99
|
+
elif pml_width is not None and len(pml_width) in {4, 6}:
|
|
100
|
+
self._ndim = len(pml_width) // 2
|
|
101
|
+
elif models:
|
|
102
|
+
first_model = next(iter(models.values()))
|
|
103
|
+
# Heuristic:
|
|
104
|
+
# - 2D unbatched: [ny, nx] -> 2
|
|
105
|
+
# - 2D batched: [n_shots, ny, nx] -> 2
|
|
106
|
+
# - 3D unbatched: [nz, ny, nx] -> ambiguous with 2D batched; callers
|
|
107
|
+
# should pass fd_pad/pml_width to disambiguate.
|
|
108
|
+
# - 3D batched: [n_shots, nz, ny, nx] -> 3
|
|
109
|
+
if first_model.ndim == 2:
|
|
110
|
+
self._ndim = 2
|
|
111
|
+
elif first_model.ndim == 4:
|
|
112
|
+
self._ndim = 3
|
|
113
|
+
else:
|
|
114
|
+
# Preserve existing behavior (Maxwell TM callbacks) as default.
|
|
115
|
+
self._ndim = 2
|
|
116
|
+
else:
|
|
117
|
+
# Default to 2D when no other information is available.
|
|
118
|
+
self._ndim = 2
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def time(self) -> float:
|
|
122
|
+
"""Current simulation time in seconds."""
|
|
123
|
+
return self.step * self.dt
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def progress(self) -> float:
|
|
127
|
+
"""Simulation progress as a fraction [0, 1]."""
|
|
128
|
+
return self.step / max(self.nt - 1, 1)
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def wavefield_names(self) -> list[str]:
|
|
132
|
+
"""list of available wavefield names."""
|
|
133
|
+
return list(self._wavefields.keys())
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def model_names(self) -> list[str]:
|
|
137
|
+
"""list of available model names."""
|
|
138
|
+
return list(self._models.keys())
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def gradient_names(self) -> list[str]:
|
|
142
|
+
"""list of available gradient names."""
|
|
143
|
+
return list(self._gradients.keys())
|
|
144
|
+
|
|
145
|
+
def get_wavefield(self, name: str, view: str = "inner") -> torch.Tensor:
|
|
146
|
+
"""Get a wavefield tensor.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
name: The name of the wavefield. For Maxwell TM mode:
|
|
150
|
+
- "Ey": Electric field (y-component)
|
|
151
|
+
- "Hx": Magnetic field (x-component)
|
|
152
|
+
- "Hz": Magnetic field (z-component)
|
|
153
|
+
- "m_Ey_x", "m_Ey_z", "m_Hx_z", "m_Hz_x": CPML auxiliary fields
|
|
154
|
+
- During backward: "lambda_Ey", "lambda_Hx", "lambda_Hz"
|
|
155
|
+
view: The part of the wavefield to return:
|
|
156
|
+
- 'inner': The physical model region (default)
|
|
157
|
+
- 'pml': Model region plus PML layers
|
|
158
|
+
- 'full': Entire domain including FD padding
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
The specified part of the wavefield tensor.
|
|
162
|
+
Shape depends on view and whether batched: [n_shots, ny, nx] or [ny, nx]
|
|
163
|
+
|
|
164
|
+
Raises:
|
|
165
|
+
KeyError: If the wavefield name is not found.
|
|
166
|
+
ValueError: If view is not valid.
|
|
167
|
+
"""
|
|
168
|
+
if name not in self._wavefields:
|
|
169
|
+
available = ", ".join(self._wavefields.keys())
|
|
170
|
+
raise KeyError(f"Wavefield '{name}' not found. Available: {available}")
|
|
171
|
+
return self._get_view(self._wavefields[name], view)
|
|
172
|
+
|
|
173
|
+
def get_model(self, name: str, view: str = "inner") -> torch.Tensor:
|
|
174
|
+
"""Get a model parameter tensor.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
name: The name of the model parameter. For Maxwell TM:
|
|
178
|
+
- "epsilon": Relative permittivity
|
|
179
|
+
- "sigma": Electrical conductivity (S/m)
|
|
180
|
+
- "mu": Relative permeability
|
|
181
|
+
- "ca", "cb", "cq": Update coefficients
|
|
182
|
+
view: The part of the model to return:
|
|
183
|
+
- 'inner': The physical model region (default)
|
|
184
|
+
- 'pml': Model region plus PML layers
|
|
185
|
+
- 'full': Entire domain including FD padding
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
The specified part of the model tensor.
|
|
189
|
+
|
|
190
|
+
Raises:
|
|
191
|
+
KeyError: If the model name is not found.
|
|
192
|
+
ValueError: If view is not valid.
|
|
193
|
+
"""
|
|
194
|
+
if name not in self._models:
|
|
195
|
+
available = ", ".join(self._models.keys())
|
|
196
|
+
raise KeyError(f"Model '{name}' not found. Available: {available}")
|
|
197
|
+
return self._get_view(self._models[name], view)
|
|
198
|
+
|
|
199
|
+
def get_gradient(self, name: str, view: str = "inner") -> torch.Tensor:
|
|
200
|
+
"""Get a gradient tensor (only available during backward pass).
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
name: The name of the gradient. For Maxwell TM:
|
|
204
|
+
- "epsilon" or "ca": Gradient w.r.t. permittivity/Ca
|
|
205
|
+
- "sigma" or "cb": Gradient w.r.t. conductivity/Cb
|
|
206
|
+
view: The part of the gradient to return:
|
|
207
|
+
- 'inner': The physical model region (default)
|
|
208
|
+
- 'pml': Model region plus PML layers
|
|
209
|
+
- 'full': Entire domain including FD padding
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
The specified part of the gradient tensor.
|
|
213
|
+
|
|
214
|
+
Raises:
|
|
215
|
+
KeyError: If the gradient name is not found.
|
|
216
|
+
ValueError: If view is not valid.
|
|
217
|
+
RuntimeError: If called during forward pass (no gradients available).
|
|
218
|
+
"""
|
|
219
|
+
if not self._gradients:
|
|
220
|
+
raise RuntimeError(
|
|
221
|
+
"Gradients are only available during backward propagation. "
|
|
222
|
+
"Use backward_callback instead of forward_callback."
|
|
223
|
+
)
|
|
224
|
+
if name not in self._gradients:
|
|
225
|
+
available = ", ".join(self._gradients.keys())
|
|
226
|
+
raise KeyError(f"Gradient '{name}' not found. Available: {available}")
|
|
227
|
+
return self._get_view(self._gradients[name], view)
|
|
228
|
+
|
|
229
|
+
def _get_view(self, x: torch.Tensor, view: str) -> torch.Tensor:
|
|
230
|
+
"""Extract a view of a tensor based on the specified region.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
x: The tensor to extract a view from.
|
|
234
|
+
view: One of 'full', 'pml', or 'inner'.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
A view of the tensor corresponding to the specified region.
|
|
238
|
+
"""
|
|
239
|
+
if view == "full":
|
|
240
|
+
return x
|
|
241
|
+
|
|
242
|
+
if view not in {"pml", "inner"}:
|
|
243
|
+
raise ValueError(
|
|
244
|
+
f"view must be 'full', 'pml', or 'inner', but got '{view}'"
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
spatial_ndim = self._ndim
|
|
248
|
+
if spatial_ndim not in {2, 3}:
|
|
249
|
+
raise ValueError(f"Unsupported spatial ndim {spatial_ndim}.")
|
|
250
|
+
|
|
251
|
+
if view == "pml":
|
|
252
|
+
starts = [self._fd_pad[2 * i] for i in range(spatial_ndim)]
|
|
253
|
+
ends = [self._fd_pad[2 * i + 1] for i in range(spatial_ndim)]
|
|
254
|
+
else:
|
|
255
|
+
starts = [
|
|
256
|
+
self._fd_pad[2 * i] + self._pml_width[2 * i]
|
|
257
|
+
for i in range(spatial_ndim)
|
|
258
|
+
]
|
|
259
|
+
ends = [
|
|
260
|
+
self._fd_pad[2 * i + 1] + self._pml_width[2 * i + 1]
|
|
261
|
+
for i in range(spatial_ndim)
|
|
262
|
+
]
|
|
263
|
+
|
|
264
|
+
def _slice(dim_size: int, start: int, end: int) -> slice:
|
|
265
|
+
stop = dim_size - end if end > 0 else None
|
|
266
|
+
return slice(start, stop)
|
|
267
|
+
|
|
268
|
+
if x.ndim == spatial_ndim:
|
|
269
|
+
# Non-batched: [ny, nx] or [nz, ny, nx]
|
|
270
|
+
idx = tuple(
|
|
271
|
+
_slice(x.shape[i], starts[i], ends[i]) for i in range(spatial_ndim)
|
|
272
|
+
)
|
|
273
|
+
return x[idx]
|
|
274
|
+
|
|
275
|
+
# Batched: [..., ny, nx] or [..., nz, ny, nx]
|
|
276
|
+
idx_batched: tuple[Union["EllipsisType", slice], ...] = (
|
|
277
|
+
...,
|
|
278
|
+
*(
|
|
279
|
+
_slice(
|
|
280
|
+
x.shape[-spatial_ndim + i],
|
|
281
|
+
starts[i],
|
|
282
|
+
ends[i],
|
|
283
|
+
)
|
|
284
|
+
for i in range(spatial_ndim)
|
|
285
|
+
),
|
|
286
|
+
)
|
|
287
|
+
return x[idx_batched]
|
|
288
|
+
|
|
289
|
+
def __repr__(self) -> str:
|
|
290
|
+
"""Return a string representation of the callback state."""
|
|
291
|
+
return (
|
|
292
|
+
f"CallbackState(step={self.step}/{self.nt}, "
|
|
293
|
+
f"time={self.time:.2e}s, "
|
|
294
|
+
f"is_backward={self.is_backward}, "
|
|
295
|
+
f"wavefields={self.wavefield_names}, "
|
|
296
|
+
f"models={self.model_names})"
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
# Type alias for callback functions
|
|
301
|
+
Callback = Callable[[CallbackState], None]
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def create_callback_state(
|
|
305
|
+
dt: float,
|
|
306
|
+
step: int,
|
|
307
|
+
nt: int,
|
|
308
|
+
wavefields: Dict[str, torch.Tensor],
|
|
309
|
+
models: Dict[str, torch.Tensor],
|
|
310
|
+
gradients: Optional[Dict[str, torch.Tensor]] = None,
|
|
311
|
+
fd_pad: Optional[list[int]] = None,
|
|
312
|
+
pml_width: Optional[list[int]] = None,
|
|
313
|
+
is_backward: bool = False,
|
|
314
|
+
grid_spacing: Optional[list[float]] = None,
|
|
315
|
+
) -> CallbackState:
|
|
316
|
+
"""Factory function to create a CallbackState.
|
|
317
|
+
|
|
318
|
+
This is a convenience function that creates a CallbackState with
|
|
319
|
+
the given parameters. It's equivalent to calling the CallbackState
|
|
320
|
+
constructor directly.
|
|
321
|
+
|
|
322
|
+
Args:
|
|
323
|
+
dt: The time step size in seconds.
|
|
324
|
+
step: The current time step number.
|
|
325
|
+
nt: Total number of time steps.
|
|
326
|
+
wavefields: A dictionary mapping wavefield names to tensors.
|
|
327
|
+
models: A dictionary mapping model names to tensors.
|
|
328
|
+
gradients: A dictionary mapping gradient names to tensors (backward only).
|
|
329
|
+
fd_pad: Padding for finite difference stencil [y0, y1, x0, x1].
|
|
330
|
+
pml_width: Width of PML layers [top, bottom, left, right].
|
|
331
|
+
is_backward: Whether this is during backward propagation.
|
|
332
|
+
grid_spacing: Grid spacing [dy, dx] in meters.
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
A new CallbackState instance.
|
|
336
|
+
"""
|
|
337
|
+
return CallbackState(
|
|
338
|
+
dt=dt,
|
|
339
|
+
step=step,
|
|
340
|
+
nt=nt,
|
|
341
|
+
wavefields=wavefields,
|
|
342
|
+
models=models,
|
|
343
|
+
gradients=gradients,
|
|
344
|
+
fd_pad=fd_pad,
|
|
345
|
+
pml_width=pml_width,
|
|
346
|
+
is_backward=is_backward,
|
|
347
|
+
grid_spacing=grid_spacing,
|
|
348
|
+
)
|
tide/cfl.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""CFL condition helpers for stable time stepping."""
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
from typing import Union
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def cfl_condition(
|
|
8
|
+
grid_spacing: Union[float, list[float]],
|
|
9
|
+
dt: float,
|
|
10
|
+
max_vel: float,
|
|
11
|
+
c_max: float = 1,
|
|
12
|
+
eps: float = 1e-15,
|
|
13
|
+
) -> tuple[float, int]:
|
|
14
|
+
"""Calculate time step interval to satisfy CFL condition.
|
|
15
|
+
|
|
16
|
+
The CFL (Courant-Friedrichs-Lewy) condition ensures numerical stability
|
|
17
|
+
for explicit FDTD schemes. If the user-provided dt is too large, this
|
|
18
|
+
function computes a smaller internal dt and the ratio between them.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
grid_spacing: Grid spacing [dy, dx] or single value for isotropic.
|
|
22
|
+
dt: User-provided time step.
|
|
23
|
+
max_vel: Maximum wave velocity in the model.
|
|
24
|
+
c_max: Maximum Courant number (default 0.6 for stability margin).
|
|
25
|
+
eps: Small value to prevent division by zero.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Tuple of (inner_dt, step_ratio) where:
|
|
29
|
+
- inner_dt: Time step satisfying CFL condition
|
|
30
|
+
- step_ratio: Integer ratio dt / inner_dt
|
|
31
|
+
|
|
32
|
+
Example:
|
|
33
|
+
>>> # Check if dt=1e-9 is stable for v=3e8 m/s, dx=1e-3 m
|
|
34
|
+
>>> inner_dt, ratio = cfl_condition([1e-3, 1e-3], 1e-9, 3e8)
|
|
35
|
+
>>> print(f"Need {ratio}x smaller time step")
|
|
36
|
+
"""
|
|
37
|
+
# Normalize grid_spacing to list
|
|
38
|
+
if isinstance(grid_spacing, (int, float)):
|
|
39
|
+
grid_spacing = [float(grid_spacing), float(grid_spacing)]
|
|
40
|
+
else:
|
|
41
|
+
grid_spacing = list(grid_spacing)
|
|
42
|
+
|
|
43
|
+
if max_vel <= 0:
|
|
44
|
+
raise ValueError("max_vel must be positive")
|
|
45
|
+
|
|
46
|
+
# Maximum stable dt from CFL condition
|
|
47
|
+
max_dt = (
|
|
48
|
+
c_max / math.sqrt(sum(1 / dx**2 for dx in grid_spacing)) / (max_vel**2 + eps)
|
|
49
|
+
) * max_vel
|
|
50
|
+
|
|
51
|
+
step_ratio = math.ceil(abs(dt) / max_dt)
|
|
52
|
+
inner_dt = dt / step_ratio
|
|
53
|
+
|
|
54
|
+
if step_ratio >= 2:
|
|
55
|
+
import warnings
|
|
56
|
+
|
|
57
|
+
warnings.warn(
|
|
58
|
+
f"CFL condition requires {step_ratio} internal time steps per "
|
|
59
|
+
f"user time step (dt={dt}, inner_dt={inner_dt}). Consider using "
|
|
60
|
+
"a smaller dt or coarser grid.",
|
|
61
|
+
stacklevel=2,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
return inner_dt, step_ratio
|
tide/csrc/CMakeLists.txt
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
# TIDE Backend CMakelists.txt
|
|
2
|
+
|
|
3
|
+
cmake_minimum_required(VERSION 3.18)
|
|
4
|
+
project(tide_backend LANGUAGES C CXX)
|
|
5
|
+
|
|
6
|
+
option(TIDE_ENABLE_CUDA "Enable CUDA backend" ON)
|
|
7
|
+
|
|
8
|
+
# Detect and enable CUDA if present and enabled
|
|
9
|
+
if(TIDE_ENABLE_CUDA)
|
|
10
|
+
find_package(CUDAToolkit)
|
|
11
|
+
if(CUDAToolkit_FOUND)
|
|
12
|
+
enable_language(CUDA)
|
|
13
|
+
else()
|
|
14
|
+
message(WARNING "CUDA not found. Building without CUDA support.")
|
|
15
|
+
endif()
|
|
16
|
+
else()
|
|
17
|
+
message(STATUS "CUDA disabled (TIDE_ENABLE_CUDA=OFF).")
|
|
18
|
+
set(CUDAToolkit_FOUND FALSE)
|
|
19
|
+
endif()
|
|
20
|
+
|
|
21
|
+
# Default build type
|
|
22
|
+
if(NOT CMAKE_BUILD_TYPE)
|
|
23
|
+
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
|
|
24
|
+
endif()
|
|
25
|
+
|
|
26
|
+
# --- OpenMP Configuration (aligned with deepwave) ---
|
|
27
|
+
add_library(Tide_OpenMP_Interface INTERFACE)
|
|
28
|
+
set(OPENMP_CONFIGURED FALSE)
|
|
29
|
+
|
|
30
|
+
# On Windows with Clang, we need to manually configure OpenMP
|
|
31
|
+
if(WIN32 AND CMAKE_C_COMPILER_ID MATCHES "Clang")
|
|
32
|
+
# Find libomp from LLVM installation
|
|
33
|
+
find_library(LIBOMP_LIB NAMES libomp omp PATHS "C:/Program Files/LLVM/lib" NO_DEFAULT_PATH)
|
|
34
|
+
if(LIBOMP_LIB)
|
|
35
|
+
target_link_libraries(Tide_OpenMP_Interface INTERFACE "${LIBOMP_LIB}")
|
|
36
|
+
target_compile_options(Tide_OpenMP_Interface INTERFACE -fopenmp)
|
|
37
|
+
set(OPENMP_CONFIGURED TRUE)
|
|
38
|
+
message(STATUS "OpenMP enabled (Clang/LLVM on Windows).")
|
|
39
|
+
else()
|
|
40
|
+
message(STATUS "OpenMP not found (libomp.lib not in LLVM/lib).")
|
|
41
|
+
endif()
|
|
42
|
+
else()
|
|
43
|
+
find_package(OpenMP QUIET)
|
|
44
|
+
if(OpenMP_C_FOUND)
|
|
45
|
+
target_link_libraries(Tide_OpenMP_Interface INTERFACE OpenMP::OpenMP_C)
|
|
46
|
+
set(OPENMP_CONFIGURED TRUE)
|
|
47
|
+
message(STATUS "OpenMP enabled.")
|
|
48
|
+
else()
|
|
49
|
+
message(STATUS "OpenMP not found.")
|
|
50
|
+
endif()
|
|
51
|
+
endif()
|
|
52
|
+
|
|
53
|
+
# --- Compiler Feature Detection and Flags ---
|
|
54
|
+
include(CheckCSourceCompiles)
|
|
55
|
+
|
|
56
|
+
# AVX2 detection
|
|
57
|
+
set(AVX2_TEST_CODE "
|
|
58
|
+
#include <immintrin.h>
|
|
59
|
+
int main() {
|
|
60
|
+
__m256 vec = _mm256_set1_ps(42.0f);
|
|
61
|
+
return 0;
|
|
62
|
+
}")
|
|
63
|
+
|
|
64
|
+
if(CMAKE_C_COMPILER_ID MATCHES "GNU|Clang|Intel")
|
|
65
|
+
if(WIN32)
|
|
66
|
+
set(C_AVX2_FLAG "/arch:AVX2") # Clang-cl uses MSVC-style flags
|
|
67
|
+
else()
|
|
68
|
+
set(C_AVX2_FLAG "-mavx2")
|
|
69
|
+
endif()
|
|
70
|
+
endif()
|
|
71
|
+
|
|
72
|
+
if(C_AVX2_FLAG)
|
|
73
|
+
set(CMAKE_REQUIRED_FLAGS "${C_AVX2_FLAG}")
|
|
74
|
+
check_c_source_compiles("${AVX2_TEST_CODE}" HAVE_AVX2)
|
|
75
|
+
unset(CMAKE_REQUIRED_FLAGS)
|
|
76
|
+
else()
|
|
77
|
+
set(HAVE_AVX2 FALSE)
|
|
78
|
+
endif()
|
|
79
|
+
|
|
80
|
+
if(HAVE_AVX2)
|
|
81
|
+
message(STATUS "AVX2 is supported.")
|
|
82
|
+
else()
|
|
83
|
+
message(STATUS "AVX2 is not supported.")
|
|
84
|
+
endif()
|
|
85
|
+
|
|
86
|
+
# Release flags (aligned with deepwave)
|
|
87
|
+
if(CMAKE_BUILD_TYPE MATCHES Release)
|
|
88
|
+
if(CMAKE_C_COMPILER_ID MATCHES "GNU|Clang|Intel")
|
|
89
|
+
if(WIN32)
|
|
90
|
+
# Clang-cl on Windows uses MSVC-style flags
|
|
91
|
+
set(C_RELEASE_FLAGS "/O2" "/fp:fast")
|
|
92
|
+
else()
|
|
93
|
+
set(C_RELEASE_FLAGS "-Ofast")
|
|
94
|
+
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Ofast")
|
|
95
|
+
endif()
|
|
96
|
+
endif()
|
|
97
|
+
endif()
|
|
98
|
+
|
|
99
|
+
# --- Helper Macros for Object Libraries ---
|
|
100
|
+
macro(add_tide_cpu_object_library BASENAME ACCURACY DTYPE)
|
|
101
|
+
set(TARGET_NAME "${BASENAME}_${ACCURACY}_${DTYPE}_cpu_obj")
|
|
102
|
+
add_library(${TARGET_NAME} OBJECT ${CMAKE_CURRENT_SOURCE_DIR}/${BASENAME}.c)
|
|
103
|
+
target_compile_definitions(${TARGET_NAME} PRIVATE
|
|
104
|
+
TIDE_STENCIL=${ACCURACY}
|
|
105
|
+
TIDE_DTYPE=${DTYPE}
|
|
106
|
+
TIDE_DEVICE=cpu
|
|
107
|
+
)
|
|
108
|
+
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
|
109
|
+
list(APPEND TIDE_OBJECTS $<TARGET_OBJECTS:${TARGET_NAME}>)
|
|
110
|
+
list(APPEND CPU_TARGETS ${TARGET_NAME})
|
|
111
|
+
|
|
112
|
+
# Set PIC for shared library objects
|
|
113
|
+
set_target_properties(${TARGET_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
114
|
+
|
|
115
|
+
if(C_RELEASE_FLAGS)
|
|
116
|
+
target_compile_options(${TARGET_NAME} PRIVATE ${C_RELEASE_FLAGS})
|
|
117
|
+
endif()
|
|
118
|
+
|
|
119
|
+
if(HAVE_AVX2 AND C_AVX2_FLAG)
|
|
120
|
+
target_compile_options(${TARGET_NAME} PRIVATE ${C_AVX2_FLAG})
|
|
121
|
+
endif()
|
|
122
|
+
endmacro()
|
|
123
|
+
|
|
124
|
+
if(CUDAToolkit_FOUND)
|
|
125
|
+
macro(add_tide_cuda_object_library BASENAME ACCURACY DTYPE)
|
|
126
|
+
set(TARGET_NAME "${BASENAME}_${ACCURACY}_${DTYPE}_cuda_obj")
|
|
127
|
+
add_library(${TARGET_NAME} OBJECT ${CMAKE_CURRENT_SOURCE_DIR}/${BASENAME}.cu)
|
|
128
|
+
# Set TIDE_DTYPE_FLOAT based on DTYPE (optimization 2.1)
|
|
129
|
+
if(${DTYPE} STREQUAL "float")
|
|
130
|
+
set(IS_FLOAT 1)
|
|
131
|
+
else()
|
|
132
|
+
set(IS_FLOAT 0)
|
|
133
|
+
endif()
|
|
134
|
+
target_compile_definitions(${TARGET_NAME} PRIVATE
|
|
135
|
+
TIDE_STENCIL=${ACCURACY}
|
|
136
|
+
TIDE_DTYPE=${DTYPE}
|
|
137
|
+
TIDE_DEVICE=cuda
|
|
138
|
+
TIDE_DTYPE_FLOAT=${IS_FLOAT}
|
|
139
|
+
)
|
|
140
|
+
target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
|
141
|
+
list(APPEND TIDE_OBJECTS $<TARGET_OBJECTS:${TARGET_NAME}>)
|
|
142
|
+
|
|
143
|
+
# Use "all" or specific architectures
|
|
144
|
+
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
|
145
|
+
set_target_properties(${TARGET_NAME} PROPERTIES CUDA_ARCHITECTURES "89")
|
|
146
|
+
else()
|
|
147
|
+
set_target_properties(${TARGET_NAME} PROPERTIES CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}")
|
|
148
|
+
endif()
|
|
149
|
+
|
|
150
|
+
if(CMAKE_BUILD_TYPE MATCHES Release)
|
|
151
|
+
target_compile_options(${TARGET_NAME} PRIVATE
|
|
152
|
+
$<$<COMPILE_LANGUAGE:CUDA>:
|
|
153
|
+
--use_fast_math
|
|
154
|
+
-O3
|
|
155
|
+
--restrict
|
|
156
|
+
--maxrregcount=64
|
|
157
|
+
-Xptxas=-dlcm=ca
|
|
158
|
+
>)
|
|
159
|
+
endif()
|
|
160
|
+
endmacro()
|
|
161
|
+
endif()
|
|
162
|
+
|
|
163
|
+
# Prepare lists that will collect the object files
|
|
164
|
+
set(TIDE_OBJECTS)
|
|
165
|
+
set(CPU_TARGETS)
|
|
166
|
+
set(ACCURACIES 2 4 6 8)
|
|
167
|
+
set(DTYPES float double)
|
|
168
|
+
|
|
169
|
+
# --- Storage utilities ---
|
|
170
|
+
set(STORAGE_UTILS_CPU_SRC ${CMAKE_CURRENT_SOURCE_DIR}/storage_utils.c)
|
|
171
|
+
set(STORAGE_UTILS_CUDA_SRC "")
|
|
172
|
+
|
|
173
|
+
# --- CPU object libraries ---
|
|
174
|
+
foreach(ACCURACY ${ACCURACIES})
|
|
175
|
+
foreach(DTYPE ${DTYPES})
|
|
176
|
+
add_tide_cpu_object_library(maxwell ${ACCURACY} ${DTYPE})
|
|
177
|
+
endforeach()
|
|
178
|
+
endforeach()
|
|
179
|
+
|
|
180
|
+
if(OPENMP_CONFIGURED)
|
|
181
|
+
foreach(CPU_TARGET ${CPU_TARGETS})
|
|
182
|
+
target_link_libraries(${CPU_TARGET} PRIVATE Tide_OpenMP_Interface)
|
|
183
|
+
endforeach()
|
|
184
|
+
endif()
|
|
185
|
+
|
|
186
|
+
# --- CUDA object libraries ---
|
|
187
|
+
if(CUDAToolkit_FOUND)
|
|
188
|
+
if(CMAKE_BUILD_TYPE MATCHES Release)
|
|
189
|
+
set(CUDA_RELEASE_OPTIONS --use_fast_math -O3 --restrict --maxrregcount=64 -Xptxas=-dlcm=ca)
|
|
190
|
+
endif()
|
|
191
|
+
if(NOT WIN32)
|
|
192
|
+
list(APPEND CMAKE_CUDA_FLAGS -Xcompiler=-fPIC)
|
|
193
|
+
endif()
|
|
194
|
+
|
|
195
|
+
foreach(ACCURACY ${ACCURACIES})
|
|
196
|
+
foreach(DTYPE ${DTYPES})
|
|
197
|
+
add_tide_cuda_object_library(maxwell ${ACCURACY} ${DTYPE})
|
|
198
|
+
endforeach()
|
|
199
|
+
endforeach()
|
|
200
|
+
|
|
201
|
+
# --- Storage utilities ---
|
|
202
|
+
set(STORAGE_UTILS_CUDA_SRC ${CMAKE_CURRENT_SOURCE_DIR}/storage_utils.cu)
|
|
203
|
+
set_source_files_properties(${STORAGE_UTILS_CUDA_SRC} PROPERTIES CUDA_ARCHITECTURES "89")
|
|
204
|
+
endif()
|
|
205
|
+
|
|
206
|
+
# --- Final Library Build ---
|
|
207
|
+
# Combine all objects into a single shared library
|
|
208
|
+
add_library(tide_C SHARED ${TIDE_OBJECTS} ${STORAGE_UTILS_CUDA_SRC} ${STORAGE_UTILS_CPU_SRC})
|
|
209
|
+
|
|
210
|
+
if(WIN32)
|
|
211
|
+
set_target_properties(tide_C PROPERTIES OUTPUT_NAME "libtide_C")
|
|
212
|
+
endif()
|
|
213
|
+
|
|
214
|
+
set_target_properties(tide_C PROPERTIES
|
|
215
|
+
C_VISIBILITY_PRESET default
|
|
216
|
+
CUDA_VISIBILITY_PRESET default
|
|
217
|
+
POSITION_INDEPENDENT_CODE ON
|
|
218
|
+
WINDOWS_EXPORT_ALL_SYMBOLS ON
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
if(OPENMP_CONFIGURED)
|
|
222
|
+
target_link_libraries(tide_C PRIVATE Tide_OpenMP_Interface)
|
|
223
|
+
endif()
|
|
224
|
+
|
|
225
|
+
if(HAVE_AVX2)
|
|
226
|
+
target_compile_definitions(tide_C PRIVATE HAVE_AVX2)
|
|
227
|
+
endif()
|
|
228
|
+
|
|
229
|
+
# Set output directory to the tide package directory
|
|
230
|
+
set_target_properties(tide_C PROPERTIES
|
|
231
|
+
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..
|
|
232
|
+
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# --- Print Configuration Summary ---
|
|
236
|
+
message(STATUS "")
|
|
237
|
+
message(STATUS "TIDE Backend Configuration:")
|
|
238
|
+
message(STATUS " Build Type: ${CMAKE_BUILD_TYPE}")
|
|
239
|
+
message(STATUS " OpenMP: ${OPENMP_CONFIGURED}")
|
|
240
|
+
message(STATUS " AVX2: ${HAVE_AVX2}")
|
|
241
|
+
if(CUDAToolkit_FOUND)
|
|
242
|
+
message(STATUS " CUDA: ON")
|
|
243
|
+
message(STATUS " CUDA Compiler: ${CMAKE_CUDA_COMPILER}")
|
|
244
|
+
else()
|
|
245
|
+
message(STATUS " CUDA: OFF")
|
|
246
|
+
endif()
|
|
247
|
+
message(STATUS " Output Directory: ${CMAKE_CURRENT_SOURCE_DIR}/..")
|
|
248
|
+
message(STATUS "")
|
|
249
|
+
|
|
250
|
+
# Install target
|
|
251
|
+
install(TARGETS tide_C
|
|
252
|
+
LIBRARY DESTINATION tide
|
|
253
|
+
ARCHIVE DESTINATION tide
|
|
254
|
+
RUNTIME DESTINATION tide
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Bundle OpenMP runtime on Windows (Clang's libomp.dll)
|
|
258
|
+
if(WIN32)
|
|
259
|
+
set(TIDE_LIBOMP_DLL "${CMAKE_CURRENT_SOURCE_DIR}/../libomp.dll")
|
|
260
|
+
if(EXISTS "${TIDE_LIBOMP_DLL}")
|
|
261
|
+
install(FILES "${TIDE_LIBOMP_DLL}" DESTINATION tide)
|
|
262
|
+
endif()
|
|
263
|
+
endif()
|
tide/csrc/common_cpu.h
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
#ifndef COMMON_CPU_H
|
|
2
|
+
#define COMMON_CPU_H
|
|
3
|
+
|
|
4
|
+
#include <stdint.h>
|
|
5
|
+
#include <stdbool.h>
|
|
6
|
+
|
|
7
|
+
#ifndef TIDE_DTYPE
|
|
8
|
+
#define TIDE_DTYPE float
|
|
9
|
+
#endif
|
|
10
|
+
|
|
11
|
+
#ifndef TIDE_STENCIL
|
|
12
|
+
#define TIDE_STENCIL 4
|
|
13
|
+
#endif
|
|
14
|
+
|
|
15
|
+
#if defined(_OPENMP)
|
|
16
|
+
#define TIDE_OMP_INDEX int64_t
|
|
17
|
+
#define TIDE_OMP_PARALLEL_FOR _Pragma("omp parallel for")
|
|
18
|
+
#define TIDE_OMP_PARALLEL_FOR_COLLAPSE2 _Pragma("omp parallel for collapse(2)")
|
|
19
|
+
#define TIDE_OMP_PARALLEL_FOR_COLLAPSE4 _Pragma("omp parallel for collapse(4)")
|
|
20
|
+
#define TIDE_OMP_SIMD _Pragma("omp simd")
|
|
21
|
+
#define TIDE_OMP_SIMD_COLLAPSE2 _Pragma("omp simd collapse(2)")
|
|
22
|
+
#else
|
|
23
|
+
#define TIDE_OMP_INDEX int64_t
|
|
24
|
+
#define TIDE_OMP_PARALLEL_FOR
|
|
25
|
+
#define TIDE_OMP_PARALLEL_FOR_COLLAPSE2
|
|
26
|
+
#define TIDE_OMP_PARALLEL_FOR_COLLAPSE4
|
|
27
|
+
#define TIDE_OMP_SIMD
|
|
28
|
+
#define TIDE_OMP_SIMD_COLLAPSE2
|
|
29
|
+
#endif
|
|
30
|
+
|
|
31
|
+
#endif
|