tide-GPR 0.0.9__py3-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tide/callbacks.py ADDED
@@ -0,0 +1,348 @@
1
+ """Callback state and helpers for TIDE propagators."""
2
+
3
+ from typing import (
4
+ TYPE_CHECKING,
5
+ Callable,
6
+ Dict,
7
+ Optional,
8
+ Union,
9
+ )
10
+
11
+ if TYPE_CHECKING:
12
+ from types import EllipsisType
13
+
14
+ import torch
15
+
16
+
17
+ class CallbackState:
18
+ """State provided to user callbacks during wave propagation.
19
+
20
+ This class encapsulates the simulation state at a given time step,
21
+ providing convenient access to wavefields, model parameters, and
22
+ gradients with different views (full, pml, inner).
23
+
24
+ The three views correspond to different regions of the computational domain:
25
+ - 'full': The entire padded domain including FD padding
26
+ - 'pml': The model region plus PML absorbing layers
27
+ - 'inner': Only the physical model region (excluding PML)
28
+
29
+ Example:
30
+ >>> def my_callback(state: CallbackState):
31
+ ... # Get the Ey field in the inner (physical) region
32
+ ... ey = state.get_wavefield("Ey", view="inner")
33
+ ... print(f"Step {state.step}, max |Ey| = {ey.abs().max():.6e}")
34
+ ...
35
+ ... # Get the permittivity model
36
+ ... eps = state.get_model("epsilon", view="inner")
37
+ ...
38
+ ... # During backward pass, get gradients
39
+ ... if state.is_backward:
40
+ ... grad_eps = state.get_gradient("epsilon", view="inner")
41
+ >>>
42
+ >>> # Use with maxwell propagator
43
+ >>> result = maxwell.maxwelltm(..., forward_callback=my_callback)
44
+
45
+ Attributes:
46
+ dt: The time step size in seconds.
47
+ step: The current time step number (0-indexed).
48
+ nt: Total number of time steps.
49
+ is_backward: Whether this is during backward (adjoint) propagation.
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ dt: float,
55
+ step: int,
56
+ nt: int,
57
+ wavefields: Dict[str, torch.Tensor],
58
+ models: Dict[str, torch.Tensor],
59
+ gradients: Optional[Dict[str, torch.Tensor]] = None,
60
+ fd_pad: Optional[list[int]] = None,
61
+ pml_width: Optional[list[int]] = None,
62
+ is_backward: bool = False,
63
+ grid_spacing: Optional[list[float]] = None,
64
+ ) -> None:
65
+ """Initialize the callback state.
66
+
67
+ Args:
68
+ dt: The time step size in seconds.
69
+ step: The current time step number.
70
+ nt: Total number of time steps.
71
+ wavefields: A dictionary mapping wavefield names to tensors.
72
+ For Maxwell TM: {"Ey", "Hx", "Hz", "m_Ey_x", "m_Ey_z", ...}
73
+ models: A dictionary mapping model names to tensors.
74
+ For Maxwell TM: {"epsilon", "sigma", "mu", "ca", "cb", "cq"}
75
+ gradients: A dictionary mapping gradient names to tensors.
76
+ Only available during backward pass.
77
+ fd_pad: Padding for finite difference stencil [y0, y1, x0, x1].
78
+ If None, assumes no padding.
79
+ pml_width: Width of PML layers [top, bottom, left, right].
80
+ If None, assumes no PML.
81
+ is_backward: Whether this is during backward propagation.
82
+ grid_spacing: Grid spacing [dy, dx] in meters.
83
+ """
84
+ self.dt = dt
85
+ self.step = step
86
+ self.nt = nt
87
+ self.is_backward = is_backward
88
+ self._wavefields = wavefields
89
+ self._models = models
90
+ self._gradients = gradients if gradients is not None else {}
91
+ self._fd_pad = fd_pad if fd_pad is not None else [0, 0, 0, 0]
92
+ self._pml_width = pml_width if pml_width is not None else [0, 0, 0, 0]
93
+ self._grid_spacing = grid_spacing
94
+
95
+ # Determine spatial ndim from padding (preferred) or model tensors.
96
+ # Padding lists are in [d0_low, d0_high, d1_low, d1_high, ...] format.
97
+ if fd_pad is not None and len(fd_pad) in {4, 6}:
98
+ self._ndim = len(fd_pad) // 2
99
+ elif pml_width is not None and len(pml_width) in {4, 6}:
100
+ self._ndim = len(pml_width) // 2
101
+ elif models:
102
+ first_model = next(iter(models.values()))
103
+ # Heuristic:
104
+ # - 2D unbatched: [ny, nx] -> 2
105
+ # - 2D batched: [n_shots, ny, nx] -> 2
106
+ # - 3D unbatched: [nz, ny, nx] -> ambiguous with 2D batched; callers
107
+ # should pass fd_pad/pml_width to disambiguate.
108
+ # - 3D batched: [n_shots, nz, ny, nx] -> 3
109
+ if first_model.ndim == 2:
110
+ self._ndim = 2
111
+ elif first_model.ndim == 4:
112
+ self._ndim = 3
113
+ else:
114
+ # Preserve existing behavior (Maxwell TM callbacks) as default.
115
+ self._ndim = 2
116
+ else:
117
+ # Default to 2D when no other information is available.
118
+ self._ndim = 2
119
+
120
+ @property
121
+ def time(self) -> float:
122
+ """Current simulation time in seconds."""
123
+ return self.step * self.dt
124
+
125
+ @property
126
+ def progress(self) -> float:
127
+ """Simulation progress as a fraction [0, 1]."""
128
+ return self.step / max(self.nt - 1, 1)
129
+
130
+ @property
131
+ def wavefield_names(self) -> list[str]:
132
+ """list of available wavefield names."""
133
+ return list(self._wavefields.keys())
134
+
135
+ @property
136
+ def model_names(self) -> list[str]:
137
+ """list of available model names."""
138
+ return list(self._models.keys())
139
+
140
+ @property
141
+ def gradient_names(self) -> list[str]:
142
+ """list of available gradient names."""
143
+ return list(self._gradients.keys())
144
+
145
+ def get_wavefield(self, name: str, view: str = "inner") -> torch.Tensor:
146
+ """Get a wavefield tensor.
147
+
148
+ Args:
149
+ name: The name of the wavefield. For Maxwell TM mode:
150
+ - "Ey": Electric field (y-component)
151
+ - "Hx": Magnetic field (x-component)
152
+ - "Hz": Magnetic field (z-component)
153
+ - "m_Ey_x", "m_Ey_z", "m_Hx_z", "m_Hz_x": CPML auxiliary fields
154
+ - During backward: "lambda_Ey", "lambda_Hx", "lambda_Hz"
155
+ view: The part of the wavefield to return:
156
+ - 'inner': The physical model region (default)
157
+ - 'pml': Model region plus PML layers
158
+ - 'full': Entire domain including FD padding
159
+
160
+ Returns:
161
+ The specified part of the wavefield tensor.
162
+ Shape depends on view and whether batched: [n_shots, ny, nx] or [ny, nx]
163
+
164
+ Raises:
165
+ KeyError: If the wavefield name is not found.
166
+ ValueError: If view is not valid.
167
+ """
168
+ if name not in self._wavefields:
169
+ available = ", ".join(self._wavefields.keys())
170
+ raise KeyError(f"Wavefield '{name}' not found. Available: {available}")
171
+ return self._get_view(self._wavefields[name], view)
172
+
173
+ def get_model(self, name: str, view: str = "inner") -> torch.Tensor:
174
+ """Get a model parameter tensor.
175
+
176
+ Args:
177
+ name: The name of the model parameter. For Maxwell TM:
178
+ - "epsilon": Relative permittivity
179
+ - "sigma": Electrical conductivity (S/m)
180
+ - "mu": Relative permeability
181
+ - "ca", "cb", "cq": Update coefficients
182
+ view: The part of the model to return:
183
+ - 'inner': The physical model region (default)
184
+ - 'pml': Model region plus PML layers
185
+ - 'full': Entire domain including FD padding
186
+
187
+ Returns:
188
+ The specified part of the model tensor.
189
+
190
+ Raises:
191
+ KeyError: If the model name is not found.
192
+ ValueError: If view is not valid.
193
+ """
194
+ if name not in self._models:
195
+ available = ", ".join(self._models.keys())
196
+ raise KeyError(f"Model '{name}' not found. Available: {available}")
197
+ return self._get_view(self._models[name], view)
198
+
199
+ def get_gradient(self, name: str, view: str = "inner") -> torch.Tensor:
200
+ """Get a gradient tensor (only available during backward pass).
201
+
202
+ Args:
203
+ name: The name of the gradient. For Maxwell TM:
204
+ - "epsilon" or "ca": Gradient w.r.t. permittivity/Ca
205
+ - "sigma" or "cb": Gradient w.r.t. conductivity/Cb
206
+ view: The part of the gradient to return:
207
+ - 'inner': The physical model region (default)
208
+ - 'pml': Model region plus PML layers
209
+ - 'full': Entire domain including FD padding
210
+
211
+ Returns:
212
+ The specified part of the gradient tensor.
213
+
214
+ Raises:
215
+ KeyError: If the gradient name is not found.
216
+ ValueError: If view is not valid.
217
+ RuntimeError: If called during forward pass (no gradients available).
218
+ """
219
+ if not self._gradients:
220
+ raise RuntimeError(
221
+ "Gradients are only available during backward propagation. "
222
+ "Use backward_callback instead of forward_callback."
223
+ )
224
+ if name not in self._gradients:
225
+ available = ", ".join(self._gradients.keys())
226
+ raise KeyError(f"Gradient '{name}' not found. Available: {available}")
227
+ return self._get_view(self._gradients[name], view)
228
+
229
+ def _get_view(self, x: torch.Tensor, view: str) -> torch.Tensor:
230
+ """Extract a view of a tensor based on the specified region.
231
+
232
+ Args:
233
+ x: The tensor to extract a view from.
234
+ view: One of 'full', 'pml', or 'inner'.
235
+
236
+ Returns:
237
+ A view of the tensor corresponding to the specified region.
238
+ """
239
+ if view == "full":
240
+ return x
241
+
242
+ if view not in {"pml", "inner"}:
243
+ raise ValueError(
244
+ f"view must be 'full', 'pml', or 'inner', but got '{view}'"
245
+ )
246
+
247
+ spatial_ndim = self._ndim
248
+ if spatial_ndim not in {2, 3}:
249
+ raise ValueError(f"Unsupported spatial ndim {spatial_ndim}.")
250
+
251
+ if view == "pml":
252
+ starts = [self._fd_pad[2 * i] for i in range(spatial_ndim)]
253
+ ends = [self._fd_pad[2 * i + 1] for i in range(spatial_ndim)]
254
+ else:
255
+ starts = [
256
+ self._fd_pad[2 * i] + self._pml_width[2 * i]
257
+ for i in range(spatial_ndim)
258
+ ]
259
+ ends = [
260
+ self._fd_pad[2 * i + 1] + self._pml_width[2 * i + 1]
261
+ for i in range(spatial_ndim)
262
+ ]
263
+
264
+ def _slice(dim_size: int, start: int, end: int) -> slice:
265
+ stop = dim_size - end if end > 0 else None
266
+ return slice(start, stop)
267
+
268
+ if x.ndim == spatial_ndim:
269
+ # Non-batched: [ny, nx] or [nz, ny, nx]
270
+ idx = tuple(
271
+ _slice(x.shape[i], starts[i], ends[i]) for i in range(spatial_ndim)
272
+ )
273
+ return x[idx]
274
+
275
+ # Batched: [..., ny, nx] or [..., nz, ny, nx]
276
+ idx_batched: tuple[Union["EllipsisType", slice], ...] = (
277
+ ...,
278
+ *(
279
+ _slice(
280
+ x.shape[-spatial_ndim + i],
281
+ starts[i],
282
+ ends[i],
283
+ )
284
+ for i in range(spatial_ndim)
285
+ ),
286
+ )
287
+ return x[idx_batched]
288
+
289
+ def __repr__(self) -> str:
290
+ """Return a string representation of the callback state."""
291
+ return (
292
+ f"CallbackState(step={self.step}/{self.nt}, "
293
+ f"time={self.time:.2e}s, "
294
+ f"is_backward={self.is_backward}, "
295
+ f"wavefields={self.wavefield_names}, "
296
+ f"models={self.model_names})"
297
+ )
298
+
299
+
300
+ # Type alias for callback functions
301
+ Callback = Callable[[CallbackState], None]
302
+
303
+
304
+ def create_callback_state(
305
+ dt: float,
306
+ step: int,
307
+ nt: int,
308
+ wavefields: Dict[str, torch.Tensor],
309
+ models: Dict[str, torch.Tensor],
310
+ gradients: Optional[Dict[str, torch.Tensor]] = None,
311
+ fd_pad: Optional[list[int]] = None,
312
+ pml_width: Optional[list[int]] = None,
313
+ is_backward: bool = False,
314
+ grid_spacing: Optional[list[float]] = None,
315
+ ) -> CallbackState:
316
+ """Factory function to create a CallbackState.
317
+
318
+ This is a convenience function that creates a CallbackState with
319
+ the given parameters. It's equivalent to calling the CallbackState
320
+ constructor directly.
321
+
322
+ Args:
323
+ dt: The time step size in seconds.
324
+ step: The current time step number.
325
+ nt: Total number of time steps.
326
+ wavefields: A dictionary mapping wavefield names to tensors.
327
+ models: A dictionary mapping model names to tensors.
328
+ gradients: A dictionary mapping gradient names to tensors (backward only).
329
+ fd_pad: Padding for finite difference stencil [y0, y1, x0, x1].
330
+ pml_width: Width of PML layers [top, bottom, left, right].
331
+ is_backward: Whether this is during backward propagation.
332
+ grid_spacing: Grid spacing [dy, dx] in meters.
333
+
334
+ Returns:
335
+ A new CallbackState instance.
336
+ """
337
+ return CallbackState(
338
+ dt=dt,
339
+ step=step,
340
+ nt=nt,
341
+ wavefields=wavefields,
342
+ models=models,
343
+ gradients=gradients,
344
+ fd_pad=fd_pad,
345
+ pml_width=pml_width,
346
+ is_backward=is_backward,
347
+ grid_spacing=grid_spacing,
348
+ )
tide/cfl.py ADDED
@@ -0,0 +1,64 @@
1
+ """CFL condition helpers for stable time stepping."""
2
+
3
+ import math
4
+ from typing import Union
5
+
6
+
7
+ def cfl_condition(
8
+ grid_spacing: Union[float, list[float]],
9
+ dt: float,
10
+ max_vel: float,
11
+ c_max: float = 1,
12
+ eps: float = 1e-15,
13
+ ) -> tuple[float, int]:
14
+ """Calculate time step interval to satisfy CFL condition.
15
+
16
+ The CFL (Courant-Friedrichs-Lewy) condition ensures numerical stability
17
+ for explicit FDTD schemes. If the user-provided dt is too large, this
18
+ function computes a smaller internal dt and the ratio between them.
19
+
20
+ Args:
21
+ grid_spacing: Grid spacing [dy, dx] or single value for isotropic.
22
+ dt: User-provided time step.
23
+ max_vel: Maximum wave velocity in the model.
24
+ c_max: Maximum Courant number (default 0.6 for stability margin).
25
+ eps: Small value to prevent division by zero.
26
+
27
+ Returns:
28
+ Tuple of (inner_dt, step_ratio) where:
29
+ - inner_dt: Time step satisfying CFL condition
30
+ - step_ratio: Integer ratio dt / inner_dt
31
+
32
+ Example:
33
+ >>> # Check if dt=1e-9 is stable for v=3e8 m/s, dx=1e-3 m
34
+ >>> inner_dt, ratio = cfl_condition([1e-3, 1e-3], 1e-9, 3e8)
35
+ >>> print(f"Need {ratio}x smaller time step")
36
+ """
37
+ # Normalize grid_spacing to list
38
+ if isinstance(grid_spacing, (int, float)):
39
+ grid_spacing = [float(grid_spacing), float(grid_spacing)]
40
+ else:
41
+ grid_spacing = list(grid_spacing)
42
+
43
+ if max_vel <= 0:
44
+ raise ValueError("max_vel must be positive")
45
+
46
+ # Maximum stable dt from CFL condition
47
+ max_dt = (
48
+ c_max / math.sqrt(sum(1 / dx**2 for dx in grid_spacing)) / (max_vel**2 + eps)
49
+ ) * max_vel
50
+
51
+ step_ratio = math.ceil(abs(dt) / max_dt)
52
+ inner_dt = dt / step_ratio
53
+
54
+ if step_ratio >= 2:
55
+ import warnings
56
+
57
+ warnings.warn(
58
+ f"CFL condition requires {step_ratio} internal time steps per "
59
+ f"user time step (dt={dt}, inner_dt={inner_dt}). Consider using "
60
+ "a smaller dt or coarser grid.",
61
+ stacklevel=2,
62
+ )
63
+
64
+ return inner_dt, step_ratio
@@ -0,0 +1,263 @@
1
+ # TIDE Backend CMakelists.txt
2
+
3
+ cmake_minimum_required(VERSION 3.18)
4
+ project(tide_backend LANGUAGES C CXX)
5
+
6
+ option(TIDE_ENABLE_CUDA "Enable CUDA backend" ON)
7
+
8
+ # Detect and enable CUDA if present and enabled
9
+ if(TIDE_ENABLE_CUDA)
10
+ find_package(CUDAToolkit)
11
+ if(CUDAToolkit_FOUND)
12
+ enable_language(CUDA)
13
+ else()
14
+ message(WARNING "CUDA not found. Building without CUDA support.")
15
+ endif()
16
+ else()
17
+ message(STATUS "CUDA disabled (TIDE_ENABLE_CUDA=OFF).")
18
+ set(CUDAToolkit_FOUND FALSE)
19
+ endif()
20
+
21
+ # Default build type
22
+ if(NOT CMAKE_BUILD_TYPE)
23
+ set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE)
24
+ endif()
25
+
26
+ # --- OpenMP Configuration (aligned with deepwave) ---
27
+ add_library(Tide_OpenMP_Interface INTERFACE)
28
+ set(OPENMP_CONFIGURED FALSE)
29
+
30
+ # On Windows with Clang, we need to manually configure OpenMP
31
+ if(WIN32 AND CMAKE_C_COMPILER_ID MATCHES "Clang")
32
+ # Find libomp from LLVM installation
33
+ find_library(LIBOMP_LIB NAMES libomp omp PATHS "C:/Program Files/LLVM/lib" NO_DEFAULT_PATH)
34
+ if(LIBOMP_LIB)
35
+ target_link_libraries(Tide_OpenMP_Interface INTERFACE "${LIBOMP_LIB}")
36
+ target_compile_options(Tide_OpenMP_Interface INTERFACE -fopenmp)
37
+ set(OPENMP_CONFIGURED TRUE)
38
+ message(STATUS "OpenMP enabled (Clang/LLVM on Windows).")
39
+ else()
40
+ message(STATUS "OpenMP not found (libomp.lib not in LLVM/lib).")
41
+ endif()
42
+ else()
43
+ find_package(OpenMP QUIET)
44
+ if(OpenMP_C_FOUND)
45
+ target_link_libraries(Tide_OpenMP_Interface INTERFACE OpenMP::OpenMP_C)
46
+ set(OPENMP_CONFIGURED TRUE)
47
+ message(STATUS "OpenMP enabled.")
48
+ else()
49
+ message(STATUS "OpenMP not found.")
50
+ endif()
51
+ endif()
52
+
53
+ # --- Compiler Feature Detection and Flags ---
54
+ include(CheckCSourceCompiles)
55
+
56
+ # AVX2 detection
57
+ set(AVX2_TEST_CODE "
58
+ #include <immintrin.h>
59
+ int main() {
60
+ __m256 vec = _mm256_set1_ps(42.0f);
61
+ return 0;
62
+ }")
63
+
64
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU|Clang|Intel")
65
+ if(WIN32)
66
+ set(C_AVX2_FLAG "/arch:AVX2") # Clang-cl uses MSVC-style flags
67
+ else()
68
+ set(C_AVX2_FLAG "-mavx2")
69
+ endif()
70
+ endif()
71
+
72
+ if(C_AVX2_FLAG)
73
+ set(CMAKE_REQUIRED_FLAGS "${C_AVX2_FLAG}")
74
+ check_c_source_compiles("${AVX2_TEST_CODE}" HAVE_AVX2)
75
+ unset(CMAKE_REQUIRED_FLAGS)
76
+ else()
77
+ set(HAVE_AVX2 FALSE)
78
+ endif()
79
+
80
+ if(HAVE_AVX2)
81
+ message(STATUS "AVX2 is supported.")
82
+ else()
83
+ message(STATUS "AVX2 is not supported.")
84
+ endif()
85
+
86
+ # Release flags (aligned with deepwave)
87
+ if(CMAKE_BUILD_TYPE MATCHES Release)
88
+ if(CMAKE_C_COMPILER_ID MATCHES "GNU|Clang|Intel")
89
+ if(WIN32)
90
+ # Clang-cl on Windows uses MSVC-style flags
91
+ set(C_RELEASE_FLAGS "/O2" "/fp:fast")
92
+ else()
93
+ set(C_RELEASE_FLAGS "-Ofast")
94
+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Ofast")
95
+ endif()
96
+ endif()
97
+ endif()
98
+
99
+ # --- Helper Macros for Object Libraries ---
100
+ macro(add_tide_cpu_object_library BASENAME ACCURACY DTYPE)
101
+ set(TARGET_NAME "${BASENAME}_${ACCURACY}_${DTYPE}_cpu_obj")
102
+ add_library(${TARGET_NAME} OBJECT ${CMAKE_CURRENT_SOURCE_DIR}/${BASENAME}.c)
103
+ target_compile_definitions(${TARGET_NAME} PRIVATE
104
+ TIDE_STENCIL=${ACCURACY}
105
+ TIDE_DTYPE=${DTYPE}
106
+ TIDE_DEVICE=cpu
107
+ )
108
+ target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
109
+ list(APPEND TIDE_OBJECTS $<TARGET_OBJECTS:${TARGET_NAME}>)
110
+ list(APPEND CPU_TARGETS ${TARGET_NAME})
111
+
112
+ # Set PIC for shared library objects
113
+ set_target_properties(${TARGET_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
114
+
115
+ if(C_RELEASE_FLAGS)
116
+ target_compile_options(${TARGET_NAME} PRIVATE ${C_RELEASE_FLAGS})
117
+ endif()
118
+
119
+ if(HAVE_AVX2 AND C_AVX2_FLAG)
120
+ target_compile_options(${TARGET_NAME} PRIVATE ${C_AVX2_FLAG})
121
+ endif()
122
+ endmacro()
123
+
124
+ if(CUDAToolkit_FOUND)
125
+ macro(add_tide_cuda_object_library BASENAME ACCURACY DTYPE)
126
+ set(TARGET_NAME "${BASENAME}_${ACCURACY}_${DTYPE}_cuda_obj")
127
+ add_library(${TARGET_NAME} OBJECT ${CMAKE_CURRENT_SOURCE_DIR}/${BASENAME}.cu)
128
+ # Set TIDE_DTYPE_FLOAT based on DTYPE (optimization 2.1)
129
+ if(${DTYPE} STREQUAL "float")
130
+ set(IS_FLOAT 1)
131
+ else()
132
+ set(IS_FLOAT 0)
133
+ endif()
134
+ target_compile_definitions(${TARGET_NAME} PRIVATE
135
+ TIDE_STENCIL=${ACCURACY}
136
+ TIDE_DTYPE=${DTYPE}
137
+ TIDE_DEVICE=cuda
138
+ TIDE_DTYPE_FLOAT=${IS_FLOAT}
139
+ )
140
+ target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
141
+ list(APPEND TIDE_OBJECTS $<TARGET_OBJECTS:${TARGET_NAME}>)
142
+
143
+ # Use "all" or specific architectures
144
+ if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
145
+ set_target_properties(${TARGET_NAME} PROPERTIES CUDA_ARCHITECTURES "89")
146
+ else()
147
+ set_target_properties(${TARGET_NAME} PROPERTIES CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES}")
148
+ endif()
149
+
150
+ if(CMAKE_BUILD_TYPE MATCHES Release)
151
+ target_compile_options(${TARGET_NAME} PRIVATE
152
+ $<$<COMPILE_LANGUAGE:CUDA>:
153
+ --use_fast_math
154
+ -O3
155
+ --restrict
156
+ --maxrregcount=64
157
+ -Xptxas=-dlcm=ca
158
+ >)
159
+ endif()
160
+ endmacro()
161
+ endif()
162
+
163
+ # Prepare lists that will collect the object files
164
+ set(TIDE_OBJECTS)
165
+ set(CPU_TARGETS)
166
+ set(ACCURACIES 2 4 6 8)
167
+ set(DTYPES float double)
168
+
169
+ # --- Storage utilities ---
170
+ set(STORAGE_UTILS_CPU_SRC ${CMAKE_CURRENT_SOURCE_DIR}/storage_utils.c)
171
+ set(STORAGE_UTILS_CUDA_SRC "")
172
+
173
+ # --- CPU object libraries ---
174
+ foreach(ACCURACY ${ACCURACIES})
175
+ foreach(DTYPE ${DTYPES})
176
+ add_tide_cpu_object_library(maxwell ${ACCURACY} ${DTYPE})
177
+ endforeach()
178
+ endforeach()
179
+
180
+ if(OPENMP_CONFIGURED)
181
+ foreach(CPU_TARGET ${CPU_TARGETS})
182
+ target_link_libraries(${CPU_TARGET} PRIVATE Tide_OpenMP_Interface)
183
+ endforeach()
184
+ endif()
185
+
186
+ # --- CUDA object libraries ---
187
+ if(CUDAToolkit_FOUND)
188
+ if(CMAKE_BUILD_TYPE MATCHES Release)
189
+ set(CUDA_RELEASE_OPTIONS --use_fast_math -O3 --restrict --maxrregcount=64 -Xptxas=-dlcm=ca)
190
+ endif()
191
+ if(NOT WIN32)
192
+ list(APPEND CMAKE_CUDA_FLAGS -Xcompiler=-fPIC)
193
+ endif()
194
+
195
+ foreach(ACCURACY ${ACCURACIES})
196
+ foreach(DTYPE ${DTYPES})
197
+ add_tide_cuda_object_library(maxwell ${ACCURACY} ${DTYPE})
198
+ endforeach()
199
+ endforeach()
200
+
201
+ # --- Storage utilities ---
202
+ set(STORAGE_UTILS_CUDA_SRC ${CMAKE_CURRENT_SOURCE_DIR}/storage_utils.cu)
203
+ set_source_files_properties(${STORAGE_UTILS_CUDA_SRC} PROPERTIES CUDA_ARCHITECTURES "89")
204
+ endif()
205
+
206
+ # --- Final Library Build ---
207
+ # Combine all objects into a single shared library
208
+ add_library(tide_C SHARED ${TIDE_OBJECTS} ${STORAGE_UTILS_CUDA_SRC} ${STORAGE_UTILS_CPU_SRC})
209
+
210
+ if(WIN32)
211
+ set_target_properties(tide_C PROPERTIES OUTPUT_NAME "libtide_C")
212
+ endif()
213
+
214
+ set_target_properties(tide_C PROPERTIES
215
+ C_VISIBILITY_PRESET default
216
+ CUDA_VISIBILITY_PRESET default
217
+ POSITION_INDEPENDENT_CODE ON
218
+ WINDOWS_EXPORT_ALL_SYMBOLS ON
219
+ )
220
+
221
+ if(OPENMP_CONFIGURED)
222
+ target_link_libraries(tide_C PRIVATE Tide_OpenMP_Interface)
223
+ endif()
224
+
225
+ if(HAVE_AVX2)
226
+ target_compile_definitions(tide_C PRIVATE HAVE_AVX2)
227
+ endif()
228
+
229
+ # Set output directory to the tide package directory
230
+ set_target_properties(tide_C PROPERTIES
231
+ LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..
232
+ RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/..
233
+ )
234
+
235
+ # --- Print Configuration Summary ---
236
+ message(STATUS "")
237
+ message(STATUS "TIDE Backend Configuration:")
238
+ message(STATUS " Build Type: ${CMAKE_BUILD_TYPE}")
239
+ message(STATUS " OpenMP: ${OPENMP_CONFIGURED}")
240
+ message(STATUS " AVX2: ${HAVE_AVX2}")
241
+ if(CUDAToolkit_FOUND)
242
+ message(STATUS " CUDA: ON")
243
+ message(STATUS " CUDA Compiler: ${CMAKE_CUDA_COMPILER}")
244
+ else()
245
+ message(STATUS " CUDA: OFF")
246
+ endif()
247
+ message(STATUS " Output Directory: ${CMAKE_CURRENT_SOURCE_DIR}/..")
248
+ message(STATUS "")
249
+
250
+ # Install target
251
+ install(TARGETS tide_C
252
+ LIBRARY DESTINATION tide
253
+ ARCHIVE DESTINATION tide
254
+ RUNTIME DESTINATION tide
255
+ )
256
+
257
+ # Bundle OpenMP runtime on Windows (Clang's libomp.dll)
258
+ if(WIN32)
259
+ set(TIDE_LIBOMP_DLL "${CMAKE_CURRENT_SOURCE_DIR}/../libomp.dll")
260
+ if(EXISTS "${TIDE_LIBOMP_DLL}")
261
+ install(FILES "${TIDE_LIBOMP_DLL}" DESTINATION tide)
262
+ endif()
263
+ endif()
tide/csrc/common_cpu.h ADDED
@@ -0,0 +1,31 @@
1
+ #ifndef COMMON_CPU_H
2
+ #define COMMON_CPU_H
3
+
4
+ #include <stdint.h>
5
+ #include <stdbool.h>
6
+
7
+ #ifndef TIDE_DTYPE
8
+ #define TIDE_DTYPE float
9
+ #endif
10
+
11
+ #ifndef TIDE_STENCIL
12
+ #define TIDE_STENCIL 4
13
+ #endif
14
+
15
+ #if defined(_OPENMP)
16
+ #define TIDE_OMP_INDEX int64_t
17
+ #define TIDE_OMP_PARALLEL_FOR _Pragma("omp parallel for")
18
+ #define TIDE_OMP_PARALLEL_FOR_COLLAPSE2 _Pragma("omp parallel for collapse(2)")
19
+ #define TIDE_OMP_PARALLEL_FOR_COLLAPSE4 _Pragma("omp parallel for collapse(4)")
20
+ #define TIDE_OMP_SIMD _Pragma("omp simd")
21
+ #define TIDE_OMP_SIMD_COLLAPSE2 _Pragma("omp simd collapse(2)")
22
+ #else
23
+ #define TIDE_OMP_INDEX int64_t
24
+ #define TIDE_OMP_PARALLEL_FOR
25
+ #define TIDE_OMP_PARALLEL_FOR_COLLAPSE2
26
+ #define TIDE_OMP_PARALLEL_FOR_COLLAPSE4
27
+ #define TIDE_OMP_SIMD
28
+ #define TIDE_OMP_SIMD_COLLAPSE2
29
+ #endif
30
+
31
+ #endif