tide-GPR 0.0.9__py3-none-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,175 @@
1
+ #ifndef STAGGERED_GRID_H
2
+ #define STAGGERED_GRID_H
3
+
4
+ // FD_PAD is half the stencil width
5
+ #if TIDE_STENCIL == 2
6
+ #define FD_PAD 1
7
+ #elif TIDE_STENCIL == 4
8
+ #define FD_PAD 2
9
+ #elif TIDE_STENCIL == 6
10
+ #define FD_PAD 3
11
+ #elif TIDE_STENCIL == 8
12
+ #define FD_PAD 4
13
+ #else
14
+ #error "TIDE_STENCIL must be 2, 4, 6, or 8"
15
+ #endif
16
+
17
+ #if TIDE_STENCIL == 2
18
+ // 2nd order accuracy
19
+ #define DIFFY1(F) ((F(0, 0) - F(-1, 0)) * rdy)
20
+ #define DIFFX1(F) ((F(0, 0) - F(0, -1)) * rdx)
21
+ #define DIFFYH1(F) ((F(1, 0) - F(0, 0)) * rdy)
22
+ #define DIFFXH1(F) ((F(0, 1) - F(0, 0)) * rdx)
23
+
24
+ #elif TIDE_STENCIL == 4
25
+ // 4th order accuracy
26
+ #define DIFFY1(F) \
27
+ ((TIDE_DTYPE)(9.0/8.0) * (F(0, 0) - F(-1, 0)) \
28
+ + (TIDE_DTYPE)(-1.0/24.0) * (F(1, 0) - F(-2, 0))) * rdy
29
+
30
+ #define DIFFX1(F) \
31
+ ((TIDE_DTYPE)(9.0/8.0) * (F(0, 0) - F(0, -1)) \
32
+ + (TIDE_DTYPE)(-1.0/24.0) * (F(0, 1) - F(0, -2))) * rdx
33
+
34
+ #define DIFFYH1(F) \
35
+ ((TIDE_DTYPE)(9.0/8.0) * (F(1, 0) - F(0, 0)) \
36
+ + (TIDE_DTYPE)(-1.0/24.0) * (F(2, 0) - F(-1, 0))) * rdy
37
+
38
+ #define DIFFXH1(F) \
39
+ ((TIDE_DTYPE)(9.0/8.0) * (F(0, 1) - F(0, 0)) \
40
+ + (TIDE_DTYPE)(-1.0/24.0) * (F(0, 2) - F(0, -1))) * rdx
41
+
42
+ #elif TIDE_STENCIL == 6
43
+ // 6th order accuracy
44
+ #define DIFFY1(F) \
45
+ ((TIDE_DTYPE)(75.0/64.0) * (F(0, 0) - F(-1, 0)) \
46
+ + (TIDE_DTYPE)(-25.0/384.0) * (F(1, 0) - F(-2, 0)) \
47
+ + (TIDE_DTYPE)(3.0/640.0) * (F(2, 0) - F(-3, 0))) * rdy
48
+
49
+ #define DIFFX1(F) \
50
+ ((TIDE_DTYPE)(75.0/64.0) * (F(0, 0) - F(0, -1)) \
51
+ + (TIDE_DTYPE)(-25.0/384.0) * (F(0, 1) - F(0, -2)) \
52
+ + (TIDE_DTYPE)(3.0/640.0) * (F(0, 2) - F(0, -3))) * rdx
53
+
54
+ #define DIFFYH1(F) \
55
+ ((TIDE_DTYPE)(75.0/64.0) * (F(1, 0) - F(0, 0)) \
56
+ + (TIDE_DTYPE)(-25.0/384.0) * (F(2, 0) - F(-1, 0)) \
57
+ + (TIDE_DTYPE)(3.0/640.0) * (F(3, 0) - F(-2, 0))) * rdy
58
+
59
+ #define DIFFXH1(F) \
60
+ ((TIDE_DTYPE)(75.0/64.0) * (F(0, 1) - F(0, 0)) \
61
+ + (TIDE_DTYPE)(-25.0/384.0) * (F(0, 2) - F(0, -1)) \
62
+ + (TIDE_DTYPE)(3.0/640.0) * (F(0, 3) - F(0, -2))) * rdx
63
+
64
+ #elif TIDE_STENCIL == 8
65
+ // 8th order accuracy
66
+ #define DIFFY1(F) \
67
+ ((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 0) - F(-1, 0)) \
68
+ + (TIDE_DTYPE)(-245.0/3072.0) * (F(1, 0) - F(-2, 0)) \
69
+ + (TIDE_DTYPE)(49.0/5120.0) * (F(2, 0) - F(-3, 0)) \
70
+ + (TIDE_DTYPE)(-5.0/7168.0) * (F(3, 0) - F(-4, 0))) * rdy
71
+
72
+ #define DIFFX1(F) \
73
+ ((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 0) - F(0, -1)) \
74
+ + (TIDE_DTYPE)(-245.0/3072.0) * (F(0, 1) - F(0, -2)) \
75
+ + (TIDE_DTYPE)(49.0/5120.0) * (F(0, 2) - F(0, -3)) \
76
+ + (TIDE_DTYPE)(-5.0/7168.0) * (F(0, 3) - F(0, -4))) * rdx
77
+
78
+ #define DIFFYH1(F) \
79
+ ((TIDE_DTYPE)(1225.0/1024.0) * (F(1, 0) - F(0, 0)) \
80
+ + (TIDE_DTYPE)(-245.0/3072.0) * (F(2, 0) - F(-1, 0)) \
81
+ + (TIDE_DTYPE)(49.0/5120.0) * (F(3, 0) - F(-2, 0)) \
82
+ + (TIDE_DTYPE)(-5.0/7168.0) * (F(4, 0) - F(-3, 0))) * rdy
83
+
84
+ #define DIFFXH1(F) \
85
+ ((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 1) - F(0, 0)) \
86
+ + (TIDE_DTYPE)(-245.0/3072.0) * (F(0, 2) - F(0, -1)) \
87
+ + (TIDE_DTYPE)(49.0/5120.0) * (F(0, 3) - F(0, -2)) \
88
+ + (TIDE_DTYPE)(-5.0/7168.0) * (F(0, 4) - F(0, -3))) * rdx
89
+
90
+ #endif
91
+
92
+ /*
93
+ * Adjoint derivative operators for backward pass
94
+ * These compute the transpose of the forward derivative operators
95
+ */
96
+
97
+ #if TIDE_STENCIL == 2
98
+ #define DIFFY1_ADJ(C, F) \
99
+ ((C(0, 0) * F(0, 0) - C(1, 0) * F(1, 0)) * rdy)
100
+
101
+ #define DIFFX1_ADJ(C, F) \
102
+ ((C(0, 0) * F(0, 0) - C(0, 1) * F(0, 1)) * rdx)
103
+
104
+ #define DIFFYH1_ADJ(C, F) \
105
+ ((C(-1, 0) * F(-1, 0) - C(0, 0) * F(0, 0)) * rdy)
106
+
107
+ #define DIFFXH1_ADJ(C, F) \
108
+ ((C(0, -1) * F(0, -1) - C(0, 0) * F(0, 0)) * rdx)
109
+
110
+ #elif TIDE_STENCIL == 4
111
+ #define DIFFY1_ADJ(C, F) \
112
+ ((TIDE_DTYPE)(9.0/8.0) * (C(0, 0) * F(0, 0) - C(1, 0) * F(1, 0)) \
113
+ + (TIDE_DTYPE)(-1.0/24.0) * (C(-1, 0) * F(-1, 0) - C(2, 0) * F(2, 0))) * rdy
114
+
115
+ #define DIFFX1_ADJ(C, F) \
116
+ ((TIDE_DTYPE)(9.0/8.0) * (C(0, 0) * F(0, 0) - C(0, 1) * F(0, 1)) \
117
+ + (TIDE_DTYPE)(-1.0/24.0) * (C(0, -1) * F(0, -1) - C(0, 2) * F(0, 2))) * rdx
118
+
119
+ #define DIFFYH1_ADJ(C, F) \
120
+ ((TIDE_DTYPE)(9.0/8.0) * (C(-1, 0) * F(-1, 0) - C(0, 0) * F(0, 0)) \
121
+ + (TIDE_DTYPE)(-1.0/24.0) * (C(-2, 0) * F(-2, 0) - C(1, 0) * F(1, 0))) * rdy
122
+
123
+ #define DIFFXH1_ADJ(C, F) \
124
+ ((TIDE_DTYPE)(9.0/8.0) * (C(0, -1) * F(0, -1) - C(0, 0) * F(0, 0)) \
125
+ + (TIDE_DTYPE)(-1.0/24.0) * (C(0, -2) * F(0, -2) - C(0, 1) * F(0, 1))) * rdx
126
+
127
+ #elif TIDE_STENCIL == 6
128
+ #define DIFFY1_ADJ(C, F) \
129
+ ((TIDE_DTYPE)(75.0/64.0) * (C(0, 0) * F(0, 0) - C(1, 0) * F(1, 0)) \
130
+ + (TIDE_DTYPE)(-25.0/384.0) * (C(-1, 0) * F(-1, 0) - C(2, 0) * F(2, 0)) \
131
+ + (TIDE_DTYPE)(3.0/640.0) * (C(-2, 0) * F(-2, 0) - C(3, 0) * F(3, 0))) * rdy
132
+
133
+ #define DIFFX1_ADJ(C, F) \
134
+ ((TIDE_DTYPE)(75.0/64.0) * (C(0, 0) * F(0, 0) - C(0, 1) * F(0, 1)) \
135
+ + (TIDE_DTYPE)(-25.0/384.0) * (C(0, -1) * F(0, -1) - C(0, 2) * F(0, 2)) \
136
+ + (TIDE_DTYPE)(3.0/640.0) * (C(0, -2) * F(0, -2) - C(0, 3) * F(0, 3))) * rdx
137
+
138
+ #define DIFFYH1_ADJ(C, F) \
139
+ ((TIDE_DTYPE)(75.0/64.0) * (C(-1, 0) * F(-1, 0) - C(0, 0) * F(0, 0)) \
140
+ + (TIDE_DTYPE)(-25.0/384.0) * (C(-2, 0) * F(-2, 0) - C(1, 0) * F(1, 0)) \
141
+ + (TIDE_DTYPE)(3.0/640.0) * (C(-3, 0) * F(-3, 0) - C(2, 0) * F(2, 0))) * rdy
142
+
143
+ #define DIFFXH1_ADJ(C, F) \
144
+ ((TIDE_DTYPE)(75.0/64.0) * (C(0, -1) * F(0, -1) - C(0, 0) * F(0, 0)) \
145
+ + (TIDE_DTYPE)(-25.0/384.0) * (C(0, -2) * F(0, -2) - C(0, 1) * F(0, 1)) \
146
+ + (TIDE_DTYPE)(3.0/640.0) * (C(0, -3) * F(0, -3) - C(0, 2) * F(0, 2))) * rdx
147
+
148
+ #elif TIDE_STENCIL == 8
149
+ #define DIFFY1_ADJ(C, F) \
150
+ ((TIDE_DTYPE)(1225.0/1024.0) * (C(0, 0) * F(0, 0) - C(1, 0) * F(1, 0)) \
151
+ + (TIDE_DTYPE)(-245.0/3072.0) * (C(-1, 0) * F(-1, 0) - C(2, 0) * F(2, 0)) \
152
+ + (TIDE_DTYPE)(49.0/5120.0) * (C(-2, 0) * F(-2, 0) - C(3, 0) * F(3, 0)) \
153
+ + (TIDE_DTYPE)(-5.0/7168.0) * (C(-3, 0) * F(-3, 0) - C(4, 0) * F(4, 0))) * rdy
154
+
155
+ #define DIFFX1_ADJ(C, F) \
156
+ ((TIDE_DTYPE)(1225.0/1024.0) * (C(0, 0) * F(0, 0) - C(0, 1) * F(0, 1)) \
157
+ + (TIDE_DTYPE)(-245.0/3072.0) * (C(0, -1) * F(0, -1) - C(0, 2) * F(0, 2)) \
158
+ + (TIDE_DTYPE)(49.0/5120.0) * (C(0, -2) * F(0, -2) - C(0, 3) * F(0, 3)) \
159
+ + (TIDE_DTYPE)(-5.0/7168.0) * (C(0, -3) * F(0, -3) - C(0, 4) * F(0, 4))) * rdx
160
+
161
+ #define DIFFYH1_ADJ(C, F) \
162
+ ((TIDE_DTYPE)(1225.0/1024.0) * (C(-1, 0) * F(-1, 0) - C(0, 0) * F(0, 0)) \
163
+ + (TIDE_DTYPE)(-245.0/3072.0) * (C(-2, 0) * F(-2, 0) - C(1, 0) * F(1, 0)) \
164
+ + (TIDE_DTYPE)(49.0/5120.0) * (C(-3, 0) * F(-3, 0) - C(2, 0) * F(2, 0)) \
165
+ + (TIDE_DTYPE)(-5.0/7168.0) * (C(-4, 0) * F(-4, 0) - C(3, 0) * F(3, 0))) * rdy
166
+
167
+ #define DIFFXH1_ADJ(C, F) \
168
+ ((TIDE_DTYPE)(1225.0/1024.0) * (C(0, -1) * F(0, -1) - C(0, 0) * F(0, 0)) \
169
+ + (TIDE_DTYPE)(-245.0/3072.0) * (C(0, -2) * F(0, -2) - C(0, 1) * F(0, 1)) \
170
+ + (TIDE_DTYPE)(49.0/5120.0) * (C(0, -3) * F(0, -3) - C(0, 2) * F(0, 2)) \
171
+ + (TIDE_DTYPE)(-5.0/7168.0) * (C(0, -4) * F(0, -4) - C(0, 3) * F(0, 3))) * rdx
172
+
173
+ #endif
174
+
175
+ #endif // STAGGERED_GRID_H
@@ -0,0 +1,124 @@
1
+ #ifndef STAGGERED_GRID_3D_H
2
+ #define STAGGERED_GRID_3D_H
3
+
4
+ // FD_PAD is half the stencil width
5
+ #if TIDE_STENCIL == 2
6
+ #define FD_PAD 1
7
+ #elif TIDE_STENCIL == 4
8
+ #define FD_PAD 2
9
+ #elif TIDE_STENCIL == 6
10
+ #define FD_PAD 3
11
+ #elif TIDE_STENCIL == 8
12
+ #define FD_PAD 4
13
+ #else
14
+ #error "TIDE_STENCIL must be 2, 4, 6, or 8"
15
+ #endif
16
+
17
+ #if TIDE_STENCIL == 2
18
+ // 2nd order accuracy
19
+ #define DIFFZ1(F) ((F(0, 0, 0) - F(-1, 0, 0)) * rdz)
20
+ #define DIFFY1(F) ((F(0, 0, 0) - F(0, -1, 0)) * rdy)
21
+ #define DIFFX1(F) ((F(0, 0, 0) - F(0, 0, -1)) * rdx)
22
+ #define DIFFZH1(F) ((F(1, 0, 0) - F(0, 0, 0)) * rdz)
23
+ #define DIFFYH1(F) ((F(0, 1, 0) - F(0, 0, 0)) * rdy)
24
+ #define DIFFXH1(F) ((F(0, 0, 1) - F(0, 0, 0)) * rdx)
25
+
26
+ #elif TIDE_STENCIL == 4
27
+ // 4th order accuracy
28
+ #define DIFFZ1(F) \
29
+ ((TIDE_DTYPE)(9.0/8.0) * (F(0, 0, 0) - F(-1, 0, 0)) \
30
+ + (TIDE_DTYPE)(-1.0/24.0) * (F(1, 0, 0) - F(-2, 0, 0))) * rdz
31
+
32
+ #define DIFFY1(F) \
33
+ ((TIDE_DTYPE)(9.0/8.0) * (F(0, 0, 0) - F(0, -1, 0)) \
34
+ + (TIDE_DTYPE)(-1.0/24.0) * (F(0, 1, 0) - F(0, -2, 0))) * rdy
35
+
36
+ #define DIFFX1(F) \
37
+ ((TIDE_DTYPE)(9.0/8.0) * (F(0, 0, 0) - F(0, 0, -1)) \
38
+ + (TIDE_DTYPE)(-1.0/24.0) * (F(0, 0, 1) - F(0, 0, -2))) * rdx
39
+
40
+ #define DIFFZH1(F) \
41
+ ((TIDE_DTYPE)(9.0/8.0) * (F(1, 0, 0) - F(0, 0, 0)) \
42
+ + (TIDE_DTYPE)(-1.0/24.0) * (F(2, 0, 0) - F(-1, 0, 0))) * rdz
43
+
44
+ #define DIFFYH1(F) \
45
+ ((TIDE_DTYPE)(9.0/8.0) * (F(0, 1, 0) - F(0, 0, 0)) \
46
+ + (TIDE_DTYPE)(-1.0/24.0) * (F(0, 2, 0) - F(0, -1, 0))) * rdy
47
+
48
+ #define DIFFXH1(F) \
49
+ ((TIDE_DTYPE)(9.0/8.0) * (F(0, 0, 1) - F(0, 0, 0)) \
50
+ + (TIDE_DTYPE)(-1.0/24.0) * (F(0, 0, 2) - F(0, 0, -1))) * rdx
51
+
52
+ #elif TIDE_STENCIL == 6
53
+ // 6th order accuracy
54
+ #define DIFFZ1(F) \
55
+ ((TIDE_DTYPE)(75.0/64.0) * (F(0, 0, 0) - F(-1, 0, 0)) \
56
+ + (TIDE_DTYPE)(-25.0/384.0) * (F(1, 0, 0) - F(-2, 0, 0)) \
57
+ + (TIDE_DTYPE)(3.0/640.0) * (F(2, 0, 0) - F(-3, 0, 0))) * rdz
58
+
59
+ #define DIFFY1(F) \
60
+ ((TIDE_DTYPE)(75.0/64.0) * (F(0, 0, 0) - F(0, -1, 0)) \
61
+ + (TIDE_DTYPE)(-25.0/384.0) * (F(0, 1, 0) - F(0, -2, 0)) \
62
+ + (TIDE_DTYPE)(3.0/640.0) * (F(0, 2, 0) - F(0, -3, 0))) * rdy
63
+
64
+ #define DIFFX1(F) \
65
+ ((TIDE_DTYPE)(75.0/64.0) * (F(0, 0, 0) - F(0, 0, -1)) \
66
+ + (TIDE_DTYPE)(-25.0/384.0) * (F(0, 0, 1) - F(0, 0, -2)) \
67
+ + (TIDE_DTYPE)(3.0/640.0) * (F(0, 0, 2) - F(0, 0, -3))) * rdx
68
+
69
+ #define DIFFZH1(F) \
70
+ ((TIDE_DTYPE)(75.0/64.0) * (F(1, 0, 0) - F(0, 0, 0)) \
71
+ + (TIDE_DTYPE)(-25.0/384.0) * (F(2, 0, 0) - F(-1, 0, 0)) \
72
+ + (TIDE_DTYPE)(3.0/640.0) * (F(3, 0, 0) - F(-2, 0, 0))) * rdz
73
+
74
+ #define DIFFYH1(F) \
75
+ ((TIDE_DTYPE)(75.0/64.0) * (F(0, 1, 0) - F(0, 0, 0)) \
76
+ + (TIDE_DTYPE)(-25.0/384.0) * (F(0, 2, 0) - F(0, -1, 0)) \
77
+ + (TIDE_DTYPE)(3.0/640.0) * (F(0, 3, 0) - F(0, -2, 0))) * rdy
78
+
79
+ #define DIFFXH1(F) \
80
+ ((TIDE_DTYPE)(75.0/64.0) * (F(0, 0, 1) - F(0, 0, 0)) \
81
+ + (TIDE_DTYPE)(-25.0/384.0) * (F(0, 0, 2) - F(0, 0, -1)) \
82
+ + (TIDE_DTYPE)(3.0/640.0) * (F(0, 0, 3) - F(0, 0, -2))) * rdx
83
+
84
+ #elif TIDE_STENCIL == 8
85
+ // 8th order accuracy
86
+ #define DIFFZ1(F) \
87
+ ((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 0, 0) - F(-1, 0, 0)) \
88
+ + (TIDE_DTYPE)(-245.0/3072.0) * (F(1, 0, 0) - F(-2, 0, 0)) \
89
+ + (TIDE_DTYPE)(49.0/5120.0) * (F(2, 0, 0) - F(-3, 0, 0)) \
90
+ + (TIDE_DTYPE)(-5.0/7168.0) * (F(3, 0, 0) - F(-4, 0, 0))) * rdz
91
+
92
+ #define DIFFY1(F) \
93
+ ((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 0, 0) - F(0, -1, 0)) \
94
+ + (TIDE_DTYPE)(-245.0/3072.0) * (F(0, 1, 0) - F(0, -2, 0)) \
95
+ + (TIDE_DTYPE)(49.0/5120.0) * (F(0, 2, 0) - F(0, -3, 0)) \
96
+ + (TIDE_DTYPE)(-5.0/7168.0) * (F(0, 3, 0) - F(0, -4, 0))) * rdy
97
+
98
+ #define DIFFX1(F) \
99
+ ((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 0, 0) - F(0, 0, -1)) \
100
+ + (TIDE_DTYPE)(-245.0/3072.0) * (F(0, 0, 1) - F(0, 0, -2)) \
101
+ + (TIDE_DTYPE)(49.0/5120.0) * (F(0, 0, 2) - F(0, 0, -3)) \
102
+ + (TIDE_DTYPE)(-5.0/7168.0) * (F(0, 0, 3) - F(0, 0, -4))) * rdx
103
+
104
+ #define DIFFZH1(F) \
105
+ ((TIDE_DTYPE)(1225.0/1024.0) * (F(1, 0, 0) - F(0, 0, 0)) \
106
+ + (TIDE_DTYPE)(-245.0/3072.0) * (F(2, 0, 0) - F(-1, 0, 0)) \
107
+ + (TIDE_DTYPE)(49.0/5120.0) * (F(3, 0, 0) - F(-2, 0, 0)) \
108
+ + (TIDE_DTYPE)(-5.0/7168.0) * (F(4, 0, 0) - F(-3, 0, 0))) * rdz
109
+
110
+ #define DIFFYH1(F) \
111
+ ((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 1, 0) - F(0, 0, 0)) \
112
+ + (TIDE_DTYPE)(-245.0/3072.0) * (F(0, 2, 0) - F(0, -1, 0)) \
113
+ + (TIDE_DTYPE)(49.0/5120.0) * (F(0, 3, 0) - F(0, -2, 0)) \
114
+ + (TIDE_DTYPE)(-5.0/7168.0) * (F(0, 4, 0) - F(0, -3, 0))) * rdy
115
+
116
+ #define DIFFXH1(F) \
117
+ ((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 0, 1) - F(0, 0, 0)) \
118
+ + (TIDE_DTYPE)(-245.0/3072.0) * (F(0, 0, 2) - F(0, 0, -1)) \
119
+ + (TIDE_DTYPE)(49.0/5120.0) * (F(0, 0, 3) - F(0, 0, -2)) \
120
+ + (TIDE_DTYPE)(-5.0/7168.0) * (F(0, 0, 4) - F(0, 0, -3))) * rdx
121
+
122
+ #endif
123
+
124
+ #endif // STAGGERED_GRID_3D_H
@@ -0,0 +1,78 @@
1
+ #include "storage_utils.h"
2
+
3
+ #include <errno.h>
4
+ #include <string.h>
5
+
6
+ static bool read_exact(FILE* fp, void* dst, size_t nbytes) {
7
+ size_t total = 0;
8
+ while (total < nbytes) {
9
+ size_t n = fread((char*)dst + total, 1, nbytes - total, fp);
10
+ if (n == 0) {
11
+ return false;
12
+ }
13
+ total += n;
14
+ }
15
+ return true;
16
+ }
17
+
18
+ static bool write_exact(FILE* fp, const void* src, size_t nbytes) {
19
+ size_t total = 0;
20
+ while (total < nbytes) {
21
+ size_t n = fwrite((const char*)src + total, 1, nbytes - total, fp);
22
+ if (n == 0) {
23
+ return false;
24
+ }
25
+ total += n;
26
+ }
27
+ return true;
28
+ }
29
+
30
+ static void report_io_error(const char* op, int64_t step_idx) {
31
+ if (errno != 0) {
32
+ fprintf(stderr, "storage_utils: %s failed at step %lld: %s\n",
33
+ op, (long long)step_idx, strerror(errno));
34
+ } else {
35
+ fprintf(stderr, "storage_utils: %s failed at step %lld\n",
36
+ op, (long long)step_idx);
37
+ }
38
+ }
39
+
40
+ void storage_save_snapshot_cpu(void* store_1, FILE* fp, int64_t storage_mode,
41
+ int64_t step_idx, size_t step_bytes_uncomp) {
42
+ if (storage_mode == STORAGE_NONE) return;
43
+ if (storage_mode == STORAGE_DISK) {
44
+ int64_t offset = step_idx * (int64_t)step_bytes_uncomp;
45
+ errno = 0;
46
+ if (fseek(fp, offset, SEEK_SET) != 0) {
47
+ report_io_error("fseek(write)", step_idx);
48
+ return;
49
+ }
50
+ if (!write_exact(fp, store_1, step_bytes_uncomp)) {
51
+ report_io_error("fwrite", step_idx);
52
+ }
53
+ }
54
+ }
55
+
56
+ void storage_load_snapshot_cpu(void* store_1, FILE* fp, int64_t storage_mode,
57
+ int64_t step_idx, size_t step_bytes_uncomp) {
58
+ if (storage_mode == STORAGE_NONE) return;
59
+ if (storage_mode == STORAGE_DISK) {
60
+ int64_t offset = step_idx * (int64_t)step_bytes_uncomp;
61
+ errno = 0;
62
+ if (fseek(fp, offset, SEEK_SET) != 0) {
63
+ report_io_error("fseek(read)", step_idx);
64
+ memset(store_1, 0, step_bytes_uncomp);
65
+ return;
66
+ }
67
+ if (!read_exact(fp, store_1, step_bytes_uncomp)) {
68
+ if (feof(fp)) {
69
+ fprintf(stderr, "storage_utils: unexpected EOF at step %lld\n",
70
+ (long long)step_idx);
71
+ } else {
72
+ report_io_error("fread", step_idx);
73
+ }
74
+ clearerr(fp);
75
+ memset(store_1, 0, step_bytes_uncomp);
76
+ }
77
+ }
78
+ }
@@ -0,0 +1,135 @@
1
+ #include <cuda_runtime.h>
2
+ #include <errno.h>
3
+ #include <stdint.h>
4
+ #include <stdio.h>
5
+ #include <string.h>
6
+
7
+ #include "storage_utils.h"
8
+
9
+ static bool read_exact(FILE* fp, void* dst, size_t nbytes) {
10
+ size_t total = 0;
11
+ while (total < nbytes) {
12
+ size_t n = fread((char*)dst + total, 1, nbytes - total, fp);
13
+ if (n == 0) {
14
+ return false;
15
+ }
16
+ total += n;
17
+ }
18
+ return true;
19
+ }
20
+
21
+ static bool write_exact(FILE* fp, const void* src, size_t nbytes) {
22
+ size_t total = 0;
23
+ while (total < nbytes) {
24
+ size_t n = fwrite((const char*)src + total, 1, nbytes - total, fp);
25
+ if (n == 0) {
26
+ return false;
27
+ }
28
+ total += n;
29
+ }
30
+ return true;
31
+ }
32
+
33
+ static void report_io_error(const char* op, int64_t step_idx) {
34
+ if (errno != 0) {
35
+ fprintf(stderr, "storage_utils: %s failed at step %lld: %s\n",
36
+ op, (long long)step_idx, strerror(errno));
37
+ } else {
38
+ fprintf(stderr, "storage_utils: %s failed at step %lld\n",
39
+ op, (long long)step_idx);
40
+ }
41
+ }
42
+
43
+ static void report_cuda_error(const char* op, cudaError_t err) {
44
+ fprintf(stderr, "storage_utils: %s failed: %s\n",
45
+ op, cudaGetErrorString(err));
46
+ }
47
+
48
+ extern "C" {
49
+
50
+ void storage_save_snapshot_gpu(
51
+ void* store_1, void* store_3, FILE* fp, int64_t storage_mode,
52
+ int64_t step_idx, size_t shot_bytes_uncomp, size_t n_shots) {
53
+ if (storage_mode == STORAGE_NONE) return;
54
+ size_t bytes_to_store = shot_bytes_uncomp * n_shots;
55
+
56
+ if (storage_mode == STORAGE_CPU || storage_mode == STORAGE_DISK) {
57
+ if (storage_mode == STORAGE_DISK) {
58
+ // Disk mode needs host-visible data immediately for fwrite.
59
+ cudaError_t err =
60
+ cudaMemcpy(store_3, store_1, bytes_to_store, cudaMemcpyDeviceToHost);
61
+ if (err != cudaSuccess) {
62
+ report_cuda_error("cudaMemcpy(D2H)", err);
63
+ return;
64
+ }
65
+ } else {
66
+ // CPU mode: avoid blocking the host thread on every step.
67
+ // Copies are enqueued on the current (default) stream and will be ordered
68
+ // with subsequent CUDA work in the same stream.
69
+ cudaError_t err = cudaMemcpyAsync(
70
+ store_3, store_1, bytes_to_store, cudaMemcpyDeviceToHost, 0);
71
+ if (err != cudaSuccess) {
72
+ report_cuda_error("cudaMemcpyAsync(D2H)", err);
73
+ return;
74
+ }
75
+ }
76
+ }
77
+ if (storage_mode == STORAGE_DISK) {
78
+ int64_t offset = step_idx * (int64_t)bytes_to_store;
79
+ errno = 0;
80
+ if (fseek(fp, offset, SEEK_SET) != 0) {
81
+ report_io_error("fseek(write)", step_idx);
82
+ return;
83
+ }
84
+ if (!write_exact(fp, store_3, bytes_to_store)) {
85
+ report_io_error("fwrite", step_idx);
86
+ }
87
+ }
88
+ }
89
+
90
+ void storage_load_snapshot_gpu(void* store_1, void* store_3, FILE* fp,
91
+ int64_t storage_mode, int64_t step_idx,
92
+ size_t shot_bytes_uncomp, size_t n_shots) {
93
+ if (storage_mode == STORAGE_NONE) return;
94
+ size_t bytes_to_load = shot_bytes_uncomp * n_shots;
95
+
96
+ if (storage_mode == STORAGE_DISK) {
97
+ int64_t offset = step_idx * (int64_t)bytes_to_load;
98
+ errno = 0;
99
+ if (fseek(fp, offset, SEEK_SET) != 0) {
100
+ report_io_error("fseek(read)", step_idx);
101
+ memset(store_3, 0, bytes_to_load);
102
+ } else if (!read_exact(fp, store_3, bytes_to_load)) {
103
+ if (feof(fp)) {
104
+ fprintf(stderr, "storage_utils: unexpected EOF at step %lld\n",
105
+ (long long)step_idx);
106
+ } else {
107
+ report_io_error("fread", step_idx);
108
+ }
109
+ clearerr(fp);
110
+ memset(store_3, 0, bytes_to_load);
111
+ }
112
+ }
113
+
114
+ if (storage_mode == STORAGE_CPU || storage_mode == STORAGE_DISK) {
115
+ if (storage_mode == STORAGE_DISK) {
116
+ // Disk mode reuses the same pinned host buffer (store_3) for many steps.
117
+ // Use a synchronous copy to avoid the host overwriting store_3 (fread)
118
+ // before the device copy has consumed it.
119
+ cudaError_t err =
120
+ cudaMemcpy(store_1, store_3, bytes_to_load, cudaMemcpyHostToDevice);
121
+ if (err != cudaSuccess) {
122
+ report_cuda_error("cudaMemcpy(H2D)", err);
123
+ return;
124
+ }
125
+ } else {
126
+ cudaError_t err = cudaMemcpyAsync(
127
+ store_1, store_3, bytes_to_load, cudaMemcpyHostToDevice, 0);
128
+ if (err != cudaSuccess) {
129
+ report_cuda_error("cudaMemcpyAsync(H2D)", err);
130
+ return;
131
+ }
132
+ }
133
+ }
134
+ }
135
+ }
@@ -0,0 +1,36 @@
1
+ #ifndef STORAGE_UTILS_H
2
+ #define STORAGE_UTILS_H
3
+
4
+ #include <stdbool.h>
5
+ #include <stddef.h>
6
+ #include <stdint.h>
7
+ #include <stdio.h>
8
+
9
+ #define STORAGE_DEVICE 0
10
+ #define STORAGE_CPU 1
11
+ #define STORAGE_DISK 2
12
+ #define STORAGE_NONE 3
13
+
14
+ #ifdef __cplusplus
15
+ extern "C" {
16
+ #endif
17
+
18
+ void storage_save_snapshot_cpu(void* store_1, FILE* fp, int64_t storage_mode,
19
+ int64_t step_idx, size_t step_bytes_uncomp);
20
+
21
+ void storage_save_snapshot_gpu(void* store_1, void* store_3, FILE* fp,
22
+ int64_t storage_mode, int64_t step_idx,
23
+ size_t shot_bytes_uncomp, size_t n_shots);
24
+
25
+ void storage_load_snapshot_cpu(void* store_1, FILE* fp, int64_t storage_mode,
26
+ int64_t step_idx, size_t step_bytes_uncomp);
27
+
28
+ void storage_load_snapshot_gpu(void* store_1, void* store_3, FILE* fp,
29
+ int64_t storage_mode, int64_t step_idx,
30
+ size_t shot_bytes_uncomp, size_t n_shots);
31
+
32
+ #ifdef __cplusplus
33
+ }
34
+ #endif
35
+
36
+ #endif
tide/grid_utils.py ADDED
@@ -0,0 +1,31 @@
1
+ """Grid-related helpers for padding and boundary bookkeeping."""
2
+
3
+ from typing import Sequence, Union
4
+
5
+
6
+ def _normalize_grid_spacing_2d(
7
+ grid_spacing: Union[float, Sequence[float]],
8
+ ) -> list[float]:
9
+ """Normalize 2D grid spacing to [dy, dx]."""
10
+ if isinstance(grid_spacing, (int, float)):
11
+ return [float(grid_spacing), float(grid_spacing)]
12
+ return list(grid_spacing)
13
+
14
+
15
+ def _normalize_pml_width_2d(
16
+ pml_width: Union[int, Sequence[int]],
17
+ ) -> list[int]:
18
+ """Normalize 2D PML width to [top, bottom, left, right]."""
19
+ if isinstance(pml_width, int):
20
+ return [pml_width] * 4
21
+ pml_width_list = list(pml_width)
22
+ if len(pml_width_list) == 1:
23
+ return pml_width_list * 4
24
+ if len(pml_width_list) == 2:
25
+ return [
26
+ pml_width_list[0],
27
+ pml_width_list[0],
28
+ pml_width_list[1],
29
+ pml_width_list[1],
30
+ ]
31
+ return pml_width_list