tide-GPR 0.0.9__py3-none-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tide/__init__.py +65 -0
- tide/autograd_utils.py +26 -0
- tide/backend_utils.py +536 -0
- tide/callbacks.py +348 -0
- tide/cfl.py +64 -0
- tide/csrc/CMakeLists.txt +263 -0
- tide/csrc/common_cpu.h +31 -0
- tide/csrc/common_gpu.h +56 -0
- tide/csrc/maxwell.c +2133 -0
- tide/csrc/maxwell.cu +2297 -0
- tide/csrc/maxwell_born.cu +0 -0
- tide/csrc/staggered_grid.h +175 -0
- tide/csrc/staggered_grid_3d.h +124 -0
- tide/csrc/storage_utils.c +78 -0
- tide/csrc/storage_utils.cu +135 -0
- tide/csrc/storage_utils.h +36 -0
- tide/grid_utils.py +31 -0
- tide/maxwell.py +2651 -0
- tide/padding.py +139 -0
- tide/resampling.py +246 -0
- tide/staggered.py +567 -0
- tide/storage.py +131 -0
- tide/tide/libtide_C.so +0 -0
- tide/utils.py +274 -0
- tide/validation.py +71 -0
- tide/wavelets.py +72 -0
- tide_gpr-0.0.9.dist-info/METADATA +256 -0
- tide_gpr-0.0.9.dist-info/RECORD +31 -0
- tide_gpr-0.0.9.dist-info/WHEEL +5 -0
- tide_gpr-0.0.9.dist-info/licenses/LICENSE +46 -0
- tide_gpr.libs/libgomp-24e2ab19.so.1.0.0 +0 -0
|
File without changes
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
#ifndef STAGGERED_GRID_H
|
|
2
|
+
#define STAGGERED_GRID_H
|
|
3
|
+
|
|
4
|
+
// FD_PAD is half the stencil width
|
|
5
|
+
#if TIDE_STENCIL == 2
|
|
6
|
+
#define FD_PAD 1
|
|
7
|
+
#elif TIDE_STENCIL == 4
|
|
8
|
+
#define FD_PAD 2
|
|
9
|
+
#elif TIDE_STENCIL == 6
|
|
10
|
+
#define FD_PAD 3
|
|
11
|
+
#elif TIDE_STENCIL == 8
|
|
12
|
+
#define FD_PAD 4
|
|
13
|
+
#else
|
|
14
|
+
#error "TIDE_STENCIL must be 2, 4, 6, or 8"
|
|
15
|
+
#endif
|
|
16
|
+
|
|
17
|
+
#if TIDE_STENCIL == 2
|
|
18
|
+
// 2nd order accuracy
|
|
19
|
+
#define DIFFY1(F) ((F(0, 0) - F(-1, 0)) * rdy)
|
|
20
|
+
#define DIFFX1(F) ((F(0, 0) - F(0, -1)) * rdx)
|
|
21
|
+
#define DIFFYH1(F) ((F(1, 0) - F(0, 0)) * rdy)
|
|
22
|
+
#define DIFFXH1(F) ((F(0, 1) - F(0, 0)) * rdx)
|
|
23
|
+
|
|
24
|
+
#elif TIDE_STENCIL == 4
|
|
25
|
+
// 4th order accuracy
|
|
26
|
+
#define DIFFY1(F) \
|
|
27
|
+
((TIDE_DTYPE)(9.0/8.0) * (F(0, 0) - F(-1, 0)) \
|
|
28
|
+
+ (TIDE_DTYPE)(-1.0/24.0) * (F(1, 0) - F(-2, 0))) * rdy
|
|
29
|
+
|
|
30
|
+
#define DIFFX1(F) \
|
|
31
|
+
((TIDE_DTYPE)(9.0/8.0) * (F(0, 0) - F(0, -1)) \
|
|
32
|
+
+ (TIDE_DTYPE)(-1.0/24.0) * (F(0, 1) - F(0, -2))) * rdx
|
|
33
|
+
|
|
34
|
+
#define DIFFYH1(F) \
|
|
35
|
+
((TIDE_DTYPE)(9.0/8.0) * (F(1, 0) - F(0, 0)) \
|
|
36
|
+
+ (TIDE_DTYPE)(-1.0/24.0) * (F(2, 0) - F(-1, 0))) * rdy
|
|
37
|
+
|
|
38
|
+
#define DIFFXH1(F) \
|
|
39
|
+
((TIDE_DTYPE)(9.0/8.0) * (F(0, 1) - F(0, 0)) \
|
|
40
|
+
+ (TIDE_DTYPE)(-1.0/24.0) * (F(0, 2) - F(0, -1))) * rdx
|
|
41
|
+
|
|
42
|
+
#elif TIDE_STENCIL == 6
|
|
43
|
+
// 6th order accuracy
|
|
44
|
+
#define DIFFY1(F) \
|
|
45
|
+
((TIDE_DTYPE)(75.0/64.0) * (F(0, 0) - F(-1, 0)) \
|
|
46
|
+
+ (TIDE_DTYPE)(-25.0/384.0) * (F(1, 0) - F(-2, 0)) \
|
|
47
|
+
+ (TIDE_DTYPE)(3.0/640.0) * (F(2, 0) - F(-3, 0))) * rdy
|
|
48
|
+
|
|
49
|
+
#define DIFFX1(F) \
|
|
50
|
+
((TIDE_DTYPE)(75.0/64.0) * (F(0, 0) - F(0, -1)) \
|
|
51
|
+
+ (TIDE_DTYPE)(-25.0/384.0) * (F(0, 1) - F(0, -2)) \
|
|
52
|
+
+ (TIDE_DTYPE)(3.0/640.0) * (F(0, 2) - F(0, -3))) * rdx
|
|
53
|
+
|
|
54
|
+
#define DIFFYH1(F) \
|
|
55
|
+
((TIDE_DTYPE)(75.0/64.0) * (F(1, 0) - F(0, 0)) \
|
|
56
|
+
+ (TIDE_DTYPE)(-25.0/384.0) * (F(2, 0) - F(-1, 0)) \
|
|
57
|
+
+ (TIDE_DTYPE)(3.0/640.0) * (F(3, 0) - F(-2, 0))) * rdy
|
|
58
|
+
|
|
59
|
+
#define DIFFXH1(F) \
|
|
60
|
+
((TIDE_DTYPE)(75.0/64.0) * (F(0, 1) - F(0, 0)) \
|
|
61
|
+
+ (TIDE_DTYPE)(-25.0/384.0) * (F(0, 2) - F(0, -1)) \
|
|
62
|
+
+ (TIDE_DTYPE)(3.0/640.0) * (F(0, 3) - F(0, -2))) * rdx
|
|
63
|
+
|
|
64
|
+
#elif TIDE_STENCIL == 8
|
|
65
|
+
// 8th order accuracy
|
|
66
|
+
#define DIFFY1(F) \
|
|
67
|
+
((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 0) - F(-1, 0)) \
|
|
68
|
+
+ (TIDE_DTYPE)(-245.0/3072.0) * (F(1, 0) - F(-2, 0)) \
|
|
69
|
+
+ (TIDE_DTYPE)(49.0/5120.0) * (F(2, 0) - F(-3, 0)) \
|
|
70
|
+
+ (TIDE_DTYPE)(-5.0/7168.0) * (F(3, 0) - F(-4, 0))) * rdy
|
|
71
|
+
|
|
72
|
+
#define DIFFX1(F) \
|
|
73
|
+
((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 0) - F(0, -1)) \
|
|
74
|
+
+ (TIDE_DTYPE)(-245.0/3072.0) * (F(0, 1) - F(0, -2)) \
|
|
75
|
+
+ (TIDE_DTYPE)(49.0/5120.0) * (F(0, 2) - F(0, -3)) \
|
|
76
|
+
+ (TIDE_DTYPE)(-5.0/7168.0) * (F(0, 3) - F(0, -4))) * rdx
|
|
77
|
+
|
|
78
|
+
#define DIFFYH1(F) \
|
|
79
|
+
((TIDE_DTYPE)(1225.0/1024.0) * (F(1, 0) - F(0, 0)) \
|
|
80
|
+
+ (TIDE_DTYPE)(-245.0/3072.0) * (F(2, 0) - F(-1, 0)) \
|
|
81
|
+
+ (TIDE_DTYPE)(49.0/5120.0) * (F(3, 0) - F(-2, 0)) \
|
|
82
|
+
+ (TIDE_DTYPE)(-5.0/7168.0) * (F(4, 0) - F(-3, 0))) * rdy
|
|
83
|
+
|
|
84
|
+
#define DIFFXH1(F) \
|
|
85
|
+
((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 1) - F(0, 0)) \
|
|
86
|
+
+ (TIDE_DTYPE)(-245.0/3072.0) * (F(0, 2) - F(0, -1)) \
|
|
87
|
+
+ (TIDE_DTYPE)(49.0/5120.0) * (F(0, 3) - F(0, -2)) \
|
|
88
|
+
+ (TIDE_DTYPE)(-5.0/7168.0) * (F(0, 4) - F(0, -3))) * rdx
|
|
89
|
+
|
|
90
|
+
#endif
|
|
91
|
+
|
|
92
|
+
/*
|
|
93
|
+
* Adjoint derivative operators for backward pass
|
|
94
|
+
* These compute the transpose of the forward derivative operators
|
|
95
|
+
*/
|
|
96
|
+
|
|
97
|
+
#if TIDE_STENCIL == 2
|
|
98
|
+
#define DIFFY1_ADJ(C, F) \
|
|
99
|
+
((C(0, 0) * F(0, 0) - C(1, 0) * F(1, 0)) * rdy)
|
|
100
|
+
|
|
101
|
+
#define DIFFX1_ADJ(C, F) \
|
|
102
|
+
((C(0, 0) * F(0, 0) - C(0, 1) * F(0, 1)) * rdx)
|
|
103
|
+
|
|
104
|
+
#define DIFFYH1_ADJ(C, F) \
|
|
105
|
+
((C(-1, 0) * F(-1, 0) - C(0, 0) * F(0, 0)) * rdy)
|
|
106
|
+
|
|
107
|
+
#define DIFFXH1_ADJ(C, F) \
|
|
108
|
+
((C(0, -1) * F(0, -1) - C(0, 0) * F(0, 0)) * rdx)
|
|
109
|
+
|
|
110
|
+
#elif TIDE_STENCIL == 4
|
|
111
|
+
#define DIFFY1_ADJ(C, F) \
|
|
112
|
+
((TIDE_DTYPE)(9.0/8.0) * (C(0, 0) * F(0, 0) - C(1, 0) * F(1, 0)) \
|
|
113
|
+
+ (TIDE_DTYPE)(-1.0/24.0) * (C(-1, 0) * F(-1, 0) - C(2, 0) * F(2, 0))) * rdy
|
|
114
|
+
|
|
115
|
+
#define DIFFX1_ADJ(C, F) \
|
|
116
|
+
((TIDE_DTYPE)(9.0/8.0) * (C(0, 0) * F(0, 0) - C(0, 1) * F(0, 1)) \
|
|
117
|
+
+ (TIDE_DTYPE)(-1.0/24.0) * (C(0, -1) * F(0, -1) - C(0, 2) * F(0, 2))) * rdx
|
|
118
|
+
|
|
119
|
+
#define DIFFYH1_ADJ(C, F) \
|
|
120
|
+
((TIDE_DTYPE)(9.0/8.0) * (C(-1, 0) * F(-1, 0) - C(0, 0) * F(0, 0)) \
|
|
121
|
+
+ (TIDE_DTYPE)(-1.0/24.0) * (C(-2, 0) * F(-2, 0) - C(1, 0) * F(1, 0))) * rdy
|
|
122
|
+
|
|
123
|
+
#define DIFFXH1_ADJ(C, F) \
|
|
124
|
+
((TIDE_DTYPE)(9.0/8.0) * (C(0, -1) * F(0, -1) - C(0, 0) * F(0, 0)) \
|
|
125
|
+
+ (TIDE_DTYPE)(-1.0/24.0) * (C(0, -2) * F(0, -2) - C(0, 1) * F(0, 1))) * rdx
|
|
126
|
+
|
|
127
|
+
#elif TIDE_STENCIL == 6
|
|
128
|
+
#define DIFFY1_ADJ(C, F) \
|
|
129
|
+
((TIDE_DTYPE)(75.0/64.0) * (C(0, 0) * F(0, 0) - C(1, 0) * F(1, 0)) \
|
|
130
|
+
+ (TIDE_DTYPE)(-25.0/384.0) * (C(-1, 0) * F(-1, 0) - C(2, 0) * F(2, 0)) \
|
|
131
|
+
+ (TIDE_DTYPE)(3.0/640.0) * (C(-2, 0) * F(-2, 0) - C(3, 0) * F(3, 0))) * rdy
|
|
132
|
+
|
|
133
|
+
#define DIFFX1_ADJ(C, F) \
|
|
134
|
+
((TIDE_DTYPE)(75.0/64.0) * (C(0, 0) * F(0, 0) - C(0, 1) * F(0, 1)) \
|
|
135
|
+
+ (TIDE_DTYPE)(-25.0/384.0) * (C(0, -1) * F(0, -1) - C(0, 2) * F(0, 2)) \
|
|
136
|
+
+ (TIDE_DTYPE)(3.0/640.0) * (C(0, -2) * F(0, -2) - C(0, 3) * F(0, 3))) * rdx
|
|
137
|
+
|
|
138
|
+
#define DIFFYH1_ADJ(C, F) \
|
|
139
|
+
((TIDE_DTYPE)(75.0/64.0) * (C(-1, 0) * F(-1, 0) - C(0, 0) * F(0, 0)) \
|
|
140
|
+
+ (TIDE_DTYPE)(-25.0/384.0) * (C(-2, 0) * F(-2, 0) - C(1, 0) * F(1, 0)) \
|
|
141
|
+
+ (TIDE_DTYPE)(3.0/640.0) * (C(-3, 0) * F(-3, 0) - C(2, 0) * F(2, 0))) * rdy
|
|
142
|
+
|
|
143
|
+
#define DIFFXH1_ADJ(C, F) \
|
|
144
|
+
((TIDE_DTYPE)(75.0/64.0) * (C(0, -1) * F(0, -1) - C(0, 0) * F(0, 0)) \
|
|
145
|
+
+ (TIDE_DTYPE)(-25.0/384.0) * (C(0, -2) * F(0, -2) - C(0, 1) * F(0, 1)) \
|
|
146
|
+
+ (TIDE_DTYPE)(3.0/640.0) * (C(0, -3) * F(0, -3) - C(0, 2) * F(0, 2))) * rdx
|
|
147
|
+
|
|
148
|
+
#elif TIDE_STENCIL == 8
|
|
149
|
+
#define DIFFY1_ADJ(C, F) \
|
|
150
|
+
((TIDE_DTYPE)(1225.0/1024.0) * (C(0, 0) * F(0, 0) - C(1, 0) * F(1, 0)) \
|
|
151
|
+
+ (TIDE_DTYPE)(-245.0/3072.0) * (C(-1, 0) * F(-1, 0) - C(2, 0) * F(2, 0)) \
|
|
152
|
+
+ (TIDE_DTYPE)(49.0/5120.0) * (C(-2, 0) * F(-2, 0) - C(3, 0) * F(3, 0)) \
|
|
153
|
+
+ (TIDE_DTYPE)(-5.0/7168.0) * (C(-3, 0) * F(-3, 0) - C(4, 0) * F(4, 0))) * rdy
|
|
154
|
+
|
|
155
|
+
#define DIFFX1_ADJ(C, F) \
|
|
156
|
+
((TIDE_DTYPE)(1225.0/1024.0) * (C(0, 0) * F(0, 0) - C(0, 1) * F(0, 1)) \
|
|
157
|
+
+ (TIDE_DTYPE)(-245.0/3072.0) * (C(0, -1) * F(0, -1) - C(0, 2) * F(0, 2)) \
|
|
158
|
+
+ (TIDE_DTYPE)(49.0/5120.0) * (C(0, -2) * F(0, -2) - C(0, 3) * F(0, 3)) \
|
|
159
|
+
+ (TIDE_DTYPE)(-5.0/7168.0) * (C(0, -3) * F(0, -3) - C(0, 4) * F(0, 4))) * rdx
|
|
160
|
+
|
|
161
|
+
#define DIFFYH1_ADJ(C, F) \
|
|
162
|
+
((TIDE_DTYPE)(1225.0/1024.0) * (C(-1, 0) * F(-1, 0) - C(0, 0) * F(0, 0)) \
|
|
163
|
+
+ (TIDE_DTYPE)(-245.0/3072.0) * (C(-2, 0) * F(-2, 0) - C(1, 0) * F(1, 0)) \
|
|
164
|
+
+ (TIDE_DTYPE)(49.0/5120.0) * (C(-3, 0) * F(-3, 0) - C(2, 0) * F(2, 0)) \
|
|
165
|
+
+ (TIDE_DTYPE)(-5.0/7168.0) * (C(-4, 0) * F(-4, 0) - C(3, 0) * F(3, 0))) * rdy
|
|
166
|
+
|
|
167
|
+
#define DIFFXH1_ADJ(C, F) \
|
|
168
|
+
((TIDE_DTYPE)(1225.0/1024.0) * (C(0, -1) * F(0, -1) - C(0, 0) * F(0, 0)) \
|
|
169
|
+
+ (TIDE_DTYPE)(-245.0/3072.0) * (C(0, -2) * F(0, -2) - C(0, 1) * F(0, 1)) \
|
|
170
|
+
+ (TIDE_DTYPE)(49.0/5120.0) * (C(0, -3) * F(0, -3) - C(0, 2) * F(0, 2)) \
|
|
171
|
+
+ (TIDE_DTYPE)(-5.0/7168.0) * (C(0, -4) * F(0, -4) - C(0, 3) * F(0, 3))) * rdx
|
|
172
|
+
|
|
173
|
+
#endif
|
|
174
|
+
|
|
175
|
+
#endif // STAGGERED_GRID_H
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
#ifndef STAGGERED_GRID_3D_H
|
|
2
|
+
#define STAGGERED_GRID_3D_H
|
|
3
|
+
|
|
4
|
+
// FD_PAD is half the stencil width
|
|
5
|
+
#if TIDE_STENCIL == 2
|
|
6
|
+
#define FD_PAD 1
|
|
7
|
+
#elif TIDE_STENCIL == 4
|
|
8
|
+
#define FD_PAD 2
|
|
9
|
+
#elif TIDE_STENCIL == 6
|
|
10
|
+
#define FD_PAD 3
|
|
11
|
+
#elif TIDE_STENCIL == 8
|
|
12
|
+
#define FD_PAD 4
|
|
13
|
+
#else
|
|
14
|
+
#error "TIDE_STENCIL must be 2, 4, 6, or 8"
|
|
15
|
+
#endif
|
|
16
|
+
|
|
17
|
+
#if TIDE_STENCIL == 2
|
|
18
|
+
// 2nd order accuracy
|
|
19
|
+
#define DIFFZ1(F) ((F(0, 0, 0) - F(-1, 0, 0)) * rdz)
|
|
20
|
+
#define DIFFY1(F) ((F(0, 0, 0) - F(0, -1, 0)) * rdy)
|
|
21
|
+
#define DIFFX1(F) ((F(0, 0, 0) - F(0, 0, -1)) * rdx)
|
|
22
|
+
#define DIFFZH1(F) ((F(1, 0, 0) - F(0, 0, 0)) * rdz)
|
|
23
|
+
#define DIFFYH1(F) ((F(0, 1, 0) - F(0, 0, 0)) * rdy)
|
|
24
|
+
#define DIFFXH1(F) ((F(0, 0, 1) - F(0, 0, 0)) * rdx)
|
|
25
|
+
|
|
26
|
+
#elif TIDE_STENCIL == 4
|
|
27
|
+
// 4th order accuracy
|
|
28
|
+
#define DIFFZ1(F) \
|
|
29
|
+
((TIDE_DTYPE)(9.0/8.0) * (F(0, 0, 0) - F(-1, 0, 0)) \
|
|
30
|
+
+ (TIDE_DTYPE)(-1.0/24.0) * (F(1, 0, 0) - F(-2, 0, 0))) * rdz
|
|
31
|
+
|
|
32
|
+
#define DIFFY1(F) \
|
|
33
|
+
((TIDE_DTYPE)(9.0/8.0) * (F(0, 0, 0) - F(0, -1, 0)) \
|
|
34
|
+
+ (TIDE_DTYPE)(-1.0/24.0) * (F(0, 1, 0) - F(0, -2, 0))) * rdy
|
|
35
|
+
|
|
36
|
+
#define DIFFX1(F) \
|
|
37
|
+
((TIDE_DTYPE)(9.0/8.0) * (F(0, 0, 0) - F(0, 0, -1)) \
|
|
38
|
+
+ (TIDE_DTYPE)(-1.0/24.0) * (F(0, 0, 1) - F(0, 0, -2))) * rdx
|
|
39
|
+
|
|
40
|
+
#define DIFFZH1(F) \
|
|
41
|
+
((TIDE_DTYPE)(9.0/8.0) * (F(1, 0, 0) - F(0, 0, 0)) \
|
|
42
|
+
+ (TIDE_DTYPE)(-1.0/24.0) * (F(2, 0, 0) - F(-1, 0, 0))) * rdz
|
|
43
|
+
|
|
44
|
+
#define DIFFYH1(F) \
|
|
45
|
+
((TIDE_DTYPE)(9.0/8.0) * (F(0, 1, 0) - F(0, 0, 0)) \
|
|
46
|
+
+ (TIDE_DTYPE)(-1.0/24.0) * (F(0, 2, 0) - F(0, -1, 0))) * rdy
|
|
47
|
+
|
|
48
|
+
#define DIFFXH1(F) \
|
|
49
|
+
((TIDE_DTYPE)(9.0/8.0) * (F(0, 0, 1) - F(0, 0, 0)) \
|
|
50
|
+
+ (TIDE_DTYPE)(-1.0/24.0) * (F(0, 0, 2) - F(0, 0, -1))) * rdx
|
|
51
|
+
|
|
52
|
+
#elif TIDE_STENCIL == 6
|
|
53
|
+
// 6th order accuracy
|
|
54
|
+
#define DIFFZ1(F) \
|
|
55
|
+
((TIDE_DTYPE)(75.0/64.0) * (F(0, 0, 0) - F(-1, 0, 0)) \
|
|
56
|
+
+ (TIDE_DTYPE)(-25.0/384.0) * (F(1, 0, 0) - F(-2, 0, 0)) \
|
|
57
|
+
+ (TIDE_DTYPE)(3.0/640.0) * (F(2, 0, 0) - F(-3, 0, 0))) * rdz
|
|
58
|
+
|
|
59
|
+
#define DIFFY1(F) \
|
|
60
|
+
((TIDE_DTYPE)(75.0/64.0) * (F(0, 0, 0) - F(0, -1, 0)) \
|
|
61
|
+
+ (TIDE_DTYPE)(-25.0/384.0) * (F(0, 1, 0) - F(0, -2, 0)) \
|
|
62
|
+
+ (TIDE_DTYPE)(3.0/640.0) * (F(0, 2, 0) - F(0, -3, 0))) * rdy
|
|
63
|
+
|
|
64
|
+
#define DIFFX1(F) \
|
|
65
|
+
((TIDE_DTYPE)(75.0/64.0) * (F(0, 0, 0) - F(0, 0, -1)) \
|
|
66
|
+
+ (TIDE_DTYPE)(-25.0/384.0) * (F(0, 0, 1) - F(0, 0, -2)) \
|
|
67
|
+
+ (TIDE_DTYPE)(3.0/640.0) * (F(0, 0, 2) - F(0, 0, -3))) * rdx
|
|
68
|
+
|
|
69
|
+
#define DIFFZH1(F) \
|
|
70
|
+
((TIDE_DTYPE)(75.0/64.0) * (F(1, 0, 0) - F(0, 0, 0)) \
|
|
71
|
+
+ (TIDE_DTYPE)(-25.0/384.0) * (F(2, 0, 0) - F(-1, 0, 0)) \
|
|
72
|
+
+ (TIDE_DTYPE)(3.0/640.0) * (F(3, 0, 0) - F(-2, 0, 0))) * rdz
|
|
73
|
+
|
|
74
|
+
#define DIFFYH1(F) \
|
|
75
|
+
((TIDE_DTYPE)(75.0/64.0) * (F(0, 1, 0) - F(0, 0, 0)) \
|
|
76
|
+
+ (TIDE_DTYPE)(-25.0/384.0) * (F(0, 2, 0) - F(0, -1, 0)) \
|
|
77
|
+
+ (TIDE_DTYPE)(3.0/640.0) * (F(0, 3, 0) - F(0, -2, 0))) * rdy
|
|
78
|
+
|
|
79
|
+
#define DIFFXH1(F) \
|
|
80
|
+
((TIDE_DTYPE)(75.0/64.0) * (F(0, 0, 1) - F(0, 0, 0)) \
|
|
81
|
+
+ (TIDE_DTYPE)(-25.0/384.0) * (F(0, 0, 2) - F(0, 0, -1)) \
|
|
82
|
+
+ (TIDE_DTYPE)(3.0/640.0) * (F(0, 0, 3) - F(0, 0, -2))) * rdx
|
|
83
|
+
|
|
84
|
+
#elif TIDE_STENCIL == 8
|
|
85
|
+
// 8th order accuracy
|
|
86
|
+
#define DIFFZ1(F) \
|
|
87
|
+
((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 0, 0) - F(-1, 0, 0)) \
|
|
88
|
+
+ (TIDE_DTYPE)(-245.0/3072.0) * (F(1, 0, 0) - F(-2, 0, 0)) \
|
|
89
|
+
+ (TIDE_DTYPE)(49.0/5120.0) * (F(2, 0, 0) - F(-3, 0, 0)) \
|
|
90
|
+
+ (TIDE_DTYPE)(-5.0/7168.0) * (F(3, 0, 0) - F(-4, 0, 0))) * rdz
|
|
91
|
+
|
|
92
|
+
#define DIFFY1(F) \
|
|
93
|
+
((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 0, 0) - F(0, -1, 0)) \
|
|
94
|
+
+ (TIDE_DTYPE)(-245.0/3072.0) * (F(0, 1, 0) - F(0, -2, 0)) \
|
|
95
|
+
+ (TIDE_DTYPE)(49.0/5120.0) * (F(0, 2, 0) - F(0, -3, 0)) \
|
|
96
|
+
+ (TIDE_DTYPE)(-5.0/7168.0) * (F(0, 3, 0) - F(0, -4, 0))) * rdy
|
|
97
|
+
|
|
98
|
+
#define DIFFX1(F) \
|
|
99
|
+
((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 0, 0) - F(0, 0, -1)) \
|
|
100
|
+
+ (TIDE_DTYPE)(-245.0/3072.0) * (F(0, 0, 1) - F(0, 0, -2)) \
|
|
101
|
+
+ (TIDE_DTYPE)(49.0/5120.0) * (F(0, 0, 2) - F(0, 0, -3)) \
|
|
102
|
+
+ (TIDE_DTYPE)(-5.0/7168.0) * (F(0, 0, 3) - F(0, 0, -4))) * rdx
|
|
103
|
+
|
|
104
|
+
#define DIFFZH1(F) \
|
|
105
|
+
((TIDE_DTYPE)(1225.0/1024.0) * (F(1, 0, 0) - F(0, 0, 0)) \
|
|
106
|
+
+ (TIDE_DTYPE)(-245.0/3072.0) * (F(2, 0, 0) - F(-1, 0, 0)) \
|
|
107
|
+
+ (TIDE_DTYPE)(49.0/5120.0) * (F(3, 0, 0) - F(-2, 0, 0)) \
|
|
108
|
+
+ (TIDE_DTYPE)(-5.0/7168.0) * (F(4, 0, 0) - F(-3, 0, 0))) * rdz
|
|
109
|
+
|
|
110
|
+
#define DIFFYH1(F) \
|
|
111
|
+
((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 1, 0) - F(0, 0, 0)) \
|
|
112
|
+
+ (TIDE_DTYPE)(-245.0/3072.0) * (F(0, 2, 0) - F(0, -1, 0)) \
|
|
113
|
+
+ (TIDE_DTYPE)(49.0/5120.0) * (F(0, 3, 0) - F(0, -2, 0)) \
|
|
114
|
+
+ (TIDE_DTYPE)(-5.0/7168.0) * (F(0, 4, 0) - F(0, -3, 0))) * rdy
|
|
115
|
+
|
|
116
|
+
#define DIFFXH1(F) \
|
|
117
|
+
((TIDE_DTYPE)(1225.0/1024.0) * (F(0, 0, 1) - F(0, 0, 0)) \
|
|
118
|
+
+ (TIDE_DTYPE)(-245.0/3072.0) * (F(0, 0, 2) - F(0, 0, -1)) \
|
|
119
|
+
+ (TIDE_DTYPE)(49.0/5120.0) * (F(0, 0, 3) - F(0, 0, -2)) \
|
|
120
|
+
+ (TIDE_DTYPE)(-5.0/7168.0) * (F(0, 0, 4) - F(0, 0, -3))) * rdx
|
|
121
|
+
|
|
122
|
+
#endif
|
|
123
|
+
|
|
124
|
+
#endif // STAGGERED_GRID_3D_H
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
#include "storage_utils.h"
|
|
2
|
+
|
|
3
|
+
#include <errno.h>
|
|
4
|
+
#include <string.h>
|
|
5
|
+
|
|
6
|
+
static bool read_exact(FILE* fp, void* dst, size_t nbytes) {
|
|
7
|
+
size_t total = 0;
|
|
8
|
+
while (total < nbytes) {
|
|
9
|
+
size_t n = fread((char*)dst + total, 1, nbytes - total, fp);
|
|
10
|
+
if (n == 0) {
|
|
11
|
+
return false;
|
|
12
|
+
}
|
|
13
|
+
total += n;
|
|
14
|
+
}
|
|
15
|
+
return true;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
static bool write_exact(FILE* fp, const void* src, size_t nbytes) {
|
|
19
|
+
size_t total = 0;
|
|
20
|
+
while (total < nbytes) {
|
|
21
|
+
size_t n = fwrite((const char*)src + total, 1, nbytes - total, fp);
|
|
22
|
+
if (n == 0) {
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
25
|
+
total += n;
|
|
26
|
+
}
|
|
27
|
+
return true;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
static void report_io_error(const char* op, int64_t step_idx) {
|
|
31
|
+
if (errno != 0) {
|
|
32
|
+
fprintf(stderr, "storage_utils: %s failed at step %lld: %s\n",
|
|
33
|
+
op, (long long)step_idx, strerror(errno));
|
|
34
|
+
} else {
|
|
35
|
+
fprintf(stderr, "storage_utils: %s failed at step %lld\n",
|
|
36
|
+
op, (long long)step_idx);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
void storage_save_snapshot_cpu(void* store_1, FILE* fp, int64_t storage_mode,
|
|
41
|
+
int64_t step_idx, size_t step_bytes_uncomp) {
|
|
42
|
+
if (storage_mode == STORAGE_NONE) return;
|
|
43
|
+
if (storage_mode == STORAGE_DISK) {
|
|
44
|
+
int64_t offset = step_idx * (int64_t)step_bytes_uncomp;
|
|
45
|
+
errno = 0;
|
|
46
|
+
if (fseek(fp, offset, SEEK_SET) != 0) {
|
|
47
|
+
report_io_error("fseek(write)", step_idx);
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
if (!write_exact(fp, store_1, step_bytes_uncomp)) {
|
|
51
|
+
report_io_error("fwrite", step_idx);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
void storage_load_snapshot_cpu(void* store_1, FILE* fp, int64_t storage_mode,
|
|
57
|
+
int64_t step_idx, size_t step_bytes_uncomp) {
|
|
58
|
+
if (storage_mode == STORAGE_NONE) return;
|
|
59
|
+
if (storage_mode == STORAGE_DISK) {
|
|
60
|
+
int64_t offset = step_idx * (int64_t)step_bytes_uncomp;
|
|
61
|
+
errno = 0;
|
|
62
|
+
if (fseek(fp, offset, SEEK_SET) != 0) {
|
|
63
|
+
report_io_error("fseek(read)", step_idx);
|
|
64
|
+
memset(store_1, 0, step_bytes_uncomp);
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
if (!read_exact(fp, store_1, step_bytes_uncomp)) {
|
|
68
|
+
if (feof(fp)) {
|
|
69
|
+
fprintf(stderr, "storage_utils: unexpected EOF at step %lld\n",
|
|
70
|
+
(long long)step_idx);
|
|
71
|
+
} else {
|
|
72
|
+
report_io_error("fread", step_idx);
|
|
73
|
+
}
|
|
74
|
+
clearerr(fp);
|
|
75
|
+
memset(store_1, 0, step_bytes_uncomp);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
#include <cuda_runtime.h>
|
|
2
|
+
#include <errno.h>
|
|
3
|
+
#include <stdint.h>
|
|
4
|
+
#include <stdio.h>
|
|
5
|
+
#include <string.h>
|
|
6
|
+
|
|
7
|
+
#include "storage_utils.h"
|
|
8
|
+
|
|
9
|
+
static bool read_exact(FILE* fp, void* dst, size_t nbytes) {
|
|
10
|
+
size_t total = 0;
|
|
11
|
+
while (total < nbytes) {
|
|
12
|
+
size_t n = fread((char*)dst + total, 1, nbytes - total, fp);
|
|
13
|
+
if (n == 0) {
|
|
14
|
+
return false;
|
|
15
|
+
}
|
|
16
|
+
total += n;
|
|
17
|
+
}
|
|
18
|
+
return true;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
static bool write_exact(FILE* fp, const void* src, size_t nbytes) {
|
|
22
|
+
size_t total = 0;
|
|
23
|
+
while (total < nbytes) {
|
|
24
|
+
size_t n = fwrite((const char*)src + total, 1, nbytes - total, fp);
|
|
25
|
+
if (n == 0) {
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
total += n;
|
|
29
|
+
}
|
|
30
|
+
return true;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
static void report_io_error(const char* op, int64_t step_idx) {
|
|
34
|
+
if (errno != 0) {
|
|
35
|
+
fprintf(stderr, "storage_utils: %s failed at step %lld: %s\n",
|
|
36
|
+
op, (long long)step_idx, strerror(errno));
|
|
37
|
+
} else {
|
|
38
|
+
fprintf(stderr, "storage_utils: %s failed at step %lld\n",
|
|
39
|
+
op, (long long)step_idx);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
static void report_cuda_error(const char* op, cudaError_t err) {
|
|
44
|
+
fprintf(stderr, "storage_utils: %s failed: %s\n",
|
|
45
|
+
op, cudaGetErrorString(err));
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
extern "C" {
|
|
49
|
+
|
|
50
|
+
void storage_save_snapshot_gpu(
|
|
51
|
+
void* store_1, void* store_3, FILE* fp, int64_t storage_mode,
|
|
52
|
+
int64_t step_idx, size_t shot_bytes_uncomp, size_t n_shots) {
|
|
53
|
+
if (storage_mode == STORAGE_NONE) return;
|
|
54
|
+
size_t bytes_to_store = shot_bytes_uncomp * n_shots;
|
|
55
|
+
|
|
56
|
+
if (storage_mode == STORAGE_CPU || storage_mode == STORAGE_DISK) {
|
|
57
|
+
if (storage_mode == STORAGE_DISK) {
|
|
58
|
+
// Disk mode needs host-visible data immediately for fwrite.
|
|
59
|
+
cudaError_t err =
|
|
60
|
+
cudaMemcpy(store_3, store_1, bytes_to_store, cudaMemcpyDeviceToHost);
|
|
61
|
+
if (err != cudaSuccess) {
|
|
62
|
+
report_cuda_error("cudaMemcpy(D2H)", err);
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
} else {
|
|
66
|
+
// CPU mode: avoid blocking the host thread on every step.
|
|
67
|
+
// Copies are enqueued on the current (default) stream and will be ordered
|
|
68
|
+
// with subsequent CUDA work in the same stream.
|
|
69
|
+
cudaError_t err = cudaMemcpyAsync(
|
|
70
|
+
store_3, store_1, bytes_to_store, cudaMemcpyDeviceToHost, 0);
|
|
71
|
+
if (err != cudaSuccess) {
|
|
72
|
+
report_cuda_error("cudaMemcpyAsync(D2H)", err);
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
if (storage_mode == STORAGE_DISK) {
|
|
78
|
+
int64_t offset = step_idx * (int64_t)bytes_to_store;
|
|
79
|
+
errno = 0;
|
|
80
|
+
if (fseek(fp, offset, SEEK_SET) != 0) {
|
|
81
|
+
report_io_error("fseek(write)", step_idx);
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
if (!write_exact(fp, store_3, bytes_to_store)) {
|
|
85
|
+
report_io_error("fwrite", step_idx);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
void storage_load_snapshot_gpu(void* store_1, void* store_3, FILE* fp,
|
|
91
|
+
int64_t storage_mode, int64_t step_idx,
|
|
92
|
+
size_t shot_bytes_uncomp, size_t n_shots) {
|
|
93
|
+
if (storage_mode == STORAGE_NONE) return;
|
|
94
|
+
size_t bytes_to_load = shot_bytes_uncomp * n_shots;
|
|
95
|
+
|
|
96
|
+
if (storage_mode == STORAGE_DISK) {
|
|
97
|
+
int64_t offset = step_idx * (int64_t)bytes_to_load;
|
|
98
|
+
errno = 0;
|
|
99
|
+
if (fseek(fp, offset, SEEK_SET) != 0) {
|
|
100
|
+
report_io_error("fseek(read)", step_idx);
|
|
101
|
+
memset(store_3, 0, bytes_to_load);
|
|
102
|
+
} else if (!read_exact(fp, store_3, bytes_to_load)) {
|
|
103
|
+
if (feof(fp)) {
|
|
104
|
+
fprintf(stderr, "storage_utils: unexpected EOF at step %lld\n",
|
|
105
|
+
(long long)step_idx);
|
|
106
|
+
} else {
|
|
107
|
+
report_io_error("fread", step_idx);
|
|
108
|
+
}
|
|
109
|
+
clearerr(fp);
|
|
110
|
+
memset(store_3, 0, bytes_to_load);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
if (storage_mode == STORAGE_CPU || storage_mode == STORAGE_DISK) {
|
|
115
|
+
if (storage_mode == STORAGE_DISK) {
|
|
116
|
+
// Disk mode reuses the same pinned host buffer (store_3) for many steps.
|
|
117
|
+
// Use a synchronous copy to avoid the host overwriting store_3 (fread)
|
|
118
|
+
// before the device copy has consumed it.
|
|
119
|
+
cudaError_t err =
|
|
120
|
+
cudaMemcpy(store_1, store_3, bytes_to_load, cudaMemcpyHostToDevice);
|
|
121
|
+
if (err != cudaSuccess) {
|
|
122
|
+
report_cuda_error("cudaMemcpy(H2D)", err);
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
} else {
|
|
126
|
+
cudaError_t err = cudaMemcpyAsync(
|
|
127
|
+
store_1, store_3, bytes_to_load, cudaMemcpyHostToDevice, 0);
|
|
128
|
+
if (err != cudaSuccess) {
|
|
129
|
+
report_cuda_error("cudaMemcpyAsync(H2D)", err);
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
#ifndef STORAGE_UTILS_H
|
|
2
|
+
#define STORAGE_UTILS_H
|
|
3
|
+
|
|
4
|
+
#include <stdbool.h>
|
|
5
|
+
#include <stddef.h>
|
|
6
|
+
#include <stdint.h>
|
|
7
|
+
#include <stdio.h>
|
|
8
|
+
|
|
9
|
+
#define STORAGE_DEVICE 0
|
|
10
|
+
#define STORAGE_CPU 1
|
|
11
|
+
#define STORAGE_DISK 2
|
|
12
|
+
#define STORAGE_NONE 3
|
|
13
|
+
|
|
14
|
+
#ifdef __cplusplus
|
|
15
|
+
extern "C" {
|
|
16
|
+
#endif
|
|
17
|
+
|
|
18
|
+
void storage_save_snapshot_cpu(void* store_1, FILE* fp, int64_t storage_mode,
|
|
19
|
+
int64_t step_idx, size_t step_bytes_uncomp);
|
|
20
|
+
|
|
21
|
+
void storage_save_snapshot_gpu(void* store_1, void* store_3, FILE* fp,
|
|
22
|
+
int64_t storage_mode, int64_t step_idx,
|
|
23
|
+
size_t shot_bytes_uncomp, size_t n_shots);
|
|
24
|
+
|
|
25
|
+
void storage_load_snapshot_cpu(void* store_1, FILE* fp, int64_t storage_mode,
|
|
26
|
+
int64_t step_idx, size_t step_bytes_uncomp);
|
|
27
|
+
|
|
28
|
+
void storage_load_snapshot_gpu(void* store_1, void* store_3, FILE* fp,
|
|
29
|
+
int64_t storage_mode, int64_t step_idx,
|
|
30
|
+
size_t shot_bytes_uncomp, size_t n_shots);
|
|
31
|
+
|
|
32
|
+
#ifdef __cplusplus
|
|
33
|
+
}
|
|
34
|
+
#endif
|
|
35
|
+
|
|
36
|
+
#endif
|
tide/grid_utils.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Grid-related helpers for padding and boundary bookkeeping."""
|
|
2
|
+
|
|
3
|
+
from typing import Sequence, Union
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _normalize_grid_spacing_2d(
|
|
7
|
+
grid_spacing: Union[float, Sequence[float]],
|
|
8
|
+
) -> list[float]:
|
|
9
|
+
"""Normalize 2D grid spacing to [dy, dx]."""
|
|
10
|
+
if isinstance(grid_spacing, (int, float)):
|
|
11
|
+
return [float(grid_spacing), float(grid_spacing)]
|
|
12
|
+
return list(grid_spacing)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _normalize_pml_width_2d(
|
|
16
|
+
pml_width: Union[int, Sequence[int]],
|
|
17
|
+
) -> list[int]:
|
|
18
|
+
"""Normalize 2D PML width to [top, bottom, left, right]."""
|
|
19
|
+
if isinstance(pml_width, int):
|
|
20
|
+
return [pml_width] * 4
|
|
21
|
+
pml_width_list = list(pml_width)
|
|
22
|
+
if len(pml_width_list) == 1:
|
|
23
|
+
return pml_width_list * 4
|
|
24
|
+
if len(pml_width_list) == 2:
|
|
25
|
+
return [
|
|
26
|
+
pml_width_list[0],
|
|
27
|
+
pml_width_list[0],
|
|
28
|
+
pml_width_list[1],
|
|
29
|
+
pml_width_list[1],
|
|
30
|
+
]
|
|
31
|
+
return pml_width_list
|