water-column-sonar-processing 25.3.2__py3-none-any.whl → 25.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/aws/dynamodb_manager.py +6 -6
- water_column_sonar_processing/aws/s3_manager.py +95 -90
- water_column_sonar_processing/aws/s3fs_manager.py +5 -3
- water_column_sonar_processing/aws/sqs_manager.py +1 -1
- water_column_sonar_processing/cruise/__init__.py +2 -1
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +49 -43
- water_column_sonar_processing/cruise/create_empty_zarr_store_level_3.py +161 -0
- water_column_sonar_processing/cruise/datatree_manager.py +21 -21
- water_column_sonar_processing/cruise/resample_regrid.py +57 -47
- water_column_sonar_processing/dataset/__init__.py +3 -0
- water_column_sonar_processing/dataset/dataset_manager.py +205 -0
- water_column_sonar_processing/dataset/feature_manager.py +32 -0
- water_column_sonar_processing/geometry/geometry_manager.py +11 -12
- water_column_sonar_processing/geometry/line_simplification.py +26 -1
- water_column_sonar_processing/geometry/pmtile_generation.py +211 -247
- water_column_sonar_processing/index/index_manager.py +18 -17
- water_column_sonar_processing/model/zarr_manager.py +504 -256
- water_column_sonar_processing/processing/__init__.py +3 -2
- water_column_sonar_processing/processing/batch_downloader.py +11 -11
- water_column_sonar_processing/processing/raw_to_netcdf.py +319 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +41 -31
- water_column_sonar_processing/utility/__init__.py +9 -2
- water_column_sonar_processing/utility/cleaner.py +1 -2
- water_column_sonar_processing/utility/constants.py +26 -7
- water_column_sonar_processing/utility/timestamp.py +1 -0
- water_column_sonar_processing-25.8.0.dist-info/METADATA +162 -0
- water_column_sonar_processing-25.8.0.dist-info/RECORD +39 -0
- {water_column_sonar_processing-25.3.2.dist-info → water_column_sonar_processing-25.8.0.dist-info}/WHEEL +1 -1
- water_column_sonar_processing-25.3.2.dist-info/licenses/LICENSE → water_column_sonar_processing-25.8.0.dist-info/licenses/LICENSE-MIT +1 -1
- water_column_sonar_processing-25.3.2.dist-info/METADATA +0 -170
- water_column_sonar_processing-25.3.2.dist-info/RECORD +0 -34
- {water_column_sonar_processing-25.3.2.dist-info → water_column_sonar_processing-25.8.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import importlib.metadata
|
|
2
2
|
|
|
3
|
-
import numcodecs
|
|
4
3
|
import numpy as np
|
|
5
4
|
import xarray as xr
|
|
6
5
|
import zarr
|
|
@@ -9,50 +8,45 @@ from numcodecs import Blosc
|
|
|
9
8
|
from water_column_sonar_processing.aws import S3FSManager
|
|
10
9
|
from water_column_sonar_processing.utility import Constants, Coordinates, Timestamp
|
|
11
10
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
Blosc.use_threads = True
|
|
12
|
+
compressor = Blosc(cname="zstd", clevel=9)
|
|
15
13
|
|
|
16
14
|
# TODO: when ready switch to version 3 of model spec
|
|
17
15
|
# ZARR_V3_EXPERIMENTAL_API = 1
|
|
18
16
|
|
|
19
17
|
|
|
20
|
-
# creates the latlon
|
|
18
|
+
# creates the latlon dataset: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
21
19
|
class ZarrManager:
|
|
22
20
|
#######################################################
|
|
23
21
|
def __init__(
|
|
24
22
|
self,
|
|
25
23
|
):
|
|
26
|
-
# TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
|
|
27
|
-
self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
|
|
28
24
|
self.__overwrite = True
|
|
29
|
-
self.__num_threads = numcodecs.blosc.get_nthreads()
|
|
30
|
-
# self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
31
|
-
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
32
25
|
|
|
33
26
|
#######################################################
|
|
34
27
|
def get_depth_values(
|
|
35
28
|
self,
|
|
36
|
-
min_echo_range: float
|
|
37
|
-
max_echo_range: float
|
|
29
|
+
# min_echo_range: float, # minimum depth measured (zero non-inclusive) from whole cruise
|
|
30
|
+
max_echo_range: float, # maximum depth measured from whole cruise
|
|
38
31
|
cruise_min_epsilon: float = 0.25, # resolution between subsequent measurements
|
|
39
32
|
):
|
|
40
33
|
# Gets the set of depth values that will be used when resampling and
|
|
41
|
-
# regridding the
|
|
42
|
-
# Note: returned values
|
|
34
|
+
# regridding the dataset to a cruise level model store.
|
|
35
|
+
# Note: returned values start at zero!
|
|
43
36
|
# For more info see here: https://echopype.readthedocs.io/en/stable/data-proc-additional.html
|
|
44
|
-
print("
|
|
37
|
+
print("Computing depth values.")
|
|
45
38
|
all_cruise_depth_values = np.linspace( # TODO: PROBLEM HERE
|
|
46
|
-
start=
|
|
39
|
+
start=0, # just start it at zero
|
|
47
40
|
stop=max_echo_range,
|
|
48
|
-
num=int(
|
|
41
|
+
num=int(max_echo_range / cruise_min_epsilon)
|
|
42
|
+
+ 1, # int(np.ceil(max_echo_range / cruise_min_epsilon))?
|
|
49
43
|
endpoint=True,
|
|
50
44
|
) # np.arange(min_echo_range, max_echo_range, step=min_echo_range) # this is worse
|
|
51
45
|
|
|
52
46
|
if np.any(np.isnan(all_cruise_depth_values)):
|
|
53
47
|
raise Exception("Problem depth values returned were NaN.")
|
|
54
48
|
|
|
55
|
-
print("Done
|
|
49
|
+
print("Done computing depth values.")
|
|
56
50
|
return all_cruise_depth_values.round(decimals=2)
|
|
57
51
|
|
|
58
52
|
#######################################################
|
|
@@ -64,241 +58,496 @@ class ZarrManager:
|
|
|
64
58
|
sensor_name: str,
|
|
65
59
|
frequencies: list, # units in Hz
|
|
66
60
|
width: int, # TODO: needs better name... "ping_time"
|
|
67
|
-
min_echo_range: float,
|
|
61
|
+
# min_echo_range: float,
|
|
62
|
+
max_echo_range: float,
|
|
63
|
+
cruise_min_epsilon: float, # smallest resolution in meters
|
|
64
|
+
calibration_status: bool = False, # Assume uncalibrated
|
|
65
|
+
) -> str:
|
|
66
|
+
try:
|
|
67
|
+
# TODO: problem throwing exceptions here
|
|
68
|
+
print(
|
|
69
|
+
f"Creating local zarr_manager store at {cruise_name}.zarr for ship {ship_name}"
|
|
70
|
+
)
|
|
71
|
+
# There can not currently be repeated frequencies
|
|
72
|
+
# TODO: eventually switch coordinate to "channel" because frequencies can repeat
|
|
73
|
+
if len(frequencies) != len(set(frequencies)):
|
|
74
|
+
raise Exception(
|
|
75
|
+
"Number of frequencies does not match number of channels"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
zarr_path = f"{path}/{cruise_name}.zarr"
|
|
79
|
+
store = zarr.DirectoryStore(path=zarr_path, normalize_keys=False)
|
|
80
|
+
root = zarr.group(store=store, overwrite=self.__overwrite, cache_attrs=True)
|
|
81
|
+
|
|
82
|
+
#####################################################################
|
|
83
|
+
# --- Coordinate: Time --- #
|
|
84
|
+
# https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
|
|
85
|
+
root.create_dataset(
|
|
86
|
+
name=Coordinates.TIME.value,
|
|
87
|
+
data=np.repeat(0.0, width),
|
|
88
|
+
shape=width,
|
|
89
|
+
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
90
|
+
dtype=np.dtype(Coordinates.TIME_DTYPE.value),
|
|
91
|
+
compressor=compressor,
|
|
92
|
+
fill_value=np.nan,
|
|
93
|
+
overwrite=self.__overwrite,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
root.time.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
97
|
+
|
|
98
|
+
root.time.attrs["calendar"] = Coordinates.TIME_CALENDAR.value
|
|
99
|
+
root.time.attrs["units"] = Coordinates.TIME_UNITS.value
|
|
100
|
+
root.time.attrs["long_name"] = Coordinates.TIME_LONG_NAME.value
|
|
101
|
+
root.time.attrs["standard_name"] = Coordinates.TIME_STANDARD_NAME.value
|
|
102
|
+
|
|
103
|
+
#####################################################################
|
|
104
|
+
# --- Coordinate: Depth --- #
|
|
105
|
+
depth_values = self.get_depth_values(
|
|
106
|
+
# min_echo_range=min_echo_range,
|
|
107
|
+
max_echo_range=max_echo_range,
|
|
108
|
+
cruise_min_epsilon=cruise_min_epsilon,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
root.create_dataset(
|
|
112
|
+
name=Coordinates.DEPTH.value,
|
|
113
|
+
# TODO: verify that these values are correct
|
|
114
|
+
data=depth_values,
|
|
115
|
+
shape=len(depth_values),
|
|
116
|
+
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
117
|
+
dtype=np.dtype(
|
|
118
|
+
Coordinates.DEPTH_DTYPE.value
|
|
119
|
+
), # float16 == 2 significant digits would be ideal
|
|
120
|
+
compressor=compressor,
|
|
121
|
+
fill_value=np.nan,
|
|
122
|
+
overwrite=self.__overwrite,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
if np.any(np.isnan(depth_values)):
|
|
126
|
+
raise Exception("Some depth values returned were NaN.")
|
|
127
|
+
|
|
128
|
+
root.depth.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.DEPTH.value]
|
|
129
|
+
|
|
130
|
+
root.depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
|
|
131
|
+
root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
|
|
132
|
+
root.depth.attrs["standard_name"] = Coordinates.DEPTH_STANDARD_NAME.value
|
|
133
|
+
|
|
134
|
+
#####################################################################
|
|
135
|
+
# --- Coordinate: Latitude --- #
|
|
136
|
+
root.create_dataset(
|
|
137
|
+
name=Coordinates.LATITUDE.value,
|
|
138
|
+
# dataset=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
139
|
+
data=np.repeat(np.nan, width),
|
|
140
|
+
shape=width,
|
|
141
|
+
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
142
|
+
dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
|
|
143
|
+
compressor=compressor,
|
|
144
|
+
fill_value=np.nan,
|
|
145
|
+
overwrite=self.__overwrite,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Note: LATITUDE is indexed by TIME
|
|
149
|
+
root.latitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
150
|
+
|
|
151
|
+
root.latitude.attrs["units"] = Coordinates.LATITUDE_UNITS.value
|
|
152
|
+
root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
|
|
153
|
+
root.latitude.attrs["standard_name"] = (
|
|
154
|
+
Coordinates.LATITUDE_STANDARD_NAME.value
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
#####################################################################
|
|
158
|
+
# --- Coordinate: Longitude --- #
|
|
159
|
+
root.create_dataset(
|
|
160
|
+
name=Coordinates.LONGITUDE.value,
|
|
161
|
+
# dataset=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
162
|
+
data=np.repeat(np.nan, width),
|
|
163
|
+
shape=width,
|
|
164
|
+
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
165
|
+
dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
|
|
166
|
+
compressor=compressor,
|
|
167
|
+
fill_value=np.nan,
|
|
168
|
+
overwrite=self.__overwrite,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# Note: LONGITUDE is indexed by TIME
|
|
172
|
+
root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
173
|
+
|
|
174
|
+
root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
|
|
175
|
+
root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
|
|
176
|
+
root.longitude.attrs["standard_name"] = (
|
|
177
|
+
Coordinates.LONGITUDE_STANDARD_NAME.value
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
#####################################################################
|
|
181
|
+
# TODO: verify adding this variable for where the bottom was detected
|
|
182
|
+
# --- Coordinate: Bottom --- #
|
|
183
|
+
root.create_dataset(
|
|
184
|
+
name=Coordinates.BOTTOM.value,
|
|
185
|
+
data=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
186
|
+
shape=width,
|
|
187
|
+
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
188
|
+
dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value),
|
|
189
|
+
compressor=compressor,
|
|
190
|
+
fill_value=0.0,
|
|
191
|
+
overwrite=self.__overwrite,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
# BOTTOM is indexed by TIME
|
|
195
|
+
root.bottom.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
196
|
+
|
|
197
|
+
root.bottom.attrs["units"] = Coordinates.BOTTOM_UNITS.value
|
|
198
|
+
root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
|
|
199
|
+
root.bottom.attrs["standard_name"] = Coordinates.BOTTOM_STANDARD_NAME.value
|
|
200
|
+
|
|
201
|
+
#####################################################################
|
|
202
|
+
# TODO: verify adding this variable with test
|
|
203
|
+
# --- Coordinate: Speed --- #
|
|
204
|
+
root.create_dataset(
|
|
205
|
+
name=Coordinates.SPEED.value,
|
|
206
|
+
data=np.repeat(np.nan, width), # root.longitude[:] = np.nan
|
|
207
|
+
shape=width,
|
|
208
|
+
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
209
|
+
dtype=np.dtype(Coordinates.SPEED_DTYPE.value),
|
|
210
|
+
compressor=compressor,
|
|
211
|
+
fill_value=np.nan,
|
|
212
|
+
overwrite=self.__overwrite,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# SPEED is indexed by TIME
|
|
216
|
+
root.speed.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
217
|
+
|
|
218
|
+
root.speed.attrs["units"] = Coordinates.SPEED_UNITS.value
|
|
219
|
+
root.speed.attrs["long_name"] = Coordinates.SPEED_LONG_NAME.value
|
|
220
|
+
root.speed.attrs["standard_name"] = Coordinates.SPEED_STANDARD_NAME.value
|
|
221
|
+
|
|
222
|
+
#####################################################################
|
|
223
|
+
# --- Coordinate: Frequency --- #
|
|
224
|
+
root.create_dataset(
|
|
225
|
+
name=Coordinates.FREQUENCY.value,
|
|
226
|
+
data=frequencies,
|
|
227
|
+
shape=len(frequencies),
|
|
228
|
+
chunks=len(frequencies),
|
|
229
|
+
dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
|
|
230
|
+
compressor=compressor,
|
|
231
|
+
fill_value=0.0,
|
|
232
|
+
overwrite=self.__overwrite,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# TODO: best coordinate would be channel with str type
|
|
236
|
+
root.frequency.attrs["_ARRAY_DIMENSIONS"] = [
|
|
237
|
+
Coordinates.FREQUENCY.value
|
|
238
|
+
] # TODO: is this correct
|
|
239
|
+
|
|
240
|
+
root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
|
|
241
|
+
root.frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
|
|
242
|
+
root.frequency.attrs["standard_name"] = (
|
|
243
|
+
Coordinates.FREQUENCY_STANDARD_NAME.value
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
#####################################################################
|
|
247
|
+
# --- Sv Data --- #
|
|
248
|
+
root.create_dataset(
|
|
249
|
+
name=Coordinates.SV.value,
|
|
250
|
+
shape=(len(depth_values), width, len(frequencies)),
|
|
251
|
+
chunks=(
|
|
252
|
+
Constants.TILE_SIZE.value,
|
|
253
|
+
Constants.TILE_SIZE.value,
|
|
254
|
+
1,
|
|
255
|
+
),
|
|
256
|
+
dtype=np.dtype(Coordinates.SV_DTYPE.value),
|
|
257
|
+
compressor=compressor,
|
|
258
|
+
fill_value=np.nan,
|
|
259
|
+
overwrite=self.__overwrite,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
root.Sv.attrs["_ARRAY_DIMENSIONS"] = [
|
|
263
|
+
Coordinates.DEPTH.value,
|
|
264
|
+
Coordinates.TIME.value,
|
|
265
|
+
Coordinates.FREQUENCY.value,
|
|
266
|
+
]
|
|
267
|
+
|
|
268
|
+
root.Sv.attrs["units"] = Coordinates.SV_UNITS.value
|
|
269
|
+
root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
|
|
270
|
+
root.Sv.attrs["tile_size"] = Constants.TILE_SIZE.value
|
|
271
|
+
|
|
272
|
+
#####################################################################
|
|
273
|
+
# --- Metadata --- #
|
|
274
|
+
root.attrs["ship_name"] = ship_name
|
|
275
|
+
root.attrs["cruise_name"] = cruise_name
|
|
276
|
+
root.attrs["sensor_name"] = sensor_name
|
|
277
|
+
#
|
|
278
|
+
root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
|
|
279
|
+
|
|
280
|
+
current_project_version = importlib.metadata.version(
|
|
281
|
+
"water-column-sonar-processing"
|
|
282
|
+
)
|
|
283
|
+
root.attrs["processing_software_version"] = current_project_version
|
|
284
|
+
root.attrs["processing_software_time"] = Timestamp.get_timestamp()
|
|
285
|
+
#
|
|
286
|
+
root.attrs["calibration_status"] = calibration_status
|
|
287
|
+
root.attrs["tile_size"] = Constants.TILE_SIZE.value
|
|
288
|
+
|
|
289
|
+
zarr.consolidate_metadata(store)
|
|
290
|
+
#####################################################################
|
|
291
|
+
"""
|
|
292
|
+
# zzz = zarr.open('https://echofish-dev-master-118234403147-echofish-zarr-store.s3.us-west-2.amazonaws.com/GU1002_resample.zarr')
|
|
293
|
+
# zzz.time[0] = 1274979445.423
|
|
294
|
+
# Initialize all to origin time, will be overwritten late
|
|
295
|
+
"""
|
|
296
|
+
return zarr_path
|
|
297
|
+
except Exception as err:
|
|
298
|
+
raise RuntimeError(f"Problem trying to create zarr store, {err}")
|
|
299
|
+
# finally:
|
|
300
|
+
# cleaner = Cleaner()
|
|
301
|
+
# cleaner.delete_local_files()
|
|
302
|
+
# TODO: should delete zarr store in temp directory too?
|
|
303
|
+
|
|
304
|
+
#######################################################
|
|
305
|
+
#
|
|
306
|
+
# LEVEL 3 - LEVEL 3 - LEVEL 3 - LEVEL 3 # TODO: move to separate project for zarr 3?
|
|
307
|
+
#
|
|
308
|
+
def create_zarr_store_level_3(
|
|
309
|
+
self,
|
|
310
|
+
path: str, # 'level_2/Henry_B._Bigelow/HB0707/EK60/HB0707.model/tmp/HB0707.zarr/.zattrs'
|
|
311
|
+
ship_name: str,
|
|
312
|
+
cruise_name: str,
|
|
313
|
+
sensor_name: str,
|
|
314
|
+
frequencies: list, # units in Hz
|
|
315
|
+
width: int, # TODO: needs better name... "ping_time"
|
|
316
|
+
min_echo_range: float, # smallest resolution in meters --> 1.0 meters
|
|
68
317
|
max_echo_range: float,
|
|
69
318
|
cruise_min_epsilon: float,
|
|
70
319
|
calibration_status: bool = False, # Assume uncalibrated
|
|
71
320
|
) -> str:
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
#
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
Coordinates.
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
Coordinates.
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
"
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
#
|
|
299
|
-
#
|
|
300
|
-
|
|
301
|
-
|
|
321
|
+
compressor = Blosc(cname="zstd", clevel=9, shuffle=1)
|
|
322
|
+
TILE_SIZE = 1024
|
|
323
|
+
try:
|
|
324
|
+
# TODO: problem throwing exceptions here
|
|
325
|
+
print(
|
|
326
|
+
f"Creating level 3 local zarr_manager store at {cruise_name}.zarr for ship {ship_name}"
|
|
327
|
+
)
|
|
328
|
+
if len(frequencies) != len(set(frequencies)):
|
|
329
|
+
raise Exception(
|
|
330
|
+
"Number of frequencies does not match number of channels"
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# print(f"Debugging number of threads: {self.__num_threads}")
|
|
334
|
+
|
|
335
|
+
zarr_path = f"{path}/{cruise_name}.zarr"
|
|
336
|
+
store = zarr.DirectoryStore(path=zarr_path, normalize_keys=False)
|
|
337
|
+
root = zarr.group(store=store, overwrite=self.__overwrite, cache_attrs=True)
|
|
338
|
+
|
|
339
|
+
#####################################################################
|
|
340
|
+
# --- Coordinate: Time --- #
|
|
341
|
+
# https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
|
|
342
|
+
root.create_dataset(
|
|
343
|
+
name=Coordinates.TIME.value,
|
|
344
|
+
data=np.repeat(0.0, width),
|
|
345
|
+
shape=width,
|
|
346
|
+
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
347
|
+
dtype=np.dtype(Coordinates.TIME_DTYPE.value),
|
|
348
|
+
compressor=compressor,
|
|
349
|
+
# fill_value=np.nan,
|
|
350
|
+
overwrite=self.__overwrite,
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
root.time.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
354
|
+
root.time.attrs["calendar"] = Coordinates.TIME_CALENDAR.value
|
|
355
|
+
root.time.attrs["units"] = Coordinates.TIME_UNITS.value
|
|
356
|
+
root.time.attrs["long_name"] = Coordinates.TIME_LONG_NAME.value
|
|
357
|
+
root.time.attrs["standard_name"] = Coordinates.TIME_STANDARD_NAME.value
|
|
358
|
+
|
|
359
|
+
#####################################################################
|
|
360
|
+
# --- Coordinate: Depth --- #
|
|
361
|
+
depth_values = self.get_depth_values(
|
|
362
|
+
# min_echo_range=min_echo_range,
|
|
363
|
+
max_echo_range=max_echo_range,
|
|
364
|
+
cruise_min_epsilon=cruise_min_epsilon,
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
root.create_dataset(
|
|
368
|
+
name=Coordinates.DEPTH.value,
|
|
369
|
+
# TODO: verify that these values are correct
|
|
370
|
+
data=depth_values,
|
|
371
|
+
shape=len(depth_values),
|
|
372
|
+
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
373
|
+
dtype=np.dtype(
|
|
374
|
+
Coordinates.DEPTH_DTYPE.value # TODO: convert to integers and only get whole number depths
|
|
375
|
+
), # float16 == 2 significant digits would be ideal
|
|
376
|
+
compressor=compressor,
|
|
377
|
+
# fill_value=np.nan,
|
|
378
|
+
overwrite=self.__overwrite,
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
if np.any(np.isnan(depth_values)):
|
|
382
|
+
raise Exception("Some depth values returned were NaN.")
|
|
383
|
+
|
|
384
|
+
root.depth.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.DEPTH.value]
|
|
385
|
+
root.depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
|
|
386
|
+
root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
|
|
387
|
+
root.depth.attrs["standard_name"] = Coordinates.DEPTH_STANDARD_NAME.value
|
|
388
|
+
|
|
389
|
+
#####################################################################
|
|
390
|
+
# --- Coordinate: Latitude --- #
|
|
391
|
+
root.create_dataset(
|
|
392
|
+
name=Coordinates.LATITUDE.value,
|
|
393
|
+
# dataset=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
394
|
+
data=np.repeat(np.nan, width),
|
|
395
|
+
shape=width,
|
|
396
|
+
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
397
|
+
dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
|
|
398
|
+
compressor=compressor,
|
|
399
|
+
fill_value=np.nan, # needs to be nan to validate if any missing
|
|
400
|
+
overwrite=self.__overwrite,
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
# Note: LATITUDE is indexed by TIME
|
|
404
|
+
root.latitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
405
|
+
root.latitude.attrs["units"] = Coordinates.LATITUDE_UNITS.value
|
|
406
|
+
root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
|
|
407
|
+
root.latitude.attrs["standard_name"] = (
|
|
408
|
+
Coordinates.LATITUDE_STANDARD_NAME.value
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
#####################################################################
|
|
412
|
+
# --- Coordinate: Longitude --- #
|
|
413
|
+
root.create_dataset(
|
|
414
|
+
name=Coordinates.LONGITUDE.value,
|
|
415
|
+
# dataset=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
416
|
+
data=np.repeat(np.nan, width),
|
|
417
|
+
shape=width,
|
|
418
|
+
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
419
|
+
dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
|
|
420
|
+
compressor=compressor,
|
|
421
|
+
fill_value=np.nan,
|
|
422
|
+
overwrite=self.__overwrite,
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
# Note: LONGITUDE is indexed by TIME
|
|
426
|
+
root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
427
|
+
root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
|
|
428
|
+
root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
|
|
429
|
+
root.longitude.attrs["standard_name"] = (
|
|
430
|
+
Coordinates.LONGITUDE_STANDARD_NAME.value
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
#####################################################################
|
|
434
|
+
# TODO: verify adding this variable for where the bottom was detected
|
|
435
|
+
# --- Coordinate: Bottom --- #
|
|
436
|
+
root.create_dataset(
|
|
437
|
+
name=Coordinates.BOTTOM.value,
|
|
438
|
+
data=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
439
|
+
shape=width,
|
|
440
|
+
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
441
|
+
dtype=np.dtype(
|
|
442
|
+
Coordinates.BOTTOM_DTYPE.value
|
|
443
|
+
), # TODO: should also only be integers
|
|
444
|
+
compressor=compressor,
|
|
445
|
+
fill_value=0.0,
|
|
446
|
+
overwrite=self.__overwrite,
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
# BOTTOM is indexed by TIME
|
|
450
|
+
root.bottom.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
451
|
+
root.bottom.attrs["units"] = Coordinates.BOTTOM_UNITS.value
|
|
452
|
+
root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
|
|
453
|
+
root.bottom.attrs["standard_name"] = Coordinates.BOTTOM_STANDARD_NAME.value
|
|
454
|
+
|
|
455
|
+
#####################################################################
|
|
456
|
+
# TODO: verify adding this variable with test
|
|
457
|
+
# --- Coordinate: Speed --- #
|
|
458
|
+
root.create_dataset(
|
|
459
|
+
name=Coordinates.SPEED.value,
|
|
460
|
+
data=np.repeat(np.nan, width), # root.longitude[:] = np.nan
|
|
461
|
+
shape=width,
|
|
462
|
+
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
463
|
+
dtype=np.dtype(Coordinates.SPEED_DTYPE.value), # TODO: also round?
|
|
464
|
+
compressor=compressor,
|
|
465
|
+
fill_value=np.nan,
|
|
466
|
+
overwrite=self.__overwrite,
|
|
467
|
+
)
|
|
468
|
+
|
|
469
|
+
# SPEED is indexed by TIME
|
|
470
|
+
root.speed.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
471
|
+
root.speed.attrs["units"] = Coordinates.SPEED_UNITS.value
|
|
472
|
+
root.speed.attrs["long_name"] = Coordinates.SPEED_LONG_NAME.value
|
|
473
|
+
root.speed.attrs["standard_name"] = Coordinates.SPEED_STANDARD_NAME.value
|
|
474
|
+
|
|
475
|
+
#####################################################################
|
|
476
|
+
# --- Coordinate: Frequency --- #
|
|
477
|
+
root.create_dataset(
|
|
478
|
+
name=Coordinates.FREQUENCY.value,
|
|
479
|
+
data=frequencies,
|
|
480
|
+
shape=len(frequencies),
|
|
481
|
+
chunks=len(frequencies),
|
|
482
|
+
dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
|
|
483
|
+
compressor=compressor,
|
|
484
|
+
fill_value=0.0,
|
|
485
|
+
overwrite=self.__overwrite,
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
# TODO: best coordinate would be channel with str type
|
|
489
|
+
root.frequency.attrs["_ARRAY_DIMENSIONS"] = [
|
|
490
|
+
Coordinates.FREQUENCY.value
|
|
491
|
+
] # TODO: is this correct
|
|
492
|
+
root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
|
|
493
|
+
root.frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
|
|
494
|
+
root.frequency.attrs["standard_name"] = (
|
|
495
|
+
Coordinates.FREQUENCY_STANDARD_NAME.value
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
#####################################################################
|
|
499
|
+
# --- Sv Data --- #
|
|
500
|
+
root.create_dataset(
|
|
501
|
+
name=Coordinates.SV.value,
|
|
502
|
+
shape=(len(depth_values), width, len(frequencies)),
|
|
503
|
+
chunks=(
|
|
504
|
+
TILE_SIZE,
|
|
505
|
+
TILE_SIZE,
|
|
506
|
+
len(frequencies),
|
|
507
|
+
),
|
|
508
|
+
dtype=np.dtype("int8"), # Coordinates.SV_DTYPE.value
|
|
509
|
+
compressor=compressor, # TODO: get compression working?!
|
|
510
|
+
# fill_value=np.nan,
|
|
511
|
+
overwrite=self.__overwrite,
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
root.Sv.attrs["_ARRAY_DIMENSIONS"] = [
|
|
515
|
+
Coordinates.DEPTH.value,
|
|
516
|
+
Coordinates.TIME.value,
|
|
517
|
+
Coordinates.FREQUENCY.value,
|
|
518
|
+
]
|
|
519
|
+
root.Sv.attrs["units"] = Coordinates.SV_UNITS.value
|
|
520
|
+
root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
|
|
521
|
+
root.Sv.attrs["tile_size"] = TILE_SIZE
|
|
522
|
+
|
|
523
|
+
#####################################################################
|
|
524
|
+
# --- Metadata --- #
|
|
525
|
+
root.attrs["ship_name"] = ship_name
|
|
526
|
+
root.attrs["cruise_name"] = cruise_name
|
|
527
|
+
root.attrs["sensor_name"] = sensor_name
|
|
528
|
+
#
|
|
529
|
+
root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
|
|
530
|
+
|
|
531
|
+
current_project_version = importlib.metadata.version(
|
|
532
|
+
"water_column_sonar_processing"
|
|
533
|
+
)
|
|
534
|
+
root.attrs["processing_software_version"] = current_project_version
|
|
535
|
+
root.attrs["processing_software_time"] = Timestamp.get_timestamp()
|
|
536
|
+
#
|
|
537
|
+
# TODO: add level somewhere?
|
|
538
|
+
#
|
|
539
|
+
root.attrs["calibration_status"] = calibration_status
|
|
540
|
+
root.attrs["tile_size"] = TILE_SIZE
|
|
541
|
+
|
|
542
|
+
zarr.consolidate_metadata(store)
|
|
543
|
+
#####################################################################
|
|
544
|
+
return zarr_path
|
|
545
|
+
except Exception as err:
|
|
546
|
+
raise RuntimeError(f"Problem trying to create level 3 zarr store, {err}")
|
|
547
|
+
# finally:
|
|
548
|
+
# cleaner = Cleaner()
|
|
549
|
+
# cleaner.delete_local_files()
|
|
550
|
+
# TODO: should delete zarr store in temp directory too?
|
|
302
551
|
|
|
303
552
|
############################################################################
|
|
304
553
|
# def update_zarr_store(
|
|
@@ -335,8 +584,9 @@ class ZarrManager:
|
|
|
335
584
|
# synchronizer = model.ProcessSynchronizer(f"/tmp/{ship_name}_{cruise_name}.sync")
|
|
336
585
|
cruise_zarr = zarr.open(store=store, mode="r+")
|
|
337
586
|
except Exception as err: # Failure
|
|
338
|
-
|
|
339
|
-
|
|
587
|
+
raise RuntimeError(
|
|
588
|
+
f"Exception encountered opening Zarr store with Zarr, {err}"
|
|
589
|
+
)
|
|
340
590
|
print("Done opening Zarr store with Zarr.")
|
|
341
591
|
return cruise_zarr
|
|
342
592
|
|
|
@@ -358,12 +608,11 @@ class ZarrManager:
|
|
|
358
608
|
s3fs_manager = S3FSManager(endpoint_url=endpoint_url)
|
|
359
609
|
store_s3_map = s3fs_manager.s3_map(s3_zarr_store_path=zarr_path)
|
|
360
610
|
ds = xr.open_dataset(filename_or_obj=store_s3_map, engine="zarr", chunks={})
|
|
611
|
+
return ds
|
|
361
612
|
except Exception as err:
|
|
362
|
-
|
|
363
|
-
raise err
|
|
613
|
+
raise RuntimeError(f"Problem opening Zarr store in S3 as Xarray, {err}")
|
|
364
614
|
finally:
|
|
365
615
|
print("Exiting opening Zarr store in S3 as Xarray.")
|
|
366
|
-
return ds
|
|
367
616
|
|
|
368
617
|
def open_l2_zarr_store_with_xarray(
|
|
369
618
|
self,
|
|
@@ -380,8 +629,7 @@ class ZarrManager:
|
|
|
380
629
|
store_s3_map = s3fs_manager.s3_map(s3_zarr_store_path=zarr_path)
|
|
381
630
|
ds = xr.open_zarr(store=store_s3_map, consolidated=None)
|
|
382
631
|
except Exception as err:
|
|
383
|
-
|
|
384
|
-
raise err
|
|
632
|
+
raise RuntimeError(f"Problem opening Zarr store in S3 as Xarray, {err}")
|
|
385
633
|
print("Done opening Zarr store in S3 as Xarray.")
|
|
386
634
|
return ds
|
|
387
635
|
|