water-column-sonar-processing 0.0.1__py3-none-any.whl → 26.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/__init__.py +13 -0
- water_column_sonar_processing/aws/__init__.py +7 -0
- water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
- water_column_sonar_processing/aws/s3_manager.py +418 -0
- water_column_sonar_processing/aws/s3fs_manager.py +64 -0
- {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
- {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
- water_column_sonar_processing/cruise/__init__.py +4 -0
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +129 -0
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +323 -0
- water_column_sonar_processing/geometry/__init__.py +13 -0
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +241 -0
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +266 -0
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/__init__.py +3 -0
- water_column_sonar_processing/index/index_manager.py +381 -0
- water_column_sonar_processing/model/__init__.py +3 -0
- water_column_sonar_processing/model/zarr_manager.py +741 -0
- water_column_sonar_processing/processing/__init__.py +4 -0
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +331 -0
- water_column_sonar_processing/utility/__init__.py +13 -0
- {model → water_column_sonar_processing}/utility/cleaner.py +7 -7
- water_column_sonar_processing/utility/constants.py +118 -0
- {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
- water_column_sonar_processing/utility/timestamp.py +12 -0
- water_column_sonar_processing-26.1.14.dist-info/METADATA +240 -0
- water_column_sonar_processing-26.1.14.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing-26.1.14.dist-info/top_level.txt +1 -0
- __init__.py +0 -0
- model/__init__.py +0 -0
- model/aws/__init__.py +0 -0
- model/aws/dynamodb_manager.py +0 -149
- model/aws/s3_manager.py +0 -356
- model/aws/s3fs_manager.py +0 -74
- model/cruise/__init__.py +0 -0
- model/cruise/create_empty_zarr_store.py +0 -166
- model/cruise/resample_regrid.py +0 -248
- model/geospatial/__init__.py +0 -0
- model/geospatial/geometry_manager.py +0 -194
- model/geospatial/geometry_simplification.py +0 -81
- model/geospatial/pmtile_generation.py +0 -74
- model/index/__init__.py +0 -0
- model/index/index.py +0 -228
- model/model.py +0 -138
- model/utility/__init__.py +0 -0
- model/utility/constants.py +0 -56
- model/utility/timestamp.py +0 -12
- model/zarr/__init__.py +0 -0
- model/zarr/bar.py +0 -28
- model/zarr/foo.py +0 -11
- model/zarr/zarr_manager.py +0 -298
- water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
- water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
- water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
|
@@ -0,0 +1,741 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from importlib import metadata
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import xarray as xr
|
|
7
|
+
import zarr
|
|
8
|
+
from zarr.codecs import BloscCodec, BloscShuffle
|
|
9
|
+
from zarr.core.group import Group
|
|
10
|
+
|
|
11
|
+
from water_column_sonar_processing.utility import Constants, Coordinates, Timestamp
|
|
12
|
+
|
|
13
|
+
# https://zarr-specs.readthedocs.io/en/latest/v3/codecs/blosc/index.html
|
|
14
|
+
compressors = BloscCodec(
|
|
15
|
+
cname="zstd",
|
|
16
|
+
clevel=9,
|
|
17
|
+
shuffle=BloscShuffle.bitshuffle,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# creates the latlon dataset: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
22
|
+
class ZarrManager:
|
|
23
|
+
#######################################################
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
# endpoint_url: Optional[str] = None,
|
|
27
|
+
):
|
|
28
|
+
self.__overwrite = True
|
|
29
|
+
self.key = os.environ.get("OUTPUT_BUCKET_ACCESS_KEY")
|
|
30
|
+
self.secret = os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY")
|
|
31
|
+
|
|
32
|
+
#######################################################
|
|
33
|
+
@staticmethod
|
|
34
|
+
def get_depth_values(
|
|
35
|
+
max_echo_range: float, # maximum depth measured from whole cruise
|
|
36
|
+
cruise_min_epsilon: float = 0.20, # delta subsequent measurements
|
|
37
|
+
) -> np.ndarray[tuple]:
|
|
38
|
+
# Gets the set of depth values that will be used when resampling and
|
|
39
|
+
# regridding the dataset to a cruise level model store.
|
|
40
|
+
# Note: returned values start at zero!
|
|
41
|
+
# For more info see here: https://echopype.readthedocs.io/en/stable/data-proc-additional.html
|
|
42
|
+
all_cruise_depth_values = np.linspace( # TODO: PROBLEM HERE
|
|
43
|
+
start=0, # start it at zero
|
|
44
|
+
stop=np.ceil(max_echo_range), # round up
|
|
45
|
+
num=int(np.ceil(max_echo_range) / cruise_min_epsilon) + 1,
|
|
46
|
+
endpoint=True,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
if np.any(np.isnan(all_cruise_depth_values)):
|
|
50
|
+
raise Exception("Problem depth values returned were NaN.")
|
|
51
|
+
|
|
52
|
+
return all_cruise_depth_values.round(decimals=2)
|
|
53
|
+
|
|
54
|
+
#######################################################
|
|
55
|
+
def create_zarr_store(
|
|
56
|
+
self,
|
|
57
|
+
path: str, # 'level_2/Henry_B._Bigelow/HB0707/EK60/HB0707.model/tmp/HB0707.zarr/.zattrs'
|
|
58
|
+
ship_name: str,
|
|
59
|
+
cruise_name: str,
|
|
60
|
+
sensor_name: str,
|
|
61
|
+
frequencies: list, # units in Hz, type(frequencies) == np.ndarray
|
|
62
|
+
width: int,
|
|
63
|
+
max_echo_range: float,
|
|
64
|
+
calibration_status: bool = False, # Assume uncalibrated
|
|
65
|
+
) -> str:
|
|
66
|
+
"""
|
|
67
|
+
Creates a new zarr store in a local temporary directory(?)
|
|
68
|
+
This includes the water_level on top of the max_echo_range already, nothing extra needs to be done.
|
|
69
|
+
"""
|
|
70
|
+
try:
|
|
71
|
+
print(f"Creating local zarr store, {cruise_name}.zarr for ship {ship_name}")
|
|
72
|
+
if len(frequencies) != len(set(frequencies)):
|
|
73
|
+
raise Exception(
|
|
74
|
+
"Number of frequencies does not match number of channels"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
zarr_path = f"{path}/{cruise_name}.zarr"
|
|
78
|
+
#####################################################################
|
|
79
|
+
frequencies = np.array(
|
|
80
|
+
frequencies, dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value)
|
|
81
|
+
)
|
|
82
|
+
#####################################################################
|
|
83
|
+
# Define the chunk sizes and the encoding
|
|
84
|
+
depth_chunk_shape = (Constants.TILE_SIZE.value,)
|
|
85
|
+
time_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
86
|
+
frequency_chunk_shape = (len(frequencies),)
|
|
87
|
+
latitude_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
88
|
+
longitude_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
89
|
+
bottom_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
90
|
+
speed_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
91
|
+
distance_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
92
|
+
sv_chunk_shape = (Constants.TILE_SIZE.value, Constants.TILE_SIZE.value, 1)
|
|
93
|
+
#####################################################################
|
|
94
|
+
root = zarr.create_group(store=zarr_path, zarr_format=3, overwrite=True)
|
|
95
|
+
#####################################################################
|
|
96
|
+
# --- Coordinate: Time --- #
|
|
97
|
+
# https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
|
|
98
|
+
# "data_type": "int64", "fill_value": 0, "units": "nanoseconds since 1970-01-01", "calendar": "proleptic_gregorian"
|
|
99
|
+
#
|
|
100
|
+
time_values = np.repeat(0.0, width)
|
|
101
|
+
time_values.astype(np.dtype(Coordinates.TIME_DTYPE.value))
|
|
102
|
+
root.create_array(
|
|
103
|
+
name=Coordinates.TIME.value,
|
|
104
|
+
# shape=width_indices,
|
|
105
|
+
# dtype=np.dtype(Coordinates.TIME_DTYPE.value),
|
|
106
|
+
data=time_values,
|
|
107
|
+
chunks=time_chunk_shape,
|
|
108
|
+
compressors=compressors,
|
|
109
|
+
fill_value=np.nan,
|
|
110
|
+
attributes=dict(
|
|
111
|
+
calendar=Coordinates.TIME_CALENDAR.value,
|
|
112
|
+
units=Coordinates.TIME_UNITS.value,
|
|
113
|
+
long_name=Coordinates.TIME_LONG_NAME.value,
|
|
114
|
+
standard_name=Coordinates.TIME_STANDARD_NAME.value,
|
|
115
|
+
),
|
|
116
|
+
dimension_names=[Coordinates.TIME.value],
|
|
117
|
+
overwrite=True,
|
|
118
|
+
)
|
|
119
|
+
#####################################################################
|
|
120
|
+
#####################################################################
|
|
121
|
+
# # --- Coordinate: Depth --- #
|
|
122
|
+
depth_data_values = self.get_depth_values(
|
|
123
|
+
max_echo_range=max_echo_range,
|
|
124
|
+
)
|
|
125
|
+
depth_data = np.array(
|
|
126
|
+
depth_data_values, dtype=Coordinates.DEPTH_DTYPE.value
|
|
127
|
+
)
|
|
128
|
+
root.create_array(
|
|
129
|
+
name=Coordinates.DEPTH.value,
|
|
130
|
+
# shape=depth_indices,
|
|
131
|
+
# dtype=np.dtype(Coordinates.DEPTH_DTYPE.value),
|
|
132
|
+
data=depth_data,
|
|
133
|
+
chunks=depth_chunk_shape,
|
|
134
|
+
compressors=compressors,
|
|
135
|
+
# fill_value=np.nan,
|
|
136
|
+
attributes=dict(
|
|
137
|
+
units=Coordinates.DEPTH_UNITS.value,
|
|
138
|
+
long_name=Coordinates.DEPTH_LONG_NAME.value,
|
|
139
|
+
standard_name=Coordinates.DEPTH_STANDARD_NAME.value,
|
|
140
|
+
),
|
|
141
|
+
dimension_names=[Coordinates.DEPTH.value], # TODO: is this right
|
|
142
|
+
overwrite=True,
|
|
143
|
+
)
|
|
144
|
+
# #####################################################################
|
|
145
|
+
# # --- Coordinate: Latitude --- #
|
|
146
|
+
# latitude_values = np.rep(np.nan, width_indices)
|
|
147
|
+
# latitude_values.astype(np.dtype(Coordinates.LATITUDE_DTYPE.value))
|
|
148
|
+
root.create_array(
|
|
149
|
+
name=Coordinates.LATITUDE.value,
|
|
150
|
+
shape=width,
|
|
151
|
+
dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
|
|
152
|
+
# data=latitude_values,
|
|
153
|
+
chunks=latitude_chunk_shape,
|
|
154
|
+
compressors=compressors,
|
|
155
|
+
fill_value=np.nan,
|
|
156
|
+
attributes=dict(
|
|
157
|
+
units=Coordinates.LATITUDE_UNITS.value,
|
|
158
|
+
long_name=Coordinates.LATITUDE_LONG_NAME.value,
|
|
159
|
+
standard_name=Coordinates.LATITUDE_STANDARD_NAME.value,
|
|
160
|
+
),
|
|
161
|
+
dimension_names=[Coordinates.TIME.value],
|
|
162
|
+
overwrite=True,
|
|
163
|
+
)
|
|
164
|
+
# #####################################################################
|
|
165
|
+
# # --- Coordinate: Longitude --- #
|
|
166
|
+
# longitude_values = np.arange(0, width_indices)
|
|
167
|
+
# longitude_values.astype(np.dtype(Coordinates.LONGITUDE_DTYPE.value))
|
|
168
|
+
root.create_array(
|
|
169
|
+
name=Coordinates.LONGITUDE.value,
|
|
170
|
+
shape=width,
|
|
171
|
+
dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
|
|
172
|
+
# data=longitude_values,
|
|
173
|
+
chunks=longitude_chunk_shape,
|
|
174
|
+
compressors=compressors,
|
|
175
|
+
fill_value=np.nan,
|
|
176
|
+
attributes=dict(
|
|
177
|
+
units=Coordinates.LONGITUDE_UNITS.value,
|
|
178
|
+
long_name=Coordinates.LONGITUDE_LONG_NAME.value,
|
|
179
|
+
standard_name=Coordinates.LONGITUDE_STANDARD_NAME.value,
|
|
180
|
+
),
|
|
181
|
+
dimension_names=[
|
|
182
|
+
Coordinates.TIME.value
|
|
183
|
+
], # Note: LONGITUDE is indexed by TIME
|
|
184
|
+
overwrite=True,
|
|
185
|
+
)
|
|
186
|
+
# #####################################################################
|
|
187
|
+
# # --- Coordinate: Bottom --- #
|
|
188
|
+
# bottom_values = np.repeat(12.34, width_indices)
|
|
189
|
+
# bottom_values.astype(np.dtype(Coordinates.BOTTOM_DTYPE.value))
|
|
190
|
+
root.create_array(
|
|
191
|
+
name=Coordinates.BOTTOM.value,
|
|
192
|
+
shape=width,
|
|
193
|
+
dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value),
|
|
194
|
+
# data=bottom_values,
|
|
195
|
+
chunks=bottom_chunk_shape,
|
|
196
|
+
compressors=compressors,
|
|
197
|
+
fill_value=np.nan,
|
|
198
|
+
attributes=dict(
|
|
199
|
+
units=Coordinates.BOTTOM_UNITS.value,
|
|
200
|
+
long_name=Coordinates.BOTTOM_LONG_NAME.value,
|
|
201
|
+
standard_name=Coordinates.BOTTOM_STANDARD_NAME.value,
|
|
202
|
+
),
|
|
203
|
+
dimension_names=[Coordinates.TIME.value], # Note: _ is indexed by TIME
|
|
204
|
+
overwrite=True,
|
|
205
|
+
)
|
|
206
|
+
# #####################################################################
|
|
207
|
+
# # --- Coordinate: Speed --- #
|
|
208
|
+
# speed_values = np.repeat(5.67, width_indices)
|
|
209
|
+
# speed_values.astype(np.dtype(Coordinates.SPEED_DTYPE.value))
|
|
210
|
+
root.create_array(
|
|
211
|
+
name=Coordinates.SPEED.value,
|
|
212
|
+
shape=width,
|
|
213
|
+
dtype=np.dtype(Coordinates.SPEED_DTYPE.value),
|
|
214
|
+
# data=speed_values,
|
|
215
|
+
chunks=speed_chunk_shape,
|
|
216
|
+
compressors=compressors,
|
|
217
|
+
fill_value=np.nan,
|
|
218
|
+
attributes=dict(
|
|
219
|
+
units=Coordinates.SPEED_UNITS.value,
|
|
220
|
+
long_name=Coordinates.SPEED_LONG_NAME.value,
|
|
221
|
+
standard_name=Coordinates.SPEED_STANDARD_NAME.value,
|
|
222
|
+
),
|
|
223
|
+
dimension_names=[Coordinates.TIME.value], # Note: _ is indexed by TIME
|
|
224
|
+
overwrite=True,
|
|
225
|
+
)
|
|
226
|
+
# #####################################################################
|
|
227
|
+
# # --- Coordinate: Distance --- #
|
|
228
|
+
# distance_values = np.repeat(8.90, width_indices)
|
|
229
|
+
# distance_values.astype(np.dtype(Coordinates.DISTANCE_DTYPE.value))
|
|
230
|
+
root.create_array(
|
|
231
|
+
name=Coordinates.DISTANCE.value,
|
|
232
|
+
shape=width,
|
|
233
|
+
dtype=np.dtype(Coordinates.DISTANCE_DTYPE.value),
|
|
234
|
+
# data=distance_values,
|
|
235
|
+
chunks=distance_chunk_shape,
|
|
236
|
+
compressors=compressors,
|
|
237
|
+
fill_value=np.nan,
|
|
238
|
+
attributes=dict(
|
|
239
|
+
units=Coordinates.DISTANCE_UNITS.value,
|
|
240
|
+
long_name=Coordinates.DISTANCE_LONG_NAME.value,
|
|
241
|
+
standard_name=Coordinates.DISTANCE_STANDARD_NAME.value,
|
|
242
|
+
),
|
|
243
|
+
dimension_names=[Coordinates.TIME.value], # Note: _ is indexed by TIME
|
|
244
|
+
overwrite=True,
|
|
245
|
+
)
|
|
246
|
+
# #####################################################################
|
|
247
|
+
# # --- Coordinate: Frequency --- #
|
|
248
|
+
root.create_array(
|
|
249
|
+
name=Coordinates.FREQUENCY.value,
|
|
250
|
+
# shape=frequency_indices,
|
|
251
|
+
# dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
|
|
252
|
+
data=frequencies,
|
|
253
|
+
# chunks=(Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,),
|
|
254
|
+
chunks=frequency_chunk_shape,
|
|
255
|
+
compressors=compressors,
|
|
256
|
+
# fill_value=0,
|
|
257
|
+
attributes=dict(
|
|
258
|
+
units=Coordinates.FREQUENCY_UNITS.value,
|
|
259
|
+
long_name=Coordinates.FREQUENCY_LONG_NAME.value,
|
|
260
|
+
standard_name=Coordinates.FREQUENCY_STANDARD_NAME.value,
|
|
261
|
+
),
|
|
262
|
+
dimension_names=[Coordinates.FREQUENCY.value],
|
|
263
|
+
overwrite=True,
|
|
264
|
+
)
|
|
265
|
+
# #####################################################################
|
|
266
|
+
# # --- Sv Data --- #
|
|
267
|
+
root.create_array(
|
|
268
|
+
name=Coordinates.SV.value,
|
|
269
|
+
shape=(len(depth_data), width, len(frequencies)),
|
|
270
|
+
dtype=np.dtype(Coordinates.SV_DTYPE.value),
|
|
271
|
+
# data=,
|
|
272
|
+
chunks=sv_chunk_shape,
|
|
273
|
+
compressors=compressors,
|
|
274
|
+
fill_value=np.nan,
|
|
275
|
+
attributes=dict(
|
|
276
|
+
units=Coordinates.SV_UNITS.value,
|
|
277
|
+
long_name=Coordinates.SV_LONG_NAME.value,
|
|
278
|
+
standard_name=Coordinates.SV_STANDARD_NAME.value,
|
|
279
|
+
),
|
|
280
|
+
dimension_names=[
|
|
281
|
+
Coordinates.DEPTH.value,
|
|
282
|
+
Coordinates.TIME.value,
|
|
283
|
+
Coordinates.FREQUENCY.value,
|
|
284
|
+
],
|
|
285
|
+
overwrite=True,
|
|
286
|
+
)
|
|
287
|
+
#####################################################################
|
|
288
|
+
# # --- Metadata --- #
|
|
289
|
+
root.attrs["ship_name"] = ship_name
|
|
290
|
+
root.attrs["cruise_name"] = cruise_name
|
|
291
|
+
root.attrs["sensor_name"] = sensor_name
|
|
292
|
+
#
|
|
293
|
+
root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
|
|
294
|
+
# NOTE: for the version to be parsable you need to build the python package locally first.
|
|
295
|
+
root.attrs["processing_software_version"] = metadata.version(
|
|
296
|
+
"water-column-sonar-processing"
|
|
297
|
+
)
|
|
298
|
+
root.attrs["processing_software_time"] = Timestamp.get_timestamp()
|
|
299
|
+
#
|
|
300
|
+
root.attrs["calibration_status"] = calibration_status
|
|
301
|
+
root.attrs["tile_size"] = Constants.TILE_SIZE.value
|
|
302
|
+
#
|
|
303
|
+
return zarr_path
|
|
304
|
+
except Exception as err:
|
|
305
|
+
raise RuntimeError(f"Problem trying to create zarr store, {err}")
|
|
306
|
+
|
|
307
|
+
# #######################################################
|
|
308
|
+
# def create_zarr_store_old(
|
|
309
|
+
# self,
|
|
310
|
+
# path: str, # 'level_2/Henry_B._Bigelow/HB0707/EK60/HB0707.model/tmp/HB0707.zarr/.zattrs'
|
|
311
|
+
# ship_name: str,
|
|
312
|
+
# cruise_name: str,
|
|
313
|
+
# sensor_name: str,
|
|
314
|
+
# frequencies: list, # units in Hz
|
|
315
|
+
# width: int,
|
|
316
|
+
# max_echo_range: float,
|
|
317
|
+
# # cruise_min_epsilon: float, # smallest resolution in meters
|
|
318
|
+
# calibration_status: bool = False, # Assume uncalibrated
|
|
319
|
+
# ) -> str:
|
|
320
|
+
# """
|
|
321
|
+
# Creates a new zarr store in a local temporary directory(?)
|
|
322
|
+
# """
|
|
323
|
+
# try:
|
|
324
|
+
# print(f"Creating local zarr store, {cruise_name}.zarr for ship {ship_name}")
|
|
325
|
+
# if len(frequencies) != len(set(frequencies)):
|
|
326
|
+
# raise Exception(
|
|
327
|
+
# "Number of frequencies does not match number of channels"
|
|
328
|
+
# )
|
|
329
|
+
#
|
|
330
|
+
# zarr_path = f"{path}/{cruise_name}.zarr"
|
|
331
|
+
# #####################################################################
|
|
332
|
+
# # Define the chunk sizes and the encoding
|
|
333
|
+
# # 1_000_000 data points for quickest download
|
|
334
|
+
# spatiotemporal_chunk_size = int(1e6)
|
|
335
|
+
# depth_chunk_shape = (512,)
|
|
336
|
+
# time_chunk_shape = (spatiotemporal_chunk_size,)
|
|
337
|
+
# frequency_chunk_shape = (len(frequencies),)
|
|
338
|
+
# latitude_chunk_shape = (spatiotemporal_chunk_size,)
|
|
339
|
+
# longitude_chunk_shape = (spatiotemporal_chunk_size,)
|
|
340
|
+
# bottom_chunk_shape = (spatiotemporal_chunk_size,)
|
|
341
|
+
# speed_chunk_shape = (spatiotemporal_chunk_size,)
|
|
342
|
+
# distance_chunk_shape = (spatiotemporal_chunk_size,)
|
|
343
|
+
# sv_chunk_shape = (512, 512, 1) # TODO: move to constants
|
|
344
|
+
#
|
|
345
|
+
# #####################################################################
|
|
346
|
+
# ##### Depth #####
|
|
347
|
+
# depth_data_values = self.get_depth_values(
|
|
348
|
+
# max_echo_range=max_echo_range,
|
|
349
|
+
# )
|
|
350
|
+
#
|
|
351
|
+
# depth_data = np.array(
|
|
352
|
+
# depth_data_values, dtype=Coordinates.DEPTH_DTYPE.value
|
|
353
|
+
# )
|
|
354
|
+
# depth_da = xr.DataArray(
|
|
355
|
+
# data=depth_data,
|
|
356
|
+
# dims=Coordinates.DEPTH.value,
|
|
357
|
+
# name=Coordinates.DEPTH.value,
|
|
358
|
+
# attrs=dict(
|
|
359
|
+
# units=Coordinates.DEPTH_UNITS.value,
|
|
360
|
+
# long_name=Coordinates.DEPTH_LONG_NAME.value,
|
|
361
|
+
# standard_name=Coordinates.DEPTH_STANDARD_NAME.value,
|
|
362
|
+
# ),
|
|
363
|
+
# )
|
|
364
|
+
#
|
|
365
|
+
# ##### Time #####
|
|
366
|
+
# # https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
|
|
367
|
+
# time_data = np.array(
|
|
368
|
+
# np.repeat(np.datetime64(0, "ns"), width),
|
|
369
|
+
# dtype="datetime64[ns]",
|
|
370
|
+
# )
|
|
371
|
+
# time_da = xr.DataArray(
|
|
372
|
+
# data=time_data,
|
|
373
|
+
# dims=Coordinates.TIME.value,
|
|
374
|
+
# name=Coordinates.TIME.value,
|
|
375
|
+
# attrs=dict(
|
|
376
|
+
# # Note: cal & units are written automatically by xarray
|
|
377
|
+
# # calendar="proleptic_gregorian",
|
|
378
|
+
# # units="seconds since 1970-01-01 00:00:00",
|
|
379
|
+
# long_name=Coordinates.TIME_LONG_NAME.value,
|
|
380
|
+
# standard_name=Coordinates.TIME_STANDARD_NAME.value,
|
|
381
|
+
# ),
|
|
382
|
+
# )
|
|
383
|
+
#
|
|
384
|
+
# ##### Frequency #####
|
|
385
|
+
# frequency_data = np.array(
|
|
386
|
+
# frequencies,
|
|
387
|
+
# dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
|
|
388
|
+
# )
|
|
389
|
+
# frequency_da = xr.DataArray(
|
|
390
|
+
# data=frequency_data,
|
|
391
|
+
# dims=Coordinates.FREQUENCY.value,
|
|
392
|
+
# name=Coordinates.FREQUENCY.value,
|
|
393
|
+
# attrs=dict(
|
|
394
|
+
# units=Coordinates.FREQUENCY_UNITS.value,
|
|
395
|
+
# long_name=Coordinates.FREQUENCY_LONG_NAME.value,
|
|
396
|
+
# standard_name=Coordinates.FREQUENCY_STANDARD_NAME.value,
|
|
397
|
+
# ),
|
|
398
|
+
# )
|
|
399
|
+
#
|
|
400
|
+
# ##### Latitude #####
|
|
401
|
+
# gps_data = np.array(
|
|
402
|
+
# np.repeat(np.nan, width),
|
|
403
|
+
# dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
|
|
404
|
+
# )
|
|
405
|
+
# latitude_da = xr.DataArray(
|
|
406
|
+
# data=gps_data,
|
|
407
|
+
# coords=dict(
|
|
408
|
+
# time=time_da,
|
|
409
|
+
# ),
|
|
410
|
+
# dims=Coordinates.TIME.value, # Note: "TIME"
|
|
411
|
+
# name=Coordinates.LATITUDE.value,
|
|
412
|
+
# attrs=dict(
|
|
413
|
+
# units=Coordinates.LATITUDE_UNITS.value,
|
|
414
|
+
# long_name=Coordinates.LATITUDE_LONG_NAME.value,
|
|
415
|
+
# standard_name=Coordinates.LATITUDE_STANDARD_NAME.value,
|
|
416
|
+
# ),
|
|
417
|
+
# ) # Note: LATITUDE is indexed by TIME
|
|
418
|
+
#
|
|
419
|
+
# ##### Longitude #####
|
|
420
|
+
# longitude_da = xr.DataArray(
|
|
421
|
+
# data=gps_data,
|
|
422
|
+
# coords=dict(
|
|
423
|
+
# time=time_da,
|
|
424
|
+
# ),
|
|
425
|
+
# dims=Coordinates.TIME.value, # Note: "TIME"
|
|
426
|
+
# name=Coordinates.LONGITUDE.value,
|
|
427
|
+
# attrs=dict(
|
|
428
|
+
# units=Coordinates.LONGITUDE_UNITS.value,
|
|
429
|
+
# long_name=Coordinates.LONGITUDE_LONG_NAME.value,
|
|
430
|
+
# standard_name=Coordinates.LONGITUDE_STANDARD_NAME.value,
|
|
431
|
+
# ),
|
|
432
|
+
# ) # Note: LONGITUDE is indexed by TIME
|
|
433
|
+
#
|
|
434
|
+
# ##### Bottom #####
|
|
435
|
+
# bottom_data = np.array(
|
|
436
|
+
# np.repeat(np.nan, width), dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value)
|
|
437
|
+
# )
|
|
438
|
+
# bottom_da = xr.DataArray(
|
|
439
|
+
# data=bottom_data,
|
|
440
|
+
# coords=dict(
|
|
441
|
+
# time=time_da,
|
|
442
|
+
# ),
|
|
443
|
+
# dims=Coordinates.TIME.value, # Note: "TIME"
|
|
444
|
+
# name=Coordinates.BOTTOM.value,
|
|
445
|
+
# attrs=dict(
|
|
446
|
+
# units=Coordinates.BOTTOM_UNITS.value,
|
|
447
|
+
# long_name=Coordinates.BOTTOM_LONG_NAME.value,
|
|
448
|
+
# standard_name=Coordinates.BOTTOM_STANDARD_NAME.value,
|
|
449
|
+
# ),
|
|
450
|
+
# )
|
|
451
|
+
#
|
|
452
|
+
# ##### Speed #####
|
|
453
|
+
# speed_data = np.array(
|
|
454
|
+
# np.repeat(np.nan, width), dtype=np.dtype(Coordinates.SPEED_DTYPE.value)
|
|
455
|
+
# )
|
|
456
|
+
# speed_da = xr.DataArray(
|
|
457
|
+
# data=speed_data,
|
|
458
|
+
# coords=dict(
|
|
459
|
+
# time=time_da,
|
|
460
|
+
# ),
|
|
461
|
+
# dims=Coordinates.TIME.value, # Note: "TIME"
|
|
462
|
+
# name=Coordinates.SPEED.value,
|
|
463
|
+
# attrs=dict(
|
|
464
|
+
# units=Coordinates.SPEED_UNITS.value,
|
|
465
|
+
# long_name=Coordinates.SPEED_LONG_NAME.value,
|
|
466
|
+
# standard_name=Coordinates.SPEED_STANDARD_NAME.value,
|
|
467
|
+
# ),
|
|
468
|
+
# )
|
|
469
|
+
#
|
|
470
|
+
# ##### Distance #####
|
|
471
|
+
# distance_data = np.array(
|
|
472
|
+
# np.repeat(np.nan, width),
|
|
473
|
+
# dtype=np.dtype(Coordinates.DISTANCE_DTYPE.value),
|
|
474
|
+
# )
|
|
475
|
+
# distance_da = xr.DataArray(
|
|
476
|
+
# data=distance_data,
|
|
477
|
+
# coords=dict(
|
|
478
|
+
# time=time_da,
|
|
479
|
+
# ),
|
|
480
|
+
# dims=Coordinates.TIME.value, # Note: "TIME"
|
|
481
|
+
# name=Coordinates.DISTANCE.value,
|
|
482
|
+
# attrs=dict(
|
|
483
|
+
# units=Coordinates.DISTANCE_UNITS.value,
|
|
484
|
+
# long_name=Coordinates.DISTANCE_LONG_NAME.value,
|
|
485
|
+
# standard_name=Coordinates.DISTANCE_STANDARD_NAME.value,
|
|
486
|
+
# ),
|
|
487
|
+
# )
|
|
488
|
+
#
|
|
489
|
+
# ##### Sv #####
|
|
490
|
+
# gc.collect()
|
|
491
|
+
# # sv_data = np.empty(
|
|
492
|
+
# # (len(depth_data), width, len(frequencies)),
|
|
493
|
+
# # # (2501, 4_100_782, 4), # large cruise used for testing
|
|
494
|
+
# # dtype=np.dtype(Coordinates.SV_DTYPE.value),
|
|
495
|
+
# # )
|
|
496
|
+
# sv_data = np.full(
|
|
497
|
+
# (len(depth_data), width, len(frequencies)),
|
|
498
|
+
# np.nan,
|
|
499
|
+
# dtype=np.dtype(Coordinates.SV_DTYPE.value),
|
|
500
|
+
# )
|
|
501
|
+
# print(f"one: {sys.getsizeof(sv_data)}")
|
|
502
|
+
# # sv_data[:] = np.nan # initialize all
|
|
503
|
+
#
|
|
504
|
+
# sv_da = xr.DataArray(
|
|
505
|
+
# data=sv_data,
|
|
506
|
+
# coords=dict(
|
|
507
|
+
# depth=depth_da,
|
|
508
|
+
# time=time_da,
|
|
509
|
+
# frequency=frequency_da,
|
|
510
|
+
# #
|
|
511
|
+
# latitude=latitude_da,
|
|
512
|
+
# longitude=longitude_da,
|
|
513
|
+
# bottom=bottom_da,
|
|
514
|
+
# speed=speed_da,
|
|
515
|
+
# distance=distance_da,
|
|
516
|
+
# ),
|
|
517
|
+
# dims=( # Depth * Time * Frequency
|
|
518
|
+
# Coordinates.DEPTH.value,
|
|
519
|
+
# Coordinates.TIME.value,
|
|
520
|
+
# Coordinates.FREQUENCY.value,
|
|
521
|
+
# ),
|
|
522
|
+
# name=Coordinates.SV.value,
|
|
523
|
+
# attrs=dict(
|
|
524
|
+
# units=Coordinates.SV_UNITS.value,
|
|
525
|
+
# long_name=Coordinates.SV_LONG_NAME.value,
|
|
526
|
+
# standard_name=Coordinates.SV_STANDARD_NAME.value,
|
|
527
|
+
# tiles_size=Constants.TILE_SIZE.value,
|
|
528
|
+
# _FillValue=np.nan,
|
|
529
|
+
# ),
|
|
530
|
+
# )
|
|
531
|
+
# print(f"two: {sys.getsizeof(sv_data)}") # getting to at least here
|
|
532
|
+
# del sv_data
|
|
533
|
+
# sv_da.encoding = {"compressors": [compressor], "chunks": sv_chunk_shape}
|
|
534
|
+
# # sv_da = sv_da.astype(np.float32) # was crashing here
|
|
535
|
+
# gc.collect()
|
|
536
|
+
# #####################################################################
|
|
537
|
+
# ### Now create the xarray.Dataset
|
|
538
|
+
# ds = xr.Dataset(
|
|
539
|
+
# data_vars=dict(
|
|
540
|
+
# Sv=sv_da,
|
|
541
|
+
# #
|
|
542
|
+
# bottom=bottom_da,
|
|
543
|
+
# speed=speed_da,
|
|
544
|
+
# distance=distance_da,
|
|
545
|
+
# ),
|
|
546
|
+
# coords=dict(
|
|
547
|
+
# depth=depth_da,
|
|
548
|
+
# time=time_da,
|
|
549
|
+
# frequency=frequency_da,
|
|
550
|
+
# #
|
|
551
|
+
# latitude=latitude_da,
|
|
552
|
+
# longitude=longitude_da,
|
|
553
|
+
# ),
|
|
554
|
+
# attrs=dict(
|
|
555
|
+
# # --- Metadata --- #
|
|
556
|
+
# ship_name=ship_name,
|
|
557
|
+
# cruise_name=cruise_name,
|
|
558
|
+
# sensor_name=sensor_name,
|
|
559
|
+
# processing_software_name=Coordinates.PROJECT_NAME.value,
|
|
560
|
+
# # NOTE: for the version to be parsable you need to build the python package
|
|
561
|
+
# # locally first.
|
|
562
|
+
# processing_software_version=importlib.metadata.version(
|
|
563
|
+
# "water-column-sonar-processing"
|
|
564
|
+
# ),
|
|
565
|
+
# processing_software_time=Timestamp.get_timestamp(),
|
|
566
|
+
# calibration_status=calibration_status,
|
|
567
|
+
# tile_size=Constants.TILE_SIZE.value,
|
|
568
|
+
# ),
|
|
569
|
+
# )
|
|
570
|
+
# del sv_da
|
|
571
|
+
# gc.collect()
|
|
572
|
+
# print(f"three: {sys.getsizeof(ds)}")
|
|
573
|
+
# #####################################################################
|
|
574
|
+
# encodings = dict(
|
|
575
|
+
# depth={
|
|
576
|
+
# "compressors": [compressor],
|
|
577
|
+
# "chunks": depth_chunk_shape,
|
|
578
|
+
# },
|
|
579
|
+
# time={
|
|
580
|
+
# "compressors": [compressor],
|
|
581
|
+
# "chunks": time_chunk_shape,
|
|
582
|
+
# "units": Coordinates.TIME_UNITS.value,
|
|
583
|
+
# },
|
|
584
|
+
# frequency={
|
|
585
|
+
# "compressors": [compressor],
|
|
586
|
+
# "chunks": frequency_chunk_shape,
|
|
587
|
+
# },
|
|
588
|
+
# latitude={
|
|
589
|
+
# "compressors": [compressor],
|
|
590
|
+
# "chunks": latitude_chunk_shape,
|
|
591
|
+
# },
|
|
592
|
+
# longitude={
|
|
593
|
+
# "compressors": [compressor],
|
|
594
|
+
# "chunks": longitude_chunk_shape,
|
|
595
|
+
# },
|
|
596
|
+
# bottom={
|
|
597
|
+
# "compressors": [compressor],
|
|
598
|
+
# "chunks": bottom_chunk_shape,
|
|
599
|
+
# },
|
|
600
|
+
# speed={
|
|
601
|
+
# "compressors": [compressor],
|
|
602
|
+
# "chunks": speed_chunk_shape,
|
|
603
|
+
# },
|
|
604
|
+
# distance={
|
|
605
|
+
# "compressors": [compressor],
|
|
606
|
+
# "chunks": distance_chunk_shape,
|
|
607
|
+
# },
|
|
608
|
+
# Sv={
|
|
609
|
+
# "compressors": [compressor],
|
|
610
|
+
# "chunks": sv_chunk_shape,
|
|
611
|
+
# },
|
|
612
|
+
# )
|
|
613
|
+
# gc.collect()
|
|
614
|
+
# ds.to_zarr(
|
|
615
|
+
# store=zarr_path,
|
|
616
|
+
# mode="w", # “w” means create (overwrite if exists)
|
|
617
|
+
# encoding=encodings,
|
|
618
|
+
# consolidated=False,
|
|
619
|
+
# safe_chunks=False,
|
|
620
|
+
# align_chunks=True,
|
|
621
|
+
# zarr_format=3,
|
|
622
|
+
# write_empty_chunks=False, # Might need to change this
|
|
623
|
+
# )
|
|
624
|
+
# #####################################################################
|
|
625
|
+
# return zarr_path
|
|
626
|
+
# except Exception as err:
|
|
627
|
+
# raise RuntimeError(f"Problem trying to create zarr store, {err}")
|
|
628
|
+
# # finally:
|
|
629
|
+
# # cleaner = Cleaner()
|
|
630
|
+
# # cleaner.delete_local_files()
|
|
631
|
+
# # TODO: should delete zarr store in temp directory too?
|
|
632
|
+
|
|
633
|
+
############################################################################
|
|
634
|
+
def open_s3_zarr_store_with_zarr(
|
|
635
|
+
self,
|
|
636
|
+
ship_name: str,
|
|
637
|
+
cruise_name: str,
|
|
638
|
+
sensor_name: str,
|
|
639
|
+
output_bucket_name: str,
|
|
640
|
+
endpoint_url: Optional[str] = None,
|
|
641
|
+
) -> Group:
|
|
642
|
+
# Mounts a Zarr store using pythons Zarr implementation. The mounted store
|
|
643
|
+
# will have read/write privileges so that store can be updated.
|
|
644
|
+
print("Opening L2 Zarr store with Zarr for writing.")
|
|
645
|
+
try:
|
|
646
|
+
level = str(Constants.LEVEL_2.value)
|
|
647
|
+
store = f"s3://{output_bucket_name}/{level}/{ship_name}/{cruise_name}/{sensor_name}/{cruise_name}.zarr"
|
|
648
|
+
print(f"endpoint url: {endpoint_url}")
|
|
649
|
+
cruise_zarr = zarr.open(
|
|
650
|
+
store=store,
|
|
651
|
+
mode="r+",
|
|
652
|
+
zarr_format=3,
|
|
653
|
+
storage_options={
|
|
654
|
+
"endpoint_url": endpoint_url,
|
|
655
|
+
"key": self.key,
|
|
656
|
+
"secret": self.secret,
|
|
657
|
+
},
|
|
658
|
+
)
|
|
659
|
+
print("Done opening store with Zarr.")
|
|
660
|
+
return cruise_zarr
|
|
661
|
+
except Exception as err: # Failure
|
|
662
|
+
raise RuntimeError(f"Exception encountered opening store with Zarr, {err}")
|
|
663
|
+
|
|
664
|
+
###########################################################################
|
|
665
|
+
@staticmethod
|
|
666
|
+
def open_s3_zarr_store_with_xarray(
|
|
667
|
+
ship_name: str,
|
|
668
|
+
cruise_name: str,
|
|
669
|
+
sensor_name: str,
|
|
670
|
+
file_name_stem: str,
|
|
671
|
+
bucket_name: str,
|
|
672
|
+
# level: str, # TODO: add level
|
|
673
|
+
endpoint_url: Optional[str] = None, # needed for moto testing
|
|
674
|
+
) -> xr.Dataset:
|
|
675
|
+
print("Opening L1 Zarr store in S3 with Xarray.")
|
|
676
|
+
try:
|
|
677
|
+
zarr_path = f"s3://{bucket_name}/level_1/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.zarr"
|
|
678
|
+
kwargs = {"consolidated": False}
|
|
679
|
+
ds = xr.open_dataset(
|
|
680
|
+
filename_or_obj=zarr_path,
|
|
681
|
+
engine="zarr",
|
|
682
|
+
backend_kwargs={
|
|
683
|
+
"storage_options": {
|
|
684
|
+
"endpoint_url": endpoint_url,
|
|
685
|
+
"anon": True,
|
|
686
|
+
},
|
|
687
|
+
},
|
|
688
|
+
**kwargs,
|
|
689
|
+
)
|
|
690
|
+
return ds
|
|
691
|
+
except Exception as err:
|
|
692
|
+
raise RuntimeError(f"Problem opening Zarr store in S3 as Xarray, {err}")
|
|
693
|
+
|
|
694
|
+
###########################################################################
|
|
695
|
+
# TODO: can this be consolidated with above
|
|
696
|
+
@staticmethod
|
|
697
|
+
def open_l2_zarr_store_with_xarray(
|
|
698
|
+
ship_name: str,
|
|
699
|
+
cruise_name: str,
|
|
700
|
+
sensor_name: str,
|
|
701
|
+
bucket_name: str,
|
|
702
|
+
endpoint_url: Optional[str] = None, # needed for moto testing
|
|
703
|
+
) -> xr.Dataset:
|
|
704
|
+
print("Opening L2 Zarr store in S3 with Xarray.")
|
|
705
|
+
try:
|
|
706
|
+
level = str(Constants.LEVEL_2.value)
|
|
707
|
+
zarr_path = f"s3://{bucket_name}/{level}/{ship_name}/{cruise_name}/{sensor_name}/{cruise_name}.zarr"
|
|
708
|
+
kwargs = {"consolidated": False}
|
|
709
|
+
ds = xr.open_dataset(
|
|
710
|
+
filename_or_obj=zarr_path,
|
|
711
|
+
engine="zarr",
|
|
712
|
+
backend_kwargs={
|
|
713
|
+
"storage_options": {
|
|
714
|
+
"endpoint_url": endpoint_url,
|
|
715
|
+
"anon": True,
|
|
716
|
+
}
|
|
717
|
+
},
|
|
718
|
+
**kwargs,
|
|
719
|
+
)
|
|
720
|
+
return ds
|
|
721
|
+
except Exception as err:
|
|
722
|
+
raise RuntimeError(f"Problem opening Zarr store in S3 as Xarray, {err}")
|
|
723
|
+
|
|
724
|
+
###########################################################################
|
|
725
|
+
|
|
726
|
+
###########################################################################
|
|
727
|
+
# def create_process_synchronizer(self):
|
|
728
|
+
# # TODO: explore aws redis options
|
|
729
|
+
# pass
|
|
730
|
+
|
|
731
|
+
###########################################################################
|
|
732
|
+
# def verify_cruise_store_data(self):
|
|
733
|
+
# # TODO: run a check on a finished model store to ensure that
|
|
734
|
+
# # none of the time, latitude, longitude, or depth values
|
|
735
|
+
# # are NaN.
|
|
736
|
+
# pass
|
|
737
|
+
|
|
738
|
+
###########################################################################
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
###########################################################
|