water-column-sonar-processing 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/__init__.py +16 -0
- water_column_sonar_processing/aws/__init__.py +7 -0
- {aws_manager → water_column_sonar_processing/aws}/dynamodb_manager.py +71 -50
- {aws_manager → water_column_sonar_processing/aws}/s3_manager.py +120 -130
- {aws_manager → water_column_sonar_processing/aws}/s3fs_manager.py +13 -19
- {aws_manager → water_column_sonar_processing/aws}/sns_manager.py +10 -21
- {aws_manager → water_column_sonar_processing/aws}/sqs_manager.py +10 -18
- water_column_sonar_processing/cruise/__init__.py +4 -0
- {cruise → water_column_sonar_processing/cruise}/create_empty_zarr_store.py +62 -44
- {cruise → water_column_sonar_processing/cruise}/resample_regrid.py +117 -66
- water_column_sonar_processing/geometry/__init__.py +5 -0
- {geometry_manager → water_column_sonar_processing/geometry}/geometry_manager.py +80 -49
- {geometry_manager → water_column_sonar_processing/geometry}/geometry_simplification.py +13 -12
- {geometry_manager → water_column_sonar_processing/geometry}/pmtile_generation.py +25 -24
- water_column_sonar_processing/index/__init__.py +3 -0
- {index_manager → water_column_sonar_processing/index}/index_manager.py +106 -82
- water_column_sonar_processing/model/__init__.py +3 -0
- {zarr_manager → water_column_sonar_processing/model}/zarr_manager.py +119 -83
- water_column_sonar_processing/process.py +147 -0
- water_column_sonar_processing/utility/__init__.py +6 -0
- {utility → water_column_sonar_processing/utility}/cleaner.py +6 -7
- water_column_sonar_processing/utility/constants.py +63 -0
- {utility → water_column_sonar_processing/utility}/pipeline_status.py +37 -10
- {utility → water_column_sonar_processing/utility}/timestamp.py +3 -2
- {water_column_sonar_processing-0.0.4.dist-info → water_column_sonar_processing-0.0.6.dist-info}/METADATA +31 -1
- water_column_sonar_processing-0.0.6.dist-info/RECORD +29 -0
- water_column_sonar_processing-0.0.6.dist-info/top_level.txt +1 -0
- __init__.py +0 -0
- aws_manager/__init__.py +0 -4
- cruise/__init__.py +0 -0
- geometry_manager/__init__.py +0 -0
- index_manager/__init__.py +0 -0
- model.py +0 -140
- utility/__init__.py +0 -0
- utility/constants.py +0 -56
- water_column_sonar_processing-0.0.4.dist-info/RECORD +0 -29
- water_column_sonar_processing-0.0.4.dist-info/top_level.txt +0 -8
- zarr_manager/__init__.py +0 -0
- {water_column_sonar_processing-0.0.4.dist-info → water_column_sonar_processing-0.0.6.dist-info}/LICENSE +0 -0
- {water_column_sonar_processing-0.0.4.dist-info → water_column_sonar_processing-0.0.6.dist-info}/WHEEL +0 -0
|
@@ -1,27 +1,28 @@
|
|
|
1
1
|
import os
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import numcodecs
|
|
4
4
|
import numpy as np
|
|
5
5
|
import xarray as xr
|
|
6
|
+
import zarr
|
|
6
7
|
from numcodecs import Blosc
|
|
7
8
|
|
|
8
|
-
from
|
|
9
|
-
from utility.
|
|
10
|
-
from
|
|
9
|
+
from water_column_sonar_processing.aws.s3fs_manager import S3FSManager
|
|
10
|
+
from water_column_sonar_processing.utility.constants import Constants, Coordinates
|
|
11
|
+
from water_column_sonar_processing.utility.timestamp import Timestamp
|
|
11
12
|
|
|
12
13
|
numcodecs.blosc.use_threads = False
|
|
13
14
|
numcodecs.blosc.set_nthreads(1)
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
# TODO: when ready switch to version 3 of
|
|
17
|
+
# TODO: when ready switch to version 3 of model spec
|
|
17
18
|
# ZARR_V3_EXPERIMENTAL_API = 1
|
|
18
19
|
|
|
19
|
-
# creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
20
20
|
|
|
21
|
+
# creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
21
22
|
class ZarrManager:
|
|
22
23
|
#######################################################
|
|
23
24
|
def __init__(
|
|
24
|
-
|
|
25
|
+
self,
|
|
25
26
|
):
|
|
26
27
|
# TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
|
|
27
28
|
self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
|
|
@@ -33,18 +34,18 @@ class ZarrManager:
|
|
|
33
34
|
#######################################################
|
|
34
35
|
@staticmethod
|
|
35
36
|
def get_depth_values(
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
min_echo_range: float = 1.0, # minimum depth measured (zero non-inclusive) from whole cruise
|
|
38
|
+
max_echo_range: float = 100.0, # maximum depth measured from whole cruise
|
|
38
39
|
):
|
|
39
40
|
# Gets the set of depth values that will be used when resampling and
|
|
40
|
-
# regridding the data to a cruise level
|
|
41
|
+
# regridding the data to a cruise level model store.
|
|
41
42
|
# Note: returned values do not start at zero.
|
|
42
|
-
print(
|
|
43
|
+
print("Getting depth values.")
|
|
43
44
|
all_cruise_depth_values = np.linspace(
|
|
44
45
|
start=min_echo_range,
|
|
45
46
|
stop=max_echo_range,
|
|
46
47
|
num=int(max_echo_range / min_echo_range) + 1,
|
|
47
|
-
endpoint=True
|
|
48
|
+
endpoint=True,
|
|
48
49
|
)
|
|
49
50
|
|
|
50
51
|
print("Done getting depth values.")
|
|
@@ -52,18 +53,20 @@ class ZarrManager:
|
|
|
52
53
|
|
|
53
54
|
#######################################################
|
|
54
55
|
def create_zarr_store(
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
56
|
+
self,
|
|
57
|
+
path: str,
|
|
58
|
+
ship_name: str,
|
|
59
|
+
cruise_name: str,
|
|
60
|
+
sensor_name: str,
|
|
61
|
+
frequencies: list, # units in Hz
|
|
62
|
+
width: int, # TODO: needs better name... "ping_time"
|
|
63
|
+
min_echo_range: float, # smallest resolution in meters
|
|
64
|
+
max_echo_range: float,
|
|
65
|
+
calibration_status: bool = False, # Assume uncalibrated
|
|
65
66
|
) -> str:
|
|
66
|
-
print(
|
|
67
|
+
print(
|
|
68
|
+
f"Creating local zarr_manager store at {cruise_name}.zarr for ship {ship_name}"
|
|
69
|
+
)
|
|
67
70
|
|
|
68
71
|
# There should be no repeated frequencies
|
|
69
72
|
assert len(frequencies) == len(set(frequencies))
|
|
@@ -80,28 +83,29 @@ class ZarrManager:
|
|
|
80
83
|
# https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
|
|
81
84
|
root.create_dataset(
|
|
82
85
|
name=Coordinates.TIME.value,
|
|
83
|
-
data=np.repeat(0
|
|
86
|
+
data=np.repeat(0.0, width),
|
|
84
87
|
shape=width,
|
|
85
|
-
chunks=(
|
|
88
|
+
chunks=(
|
|
89
|
+
Constants.TILE_SIZE.value,
|
|
90
|
+
), # TODO: the chunking scheme doesn't seem to be working here
|
|
86
91
|
dtype=np.dtype(Coordinates.TIME_DTYPE.value),
|
|
87
92
|
compressor=self.__compressor,
|
|
88
93
|
# fill_value=0.,
|
|
89
94
|
fill_value=np.nan, # TODO: do i want nan's?
|
|
90
|
-
overwrite=self.__overwrite
|
|
95
|
+
overwrite=self.__overwrite,
|
|
91
96
|
)
|
|
92
97
|
|
|
93
|
-
root.time.attrs[
|
|
98
|
+
root.time.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
94
99
|
|
|
95
|
-
root.time.attrs[
|
|
96
|
-
root.time.attrs[
|
|
97
|
-
root.time.attrs[
|
|
98
|
-
root.time.attrs[
|
|
100
|
+
root.time.attrs["calendar"] = Coordinates.TIME_CALENDAR.value
|
|
101
|
+
root.time.attrs["units"] = Coordinates.TIME_UNITS.value
|
|
102
|
+
root.time.attrs["long_name"] = Coordinates.TIME_LONG_NAME.value
|
|
103
|
+
root.time.attrs["standard_name"] = Coordinates.TIME_STANDARD_NAME.value
|
|
99
104
|
|
|
100
105
|
#####################################################################
|
|
101
106
|
# --- Coordinate: Depth --- #
|
|
102
107
|
depth_values = self.get_depth_values(
|
|
103
|
-
min_echo_range=min_echo_range,
|
|
104
|
-
max_echo_range=max_echo_range
|
|
108
|
+
min_echo_range=min_echo_range, max_echo_range=max_echo_range
|
|
105
109
|
)
|
|
106
110
|
|
|
107
111
|
root.create_dataset(
|
|
@@ -110,54 +114,75 @@ class ZarrManager:
|
|
|
110
114
|
data=depth_values,
|
|
111
115
|
shape=len(depth_values),
|
|
112
116
|
chunks=Constants.TILE_SIZE.value,
|
|
113
|
-
dtype=np.dtype(
|
|
117
|
+
dtype=np.dtype(
|
|
118
|
+
Coordinates.DEPTH_DTYPE.value
|
|
119
|
+
), # float16 == 2 significant digits would be ideal
|
|
114
120
|
compressor=self.__compressor,
|
|
115
121
|
# fill_value=np.nan,
|
|
116
|
-
overwrite=self.__overwrite
|
|
122
|
+
overwrite=self.__overwrite,
|
|
117
123
|
)
|
|
118
124
|
# TODO: change to exception
|
|
119
125
|
assert not np.any(np.isnan(depth_values))
|
|
120
126
|
|
|
121
|
-
root.depth.attrs[
|
|
127
|
+
root.depth.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.DEPTH.value]
|
|
122
128
|
|
|
123
|
-
root.depth.attrs[
|
|
124
|
-
root.depth.attrs[
|
|
129
|
+
root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
|
|
130
|
+
root.depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
|
|
125
131
|
|
|
126
132
|
#####################################################################
|
|
127
133
|
# --- Coordinate: Latitude --- #
|
|
128
134
|
root.create_dataset(
|
|
129
135
|
name=Coordinates.LATITUDE.value,
|
|
130
|
-
data=np.repeat(0
|
|
136
|
+
data=np.repeat(0.0, width),
|
|
131
137
|
shape=width,
|
|
132
138
|
chunks=Constants.TILE_SIZE.value,
|
|
133
139
|
dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
|
|
134
140
|
compressor=self.__compressor,
|
|
135
|
-
fill_value=0
|
|
136
|
-
overwrite=self.__overwrite
|
|
141
|
+
fill_value=0.0,
|
|
142
|
+
overwrite=self.__overwrite,
|
|
137
143
|
)
|
|
138
144
|
|
|
139
|
-
root.latitude.attrs[
|
|
145
|
+
root.latitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
140
146
|
|
|
141
|
-
root.latitude.attrs[
|
|
142
|
-
root.latitude.attrs[
|
|
147
|
+
root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
|
|
148
|
+
root.latitude.attrs["units"] = Coordinates.LATITUDE_UNITS.value
|
|
143
149
|
|
|
144
150
|
#####################################################################
|
|
145
151
|
# --- Coordinate: Longitude --- #
|
|
146
152
|
root.create_dataset(
|
|
147
153
|
name=Coordinates.LONGITUDE.value,
|
|
148
|
-
data=np.repeat(0
|
|
154
|
+
data=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
149
155
|
shape=width,
|
|
150
156
|
chunks=Constants.TILE_SIZE.value,
|
|
151
157
|
dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
|
|
152
158
|
compressor=self.__compressor,
|
|
153
|
-
fill_value=0
|
|
154
|
-
overwrite=self.__overwrite
|
|
159
|
+
fill_value=0.0,
|
|
160
|
+
overwrite=self.__overwrite,
|
|
155
161
|
)
|
|
156
162
|
|
|
157
|
-
root.longitude.attrs[
|
|
163
|
+
root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
158
164
|
|
|
159
|
-
root.longitude.attrs[
|
|
160
|
-
root.longitude.attrs[
|
|
165
|
+
root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
|
|
166
|
+
root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
|
|
167
|
+
|
|
168
|
+
#####################################################################
|
|
169
|
+
# TODO: verify adding this variable for where the bottom was detected
|
|
170
|
+
# --- Coordinate: Bottom --- #
|
|
171
|
+
root.create_dataset(
|
|
172
|
+
name=Coordinates.BOTTOM.value,
|
|
173
|
+
# data=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
174
|
+
shape=width,
|
|
175
|
+
chunks=Constants.TILE_SIZE.value,
|
|
176
|
+
dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value),
|
|
177
|
+
compressor=self.__compressor,
|
|
178
|
+
fill_value=np.nan,
|
|
179
|
+
overwrite=self.__overwrite,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
183
|
+
|
|
184
|
+
root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
|
|
185
|
+
root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
|
|
161
186
|
|
|
162
187
|
#####################################################################
|
|
163
188
|
# --- Coordinate: Frequency --- #
|
|
@@ -168,16 +193,20 @@ class ZarrManager:
|
|
|
168
193
|
chunks=1,
|
|
169
194
|
dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
|
|
170
195
|
compressor=self.__compressor,
|
|
171
|
-
fill_value=0
|
|
172
|
-
overwrite=self.__overwrite
|
|
196
|
+
fill_value=0.0,
|
|
197
|
+
overwrite=self.__overwrite,
|
|
173
198
|
)
|
|
174
199
|
|
|
175
200
|
# TODO: best coordinate would be channel with str type
|
|
176
|
-
root.frequency.attrs[
|
|
201
|
+
root.frequency.attrs["_ARRAY_DIMENSIONS"] = [
|
|
202
|
+
Coordinates.FREQUENCY.value
|
|
203
|
+
] # TODO: is this correct
|
|
177
204
|
|
|
178
|
-
root.frequency.attrs[
|
|
179
|
-
root.frequency.attrs[
|
|
180
|
-
|
|
205
|
+
root.frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
|
|
206
|
+
root.frequency.attrs["standard_name"] = (
|
|
207
|
+
Coordinates.FREQUENCY_STANDARD_NAME.value
|
|
208
|
+
)
|
|
209
|
+
root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
|
|
181
210
|
|
|
182
211
|
#####################################################################
|
|
183
212
|
# --- Sv Data --- #
|
|
@@ -185,21 +214,23 @@ class ZarrManager:
|
|
|
185
214
|
name=Coordinates.SV.value,
|
|
186
215
|
shape=(len(depth_values), width, len(frequencies)),
|
|
187
216
|
chunks=(Constants.TILE_SIZE.value, Constants.TILE_SIZE.value, 1),
|
|
188
|
-
dtype=np.dtype(
|
|
217
|
+
dtype=np.dtype(
|
|
218
|
+
Coordinates.SV_DTYPE.value
|
|
219
|
+
), # TODO: try to experiment with 'float16'
|
|
189
220
|
compressor=self.__compressor,
|
|
190
221
|
fill_value=np.nan,
|
|
191
|
-
overwrite=self.__overwrite
|
|
222
|
+
overwrite=self.__overwrite,
|
|
192
223
|
)
|
|
193
224
|
|
|
194
|
-
root.Sv.attrs[
|
|
225
|
+
root.Sv.attrs["_ARRAY_DIMENSIONS"] = [
|
|
195
226
|
Coordinates.DEPTH.value,
|
|
196
227
|
Coordinates.TIME.value,
|
|
197
228
|
Coordinates.FREQUENCY.value,
|
|
198
229
|
]
|
|
199
230
|
|
|
200
|
-
root.Sv.attrs[
|
|
201
|
-
root.Sv.attrs[
|
|
202
|
-
root.Sv.attrs[
|
|
231
|
+
root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
|
|
232
|
+
root.Sv.attrs["units"] = Coordinates.SV_UNITS.value
|
|
233
|
+
root.Sv.attrs["tile_size"] = Constants.TILE_SIZE.value
|
|
203
234
|
|
|
204
235
|
#####################################################################
|
|
205
236
|
# --- Metadata --- #
|
|
@@ -208,7 +239,9 @@ class ZarrManager:
|
|
|
208
239
|
root.attrs["sensor_name"] = sensor_name
|
|
209
240
|
#
|
|
210
241
|
root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
|
|
211
|
-
root.attrs["processing_software_version"] =
|
|
242
|
+
root.attrs["processing_software_version"] = (
|
|
243
|
+
"0.0.6" # TODO: get programmatically
|
|
244
|
+
)
|
|
212
245
|
root.attrs["processing_software_time"] = Timestamp.get_timestamp()
|
|
213
246
|
#
|
|
214
247
|
root.attrs["calibration_status"] = calibration_status
|
|
@@ -239,43 +272,45 @@ class ZarrManager:
|
|
|
239
272
|
|
|
240
273
|
############################################################################
|
|
241
274
|
def open_s3_zarr_store_with_zarr(
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
275
|
+
self,
|
|
276
|
+
ship_name: str,
|
|
277
|
+
cruise_name: str,
|
|
278
|
+
sensor_name: str,
|
|
279
|
+
# zarr_synchronizer: Union[str, None] = None,
|
|
247
280
|
):
|
|
248
281
|
# Mounts a Zarr store using pythons Zarr implementation. The mounted store
|
|
249
282
|
# will have read/write privileges so that store can be updated.
|
|
250
|
-
print(
|
|
283
|
+
print("Opening Zarr store with Zarr.")
|
|
251
284
|
try:
|
|
252
285
|
s3fs_manager = S3FSManager()
|
|
253
|
-
root = f
|
|
286
|
+
root = f"{self.output_bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{cruise_name}.zarr"
|
|
254
287
|
store = s3fs_manager.s3_map(s3_zarr_store_path=root)
|
|
255
|
-
# synchronizer =
|
|
288
|
+
# synchronizer = model.ProcessSynchronizer(f"/tmp/{ship_name}_{cruise_name}.sync")
|
|
256
289
|
cruise_zarr = zarr.open(store=store, mode="r+")
|
|
257
290
|
except Exception as err: # Failure
|
|
258
|
-
print(f
|
|
291
|
+
print(f"Exception encountered opening Zarr store with Zarr.: {err}")
|
|
259
292
|
raise
|
|
260
|
-
print(
|
|
293
|
+
print("Done opening Zarr store with Zarr.")
|
|
261
294
|
return cruise_zarr
|
|
262
295
|
|
|
263
296
|
############################################################################
|
|
264
297
|
def open_s3_zarr_store_with_xarray(
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
298
|
+
self,
|
|
299
|
+
ship_name: str,
|
|
300
|
+
cruise_name: str,
|
|
301
|
+
sensor_name: str,
|
|
302
|
+
file_name_stem: str,
|
|
270
303
|
) -> xr.Dataset:
|
|
271
|
-
print(
|
|
304
|
+
print("Opening Zarr store in S3 as Xarray.")
|
|
272
305
|
try:
|
|
273
306
|
zarr_path = f"s3://{self.output_bucket_name}/level_1/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.zarr"
|
|
274
307
|
s3fs_manager = S3FSManager()
|
|
275
308
|
store_s3_map = s3fs_manager.s3_map(s3_zarr_store_path=zarr_path)
|
|
276
|
-
ds = xr.open_zarr(
|
|
309
|
+
ds = xr.open_zarr(
|
|
310
|
+
store=store_s3_map, consolidated=None
|
|
311
|
+
) # synchronizer=SYNCHRONIZER
|
|
277
312
|
except Exception as err:
|
|
278
|
-
print(
|
|
313
|
+
print("Problem opening Zarr store in S3 as Xarray.")
|
|
279
314
|
raise err
|
|
280
315
|
print("Done opening Zarr store in S3 as Xarray.")
|
|
281
316
|
return ds
|
|
@@ -284,16 +319,17 @@ class ZarrManager:
|
|
|
284
319
|
|
|
285
320
|
#######################################################
|
|
286
321
|
# def create_process_synchronizer(self):
|
|
287
|
-
# # TODO: explore
|
|
322
|
+
# # TODO: explore aws redis options
|
|
288
323
|
# pass
|
|
289
324
|
|
|
290
325
|
#######################################################
|
|
291
326
|
# def verify_cruise_store_data(self):
|
|
292
|
-
# # TODO: run a check on a finished
|
|
327
|
+
# # TODO: run a check on a finished model store to ensure that
|
|
293
328
|
# # none of the time, latitude, longitude, or depth values
|
|
294
329
|
# # are NaN.
|
|
295
330
|
# pass
|
|
296
331
|
|
|
297
332
|
#######################################################
|
|
298
333
|
|
|
334
|
+
|
|
299
335
|
###########################################################
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
|
|
7
|
+
from water_column_sonar_processing.aws.s3_manager import S3Manager
|
|
8
|
+
from water_column_sonar_processing.aws.s3fs_manager import S3FSManager
|
|
9
|
+
from water_column_sonar_processing.aws.sns_manager import SNSManager
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
###########################################################
|
|
13
|
+
class Process:
|
|
14
|
+
#######################################################
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
):
|
|
18
|
+
self.input_bucket_name = os.environ["INPUT_BUCKET_NAME"]
|
|
19
|
+
self.output_bucket_name = os.environ["OUTPUT_BUCKET_NAME"]
|
|
20
|
+
self.table_name = os.environ["TABLE_NAME"]
|
|
21
|
+
self.topic_arn = os.environ["TOPIC_ARN"]
|
|
22
|
+
# self.output_bucket_access_key = ?
|
|
23
|
+
# self.output_bucket_secret_access_key = ?
|
|
24
|
+
|
|
25
|
+
def execute(self):
|
|
26
|
+
input_s3_manager = (
|
|
27
|
+
S3Manager()
|
|
28
|
+
) # TODO: Need to allow passing in of credentials when writing to protected bucket
|
|
29
|
+
s3fs_manager = S3FSManager() # TODO: delete this
|
|
30
|
+
print(s3fs_manager) # TODO: delete this
|
|
31
|
+
output_s3_manager = S3Manager()
|
|
32
|
+
# TODO: s3fs?
|
|
33
|
+
sns_manager = SNSManager()
|
|
34
|
+
ddb_manager = DynamoDBManager()
|
|
35
|
+
|
|
36
|
+
# [1 of 5] Update Pipeline Status in DynamoDB
|
|
37
|
+
# self.dynamodb.update_ status ()
|
|
38
|
+
|
|
39
|
+
# [2 of 5] Download Object From Input Bucket
|
|
40
|
+
# return_value = input_s3_manager.download_file(
|
|
41
|
+
# bucket_name=self.input_bucket_name,
|
|
42
|
+
# key="the_input_key",
|
|
43
|
+
# file_name="the_input_key",
|
|
44
|
+
# )
|
|
45
|
+
# print(return_value)
|
|
46
|
+
|
|
47
|
+
# [3 of 5] Update Entry in DynamoDB
|
|
48
|
+
ship_name = "David_Starr_Jordan" # TODO: get this from input sns message
|
|
49
|
+
cruise_name = "DS0604"
|
|
50
|
+
sensor_name = "EK60"
|
|
51
|
+
file_name = "DSJ0604-D20060406-T113407.raw"
|
|
52
|
+
|
|
53
|
+
test_channels = [
|
|
54
|
+
"GPT 38 kHz 009072055a7f 2 ES38B",
|
|
55
|
+
"GPT 70 kHz 00907203400a 3 ES70-7C",
|
|
56
|
+
"GPT 120 kHz 009072034d52 1 ES120-7",
|
|
57
|
+
"GPT 200 kHz 0090720564e4 4 ES200-7C",
|
|
58
|
+
]
|
|
59
|
+
test_frequencies = [38_000, 70_000, 120_000, 200_000]
|
|
60
|
+
ddb_manager.update_item(
|
|
61
|
+
table_name=self.table_name,
|
|
62
|
+
key={
|
|
63
|
+
"FILE_NAME": {"S": file_name}, # Partition Key
|
|
64
|
+
"CRUISE_NAME": {"S": cruise_name}, # Sort Key
|
|
65
|
+
},
|
|
66
|
+
expression_attribute_names={
|
|
67
|
+
"#CH": "CHANNELS",
|
|
68
|
+
"#ET": "END_TIME",
|
|
69
|
+
"#ED": "ERROR_DETAIL",
|
|
70
|
+
"#FR": "FREQUENCIES",
|
|
71
|
+
"#MA": "MAX_ECHO_RANGE",
|
|
72
|
+
"#MI": "MIN_ECHO_RANGE",
|
|
73
|
+
"#ND": "NUM_PING_TIME_DROPNA",
|
|
74
|
+
"#PS": "PIPELINE_STATUS", # testing this updated
|
|
75
|
+
"#PT": "PIPELINE_TIME", # testing this updated
|
|
76
|
+
"#SE": "SENSOR_NAME",
|
|
77
|
+
"#SH": "SHIP_NAME",
|
|
78
|
+
"#ST": "START_TIME",
|
|
79
|
+
"#ZB": "ZARR_BUCKET",
|
|
80
|
+
"#ZP": "ZARR_PATH",
|
|
81
|
+
},
|
|
82
|
+
expression_attribute_values={
|
|
83
|
+
":ch": {"L": [{"S": i} for i in test_channels]},
|
|
84
|
+
":et": {"S": "2006-04-06T13:35:28.688Z"},
|
|
85
|
+
":ed": {"S": ""},
|
|
86
|
+
":fr": {"L": [{"N": str(i)} for i in test_frequencies]},
|
|
87
|
+
":ma": {"N": str(np.round(499.7653, 4))},
|
|
88
|
+
":mi": {"N": str(np.round(0.25, 4))},
|
|
89
|
+
":nd": {"N": str(2458)},
|
|
90
|
+
":ps": {"S": "SUCCESS_AGGREGATOR"},
|
|
91
|
+
":pt": {"S": "2023-10-02T08:54:43Z"},
|
|
92
|
+
":se": {"S": sensor_name},
|
|
93
|
+
":sh": {"S": ship_name},
|
|
94
|
+
":st": {"S": "2006-04-06T11:34:07.288Z"},
|
|
95
|
+
":zb": {"S": "r2d2-dev-echofish2-118234403147-echofish-dev-output"},
|
|
96
|
+
":zp": {
|
|
97
|
+
"S": "level_1/David_Starr_Jordan/DS0604/EK60/DSJ0604-D20060406-T113407.model"
|
|
98
|
+
},
|
|
99
|
+
},
|
|
100
|
+
update_expression=(
|
|
101
|
+
"SET "
|
|
102
|
+
"#CH = :ch, "
|
|
103
|
+
"#ET = :et, "
|
|
104
|
+
"#ED = :ed, "
|
|
105
|
+
"#FR = :fr, "
|
|
106
|
+
"#MA = :ma, "
|
|
107
|
+
"#MI = :mi, "
|
|
108
|
+
"#ND = :nd, "
|
|
109
|
+
"#PS = :ps, "
|
|
110
|
+
"#PT = :pt, "
|
|
111
|
+
"#SE = :se, "
|
|
112
|
+
"#SH = :sh, "
|
|
113
|
+
"#ST = :st, "
|
|
114
|
+
"#ZB = :zb, "
|
|
115
|
+
"#ZP = :zp"
|
|
116
|
+
),
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# [4 of 5] Write Object to Output Bucket
|
|
120
|
+
output_s3_manager.put(
|
|
121
|
+
bucket_name=self.output_bucket_name, key="123", body="456"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# [_ of _] Read file-level Zarr store from bucket, Create GeoJSON, Write to bucket
|
|
125
|
+
# [_ of _] Create empty cruise-level Zarr store
|
|
126
|
+
# [_ of _] Resample and write to cruise-level Zarr Store
|
|
127
|
+
|
|
128
|
+
# [5 of 5] Publish Done Message
|
|
129
|
+
success_message = {
|
|
130
|
+
"default": {
|
|
131
|
+
"shipName": ship_name,
|
|
132
|
+
"cruiseName": cruise_name,
|
|
133
|
+
"sensorName": sensor_name,
|
|
134
|
+
"fileName": file_name,
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
sns_manager.publish(
|
|
138
|
+
topic_arn=self.topic_arn,
|
|
139
|
+
message=json.dumps(success_message),
|
|
140
|
+
)
|
|
141
|
+
print("done...")
|
|
142
|
+
|
|
143
|
+
#######################################################
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
###########################################################
|
|
147
|
+
###########################################################
|
|
@@ -1,21 +1,20 @@
|
|
|
1
|
-
import os
|
|
2
1
|
import glob
|
|
2
|
+
import os
|
|
3
3
|
import shutil
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
###########################################################
|
|
7
7
|
class Cleaner:
|
|
8
8
|
@staticmethod
|
|
9
|
-
def delete_local_files(
|
|
10
|
-
|
|
11
|
-
):
|
|
12
|
-
print('Deleting all local raw and zarr_manager files')
|
|
9
|
+
def delete_local_files(file_types=["*.raw*", "*.model"]): # '*.json'
|
|
10
|
+
print("Deleting all local raw and model files")
|
|
13
11
|
for i in file_types:
|
|
14
12
|
for j in glob.glob(i):
|
|
15
13
|
if os.path.isdir(j):
|
|
16
14
|
shutil.rmtree(j, ignore_errors=True)
|
|
17
15
|
elif os.path.isfile(j):
|
|
18
16
|
os.remove(j)
|
|
19
|
-
print(
|
|
17
|
+
print("done deleting")
|
|
20
18
|
|
|
21
|
-
|
|
19
|
+
|
|
20
|
+
###########################################################
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from enum import Enum, Flag, unique
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@unique
|
|
5
|
+
class Constants(Flag):
|
|
6
|
+
TILE_SIZE = 512
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Coordinates(Enum):
|
|
10
|
+
"""
|
|
11
|
+
Should try to specify
|
|
12
|
+
dtype
|
|
13
|
+
units
|
|
14
|
+
long_name — most readable description of variable
|
|
15
|
+
standard_name — name in lowercase and snake_case
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
PROJECT_NAME = "echofish"
|
|
19
|
+
|
|
20
|
+
DEPTH = "depth"
|
|
21
|
+
DEPTH_DTYPE = "float32"
|
|
22
|
+
DEPTH_UNITS = "m" # TODO: Pint? <https://pint.readthedocs.io/en/stable/>
|
|
23
|
+
DEPTH_LONG_NAME = "Depth below surface"
|
|
24
|
+
DEPTH_STANDARD_NAME = "depth"
|
|
25
|
+
|
|
26
|
+
TIME = "time"
|
|
27
|
+
TIME_DTYPE = "float64"
|
|
28
|
+
# Note: units and calendar are used downstream by Xarray
|
|
29
|
+
TIME_UNITS = "seconds since 1970-01-01 00:00:00"
|
|
30
|
+
TIME_LONG_NAME = "Timestamp of each ping"
|
|
31
|
+
TIME_STANDARD_NAME = "time"
|
|
32
|
+
TIME_CALENDAR = "proleptic_gregorian"
|
|
33
|
+
# TODO: create test for reading out timestamps in Xarray
|
|
34
|
+
|
|
35
|
+
FREQUENCY = "frequency"
|
|
36
|
+
FREQUENCY_DTYPE = "int"
|
|
37
|
+
FREQUENCY_UNITS = "Hz"
|
|
38
|
+
FREQUENCY_LONG_NAME = "Transducer frequency"
|
|
39
|
+
FREQUENCY_STANDARD_NAME = "sound_frequency"
|
|
40
|
+
|
|
41
|
+
LATITUDE = "latitude"
|
|
42
|
+
LATITUDE_DTYPE = "float32"
|
|
43
|
+
LATITUDE_UNITS = "degrees_north"
|
|
44
|
+
LATITUDE_LONG_NAME = "Latitude"
|
|
45
|
+
LATITUDE_STANDARD_NAME = "latitude"
|
|
46
|
+
|
|
47
|
+
LONGITUDE = "longitude"
|
|
48
|
+
LONGITUDE_DTYPE = "float32"
|
|
49
|
+
LONGITUDE_UNITS = "degrees_east"
|
|
50
|
+
LONGITUDE_LONG_NAME = "Longitude"
|
|
51
|
+
LONGITUDE_STANDARD_NAME = "longitude"
|
|
52
|
+
|
|
53
|
+
BOTTOM = "bottom"
|
|
54
|
+
BOTTOM_DTYPE = "float32"
|
|
55
|
+
BOTTOM_UNITS = "m"
|
|
56
|
+
BOTTOM_LONG_NAME = "Detected sea floor depth"
|
|
57
|
+
BOTTOM_STANDARD_NAME = "bottom"
|
|
58
|
+
|
|
59
|
+
SV = "Sv"
|
|
60
|
+
SV_DTYPE = "float32" # TODO: experiment with dtype of int
|
|
61
|
+
SV_UNITS = "dB"
|
|
62
|
+
SV_LONG_NAME = "Volume backscattering strength (Sv re 1 m-1)"
|
|
63
|
+
SV_STANDARD_NAME = "volume_backscattering_strength"
|