water-column-sonar-processing 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/__init__.py +16 -0
- water_column_sonar_processing/aws/__init__.py +7 -4
- water_column_sonar_processing/aws/dynamodb_manager.py +70 -49
- water_column_sonar_processing/aws/s3_manager.py +112 -122
- water_column_sonar_processing/aws/s3fs_manager.py +13 -19
- water_column_sonar_processing/aws/sns_manager.py +10 -21
- water_column_sonar_processing/aws/sqs_manager.py +10 -18
- water_column_sonar_processing/cruise/__init__.py +4 -0
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +51 -33
- water_column_sonar_processing/cruise/resample_regrid.py +109 -58
- water_column_sonar_processing/geometry/__init__.py +5 -0
- water_column_sonar_processing/geometry/geometry_manager.py +79 -48
- water_column_sonar_processing/geometry/geometry_simplification.py +13 -12
- water_column_sonar_processing/geometry/pmtile_generation.py +24 -23
- water_column_sonar_processing/index/__init__.py +3 -0
- water_column_sonar_processing/index/index_manager.py +104 -80
- water_column_sonar_processing/model/__init__.py +3 -0
- water_column_sonar_processing/model/zarr_manager.py +113 -75
- water_column_sonar_processing/process.py +76 -69
- water_column_sonar_processing/utility/__init__.py +6 -0
- water_column_sonar_processing/utility/cleaner.py +6 -7
- water_column_sonar_processing/utility/constants.py +42 -35
- water_column_sonar_processing/utility/pipeline_status.py +37 -10
- water_column_sonar_processing/utility/timestamp.py +3 -2
- {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/METADATA +31 -1
- water_column_sonar_processing-0.0.6.dist-info/RECORD +29 -0
- water_column_sonar_processing-0.0.5.dist-info/RECORD +0 -29
- {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/LICENSE +0 -0
- {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/WHEEL +0 -0
- {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/top_level.txt +0 -0
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
import os
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import numcodecs
|
|
4
4
|
import numpy as np
|
|
5
5
|
import xarray as xr
|
|
6
|
+
import zarr
|
|
6
7
|
from numcodecs import Blosc
|
|
8
|
+
|
|
9
|
+
from water_column_sonar_processing.aws.s3fs_manager import S3FSManager
|
|
7
10
|
from water_column_sonar_processing.utility.constants import Constants, Coordinates
|
|
8
11
|
from water_column_sonar_processing.utility.timestamp import Timestamp
|
|
9
|
-
from water_column_sonar_processing.aws.s3fs_manager import S3FSManager
|
|
10
12
|
|
|
11
13
|
numcodecs.blosc.use_threads = False
|
|
12
14
|
numcodecs.blosc.set_nthreads(1)
|
|
@@ -15,11 +17,12 @@ numcodecs.blosc.set_nthreads(1)
|
|
|
15
17
|
# TODO: when ready switch to version 3 of model spec
|
|
16
18
|
# ZARR_V3_EXPERIMENTAL_API = 1
|
|
17
19
|
|
|
20
|
+
|
|
18
21
|
# creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
19
22
|
class ZarrManager:
|
|
20
23
|
#######################################################
|
|
21
24
|
def __init__(
|
|
22
|
-
|
|
25
|
+
self,
|
|
23
26
|
):
|
|
24
27
|
# TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
|
|
25
28
|
self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
|
|
@@ -31,18 +34,18 @@ class ZarrManager:
|
|
|
31
34
|
#######################################################
|
|
32
35
|
@staticmethod
|
|
33
36
|
def get_depth_values(
|
|
34
|
-
|
|
35
|
-
|
|
37
|
+
min_echo_range: float = 1.0, # minimum depth measured (zero non-inclusive) from whole cruise
|
|
38
|
+
max_echo_range: float = 100.0, # maximum depth measured from whole cruise
|
|
36
39
|
):
|
|
37
40
|
# Gets the set of depth values that will be used when resampling and
|
|
38
41
|
# regridding the data to a cruise level model store.
|
|
39
42
|
# Note: returned values do not start at zero.
|
|
40
|
-
print(
|
|
43
|
+
print("Getting depth values.")
|
|
41
44
|
all_cruise_depth_values = np.linspace(
|
|
42
45
|
start=min_echo_range,
|
|
43
46
|
stop=max_echo_range,
|
|
44
47
|
num=int(max_echo_range / min_echo_range) + 1,
|
|
45
|
-
endpoint=True
|
|
48
|
+
endpoint=True,
|
|
46
49
|
)
|
|
47
50
|
|
|
48
51
|
print("Done getting depth values.")
|
|
@@ -50,18 +53,20 @@ class ZarrManager:
|
|
|
50
53
|
|
|
51
54
|
#######################################################
|
|
52
55
|
def create_zarr_store(
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
56
|
+
self,
|
|
57
|
+
path: str,
|
|
58
|
+
ship_name: str,
|
|
59
|
+
cruise_name: str,
|
|
60
|
+
sensor_name: str,
|
|
61
|
+
frequencies: list, # units in Hz
|
|
62
|
+
width: int, # TODO: needs better name... "ping_time"
|
|
63
|
+
min_echo_range: float, # smallest resolution in meters
|
|
64
|
+
max_echo_range: float,
|
|
65
|
+
calibration_status: bool = False, # Assume uncalibrated
|
|
63
66
|
) -> str:
|
|
64
|
-
print(
|
|
67
|
+
print(
|
|
68
|
+
f"Creating local zarr_manager store at {cruise_name}.zarr for ship {ship_name}"
|
|
69
|
+
)
|
|
65
70
|
|
|
66
71
|
# There should be no repeated frequencies
|
|
67
72
|
assert len(frequencies) == len(set(frequencies))
|
|
@@ -78,28 +83,29 @@ class ZarrManager:
|
|
|
78
83
|
# https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
|
|
79
84
|
root.create_dataset(
|
|
80
85
|
name=Coordinates.TIME.value,
|
|
81
|
-
data=np.repeat(0
|
|
86
|
+
data=np.repeat(0.0, width),
|
|
82
87
|
shape=width,
|
|
83
|
-
chunks=(
|
|
88
|
+
chunks=(
|
|
89
|
+
Constants.TILE_SIZE.value,
|
|
90
|
+
), # TODO: the chunking scheme doesn't seem to be working here
|
|
84
91
|
dtype=np.dtype(Coordinates.TIME_DTYPE.value),
|
|
85
92
|
compressor=self.__compressor,
|
|
86
93
|
# fill_value=0.,
|
|
87
94
|
fill_value=np.nan, # TODO: do i want nan's?
|
|
88
|
-
overwrite=self.__overwrite
|
|
95
|
+
overwrite=self.__overwrite,
|
|
89
96
|
)
|
|
90
97
|
|
|
91
|
-
root.time.attrs[
|
|
98
|
+
root.time.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
92
99
|
|
|
93
|
-
root.time.attrs[
|
|
94
|
-
root.time.attrs[
|
|
95
|
-
root.time.attrs[
|
|
96
|
-
root.time.attrs[
|
|
100
|
+
root.time.attrs["calendar"] = Coordinates.TIME_CALENDAR.value
|
|
101
|
+
root.time.attrs["units"] = Coordinates.TIME_UNITS.value
|
|
102
|
+
root.time.attrs["long_name"] = Coordinates.TIME_LONG_NAME.value
|
|
103
|
+
root.time.attrs["standard_name"] = Coordinates.TIME_STANDARD_NAME.value
|
|
97
104
|
|
|
98
105
|
#####################################################################
|
|
99
106
|
# --- Coordinate: Depth --- #
|
|
100
107
|
depth_values = self.get_depth_values(
|
|
101
|
-
min_echo_range=min_echo_range,
|
|
102
|
-
max_echo_range=max_echo_range
|
|
108
|
+
min_echo_range=min_echo_range, max_echo_range=max_echo_range
|
|
103
109
|
)
|
|
104
110
|
|
|
105
111
|
root.create_dataset(
|
|
@@ -108,54 +114,75 @@ class ZarrManager:
|
|
|
108
114
|
data=depth_values,
|
|
109
115
|
shape=len(depth_values),
|
|
110
116
|
chunks=Constants.TILE_SIZE.value,
|
|
111
|
-
dtype=np.dtype(
|
|
117
|
+
dtype=np.dtype(
|
|
118
|
+
Coordinates.DEPTH_DTYPE.value
|
|
119
|
+
), # float16 == 2 significant digits would be ideal
|
|
112
120
|
compressor=self.__compressor,
|
|
113
121
|
# fill_value=np.nan,
|
|
114
|
-
overwrite=self.__overwrite
|
|
122
|
+
overwrite=self.__overwrite,
|
|
115
123
|
)
|
|
116
124
|
# TODO: change to exception
|
|
117
125
|
assert not np.any(np.isnan(depth_values))
|
|
118
126
|
|
|
119
|
-
root.depth.attrs[
|
|
127
|
+
root.depth.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.DEPTH.value]
|
|
120
128
|
|
|
121
|
-
root.depth.attrs[
|
|
122
|
-
root.depth.attrs[
|
|
129
|
+
root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
|
|
130
|
+
root.depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
|
|
123
131
|
|
|
124
132
|
#####################################################################
|
|
125
133
|
# --- Coordinate: Latitude --- #
|
|
126
134
|
root.create_dataset(
|
|
127
135
|
name=Coordinates.LATITUDE.value,
|
|
128
|
-
data=np.repeat(0
|
|
136
|
+
data=np.repeat(0.0, width),
|
|
129
137
|
shape=width,
|
|
130
138
|
chunks=Constants.TILE_SIZE.value,
|
|
131
139
|
dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
|
|
132
140
|
compressor=self.__compressor,
|
|
133
|
-
fill_value=0
|
|
134
|
-
overwrite=self.__overwrite
|
|
141
|
+
fill_value=0.0,
|
|
142
|
+
overwrite=self.__overwrite,
|
|
135
143
|
)
|
|
136
144
|
|
|
137
|
-
root.latitude.attrs[
|
|
145
|
+
root.latitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
138
146
|
|
|
139
|
-
root.latitude.attrs[
|
|
140
|
-
root.latitude.attrs[
|
|
147
|
+
root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
|
|
148
|
+
root.latitude.attrs["units"] = Coordinates.LATITUDE_UNITS.value
|
|
141
149
|
|
|
142
150
|
#####################################################################
|
|
143
151
|
# --- Coordinate: Longitude --- #
|
|
144
152
|
root.create_dataset(
|
|
145
153
|
name=Coordinates.LONGITUDE.value,
|
|
146
|
-
data=np.repeat(0
|
|
154
|
+
data=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
147
155
|
shape=width,
|
|
148
156
|
chunks=Constants.TILE_SIZE.value,
|
|
149
157
|
dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
|
|
150
158
|
compressor=self.__compressor,
|
|
151
|
-
fill_value=0
|
|
152
|
-
overwrite=self.__overwrite
|
|
159
|
+
fill_value=0.0,
|
|
160
|
+
overwrite=self.__overwrite,
|
|
153
161
|
)
|
|
154
162
|
|
|
155
|
-
root.longitude.attrs[
|
|
163
|
+
root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
156
164
|
|
|
157
|
-
root.longitude.attrs[
|
|
158
|
-
root.longitude.attrs[
|
|
165
|
+
root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
|
|
166
|
+
root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
|
|
167
|
+
|
|
168
|
+
#####################################################################
|
|
169
|
+
# TODO: verify adding this variable for where the bottom was detected
|
|
170
|
+
# --- Coordinate: Bottom --- #
|
|
171
|
+
root.create_dataset(
|
|
172
|
+
name=Coordinates.BOTTOM.value,
|
|
173
|
+
# data=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
174
|
+
shape=width,
|
|
175
|
+
chunks=Constants.TILE_SIZE.value,
|
|
176
|
+
dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value),
|
|
177
|
+
compressor=self.__compressor,
|
|
178
|
+
fill_value=np.nan,
|
|
179
|
+
overwrite=self.__overwrite,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
183
|
+
|
|
184
|
+
root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
|
|
185
|
+
root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
|
|
159
186
|
|
|
160
187
|
#####################################################################
|
|
161
188
|
# --- Coordinate: Frequency --- #
|
|
@@ -166,16 +193,20 @@ class ZarrManager:
|
|
|
166
193
|
chunks=1,
|
|
167
194
|
dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
|
|
168
195
|
compressor=self.__compressor,
|
|
169
|
-
fill_value=0
|
|
170
|
-
overwrite=self.__overwrite
|
|
196
|
+
fill_value=0.0,
|
|
197
|
+
overwrite=self.__overwrite,
|
|
171
198
|
)
|
|
172
199
|
|
|
173
200
|
# TODO: best coordinate would be channel with str type
|
|
174
|
-
root.frequency.attrs[
|
|
201
|
+
root.frequency.attrs["_ARRAY_DIMENSIONS"] = [
|
|
202
|
+
Coordinates.FREQUENCY.value
|
|
203
|
+
] # TODO: is this correct
|
|
175
204
|
|
|
176
|
-
root.frequency.attrs[
|
|
177
|
-
root.frequency.attrs[
|
|
178
|
-
|
|
205
|
+
root.frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
|
|
206
|
+
root.frequency.attrs["standard_name"] = (
|
|
207
|
+
Coordinates.FREQUENCY_STANDARD_NAME.value
|
|
208
|
+
)
|
|
209
|
+
root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
|
|
179
210
|
|
|
180
211
|
#####################################################################
|
|
181
212
|
# --- Sv Data --- #
|
|
@@ -183,21 +214,23 @@ class ZarrManager:
|
|
|
183
214
|
name=Coordinates.SV.value,
|
|
184
215
|
shape=(len(depth_values), width, len(frequencies)),
|
|
185
216
|
chunks=(Constants.TILE_SIZE.value, Constants.TILE_SIZE.value, 1),
|
|
186
|
-
dtype=np.dtype(
|
|
217
|
+
dtype=np.dtype(
|
|
218
|
+
Coordinates.SV_DTYPE.value
|
|
219
|
+
), # TODO: try to experiment with 'float16'
|
|
187
220
|
compressor=self.__compressor,
|
|
188
221
|
fill_value=np.nan,
|
|
189
|
-
overwrite=self.__overwrite
|
|
222
|
+
overwrite=self.__overwrite,
|
|
190
223
|
)
|
|
191
224
|
|
|
192
|
-
root.Sv.attrs[
|
|
225
|
+
root.Sv.attrs["_ARRAY_DIMENSIONS"] = [
|
|
193
226
|
Coordinates.DEPTH.value,
|
|
194
227
|
Coordinates.TIME.value,
|
|
195
228
|
Coordinates.FREQUENCY.value,
|
|
196
229
|
]
|
|
197
230
|
|
|
198
|
-
root.Sv.attrs[
|
|
199
|
-
root.Sv.attrs[
|
|
200
|
-
root.Sv.attrs[
|
|
231
|
+
root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
|
|
232
|
+
root.Sv.attrs["units"] = Coordinates.SV_UNITS.value
|
|
233
|
+
root.Sv.attrs["tile_size"] = Constants.TILE_SIZE.value
|
|
201
234
|
|
|
202
235
|
#####################################################################
|
|
203
236
|
# --- Metadata --- #
|
|
@@ -206,7 +239,9 @@ class ZarrManager:
|
|
|
206
239
|
root.attrs["sensor_name"] = sensor_name
|
|
207
240
|
#
|
|
208
241
|
root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
|
|
209
|
-
root.attrs["processing_software_version"] =
|
|
242
|
+
root.attrs["processing_software_version"] = (
|
|
243
|
+
"0.0.6" # TODO: get programmatically
|
|
244
|
+
)
|
|
210
245
|
root.attrs["processing_software_time"] = Timestamp.get_timestamp()
|
|
211
246
|
#
|
|
212
247
|
root.attrs["calibration_status"] = calibration_status
|
|
@@ -237,43 +272,45 @@ class ZarrManager:
|
|
|
237
272
|
|
|
238
273
|
############################################################################
|
|
239
274
|
def open_s3_zarr_store_with_zarr(
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
275
|
+
self,
|
|
276
|
+
ship_name: str,
|
|
277
|
+
cruise_name: str,
|
|
278
|
+
sensor_name: str,
|
|
279
|
+
# zarr_synchronizer: Union[str, None] = None,
|
|
245
280
|
):
|
|
246
281
|
# Mounts a Zarr store using pythons Zarr implementation. The mounted store
|
|
247
282
|
# will have read/write privileges so that store can be updated.
|
|
248
|
-
print(
|
|
283
|
+
print("Opening Zarr store with Zarr.")
|
|
249
284
|
try:
|
|
250
285
|
s3fs_manager = S3FSManager()
|
|
251
|
-
root = f
|
|
286
|
+
root = f"{self.output_bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{cruise_name}.zarr"
|
|
252
287
|
store = s3fs_manager.s3_map(s3_zarr_store_path=root)
|
|
253
288
|
# synchronizer = model.ProcessSynchronizer(f"/tmp/{ship_name}_{cruise_name}.sync")
|
|
254
289
|
cruise_zarr = zarr.open(store=store, mode="r+")
|
|
255
290
|
except Exception as err: # Failure
|
|
256
|
-
print(f
|
|
291
|
+
print(f"Exception encountered opening Zarr store with Zarr.: {err}")
|
|
257
292
|
raise
|
|
258
|
-
print(
|
|
293
|
+
print("Done opening Zarr store with Zarr.")
|
|
259
294
|
return cruise_zarr
|
|
260
295
|
|
|
261
296
|
############################################################################
|
|
262
297
|
def open_s3_zarr_store_with_xarray(
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
298
|
+
self,
|
|
299
|
+
ship_name: str,
|
|
300
|
+
cruise_name: str,
|
|
301
|
+
sensor_name: str,
|
|
302
|
+
file_name_stem: str,
|
|
268
303
|
) -> xr.Dataset:
|
|
269
|
-
print(
|
|
304
|
+
print("Opening Zarr store in S3 as Xarray.")
|
|
270
305
|
try:
|
|
271
306
|
zarr_path = f"s3://{self.output_bucket_name}/level_1/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.zarr"
|
|
272
307
|
s3fs_manager = S3FSManager()
|
|
273
308
|
store_s3_map = s3fs_manager.s3_map(s3_zarr_store_path=zarr_path)
|
|
274
|
-
ds = xr.open_zarr(
|
|
309
|
+
ds = xr.open_zarr(
|
|
310
|
+
store=store_s3_map, consolidated=None
|
|
311
|
+
) # synchronizer=SYNCHRONIZER
|
|
275
312
|
except Exception as err:
|
|
276
|
-
print(
|
|
313
|
+
print("Problem opening Zarr store in S3 as Xarray.")
|
|
277
314
|
raise err
|
|
278
315
|
print("Done opening Zarr store in S3 as Xarray.")
|
|
279
316
|
return ds
|
|
@@ -294,4 +331,5 @@ class ZarrManager:
|
|
|
294
331
|
|
|
295
332
|
#######################################################
|
|
296
333
|
|
|
334
|
+
|
|
297
335
|
###########################################################
|
|
@@ -1,118 +1,124 @@
|
|
|
1
|
-
import os
|
|
2
1
|
import json
|
|
2
|
+
import os
|
|
3
|
+
|
|
3
4
|
import numpy as np
|
|
5
|
+
|
|
6
|
+
from water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
|
|
4
7
|
from water_column_sonar_processing.aws.s3_manager import S3Manager
|
|
5
8
|
from water_column_sonar_processing.aws.s3fs_manager import S3FSManager
|
|
6
9
|
from water_column_sonar_processing.aws.sns_manager import SNSManager
|
|
7
|
-
|
|
10
|
+
|
|
8
11
|
|
|
9
12
|
###########################################################
|
|
10
13
|
class Process:
|
|
11
14
|
#######################################################
|
|
12
15
|
def __init__(
|
|
13
|
-
|
|
16
|
+
self,
|
|
14
17
|
):
|
|
15
|
-
self.input_bucket_name = os.environ[
|
|
16
|
-
self.output_bucket_name = os.environ[
|
|
17
|
-
self.table_name = os.environ[
|
|
18
|
-
self.topic_arn = os.environ[
|
|
18
|
+
self.input_bucket_name = os.environ["INPUT_BUCKET_NAME"]
|
|
19
|
+
self.output_bucket_name = os.environ["OUTPUT_BUCKET_NAME"]
|
|
20
|
+
self.table_name = os.environ["TABLE_NAME"]
|
|
21
|
+
self.topic_arn = os.environ["TOPIC_ARN"]
|
|
19
22
|
# self.output_bucket_access_key = ?
|
|
20
23
|
# self.output_bucket_secret_access_key = ?
|
|
21
24
|
|
|
22
25
|
def execute(self):
|
|
23
|
-
input_s3_manager =
|
|
24
|
-
|
|
25
|
-
|
|
26
|
+
input_s3_manager = (
|
|
27
|
+
S3Manager()
|
|
28
|
+
) # TODO: Need to allow passing in of credentials when writing to protected bucket
|
|
29
|
+
s3fs_manager = S3FSManager() # TODO: delete this
|
|
30
|
+
print(s3fs_manager) # TODO: delete this
|
|
26
31
|
output_s3_manager = S3Manager()
|
|
27
32
|
# TODO: s3fs?
|
|
28
33
|
sns_manager = SNSManager()
|
|
29
34
|
ddb_manager = DynamoDBManager()
|
|
30
35
|
|
|
31
36
|
# [1 of 5] Update Pipeline Status in DynamoDB
|
|
32
|
-
#self.dynamodb.update_ status ()
|
|
37
|
+
# self.dynamodb.update_ status ()
|
|
33
38
|
|
|
34
39
|
# [2 of 5] Download Object From Input Bucket
|
|
35
|
-
return_value = input_s3_manager.
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
+
# return_value = input_s3_manager.download_file(
|
|
41
|
+
# bucket_name=self.input_bucket_name,
|
|
42
|
+
# key="the_input_key",
|
|
43
|
+
# file_name="the_input_key",
|
|
44
|
+
# )
|
|
45
|
+
# print(return_value)
|
|
40
46
|
|
|
41
47
|
# [3 of 5] Update Entry in DynamoDB
|
|
42
|
-
ship_name =
|
|
43
|
-
cruise_name =
|
|
44
|
-
sensor_name =
|
|
48
|
+
ship_name = "David_Starr_Jordan" # TODO: get this from input sns message
|
|
49
|
+
cruise_name = "DS0604"
|
|
50
|
+
sensor_name = "EK60"
|
|
45
51
|
file_name = "DSJ0604-D20060406-T113407.raw"
|
|
46
52
|
|
|
47
53
|
test_channels = [
|
|
48
54
|
"GPT 38 kHz 009072055a7f 2 ES38B",
|
|
49
55
|
"GPT 70 kHz 00907203400a 3 ES70-7C",
|
|
50
56
|
"GPT 120 kHz 009072034d52 1 ES120-7",
|
|
51
|
-
"GPT 200 kHz 0090720564e4 4 ES200-7C"
|
|
57
|
+
"GPT 200 kHz 0090720564e4 4 ES200-7C",
|
|
52
58
|
]
|
|
53
59
|
test_frequencies = [38_000, 70_000, 120_000, 200_000]
|
|
54
60
|
ddb_manager.update_item(
|
|
55
61
|
table_name=self.table_name,
|
|
56
62
|
key={
|
|
57
|
-
|
|
58
|
-
|
|
63
|
+
"FILE_NAME": {"S": file_name}, # Partition Key
|
|
64
|
+
"CRUISE_NAME": {"S": cruise_name}, # Sort Key
|
|
59
65
|
},
|
|
60
66
|
expression_attribute_names={
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
67
|
+
"#CH": "CHANNELS",
|
|
68
|
+
"#ET": "END_TIME",
|
|
69
|
+
"#ED": "ERROR_DETAIL",
|
|
70
|
+
"#FR": "FREQUENCIES",
|
|
71
|
+
"#MA": "MAX_ECHO_RANGE",
|
|
72
|
+
"#MI": "MIN_ECHO_RANGE",
|
|
73
|
+
"#ND": "NUM_PING_TIME_DROPNA",
|
|
74
|
+
"#PS": "PIPELINE_STATUS", # testing this updated
|
|
75
|
+
"#PT": "PIPELINE_TIME", # testing this updated
|
|
76
|
+
"#SE": "SENSOR_NAME",
|
|
77
|
+
"#SH": "SHIP_NAME",
|
|
78
|
+
"#ST": "START_TIME",
|
|
79
|
+
"#ZB": "ZARR_BUCKET",
|
|
80
|
+
"#ZP": "ZARR_PATH",
|
|
75
81
|
},
|
|
76
82
|
expression_attribute_values={
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
83
|
+
":ch": {"L": [{"S": i} for i in test_channels]},
|
|
84
|
+
":et": {"S": "2006-04-06T13:35:28.688Z"},
|
|
85
|
+
":ed": {"S": ""},
|
|
86
|
+
":fr": {"L": [{"N": str(i)} for i in test_frequencies]},
|
|
87
|
+
":ma": {"N": str(np.round(499.7653, 4))},
|
|
88
|
+
":mi": {"N": str(np.round(0.25, 4))},
|
|
89
|
+
":nd": {"N": str(2458)},
|
|
90
|
+
":ps": {"S": "SUCCESS_AGGREGATOR"},
|
|
91
|
+
":pt": {"S": "2023-10-02T08:54:43Z"},
|
|
92
|
+
":se": {"S": sensor_name},
|
|
93
|
+
":sh": {"S": ship_name},
|
|
94
|
+
":st": {"S": "2006-04-06T11:34:07.288Z"},
|
|
95
|
+
":zb": {"S": "r2d2-dev-echofish2-118234403147-echofish-dev-output"},
|
|
96
|
+
":zp": {
|
|
97
|
+
"S": "level_1/David_Starr_Jordan/DS0604/EK60/DSJ0604-D20060406-T113407.model"
|
|
98
|
+
},
|
|
91
99
|
},
|
|
92
100
|
update_expression=(
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
)
|
|
101
|
+
"SET "
|
|
102
|
+
"#CH = :ch, "
|
|
103
|
+
"#ET = :et, "
|
|
104
|
+
"#ED = :ed, "
|
|
105
|
+
"#FR = :fr, "
|
|
106
|
+
"#MA = :ma, "
|
|
107
|
+
"#MI = :mi, "
|
|
108
|
+
"#ND = :nd, "
|
|
109
|
+
"#PS = :ps, "
|
|
110
|
+
"#PT = :pt, "
|
|
111
|
+
"#SE = :se, "
|
|
112
|
+
"#SH = :sh, "
|
|
113
|
+
"#ST = :st, "
|
|
114
|
+
"#ZB = :zb, "
|
|
115
|
+
"#ZP = :zp"
|
|
116
|
+
),
|
|
109
117
|
)
|
|
110
118
|
|
|
111
119
|
# [4 of 5] Write Object to Output Bucket
|
|
112
120
|
output_s3_manager.put(
|
|
113
|
-
bucket_name=self.output_bucket_name,
|
|
114
|
-
key='123',
|
|
115
|
-
body='456'
|
|
121
|
+
bucket_name=self.output_bucket_name, key="123", body="456"
|
|
116
122
|
)
|
|
117
123
|
|
|
118
124
|
# [_ of _] Read file-level Zarr store from bucket, Create GeoJSON, Write to bucket
|
|
@@ -136,5 +142,6 @@ class Process:
|
|
|
136
142
|
|
|
137
143
|
#######################################################
|
|
138
144
|
|
|
145
|
+
|
|
139
146
|
###########################################################
|
|
140
147
|
###########################################################
|
|
@@ -1,21 +1,20 @@
|
|
|
1
|
-
import os
|
|
2
1
|
import glob
|
|
2
|
+
import os
|
|
3
3
|
import shutil
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
###########################################################
|
|
7
7
|
class Cleaner:
|
|
8
8
|
@staticmethod
|
|
9
|
-
def delete_local_files(
|
|
10
|
-
|
|
11
|
-
):
|
|
12
|
-
print('Deleting all local raw and model files')
|
|
9
|
+
def delete_local_files(file_types=["*.raw*", "*.model"]): # '*.json'
|
|
10
|
+
print("Deleting all local raw and model files")
|
|
13
11
|
for i in file_types:
|
|
14
12
|
for j in glob.glob(i):
|
|
15
13
|
if os.path.isdir(j):
|
|
16
14
|
shutil.rmtree(j, ignore_errors=True)
|
|
17
15
|
elif os.path.isfile(j):
|
|
18
16
|
os.remove(j)
|
|
19
|
-
print(
|
|
17
|
+
print("done deleting")
|
|
20
18
|
|
|
21
|
-
|
|
19
|
+
|
|
20
|
+
###########################################################
|