water-column-sonar-processing 0.0.9__py3-none-any.whl → 26.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/aws/dynamodb_manager.py +138 -59
- water_column_sonar_processing/aws/s3_manager.py +179 -141
- water_column_sonar_processing/aws/s3fs_manager.py +29 -33
- water_column_sonar_processing/aws/sqs_manager.py +1 -1
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +35 -96
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +142 -127
- water_column_sonar_processing/geometry/__init__.py +10 -2
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +50 -49
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +227 -223
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/index_manager.py +151 -33
- water_column_sonar_processing/model/zarr_manager.py +665 -262
- water_column_sonar_processing/processing/__init__.py +3 -3
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +206 -214
- water_column_sonar_processing/utility/__init__.py +9 -2
- water_column_sonar_processing/utility/constants.py +69 -18
- water_column_sonar_processing/utility/pipeline_status.py +11 -15
- water_column_sonar_processing/utility/timestamp.py +3 -4
- water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
- water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
- water_column_sonar_processing/process.py +0 -147
- water_column_sonar_processing/processing/cruise_sampler.py +0 -342
- water_column_sonar_processing-0.0.9.dist-info/METADATA +0 -134
- water_column_sonar_processing-0.0.9.dist-info/RECORD +0 -32
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
|
@@ -1,32 +1,45 @@
|
|
|
1
1
|
import gc
|
|
2
2
|
import os
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
3
7
|
import echopype as ep
|
|
4
|
-
import numcodecs
|
|
5
8
|
import numpy as np
|
|
6
|
-
from
|
|
7
|
-
from datetime import datetime
|
|
8
|
-
from pathlib import Path # , PurePath
|
|
9
|
+
from zarr.codecs import Blosc
|
|
9
10
|
|
|
10
11
|
from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
|
|
11
|
-
from water_column_sonar_processing.geometry import GeometryManager
|
|
12
12
|
from water_column_sonar_processing.utility import Cleaner
|
|
13
|
+
from water_column_sonar_processing.utility import Constants
|
|
14
|
+
|
|
15
|
+
# from numcodecs import Blosc
|
|
16
|
+
level_1 = str(Constants.LEVEL_1.value)
|
|
13
17
|
|
|
14
|
-
|
|
18
|
+
|
|
19
|
+
def get_water_level(ds):
|
|
20
|
+
"""
|
|
21
|
+
needs to be mocked up so that's why this is broken out
|
|
22
|
+
"""
|
|
23
|
+
if "water_level" in ds.keys():
|
|
24
|
+
return ds.water_level.values
|
|
25
|
+
else:
|
|
26
|
+
return 0.0
|
|
15
27
|
|
|
16
28
|
|
|
17
29
|
# This code is getting copied from echofish-aws-raw-to-zarr-lambda
|
|
18
30
|
class RawToZarr:
|
|
19
31
|
#######################################################
|
|
20
32
|
def __init__(
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
33
|
+
self,
|
|
34
|
+
# output_bucket_access_key,
|
|
35
|
+
# output_bucket_secret_access_key,
|
|
36
|
+
# # overwrite_existing_zarr_store,
|
|
25
37
|
):
|
|
26
38
|
# TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
|
|
27
|
-
self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
|
|
39
|
+
# self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
|
|
40
|
+
self.__compressor = Blosc(cname="zstd", clevel=9)
|
|
28
41
|
self.__overwrite = True
|
|
29
|
-
self.__num_threads = numcodecs.blosc.get_nthreads()
|
|
42
|
+
# self.__num_threads = numcodecs.blosc.get_nthreads()
|
|
30
43
|
# self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
31
44
|
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
32
45
|
# self.__table_name = table_name
|
|
@@ -34,50 +47,43 @@ class RawToZarr:
|
|
|
34
47
|
|
|
35
48
|
############################################################################
|
|
36
49
|
############################################################################
|
|
50
|
+
@staticmethod
|
|
37
51
|
def __zarr_info_to_table(
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
frequencies,
|
|
52
|
-
channels
|
|
52
|
+
table_name,
|
|
53
|
+
ship_name,
|
|
54
|
+
cruise_name,
|
|
55
|
+
sensor_name, # : Constants, TODO: convert to enum
|
|
56
|
+
file_name,
|
|
57
|
+
min_echo_range,
|
|
58
|
+
max_echo_range,
|
|
59
|
+
num_ping_time_dropna,
|
|
60
|
+
start_time,
|
|
61
|
+
end_time,
|
|
62
|
+
frequencies,
|
|
63
|
+
channels,
|
|
64
|
+
water_level,
|
|
53
65
|
):
|
|
54
|
-
print(
|
|
66
|
+
print("Writing Zarr information to DynamoDB table.")
|
|
55
67
|
dynamodb_manager = DynamoDBManager()
|
|
56
|
-
|
|
57
|
-
# The problem is that these values were never populated
|
|
58
|
-
# and so when the query looks for values that aren't there
|
|
59
|
-
# they fail
|
|
60
68
|
dynamodb_manager.update_item(
|
|
61
69
|
table_name=table_name,
|
|
62
70
|
key={
|
|
63
|
-
|
|
64
|
-
|
|
71
|
+
"FILE_NAME": {"S": file_name}, # Partition Key
|
|
72
|
+
"CRUISE_NAME": {"S": cruise_name}, # Sort Key
|
|
65
73
|
},
|
|
66
74
|
expression_attribute_names={
|
|
67
|
-
|
|
68
|
-
|
|
75
|
+
"#CH": "CHANNELS",
|
|
76
|
+
"#ET": "END_TIME",
|
|
69
77
|
# "#ED": "ERROR_DETAIL",
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
"#PS": "PIPELINE_STATUS",
|
|
78
|
+
"#FR": "FREQUENCIES",
|
|
79
|
+
"#MA": "MAX_ECHO_RANGE",
|
|
80
|
+
"#MI": "MIN_ECHO_RANGE",
|
|
81
|
+
"#ND": "NUM_PING_TIME_DROPNA",
|
|
75
82
|
"#PT": "PIPELINE_TIME",
|
|
76
83
|
"#SE": "SENSOR_NAME",
|
|
77
84
|
"#SH": "SHIP_NAME",
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
'#ZP': 'ZARR_PATH',
|
|
85
|
+
"#ST": "START_TIME",
|
|
86
|
+
"#WL": "WATER_LEVEL",
|
|
81
87
|
},
|
|
82
88
|
expression_attribute_values={
|
|
83
89
|
":ch": {"L": [{"S": i} for i in channels]},
|
|
@@ -87,47 +93,49 @@ class RawToZarr:
|
|
|
87
93
|
":ma": {"N": str(np.round(max_echo_range, 4))},
|
|
88
94
|
":mi": {"N": str(np.round(min_echo_range, 4))},
|
|
89
95
|
":nd": {"N": str(num_ping_time_dropna)},
|
|
90
|
-
":ps": {"S": "PROCESSING_RESAMPLE_AND_WRITE_TO_ZARR_STORE"},
|
|
91
96
|
":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
|
|
92
97
|
":se": {"S": sensor_name},
|
|
93
98
|
":sh": {"S": ship_name},
|
|
94
99
|
":st": {"S": start_time},
|
|
95
|
-
":
|
|
96
|
-
":zp": { "S": zarr_path },
|
|
100
|
+
":wl": {"N": str(np.round(water_level, 2))},
|
|
97
101
|
},
|
|
98
102
|
update_expression=(
|
|
99
103
|
"SET "
|
|
100
104
|
"#CH = :ch, "
|
|
101
105
|
"#ET = :et, "
|
|
102
|
-
# "#ED = :ed, "
|
|
103
106
|
"#FR = :fr, "
|
|
104
107
|
"#MA = :ma, "
|
|
105
108
|
"#MI = :mi, "
|
|
106
109
|
"#ND = :nd, "
|
|
107
|
-
"#PS = :ps, "
|
|
108
110
|
"#PT = :pt, "
|
|
109
111
|
"#SE = :se, "
|
|
110
112
|
"#SH = :sh, "
|
|
111
113
|
"#ST = :st, "
|
|
112
|
-
"#
|
|
113
|
-
"#ZP = :zp"
|
|
114
|
+
"#WL = :wl"
|
|
114
115
|
),
|
|
115
116
|
)
|
|
117
|
+
print("Done writing Zarr information to DynamoDB table.")
|
|
116
118
|
|
|
117
119
|
############################################################################
|
|
118
120
|
############################################################################
|
|
119
121
|
############################################################################
|
|
122
|
+
@staticmethod
|
|
120
123
|
def __upload_files_to_output_bucket(
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
124
|
+
output_bucket_name: str,
|
|
125
|
+
local_directory: str,
|
|
126
|
+
# e.g. 'D20070724-T042400.zarr' # TODO: problem: if this is not in the current directory
|
|
127
|
+
object_prefix: str, # e.g. "level_1/Henry_B._Bigelow/HB0706/EK60/"
|
|
128
|
+
endpoint_url,
|
|
125
129
|
):
|
|
126
130
|
# Note: this will be passed credentials if using NODD
|
|
127
|
-
|
|
128
|
-
|
|
131
|
+
# TODO: this will not work if the local_directory is anywhere other than the current folder
|
|
132
|
+
# see test_s3_manager test_upload...pool_executor for solution
|
|
133
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
134
|
+
print("Uploading files using thread pool executor.")
|
|
129
135
|
all_files = []
|
|
130
|
-
for subdir, dirs, files in os.walk(
|
|
136
|
+
for subdir, dirs, files in os.walk(
|
|
137
|
+
local_directory
|
|
138
|
+
): # os.path.basename(s3_manager_test_path.joinpath("HB0707.zarr/"))
|
|
131
139
|
for file in files:
|
|
132
140
|
local_path = os.path.join(subdir, file)
|
|
133
141
|
s3_key = os.path.join(object_prefix, local_path)
|
|
@@ -139,211 +147,195 @@ class RawToZarr:
|
|
|
139
147
|
)
|
|
140
148
|
return all_uploads
|
|
141
149
|
|
|
150
|
+
############################################################################
|
|
151
|
+
|
|
142
152
|
############################################################################
|
|
143
153
|
def raw_to_zarr(
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
154
|
+
self,
|
|
155
|
+
table_name,
|
|
156
|
+
input_bucket_name,
|
|
157
|
+
output_bucket_name,
|
|
158
|
+
ship_name,
|
|
159
|
+
cruise_name,
|
|
160
|
+
sensor_name,
|
|
161
|
+
raw_file_name,
|
|
162
|
+
endpoint_url: Optional[str] = None,
|
|
163
|
+
include_bot=True,
|
|
151
164
|
):
|
|
152
|
-
|
|
153
|
-
|
|
165
|
+
"""
|
|
166
|
+
Downloads the raw files, processes them with echopype, writes geojson, and uploads files
|
|
167
|
+
to the nodd bucket.
|
|
168
|
+
"""
|
|
169
|
+
print(f"Opening raw: {raw_file_name} and creating zarr store.")
|
|
170
|
+
# geometry_manager = GeometryManager()
|
|
154
171
|
cleaner = Cleaner()
|
|
155
|
-
cleaner.delete_local_files(
|
|
172
|
+
cleaner.delete_local_files(
|
|
173
|
+
file_types=["*.zarr", "*.json"]
|
|
174
|
+
) # TODO: include bot and raw?
|
|
175
|
+
|
|
176
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
177
|
+
s3_file_path = (
|
|
178
|
+
f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
|
|
179
|
+
)
|
|
180
|
+
bottom_file_name = f"{Path(raw_file_name).stem}.bot"
|
|
181
|
+
s3_bottom_file_path = (
|
|
182
|
+
f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
|
|
183
|
+
)
|
|
184
|
+
s3_manager.download_file(
|
|
185
|
+
bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name
|
|
186
|
+
)
|
|
187
|
+
# TODO: add the bottom file
|
|
188
|
+
if include_bot:
|
|
189
|
+
s3_manager.download_file(
|
|
190
|
+
bucket_name=input_bucket_name,
|
|
191
|
+
key=s3_bottom_file_path,
|
|
192
|
+
file_name=bottom_file_name,
|
|
193
|
+
)
|
|
194
|
+
|
|
156
195
|
try:
|
|
157
196
|
gc.collect()
|
|
158
|
-
print(
|
|
159
|
-
# s3_file_path = f"s3://{bucket_name}/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}"
|
|
160
|
-
# s3_file_path = Path(f"s3://noaa-wcsd-pds/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}")
|
|
197
|
+
print("Opening raw file with echopype.")
|
|
161
198
|
echodata = ep.open_raw(
|
|
162
199
|
raw_file=raw_file_name,
|
|
163
200
|
sonar_model=sensor_name,
|
|
164
|
-
include_bot=
|
|
165
|
-
use_swap=True,
|
|
166
|
-
# max_chunk_size=100,
|
|
167
|
-
# storage_options={'anon': True } # 'endpoint_url': self.endpoint_url} # this was creating problems
|
|
201
|
+
include_bot=include_bot,
|
|
168
202
|
)
|
|
169
|
-
print(
|
|
203
|
+
print("Compute volume backscattering strength (Sv) from raw dataset.")
|
|
170
204
|
ds_sv = ep.calibrate.compute_Sv(echodata)
|
|
171
|
-
|
|
205
|
+
ds_sv = ep.consolidate.add_depth(ds_sv, echodata)
|
|
206
|
+
water_level = get_water_level(ds_sv)
|
|
207
|
+
|
|
208
|
+
gc.collect()
|
|
209
|
+
print("Done computing volume backscatter strength (Sv) from raw dataset.")
|
|
210
|
+
# Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
|
|
211
|
+
# but is not written out with ds_sv --> add to ds_sv
|
|
212
|
+
if "detected_seafloor_depth" in list(echodata.vendor.variables):
|
|
213
|
+
ds_sv["detected_seafloor_depth"] = (
|
|
214
|
+
echodata.vendor.detected_seafloor_depth
|
|
215
|
+
)
|
|
216
|
+
#
|
|
172
217
|
frequencies = echodata.environment.frequency_nominal.values
|
|
218
|
+
if len(frequencies) != len(set(frequencies)):
|
|
219
|
+
raise Exception("Problem number of frequencies does not match channels")
|
|
173
220
|
#################################################################
|
|
221
|
+
# add gps data
|
|
222
|
+
ds_sv = ep.consolidate.add_location(ds_sv, echodata)
|
|
223
|
+
|
|
174
224
|
# Get GPS coordinates
|
|
175
|
-
gps_data, lat, lon = geometry_manager.read_echodata_gps_data(
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
225
|
+
# gps_data, lat, lon = geometry_manager.read_echodata_gps_data(
|
|
226
|
+
# echodata=echodata,
|
|
227
|
+
# output_bucket_name=output_bucket_name,
|
|
228
|
+
# ship_name=ship_name,
|
|
229
|
+
# cruise_name=cruise_name,
|
|
230
|
+
# sensor_name=sensor_name,
|
|
231
|
+
# file_name=raw_file_name,
|
|
232
|
+
# endpoint_url=endpoint_url,
|
|
233
|
+
# write_geojson=True,
|
|
234
|
+
# )
|
|
235
|
+
|
|
236
|
+
# ds_sv.latitude.values = ( # their lat values are better than mine
|
|
237
|
+
# lat # overwriting echopype gps values to include missing values
|
|
238
|
+
# )
|
|
239
|
+
# ds_sv.longitude.values = lon
|
|
184
240
|
# gps_data, lat, lon = self.__get_gps_data(echodata=echodata)
|
|
185
241
|
#################################################################
|
|
186
242
|
# Technically the min_echo_range would be 0 m.
|
|
187
243
|
# TODO: this var name is supposed to represent minimum resolution of depth measurements
|
|
188
244
|
# TODO revert this so that smaller diffs can be used
|
|
189
245
|
# The most minimum the resolution can be is as small as 0.25 meters
|
|
190
|
-
min_echo_range = np.
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
246
|
+
min_echo_range = np.round(np.nanmin(np.diff(ds_sv.echo_range.values)), 2)
|
|
247
|
+
# For the HB0710 cruise the depths vary from 499.7215 @19cm to 2999.4805 @ 1cm. Moving that back
|
|
248
|
+
# inline with the
|
|
249
|
+
# min_echo_range = np.max( # TODO: I think this is creating problems with the water-level
|
|
250
|
+
# [0.20, min_echo_range]
|
|
251
|
+
# )
|
|
252
|
+
|
|
194
253
|
max_echo_range = float(np.nanmax(ds_sv.echo_range))
|
|
254
|
+
|
|
255
|
+
# This is the number of missing values found throughout the lat/lon
|
|
256
|
+
# num_ping_time_dropna = lat[~np.isnan(lat)].shape[0] # symmetric to lon
|
|
257
|
+
num_ping_time_drop_na = ds_sv.latitude.shape[
|
|
258
|
+
0
|
|
259
|
+
] # TODO: just settting to size
|
|
195
260
|
#
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
end_time =
|
|
261
|
+
start_time = (
|
|
262
|
+
np.datetime_as_string(ds_sv.ping_time.values[0], unit="ms") + "Z"
|
|
263
|
+
)
|
|
264
|
+
end_time = (
|
|
265
|
+
np.datetime_as_string(ds_sv.ping_time.values[-1], unit="ms") + "Z"
|
|
266
|
+
)
|
|
200
267
|
channels = list(ds_sv.channel.values)
|
|
201
268
|
#
|
|
202
269
|
#################################################################
|
|
203
270
|
# Create the zarr store
|
|
204
271
|
store_name = f"{Path(raw_file_name).stem}.zarr"
|
|
205
|
-
ds_sv.
|
|
272
|
+
# Sv = ds_sv.Sv
|
|
273
|
+
# ds_sv['Sv'] = Sv.astype('int32', copy=False)
|
|
274
|
+
ds_sv.to_zarr(
|
|
275
|
+
store=store_name,
|
|
276
|
+
zarr_format=3,
|
|
277
|
+
consolidated=False,
|
|
278
|
+
write_empty_chunks=False,
|
|
279
|
+
) # ds_sv.Sv.sel(channel=ds_sv.channel.values[0]).shape
|
|
280
|
+
gc.collect()
|
|
206
281
|
#################################################################
|
|
207
|
-
|
|
208
|
-
# print('Note: Adding GeoJSON inside Zarr store')
|
|
209
|
-
# self.__write_geojson_to_file( # Was trying to write geojson to the L1 zarr store
|
|
210
|
-
# store_name=store_name,
|
|
211
|
-
# data=gps_data
|
|
212
|
-
# )
|
|
282
|
+
output_zarr_prefix = f"{level_1}/{ship_name}/{cruise_name}/{sensor_name}/"
|
|
213
283
|
#################################################################
|
|
214
|
-
|
|
215
|
-
|
|
284
|
+
# If zarr store already exists then delete
|
|
285
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
286
|
+
child_objects = s3_manager.get_child_objects(
|
|
287
|
+
bucket_name=output_bucket_name,
|
|
288
|
+
sub_prefix=f"{level_1}/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.zarr",
|
|
289
|
+
)
|
|
290
|
+
if len(child_objects) > 0:
|
|
291
|
+
print(
|
|
292
|
+
"Zarr store dataset already exists in s3, deleting existing and continuing."
|
|
293
|
+
)
|
|
294
|
+
s3_manager.delete_nodd_objects(
|
|
295
|
+
bucket_name=output_bucket_name,
|
|
296
|
+
objects=child_objects,
|
|
297
|
+
)
|
|
298
|
+
#################################################################
|
|
299
|
+
self.__upload_files_to_output_bucket(
|
|
216
300
|
output_bucket_name=output_bucket_name,
|
|
301
|
+
local_directory=store_name,
|
|
302
|
+
object_prefix=output_zarr_prefix,
|
|
303
|
+
endpoint_url=endpoint_url,
|
|
304
|
+
)
|
|
305
|
+
#################################################################
|
|
306
|
+
self.__zarr_info_to_table(
|
|
217
307
|
table_name=table_name,
|
|
218
308
|
ship_name=ship_name,
|
|
219
309
|
cruise_name=cruise_name,
|
|
220
310
|
sensor_name=sensor_name,
|
|
221
311
|
file_name=raw_file_name,
|
|
222
|
-
zarr_path=os.path.join(output_zarr_prefix, store_name),
|
|
223
312
|
min_echo_range=min_echo_range,
|
|
224
313
|
max_echo_range=max_echo_range,
|
|
225
|
-
num_ping_time_dropna=
|
|
314
|
+
num_ping_time_dropna=num_ping_time_drop_na,
|
|
226
315
|
start_time=start_time,
|
|
227
316
|
end_time=end_time,
|
|
228
317
|
frequencies=frequencies,
|
|
229
|
-
channels=channels
|
|
318
|
+
channels=channels,
|
|
319
|
+
water_level=water_level,
|
|
230
320
|
)
|
|
231
|
-
###################################################################
|
|
232
321
|
#######################################################################
|
|
233
|
-
|
|
234
|
-
output_bucket_name=output_bucket_name,
|
|
235
|
-
local_directory=store_name,
|
|
236
|
-
object_prefix=output_zarr_prefix
|
|
237
|
-
)
|
|
238
|
-
#######################################################################
|
|
239
|
-
# # TODO: verify count of objects matches
|
|
240
|
-
# s3_objects = self.__s3.list_objects(
|
|
241
|
-
# bucket_name=self.__output_bucket,
|
|
242
|
-
# prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
|
|
243
|
-
# access_key_id=self.__output_bucket_access_key,
|
|
244
|
-
# secret_access_key=self.__output_bucket_secret_access_key
|
|
245
|
-
# )
|
|
246
|
-
#######################################################################
|
|
247
|
-
# self.__update_processing_status(
|
|
248
|
-
# file_name=input_file_name,
|
|
249
|
-
# cruise_name=cruise_name,
|
|
250
|
-
# pipeline_status='SUCCESS_RAW_TO_ZARR'
|
|
251
|
-
# )
|
|
322
|
+
# TODO: verify count of objects matches, publish message, update status
|
|
252
323
|
#######################################################################
|
|
253
|
-
# self.__publish_done_message(input_message)
|
|
254
|
-
print('here')
|
|
255
324
|
except Exception as err:
|
|
256
|
-
print(
|
|
325
|
+
print(
|
|
326
|
+
f"Exception encountered creating local Zarr store with echopype: {err}"
|
|
327
|
+
)
|
|
257
328
|
raise RuntimeError(f"Problem creating local Zarr store, {err}")
|
|
258
329
|
finally:
|
|
259
|
-
|
|
260
|
-
|
|
330
|
+
gc.collect()
|
|
331
|
+
cleaner.delete_local_files(
|
|
332
|
+
file_types=["*.raw", "*.bot", "*.zarr", "*.json"]
|
|
333
|
+
)
|
|
334
|
+
print("Finished raw-to-zarr conversion.")
|
|
261
335
|
|
|
262
336
|
############################################################################
|
|
263
|
-
# TODO: does this get called?
|
|
264
|
-
# def execute(self, input_message):
|
|
265
|
-
# ship_name = input_message['shipName']
|
|
266
|
-
# cruise_name = input_message['cruiseName']
|
|
267
|
-
# sensor_name = input_message['sensorName']
|
|
268
|
-
# input_file_name = input_message['fileName']
|
|
269
|
-
# #
|
|
270
|
-
# try:
|
|
271
|
-
# self.__update_processing_status(
|
|
272
|
-
# file_name=input_file_name,
|
|
273
|
-
# cruise_name=cruise_name,
|
|
274
|
-
# pipeline_status="PROCESSING_RAW_TO_ZARR"
|
|
275
|
-
# )
|
|
276
|
-
# #######################################################################
|
|
277
|
-
# store_name = f"{os.path.splitext(input_file_name)[0]}.zarr"
|
|
278
|
-
# output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}"
|
|
279
|
-
# bucket_key = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{input_file_name}"
|
|
280
|
-
# zarr_prefix = os.path.join("level_1", ship_name, cruise_name, sensor_name)
|
|
281
|
-
# #
|
|
282
|
-
# os.chdir(TEMPDIR) # Lambdas require use of temp directory
|
|
283
|
-
# #######################################################################
|
|
284
|
-
# #######################################################################
|
|
285
|
-
# # Check if zarr store already exists
|
|
286
|
-
# s3_objects = self.__s3.list_objects(
|
|
287
|
-
# bucket_name=self.__output_bucket,
|
|
288
|
-
# prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
|
|
289
|
-
# access_key_id=self.__output_bucket_access_key,
|
|
290
|
-
# secret_access_key=self.__output_bucket_secret_access_key
|
|
291
|
-
# )
|
|
292
|
-
# if len(s3_objects) > 0:
|
|
293
|
-
# print('Zarr store data already exists in s3, deleting existing and continuing.')
|
|
294
|
-
# self.__s3.delete_objects(
|
|
295
|
-
# bucket_name=self.__output_bucket,
|
|
296
|
-
# objects=s3_objects,
|
|
297
|
-
# access_key_id=self.__output_bucket_access_key,
|
|
298
|
-
# secret_access_key=self.__output_bucket_secret_access_key
|
|
299
|
-
# )
|
|
300
|
-
# #######################################################################
|
|
301
|
-
# # self.__delete_all_local_raw_and_zarr_files()
|
|
302
|
-
# Cleaner.delete_local_files(file_types=["*.raw*", "*.zarr"])
|
|
303
|
-
# self.__s3.download_file(
|
|
304
|
-
# bucket_name=self.__input_bucket,
|
|
305
|
-
# key=bucket_key,
|
|
306
|
-
# file_name=input_file_name
|
|
307
|
-
# )
|
|
308
|
-
# self.__create_local_zarr_store(
|
|
309
|
-
# raw_file_name=input_file_name,
|
|
310
|
-
# cruise_name=cruise_name,
|
|
311
|
-
# sensor_name=sensor_name,
|
|
312
|
-
# output_zarr_prefix=output_zarr_prefix,
|
|
313
|
-
# store_name=store_name
|
|
314
|
-
# )
|
|
315
|
-
# #######################################################################
|
|
316
|
-
# self.__upload_files_to_output_bucket(store_name, output_zarr_prefix)
|
|
317
|
-
# #######################################################################
|
|
318
|
-
# # # TODO: verify count of objects matches
|
|
319
|
-
# # s3_objects = self.__s3.list_objects(
|
|
320
|
-
# # bucket_name=self.__output_bucket,
|
|
321
|
-
# # prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
|
|
322
|
-
# # access_key_id=self.__output_bucket_access_key,
|
|
323
|
-
# # secret_access_key=self.__output_bucket_secret_access_key
|
|
324
|
-
# # )
|
|
325
|
-
# #######################################################################
|
|
326
|
-
# self.__update_processing_status(
|
|
327
|
-
# file_name=input_file_name,
|
|
328
|
-
# cruise_name=cruise_name,
|
|
329
|
-
# pipeline_status='SUCCESS_RAW_TO_ZARR'
|
|
330
|
-
# )
|
|
331
|
-
# #######################################################################
|
|
332
|
-
# self.__publish_done_message(input_message)
|
|
333
|
-
# #######################################################################
|
|
334
|
-
# # except Exception as err:
|
|
335
|
-
# # print(f'Exception encountered: {err}')
|
|
336
|
-
# # self.__update_processing_status(
|
|
337
|
-
# # file_name=input_file_name,
|
|
338
|
-
# # cruise_name=cruise_name,
|
|
339
|
-
# # pipeline_status='FAILURE_RAW_TO_ZARR',
|
|
340
|
-
# # error_message=str(err),
|
|
341
|
-
# # )
|
|
342
|
-
# finally:
|
|
343
|
-
# self.__delete_all_local_raw_and_zarr_files()
|
|
344
|
-
#######################################################################
|
|
345
|
-
|
|
346
337
|
############################################################################
|
|
347
338
|
|
|
339
|
+
|
|
348
340
|
################################################################################
|
|
349
341
|
############################################################################
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
from .cleaner import Cleaner
|
|
2
|
-
from .constants import Constants, Coordinates
|
|
2
|
+
from .constants import Constants, Coordinates, Instruments
|
|
3
3
|
from .pipeline_status import PipelineStatus
|
|
4
4
|
from .timestamp import Timestamp
|
|
5
5
|
|
|
6
|
-
__all__ = [
|
|
6
|
+
__all__ = [
|
|
7
|
+
"Cleaner",
|
|
8
|
+
"Instruments",
|
|
9
|
+
"Constants",
|
|
10
|
+
"Coordinates",
|
|
11
|
+
"PipelineStatus",
|
|
12
|
+
"Timestamp",
|
|
13
|
+
]
|