water-column-sonar-processing 0.0.1__py3-none-any.whl → 25.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/__init__.py +13 -0
- water_column_sonar_processing/aws/__init__.py +7 -0
- water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
- water_column_sonar_processing/aws/s3_manager.py +420 -0
- water_column_sonar_processing/aws/s3fs_manager.py +72 -0
- {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
- {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
- water_column_sonar_processing/cruise/__init__.py +4 -0
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +191 -0
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +339 -0
- water_column_sonar_processing/geometry/__init__.py +11 -0
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +243 -0
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +261 -0
- water_column_sonar_processing/index/__init__.py +3 -0
- water_column_sonar_processing/index/index_manager.py +384 -0
- water_column_sonar_processing/model/__init__.py +3 -0
- water_column_sonar_processing/model/zarr_manager.py +722 -0
- water_column_sonar_processing/process.py +149 -0
- water_column_sonar_processing/processing/__init__.py +4 -0
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +425 -0
- water_column_sonar_processing/utility/__init__.py +13 -0
- {model → water_column_sonar_processing}/utility/cleaner.py +7 -8
- water_column_sonar_processing/utility/constants.py +118 -0
- {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
- water_column_sonar_processing/utility/timestamp.py +12 -0
- water_column_sonar_processing-25.11.1.dist-info/METADATA +182 -0
- water_column_sonar_processing-25.11.1.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing-25.11.1.dist-info/top_level.txt +1 -0
- __init__.py +0 -0
- model/__init__.py +0 -0
- model/aws/__init__.py +0 -0
- model/aws/dynamodb_manager.py +0 -149
- model/aws/s3_manager.py +0 -356
- model/aws/s3fs_manager.py +0 -74
- model/cruise/__init__.py +0 -0
- model/cruise/create_empty_zarr_store.py +0 -166
- model/cruise/resample_regrid.py +0 -248
- model/geospatial/__init__.py +0 -0
- model/geospatial/geometry_manager.py +0 -194
- model/geospatial/geometry_simplification.py +0 -81
- model/geospatial/pmtile_generation.py +0 -74
- model/index/__init__.py +0 -0
- model/index/index.py +0 -228
- model/model.py +0 -138
- model/utility/__init__.py +0 -0
- model/utility/constants.py +0 -56
- model/utility/timestamp.py +0 -12
- model/zarr/__init__.py +0 -0
- model/zarr/bar.py +0 -28
- model/zarr/foo.py +0 -11
- model/zarr/zarr_manager.py +0 -298
- water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
- water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
- water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
import gc
|
|
2
|
+
import os
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import echopype as ep
|
|
7
|
+
import numpy as np
|
|
8
|
+
from zarr.codecs import Blosc
|
|
9
|
+
|
|
10
|
+
from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
|
|
11
|
+
from water_column_sonar_processing.geometry import GeometryManager
|
|
12
|
+
from water_column_sonar_processing.utility import Cleaner
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# from numcodecs import Blosc
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_water_level(ds):
|
|
19
|
+
"""
|
|
20
|
+
needs to be mocked up so thats why this is broken out
|
|
21
|
+
"""
|
|
22
|
+
if "water_level" in ds.keys():
|
|
23
|
+
return ds.water_level.values
|
|
24
|
+
else:
|
|
25
|
+
return 0.0
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# This code is getting copied from echofish-aws-raw-to-zarr-lambda
|
|
29
|
+
class RawToZarr:
|
|
30
|
+
#######################################################
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
# output_bucket_access_key,
|
|
34
|
+
# output_bucket_secret_access_key,
|
|
35
|
+
# # overwrite_existing_zarr_store,
|
|
36
|
+
):
|
|
37
|
+
# TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
|
|
38
|
+
# self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
|
|
39
|
+
self.__compressor = Blosc(cname="zstd", clevel=9)
|
|
40
|
+
self.__overwrite = True
|
|
41
|
+
# self.__num_threads = numcodecs.blosc.get_nthreads()
|
|
42
|
+
# self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
43
|
+
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
44
|
+
# self.__table_name = table_name
|
|
45
|
+
# # self.__overwrite_existing_zarr_store = overwrite_existing_zarr_store
|
|
46
|
+
|
|
47
|
+
############################################################################
|
|
48
|
+
############################################################################
|
|
49
|
+
def __zarr_info_to_table(
|
|
50
|
+
self,
|
|
51
|
+
table_name,
|
|
52
|
+
ship_name,
|
|
53
|
+
cruise_name,
|
|
54
|
+
sensor_name, # : Constants, TODO: convert to enum
|
|
55
|
+
file_name,
|
|
56
|
+
min_echo_range,
|
|
57
|
+
max_echo_range,
|
|
58
|
+
num_ping_time_dropna,
|
|
59
|
+
start_time,
|
|
60
|
+
end_time,
|
|
61
|
+
frequencies,
|
|
62
|
+
channels,
|
|
63
|
+
water_level,
|
|
64
|
+
):
|
|
65
|
+
print("Writing Zarr information to DynamoDB table.")
|
|
66
|
+
dynamodb_manager = DynamoDBManager()
|
|
67
|
+
dynamodb_manager.update_item(
|
|
68
|
+
table_name=table_name,
|
|
69
|
+
key={
|
|
70
|
+
"FILE_NAME": {"S": file_name}, # Partition Key
|
|
71
|
+
"CRUISE_NAME": {"S": cruise_name}, # Sort Key
|
|
72
|
+
},
|
|
73
|
+
expression_attribute_names={
|
|
74
|
+
"#CH": "CHANNELS",
|
|
75
|
+
"#ET": "END_TIME",
|
|
76
|
+
# "#ED": "ERROR_DETAIL",
|
|
77
|
+
"#FR": "FREQUENCIES",
|
|
78
|
+
"#MA": "MAX_ECHO_RANGE",
|
|
79
|
+
"#MI": "MIN_ECHO_RANGE",
|
|
80
|
+
"#ND": "NUM_PING_TIME_DROPNA",
|
|
81
|
+
"#PT": "PIPELINE_TIME",
|
|
82
|
+
"#SE": "SENSOR_NAME",
|
|
83
|
+
"#SH": "SHIP_NAME",
|
|
84
|
+
"#ST": "START_TIME",
|
|
85
|
+
"#WL": "WATER_LEVEL",
|
|
86
|
+
},
|
|
87
|
+
expression_attribute_values={
|
|
88
|
+
":ch": {"L": [{"S": i} for i in channels]},
|
|
89
|
+
":et": {"S": end_time},
|
|
90
|
+
# ":ed": {"S": ""},
|
|
91
|
+
":fr": {"L": [{"N": str(i)} for i in frequencies]},
|
|
92
|
+
":ma": {"N": str(np.round(max_echo_range, 4))},
|
|
93
|
+
":mi": {"N": str(np.round(min_echo_range, 4))},
|
|
94
|
+
":nd": {"N": str(num_ping_time_dropna)},
|
|
95
|
+
":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
|
|
96
|
+
":se": {"S": sensor_name},
|
|
97
|
+
":sh": {"S": ship_name},
|
|
98
|
+
":st": {"S": start_time},
|
|
99
|
+
":wl": {"N": str(np.round(water_level, 2))},
|
|
100
|
+
},
|
|
101
|
+
update_expression=(
|
|
102
|
+
"SET "
|
|
103
|
+
"#CH = :ch, "
|
|
104
|
+
"#ET = :et, "
|
|
105
|
+
"#FR = :fr, "
|
|
106
|
+
"#MA = :ma, "
|
|
107
|
+
"#MI = :mi, "
|
|
108
|
+
"#ND = :nd, "
|
|
109
|
+
"#PT = :pt, "
|
|
110
|
+
"#SE = :se, "
|
|
111
|
+
"#SH = :sh, "
|
|
112
|
+
"#ST = :st, "
|
|
113
|
+
"#WL = :wl"
|
|
114
|
+
),
|
|
115
|
+
)
|
|
116
|
+
print("Done writing Zarr information to DynamoDB table.")
|
|
117
|
+
|
|
118
|
+
############################################################################
|
|
119
|
+
############################################################################
|
|
120
|
+
############################################################################
|
|
121
|
+
def __upload_files_to_output_bucket(
|
|
122
|
+
self,
|
|
123
|
+
output_bucket_name: str,
|
|
124
|
+
local_directory: str, # e.g. 'D20070724-T042400.zarr' # TODO: problem: if this is not in the current directory
|
|
125
|
+
object_prefix: str, # e.g. "level_1/Henry_B._Bigelow/HB0706/EK60/"
|
|
126
|
+
endpoint_url,
|
|
127
|
+
):
|
|
128
|
+
# Note: this will be passed credentials if using NODD
|
|
129
|
+
# TODO: this will not work if the local_directory is anywhere other than the current folder
|
|
130
|
+
# see test_s3_manager test_upload...pool_executor for solution
|
|
131
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
132
|
+
print("Uploading files using thread pool executor.")
|
|
133
|
+
all_files = []
|
|
134
|
+
for subdir, dirs, files in os.walk(
|
|
135
|
+
local_directory
|
|
136
|
+
): # os.path.basename(s3_manager_test_path.joinpath("HB0707.zarr/"))
|
|
137
|
+
for file in files:
|
|
138
|
+
local_path = os.path.join(subdir, file)
|
|
139
|
+
s3_key = os.path.join(object_prefix, local_path)
|
|
140
|
+
all_files.append([local_path, s3_key])
|
|
141
|
+
# all_files
|
|
142
|
+
all_uploads = s3_manager.upload_files_with_thread_pool_executor(
|
|
143
|
+
output_bucket_name=output_bucket_name,
|
|
144
|
+
all_files=all_files,
|
|
145
|
+
)
|
|
146
|
+
return all_uploads
|
|
147
|
+
|
|
148
|
+
############################################################################
|
|
149
|
+
|
|
150
|
+
############################################################################
|
|
151
|
+
def raw_to_zarr(
|
|
152
|
+
self,
|
|
153
|
+
table_name,
|
|
154
|
+
input_bucket_name,
|
|
155
|
+
output_bucket_name,
|
|
156
|
+
ship_name,
|
|
157
|
+
cruise_name,
|
|
158
|
+
sensor_name,
|
|
159
|
+
raw_file_name,
|
|
160
|
+
endpoint_url=None,
|
|
161
|
+
include_bot=True,
|
|
162
|
+
):
|
|
163
|
+
"""
|
|
164
|
+
Downloads the raw files, processes them with echopype, writes geojson, and uploads files
|
|
165
|
+
to the nodd bucket.
|
|
166
|
+
"""
|
|
167
|
+
print(f"Opening raw: {raw_file_name} and creating zarr store.")
|
|
168
|
+
geometry_manager = GeometryManager()
|
|
169
|
+
cleaner = Cleaner()
|
|
170
|
+
cleaner.delete_local_files(
|
|
171
|
+
file_types=["*.zarr", "*.json"]
|
|
172
|
+
) # TODO: include bot and raw?
|
|
173
|
+
|
|
174
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
175
|
+
s3_file_path = (
|
|
176
|
+
f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
|
|
177
|
+
)
|
|
178
|
+
bottom_file_name = f"{Path(raw_file_name).stem}.bot"
|
|
179
|
+
s3_bottom_file_path = (
|
|
180
|
+
f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
|
|
181
|
+
)
|
|
182
|
+
s3_manager.download_file(
|
|
183
|
+
bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name
|
|
184
|
+
)
|
|
185
|
+
# TODO: add the bottom file
|
|
186
|
+
if include_bot:
|
|
187
|
+
s3_manager.download_file(
|
|
188
|
+
bucket_name=input_bucket_name,
|
|
189
|
+
key=s3_bottom_file_path,
|
|
190
|
+
file_name=bottom_file_name,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
gc.collect()
|
|
195
|
+
print("Opening raw file with echopype.")
|
|
196
|
+
# s3_file_path = f"s3://{bucket_name}/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}"
|
|
197
|
+
# s3_file_path = Path(f"s3://noaa-wcsd-pds/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}")
|
|
198
|
+
echodata = ep.open_raw(
|
|
199
|
+
raw_file=raw_file_name,
|
|
200
|
+
sonar_model=sensor_name,
|
|
201
|
+
include_bot=include_bot,
|
|
202
|
+
# include_idx=?
|
|
203
|
+
# use_swap=True,
|
|
204
|
+
# max_chunk_size=300,
|
|
205
|
+
# storage_options={'anon': True } # 'endpoint_url': self.endpoint_url} # this was creating problems
|
|
206
|
+
)
|
|
207
|
+
print("Compute volume backscattering strength (Sv) from raw dataset.")
|
|
208
|
+
ds_sv = ep.calibrate.compute_Sv(echodata)
|
|
209
|
+
ds_sv = ep.consolidate.add_depth(
|
|
210
|
+
ds_sv, echodata
|
|
211
|
+
) # TODO: consolidate with other depth values
|
|
212
|
+
|
|
213
|
+
water_level = get_water_level(ds_sv)
|
|
214
|
+
|
|
215
|
+
gc.collect()
|
|
216
|
+
print("Done computing volume backscatter strength (Sv) from raw dataset.")
|
|
217
|
+
# Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
|
|
218
|
+
# but is not written out with ds_sv
|
|
219
|
+
if "detected_seafloor_depth" in list(echodata.vendor.variables):
|
|
220
|
+
ds_sv["detected_seafloor_depth"] = (
|
|
221
|
+
echodata.vendor.detected_seafloor_depth
|
|
222
|
+
)
|
|
223
|
+
#
|
|
224
|
+
frequencies = echodata.environment.frequency_nominal.values
|
|
225
|
+
#################################################################
|
|
226
|
+
# Get GPS coordinates
|
|
227
|
+
gps_data, lat, lon = geometry_manager.read_echodata_gps_data(
|
|
228
|
+
echodata=echodata,
|
|
229
|
+
output_bucket_name=output_bucket_name,
|
|
230
|
+
ship_name=ship_name,
|
|
231
|
+
cruise_name=cruise_name,
|
|
232
|
+
sensor_name=sensor_name,
|
|
233
|
+
file_name=raw_file_name,
|
|
234
|
+
endpoint_url=endpoint_url,
|
|
235
|
+
write_geojson=True,
|
|
236
|
+
)
|
|
237
|
+
ds_sv = ep.consolidate.add_location(ds_sv, echodata)
|
|
238
|
+
ds_sv.latitude.values = (
|
|
239
|
+
lat # overwriting echopype gps values to include missing values
|
|
240
|
+
)
|
|
241
|
+
ds_sv.longitude.values = lon
|
|
242
|
+
# gps_data, lat, lon = self.__get_gps_data(echodata=echodata)
|
|
243
|
+
#################################################################
|
|
244
|
+
# Technically the min_echo_range would be 0 m.
|
|
245
|
+
# TODO: this var name is supposed to represent minimum resolution of depth measurements
|
|
246
|
+
# TODO revert this so that smaller diffs can be used
|
|
247
|
+
# The most minimum the resolution can be is as small as 0.25 meters
|
|
248
|
+
min_echo_range = np.round(np.nanmin(np.diff(ds_sv.echo_range.values)), 2)
|
|
249
|
+
# For the HB0710 cruise the depths vary from 499.7215 @19cm to 2999.4805 @ 1cm. Moving that back
|
|
250
|
+
# inline with the
|
|
251
|
+
min_echo_range = np.max(
|
|
252
|
+
[0.20, min_echo_range]
|
|
253
|
+
) # TODO: experiment with 0.25 and 0.50
|
|
254
|
+
|
|
255
|
+
max_echo_range = float(np.nanmax(ds_sv.echo_range))
|
|
256
|
+
|
|
257
|
+
# This is the number of missing values found throughout the lat/lon
|
|
258
|
+
num_ping_time_dropna = lat[~np.isnan(lat)].shape[0] # symmetric to lon
|
|
259
|
+
#
|
|
260
|
+
start_time = (
|
|
261
|
+
np.datetime_as_string(ds_sv.ping_time.values[0], unit="ms") + "Z"
|
|
262
|
+
)
|
|
263
|
+
end_time = (
|
|
264
|
+
np.datetime_as_string(ds_sv.ping_time.values[-1], unit="ms") + "Z"
|
|
265
|
+
)
|
|
266
|
+
channels = list(ds_sv.channel.values)
|
|
267
|
+
#
|
|
268
|
+
#################################################################
|
|
269
|
+
# Create the zarr store
|
|
270
|
+
store_name = f"{Path(raw_file_name).stem}.zarr"
|
|
271
|
+
# Sv = ds_sv.Sv
|
|
272
|
+
# ds_sv['Sv'] = Sv.astype('int32', copy=False)
|
|
273
|
+
ds_sv.to_zarr(
|
|
274
|
+
store=store_name,
|
|
275
|
+
zarr_format=3,
|
|
276
|
+
consolidated=False,
|
|
277
|
+
write_empty_chunks=False,
|
|
278
|
+
) # ds_sv.Sv.sel(channel=ds_sv.channel.values[0]).shape
|
|
279
|
+
gc.collect()
|
|
280
|
+
#################################################################
|
|
281
|
+
output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
|
|
282
|
+
#################################################################
|
|
283
|
+
# If zarr store already exists then delete
|
|
284
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
285
|
+
child_objects = s3_manager.get_child_objects(
|
|
286
|
+
bucket_name=output_bucket_name,
|
|
287
|
+
sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.zarr",
|
|
288
|
+
)
|
|
289
|
+
if len(child_objects) > 0:
|
|
290
|
+
print(
|
|
291
|
+
"Zarr store dataset already exists in s3, deleting existing and continuing."
|
|
292
|
+
)
|
|
293
|
+
s3_manager.delete_nodd_objects(
|
|
294
|
+
bucket_name=output_bucket_name,
|
|
295
|
+
objects=child_objects,
|
|
296
|
+
)
|
|
297
|
+
#################################################################
|
|
298
|
+
self.__upload_files_to_output_bucket(
|
|
299
|
+
output_bucket_name=output_bucket_name,
|
|
300
|
+
local_directory=store_name,
|
|
301
|
+
object_prefix=output_zarr_prefix,
|
|
302
|
+
endpoint_url=endpoint_url,
|
|
303
|
+
)
|
|
304
|
+
#################################################################
|
|
305
|
+
self.__zarr_info_to_table(
|
|
306
|
+
table_name=table_name,
|
|
307
|
+
ship_name=ship_name,
|
|
308
|
+
cruise_name=cruise_name,
|
|
309
|
+
sensor_name=sensor_name,
|
|
310
|
+
file_name=raw_file_name,
|
|
311
|
+
min_echo_range=min_echo_range,
|
|
312
|
+
max_echo_range=max_echo_range,
|
|
313
|
+
num_ping_time_dropna=num_ping_time_dropna,
|
|
314
|
+
start_time=start_time,
|
|
315
|
+
end_time=end_time,
|
|
316
|
+
frequencies=frequencies,
|
|
317
|
+
channels=channels,
|
|
318
|
+
water_level=water_level,
|
|
319
|
+
)
|
|
320
|
+
#######################################################################
|
|
321
|
+
# TODO: verify count of objects matches, publish message, update status
|
|
322
|
+
#######################################################################
|
|
323
|
+
print("Finished raw-to-zarr conversion.")
|
|
324
|
+
except Exception as err:
|
|
325
|
+
print(
|
|
326
|
+
f"Exception encountered creating local Zarr store with echopype: {err}"
|
|
327
|
+
)
|
|
328
|
+
raise RuntimeError(f"Problem creating local Zarr store, {err}")
|
|
329
|
+
finally:
|
|
330
|
+
gc.collect()
|
|
331
|
+
print("Finally.")
|
|
332
|
+
cleaner.delete_local_files(
|
|
333
|
+
file_types=["*.raw", "*.bot", "*.zarr", "*.json"]
|
|
334
|
+
)
|
|
335
|
+
print("Done creating local zarr store.")
|
|
336
|
+
|
|
337
|
+
############################################################################
|
|
338
|
+
# TODO: does this get called?
|
|
339
|
+
# def execute(self, input_message):
|
|
340
|
+
# ship_name = input_message['shipName']
|
|
341
|
+
# cruise_name = input_message['cruiseName']
|
|
342
|
+
# sensor_name = input_message['sensorName']
|
|
343
|
+
# input_file_name = input_message['fileName']
|
|
344
|
+
# #
|
|
345
|
+
# try:
|
|
346
|
+
# self.__update_processing_status(
|
|
347
|
+
# file_name=input_file_name,
|
|
348
|
+
# cruise_name=cruise_name,
|
|
349
|
+
# pipeline_status="PROCESSING_RAW_TO_ZARR"
|
|
350
|
+
# )
|
|
351
|
+
# #######################################################################
|
|
352
|
+
# store_name = f"{os.path.splitext(input_file_name)[0]}.zarr"
|
|
353
|
+
# output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}"
|
|
354
|
+
# bucket_key = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{input_file_name}"
|
|
355
|
+
# zarr_prefix = os.path.join("level_1", ship_name, cruise_name, sensor_name)
|
|
356
|
+
# #
|
|
357
|
+
# os.chdir(TEMPDIR) # Lambdas require use of temp directory
|
|
358
|
+
# #######################################################################
|
|
359
|
+
# #######################################################################
|
|
360
|
+
# # Check if zarr store already exists
|
|
361
|
+
# s3_objects = self.__s3.list_objects(
|
|
362
|
+
# bucket_name=self.__output_bucket,
|
|
363
|
+
# prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
|
|
364
|
+
# access_key_id=self.__output_bucket_access_key,
|
|
365
|
+
# secret_access_key=self.__output_bucket_secret_access_key
|
|
366
|
+
# )
|
|
367
|
+
# if len(s3_objects) > 0:
|
|
368
|
+
# print('Zarr store dataset already exists in s3, deleting existing and continuing.')
|
|
369
|
+
# self.__s3.delete_objects(
|
|
370
|
+
# bucket_name=self.__output_bucket,
|
|
371
|
+
# objects=s3_objects,
|
|
372
|
+
# access_key_id=self.__output_bucket_access_key,
|
|
373
|
+
# secret_access_key=self.__output_bucket_secret_access_key
|
|
374
|
+
# )
|
|
375
|
+
# #######################################################################
|
|
376
|
+
# # self.__delete_all_local_raw_and_zarr_files()
|
|
377
|
+
# Cleaner.delete_local_files(file_types=["*.raw*", "*.zarr"])
|
|
378
|
+
# self.__s3.download_file(
|
|
379
|
+
# bucket_name=self.__input_bucket,
|
|
380
|
+
# key=bucket_key,
|
|
381
|
+
# file_name=input_file_name
|
|
382
|
+
# )
|
|
383
|
+
# self.__create_local_zarr_store(
|
|
384
|
+
# raw_file_name=input_file_name,
|
|
385
|
+
# cruise_name=cruise_name,
|
|
386
|
+
# sensor_name=sensor_name,
|
|
387
|
+
# output_zarr_prefix=output_zarr_prefix,
|
|
388
|
+
# store_name=store_name
|
|
389
|
+
# )
|
|
390
|
+
# #######################################################################
|
|
391
|
+
# self.__upload_files_to_output_bucket(store_name, output_zarr_prefix)
|
|
392
|
+
# #######################################################################
|
|
393
|
+
# # # TODO: verify count of objects matches
|
|
394
|
+
# # s3_objects = self.__s3.list_objects(
|
|
395
|
+
# # bucket_name=self.__output_bucket,
|
|
396
|
+
# # prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
|
|
397
|
+
# # access_key_id=self.__output_bucket_access_key,
|
|
398
|
+
# # secret_access_key=self.__output_bucket_secret_access_key
|
|
399
|
+
# # )
|
|
400
|
+
# #######################################################################
|
|
401
|
+
# self.__update_processing_status(
|
|
402
|
+
# file_name=input_file_name,
|
|
403
|
+
# cruise_name=cruise_name,
|
|
404
|
+
# pipeline_status='SUCCESS_RAW_TO_ZARR'
|
|
405
|
+
# )
|
|
406
|
+
# #######################################################################
|
|
407
|
+
# self.__publish_done_message(input_message)
|
|
408
|
+
# #######################################################################
|
|
409
|
+
# # except Exception as err:
|
|
410
|
+
# # print(f'Exception encountered: {err}')
|
|
411
|
+
# # self.__update_processing_status(
|
|
412
|
+
# # file_name=input_file_name,
|
|
413
|
+
# # cruise_name=cruise_name,
|
|
414
|
+
# # pipeline_status='FAILURE_RAW_TO_ZARR',
|
|
415
|
+
# # error_message=str(err),
|
|
416
|
+
# # )
|
|
417
|
+
# finally:
|
|
418
|
+
# self.__delete_all_local_raw_and_zarr_files()
|
|
419
|
+
#######################################################################
|
|
420
|
+
|
|
421
|
+
############################################################################
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
################################################################################
|
|
425
|
+
############################################################################
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from .cleaner import Cleaner
|
|
2
|
+
from .constants import Constants, Coordinates, Instruments
|
|
3
|
+
from .pipeline_status import PipelineStatus
|
|
4
|
+
from .timestamp import Timestamp
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"Cleaner",
|
|
8
|
+
"Instruments",
|
|
9
|
+
"Constants",
|
|
10
|
+
"Coordinates",
|
|
11
|
+
"PipelineStatus",
|
|
12
|
+
"Timestamp",
|
|
13
|
+
]
|
|
@@ -1,21 +1,20 @@
|
|
|
1
|
-
import os
|
|
2
1
|
import glob
|
|
2
|
+
import os
|
|
3
3
|
import shutil
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
###########################################################
|
|
7
7
|
class Cleaner:
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
):
|
|
12
|
-
print('Deleting all local raw and zarr files')
|
|
8
|
+
def delete_local_files(self, file_types=["*.raw*", "*.model"]): # '*.json'
|
|
9
|
+
# TODO: add .zarr to this
|
|
10
|
+
print("Deleting all local raw and model files")
|
|
13
11
|
for i in file_types:
|
|
14
12
|
for j in glob.glob(i):
|
|
15
13
|
if os.path.isdir(j):
|
|
16
14
|
shutil.rmtree(j, ignore_errors=True)
|
|
17
15
|
elif os.path.isfile(j):
|
|
18
16
|
os.remove(j)
|
|
19
|
-
print(
|
|
17
|
+
print("done deleting")
|
|
20
18
|
|
|
21
|
-
|
|
19
|
+
|
|
20
|
+
###########################################################
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from enum import Enum, unique
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@unique
|
|
5
|
+
class Instruments(Enum):
|
|
6
|
+
# Values are determined using scan of the fist byte of data
|
|
7
|
+
EK60 = "EK60"
|
|
8
|
+
EK80 = "EK80"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# @unique
|
|
12
|
+
class Constants(Enum):
|
|
13
|
+
"""
|
|
14
|
+
See here for data type support: https://github.com/zarr-developers/zarr-extensions/tree/main/data-types
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
TILE_SIZE = 512
|
|
18
|
+
|
|
19
|
+
# Average https://noaa-wcsd-zarr-pds.s3.us-east-1.amazonaws.com/level_2/Henry_B._Bigelow/HB0902/EK60/HB0902.zarr/time/927
|
|
20
|
+
# chunk size is ~1.3 kB, HB0902 cruise takes ~30 seconds to load all time/lat/lon dataset
|
|
21
|
+
# NOTE: larger value here will speed up the TurfJS download of dataset in the UI
|
|
22
|
+
# Problem interpolating the dataset: cannot reshape array of size 65536 into shape...
|
|
23
|
+
# TODO: needs to be enum
|
|
24
|
+
SPATIOTEMPORAL_CHUNK_SIZE = int(2**16) - 1024
|
|
25
|
+
# int(2**16) - 1024,
|
|
26
|
+
# int(2**16) - 1024,
|
|
27
|
+
# e.g. int(2**14)
|
|
28
|
+
# TODO: create test for SPATIOTEMPORAL_CHUNK_SIZE with requirement!
|
|
29
|
+
|
|
30
|
+
LEVEL_0 = "raw"
|
|
31
|
+
LEVEL_1 = "level_1" # from bucket path
|
|
32
|
+
LEVEL_2 = "level_2"
|
|
33
|
+
LEVEL_3 = "level_3"
|
|
34
|
+
|
|
35
|
+
EK60 = "EK60" # TODO: use for "instrument"
|
|
36
|
+
EK80 = "EK80"
|
|
37
|
+
# INSTRUMENT = EK60 | EK80
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class Coordinates(Enum):
|
|
41
|
+
"""
|
|
42
|
+
Should try to specify
|
|
43
|
+
dtype
|
|
44
|
+
units
|
|
45
|
+
long_name — most readable description of variable
|
|
46
|
+
standard_name — name in lowercase and snake_case
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
PROJECT_NAME = "echofish"
|
|
50
|
+
|
|
51
|
+
DEPTH = "depth"
|
|
52
|
+
DEPTH_DTYPE = "float32"
|
|
53
|
+
DEPTH_UNITS = "m" # TODO: Pint? <https://pint.readthedocs.io/en/stable/>
|
|
54
|
+
DEPTH_LONG_NAME = "Depth below surface"
|
|
55
|
+
DEPTH_STANDARD_NAME = "depth"
|
|
56
|
+
|
|
57
|
+
TIME = "time"
|
|
58
|
+
TIME_DTYPE = "float64"
|
|
59
|
+
# Note: units and calendar are used downstream by Xarray
|
|
60
|
+
TIME_UNITS = "seconds since 1970-01-01 00:00:00"
|
|
61
|
+
TIME_LONG_NAME = "Timestamp of each ping"
|
|
62
|
+
TIME_STANDARD_NAME = "time"
|
|
63
|
+
TIME_CALENDAR = "proleptic_gregorian"
|
|
64
|
+
# TODO: create test for reading out timestamps in Xarray
|
|
65
|
+
|
|
66
|
+
FREQUENCY = "frequency"
|
|
67
|
+
FREQUENCY_DTYPE = "uint64"
|
|
68
|
+
FREQUENCY_UNITS = "Hz"
|
|
69
|
+
FREQUENCY_LONG_NAME = "Transducer frequency"
|
|
70
|
+
FREQUENCY_STANDARD_NAME = "sound_frequency"
|
|
71
|
+
|
|
72
|
+
LATITUDE = "latitude"
|
|
73
|
+
LATITUDE_DTYPE = "float32"
|
|
74
|
+
LATITUDE_UNITS = "degrees_north"
|
|
75
|
+
LATITUDE_LONG_NAME = "Latitude"
|
|
76
|
+
LATITUDE_STANDARD_NAME = "latitude"
|
|
77
|
+
|
|
78
|
+
LONGITUDE = "longitude"
|
|
79
|
+
LONGITUDE_DTYPE = "float32"
|
|
80
|
+
LONGITUDE_UNITS = "degrees_east"
|
|
81
|
+
LONGITUDE_LONG_NAME = "Longitude"
|
|
82
|
+
LONGITUDE_STANDARD_NAME = "longitude"
|
|
83
|
+
|
|
84
|
+
BOTTOM = "bottom"
|
|
85
|
+
BOTTOM_DTYPE = "float32"
|
|
86
|
+
BOTTOM_UNITS = "m"
|
|
87
|
+
BOTTOM_LONG_NAME = "Detected sea floor depth"
|
|
88
|
+
BOTTOM_STANDARD_NAME = "bottom"
|
|
89
|
+
|
|
90
|
+
SPEED = "speed"
|
|
91
|
+
SPEED_DTYPE = "float32"
|
|
92
|
+
SPEED_UNITS = "Knots"
|
|
93
|
+
SPEED_LONG_NAME = "Nautical miles per hour"
|
|
94
|
+
SPEED_STANDARD_NAME = "speed"
|
|
95
|
+
|
|
96
|
+
# This is the width of each slice of the water columns
|
|
97
|
+
DISTANCE = "distance"
|
|
98
|
+
DISTANCE_DTYPE = "float32"
|
|
99
|
+
DISTANCE_UNITS = "m"
|
|
100
|
+
DISTANCE_LONG_NAME = "GPS distance"
|
|
101
|
+
DISTANCE_STANDARD_NAME = "distance"
|
|
102
|
+
|
|
103
|
+
SV = "Sv"
|
|
104
|
+
SV_DTYPE = "float32" # int64
|
|
105
|
+
SV_UNITS = "dB"
|
|
106
|
+
SV_LONG_NAME = "Volume backscattering strength (Sv re 1 m-1)"
|
|
107
|
+
SV_STANDARD_NAME = "volume_backscattering_strength"
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class BatchShape(Enum):
|
|
111
|
+
"""
|
|
112
|
+
The tensor shape of a machine learning sample.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
DEPTH = 2
|
|
116
|
+
TIME = 3
|
|
117
|
+
FREQUENCY = 4
|
|
118
|
+
BATCH_SIZE = 5
|