water-column-sonar-processing 0.0.9__py3-none-any.whl → 26.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/aws/dynamodb_manager.py +138 -59
- water_column_sonar_processing/aws/s3_manager.py +179 -141
- water_column_sonar_processing/aws/s3fs_manager.py +29 -33
- water_column_sonar_processing/aws/sqs_manager.py +1 -1
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +35 -96
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +142 -127
- water_column_sonar_processing/geometry/__init__.py +10 -2
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +50 -49
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +227 -223
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/index_manager.py +151 -33
- water_column_sonar_processing/model/zarr_manager.py +665 -262
- water_column_sonar_processing/processing/__init__.py +3 -3
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +206 -214
- water_column_sonar_processing/utility/__init__.py +9 -2
- water_column_sonar_processing/utility/constants.py +69 -18
- water_column_sonar_processing/utility/pipeline_status.py +11 -15
- water_column_sonar_processing/utility/timestamp.py +3 -4
- water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
- water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
- water_column_sonar_processing/process.py +0 -147
- water_column_sonar_processing/processing/cruise_sampler.py +0 -342
- water_column_sonar_processing-0.0.9.dist-info/METADATA +0 -134
- water_column_sonar_processing-0.0.9.dist-info/RECORD +0 -32
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from .
|
|
2
|
-
from .raw_to_zarr import RawToZarr
|
|
1
|
+
from .raw_to_netcdf import RawToNetCDF
|
|
2
|
+
from .raw_to_zarr import RawToZarr, get_water_level
|
|
3
3
|
|
|
4
|
-
__all__ = ["
|
|
4
|
+
__all__ = ["RawToZarr", "get_water_level", "RawToNetCDF"]
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
import gc
|
|
2
|
+
import os
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from pathlib import Path # , PurePath
|
|
5
|
+
|
|
6
|
+
import echopype as ep
|
|
7
|
+
import numpy as np
|
|
8
|
+
from zarr.codecs import Blosc
|
|
9
|
+
|
|
10
|
+
from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
|
|
11
|
+
from water_column_sonar_processing.geometry import GeometryManager
|
|
12
|
+
from water_column_sonar_processing.utility import Cleaner
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# This code is getting copied from echofish-aws-raw-to-zarr-lambda
|
|
16
|
+
class RawToNetCDF:
|
|
17
|
+
#######################################################
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
# output_bucket_access_key,
|
|
21
|
+
# output_bucket_secret_access_key,
|
|
22
|
+
# # overwrite_existing_zarr_store,
|
|
23
|
+
):
|
|
24
|
+
# TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
|
|
25
|
+
self.__compressor = Blosc(cname="zstd", clevel=9) # shuffle=Blosc.NOSHUFFLE
|
|
26
|
+
self.__overwrite = True
|
|
27
|
+
# self.__num_threads = numcodecs.blosc.get_nthreads()
|
|
28
|
+
# self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
29
|
+
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
30
|
+
# self.__table_name = table_name
|
|
31
|
+
# # self.__overwrite_existing_zarr_store = overwrite_existing_zarr_store
|
|
32
|
+
|
|
33
|
+
############################################################################
|
|
34
|
+
############################################################################
|
|
35
|
+
def __netcdf_info_to_table(
|
|
36
|
+
self,
|
|
37
|
+
# output_bucket_name,
|
|
38
|
+
table_name,
|
|
39
|
+
ship_name,
|
|
40
|
+
cruise_name,
|
|
41
|
+
sensor_name,
|
|
42
|
+
file_name,
|
|
43
|
+
# zarr_path,
|
|
44
|
+
min_echo_range,
|
|
45
|
+
max_echo_range,
|
|
46
|
+
num_ping_time_dropna,
|
|
47
|
+
start_time,
|
|
48
|
+
end_time,
|
|
49
|
+
frequencies,
|
|
50
|
+
channels,
|
|
51
|
+
water_level,
|
|
52
|
+
):
|
|
53
|
+
print("Writing Zarr information to DynamoDB table.")
|
|
54
|
+
dynamodb_manager = DynamoDBManager()
|
|
55
|
+
dynamodb_manager.update_item(
|
|
56
|
+
table_name=table_name,
|
|
57
|
+
key={
|
|
58
|
+
"FILE_NAME": {"S": file_name}, # Partition Key
|
|
59
|
+
"CRUISE_NAME": {"S": cruise_name}, # Sort Key
|
|
60
|
+
},
|
|
61
|
+
expression_attribute_names={
|
|
62
|
+
"#CH": "CHANNELS",
|
|
63
|
+
"#ET": "END_TIME",
|
|
64
|
+
# "#ED": "ERROR_DETAIL",
|
|
65
|
+
"#FR": "FREQUENCIES",
|
|
66
|
+
"#MA": "MAX_ECHO_RANGE",
|
|
67
|
+
"#MI": "MIN_ECHO_RANGE",
|
|
68
|
+
"#ND": "NUM_PING_TIME_DROPNA",
|
|
69
|
+
# "#PS": "PIPELINE_STATUS",
|
|
70
|
+
"#PT": "PIPELINE_TIME",
|
|
71
|
+
"#SE": "SENSOR_NAME",
|
|
72
|
+
"#SH": "SHIP_NAME",
|
|
73
|
+
"#ST": "START_TIME",
|
|
74
|
+
# "#ZB": "ZARR_BUCKET",
|
|
75
|
+
# "#ZP": "ZARR_PATH",
|
|
76
|
+
"#WL": "WATER_LEVEL",
|
|
77
|
+
},
|
|
78
|
+
expression_attribute_values={
|
|
79
|
+
":ch": {"L": [{"S": i} for i in channels]},
|
|
80
|
+
":et": {"S": end_time},
|
|
81
|
+
# ":ed": {"S": ""},
|
|
82
|
+
":fr": {"L": [{"N": str(i)} for i in frequencies]},
|
|
83
|
+
":ma": {"N": str(np.round(max_echo_range, 4))},
|
|
84
|
+
":mi": {"N": str(np.round(min_echo_range, 4))},
|
|
85
|
+
":nd": {"N": str(num_ping_time_dropna)},
|
|
86
|
+
# ":ps": {"S": "PROCESSING_RESAMPLE_AND_WRITE_TO_ZARR_STORE"},
|
|
87
|
+
# ":ps": {"S": PipelineStatus.LEVEL_1_PROCESSING.name},
|
|
88
|
+
":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
|
|
89
|
+
":se": {"S": sensor_name},
|
|
90
|
+
":sh": {"S": ship_name},
|
|
91
|
+
":st": {"S": start_time},
|
|
92
|
+
":wl": {"N": str(np.round(water_level, 2))},
|
|
93
|
+
# ":zb": {"S": output_bucket_name},
|
|
94
|
+
# ":zp": {"S": zarr_path},
|
|
95
|
+
},
|
|
96
|
+
update_expression=(
|
|
97
|
+
"SET "
|
|
98
|
+
"#CH = :ch, "
|
|
99
|
+
"#ET = :et, "
|
|
100
|
+
# "#ED = :ed, "
|
|
101
|
+
"#FR = :fr, "
|
|
102
|
+
"#MA = :ma, "
|
|
103
|
+
"#MI = :mi, "
|
|
104
|
+
"#ND = :nd, "
|
|
105
|
+
# "#PS = :ps, "
|
|
106
|
+
"#PT = :pt, "
|
|
107
|
+
"#SE = :se, "
|
|
108
|
+
"#SH = :sh, "
|
|
109
|
+
"#ST = :st, "
|
|
110
|
+
"#WL = :wl"
|
|
111
|
+
# "#ZB = :zb, "
|
|
112
|
+
# "#ZP = :zp"
|
|
113
|
+
),
|
|
114
|
+
)
|
|
115
|
+
print("Done writing Zarr information to DynamoDB table.")
|
|
116
|
+
|
|
117
|
+
############################################################################
|
|
118
|
+
############################################################################
|
|
119
|
+
############################################################################
|
|
120
|
+
def __upload_files_to_output_bucket(
|
|
121
|
+
self,
|
|
122
|
+
output_bucket_name,
|
|
123
|
+
local_directory,
|
|
124
|
+
object_prefix,
|
|
125
|
+
endpoint_url,
|
|
126
|
+
):
|
|
127
|
+
# Note: this will be passed credentials if using NODD
|
|
128
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
129
|
+
print("Uploading files using thread pool executor.")
|
|
130
|
+
all_files = []
|
|
131
|
+
for subdir, dirs, files in os.walk(local_directory):
|
|
132
|
+
for file in files:
|
|
133
|
+
local_path = os.path.join(subdir, file)
|
|
134
|
+
s3_key = os.path.join(object_prefix, local_path)
|
|
135
|
+
all_files.append([local_path, s3_key])
|
|
136
|
+
# all_files
|
|
137
|
+
all_uploads = s3_manager.upload_files_with_thread_pool_executor(
|
|
138
|
+
output_bucket_name=output_bucket_name,
|
|
139
|
+
all_files=all_files,
|
|
140
|
+
)
|
|
141
|
+
return all_uploads
|
|
142
|
+
|
|
143
|
+
def __upload_file_to_output_bucket(
|
|
144
|
+
self,
|
|
145
|
+
output_bucket_name,
|
|
146
|
+
local_directory,
|
|
147
|
+
object_prefix,
|
|
148
|
+
endpoint_url,
|
|
149
|
+
):
|
|
150
|
+
# Note: this will be passed credentials if using NODD
|
|
151
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
152
|
+
print("Uploading files using thread pool executor.")
|
|
153
|
+
all_files = [local_directory]
|
|
154
|
+
all_uploads = s3_manager.upload_files_with_thread_pool_executor(
|
|
155
|
+
output_bucket_name=output_bucket_name,
|
|
156
|
+
all_files=all_files,
|
|
157
|
+
)
|
|
158
|
+
return all_uploads
|
|
159
|
+
|
|
160
|
+
############################################################################
|
|
161
|
+
def raw_to_netcdf(
|
|
162
|
+
self,
|
|
163
|
+
table_name,
|
|
164
|
+
input_bucket_name,
|
|
165
|
+
output_bucket_name,
|
|
166
|
+
ship_name,
|
|
167
|
+
cruise_name,
|
|
168
|
+
sensor_name,
|
|
169
|
+
raw_file_name,
|
|
170
|
+
endpoint_url=None,
|
|
171
|
+
include_bot=True,
|
|
172
|
+
):
|
|
173
|
+
"""
|
|
174
|
+
Downloads the raw files, processes them with echopype, and uploads files
|
|
175
|
+
to the nodd bucket.
|
|
176
|
+
|
|
177
|
+
Needs to create two files, one echopype opened file, one is Sv calibrated file
|
|
178
|
+
"""
|
|
179
|
+
print(f"Opening raw: {raw_file_name} and creating netcdf.")
|
|
180
|
+
try:
|
|
181
|
+
geometry_manager = GeometryManager()
|
|
182
|
+
cleaner = Cleaner()
|
|
183
|
+
cleaner.delete_local_files(
|
|
184
|
+
file_types=["*.nc", "*.json"]
|
|
185
|
+
) # TODO: include bot and raw?
|
|
186
|
+
|
|
187
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
188
|
+
s3_file_path = (
|
|
189
|
+
f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
|
|
190
|
+
)
|
|
191
|
+
bottom_file_name = f"{Path(raw_file_name).stem}.bot"
|
|
192
|
+
s3_bottom_file_path = (
|
|
193
|
+
f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
|
|
194
|
+
)
|
|
195
|
+
s3_manager.download_file(
|
|
196
|
+
bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name
|
|
197
|
+
)
|
|
198
|
+
# TODO: add the bottom file
|
|
199
|
+
if include_bot:
|
|
200
|
+
s3_manager.download_file(
|
|
201
|
+
bucket_name=input_bucket_name,
|
|
202
|
+
key=s3_bottom_file_path,
|
|
203
|
+
file_name=bottom_file_name,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
gc.collect()
|
|
207
|
+
print("Opening raw file with echopype.")
|
|
208
|
+
# s3_file_path = f"s3://{bucket_name}/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}"
|
|
209
|
+
# s3_file_path = Path(f"s3://noaa-wcsd-pds/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}")
|
|
210
|
+
echodata = ep.open_raw(
|
|
211
|
+
raw_file=raw_file_name,
|
|
212
|
+
sonar_model=sensor_name,
|
|
213
|
+
include_bot=include_bot,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
netcdf_name = f"{Path(raw_file_name).stem}.nc"
|
|
217
|
+
# Xarray Dataset to netcdf
|
|
218
|
+
echodata.to_netcdf(
|
|
219
|
+
save_path=netcdf_name,
|
|
220
|
+
compress=True,
|
|
221
|
+
overwrite=True,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
print("Compute volume backscattering strength (Sv) from raw dataset.")
|
|
225
|
+
ds_sv = ep.calibrate.compute_Sv(echodata)
|
|
226
|
+
ds_sv = ep.consolidate.add_depth(
|
|
227
|
+
ds_sv, echodata
|
|
228
|
+
) # TODO: consolidate with other depth values
|
|
229
|
+
# water_level = ds_sv["water_level"].values
|
|
230
|
+
gc.collect()
|
|
231
|
+
print("Done computing volume backscatter strength (Sv) from raw dataset.")
|
|
232
|
+
# Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
|
|
233
|
+
# but is not written out with ds_sv
|
|
234
|
+
if "detected_seafloor_depth" in list(echodata.vendor.variables):
|
|
235
|
+
ds_sv["detected_seafloor_depth"] = (
|
|
236
|
+
echodata.vendor.detected_seafloor_depth
|
|
237
|
+
)
|
|
238
|
+
#
|
|
239
|
+
# frequencies = echodata.environment.frequency_nominal.values
|
|
240
|
+
#################################################################
|
|
241
|
+
# Get GPS coordinates, just overwrite the lat lon values
|
|
242
|
+
gps_data, lat, lon = geometry_manager.read_echodata_gps_data(
|
|
243
|
+
echodata=echodata,
|
|
244
|
+
output_bucket_name=output_bucket_name,
|
|
245
|
+
ship_name=ship_name,
|
|
246
|
+
cruise_name=cruise_name,
|
|
247
|
+
sensor_name=sensor_name,
|
|
248
|
+
file_name=raw_file_name,
|
|
249
|
+
endpoint_url=endpoint_url,
|
|
250
|
+
write_geojson=False,
|
|
251
|
+
)
|
|
252
|
+
ds_sv = ep.consolidate.add_location(ds_sv, echodata)
|
|
253
|
+
ds_sv.latitude.values = (
|
|
254
|
+
lat # overwriting echopype gps values to include missing values
|
|
255
|
+
)
|
|
256
|
+
ds_sv.longitude.values = lon
|
|
257
|
+
# gps_data, lat, lon = self.__get_gps_data(echodata=echodata)
|
|
258
|
+
|
|
259
|
+
# Create the netcdf
|
|
260
|
+
netcdf_name_computed_Sv = f"{Path(raw_file_name).stem}_computed_Sv.nc"
|
|
261
|
+
|
|
262
|
+
# Xarray Dataset to netcdf
|
|
263
|
+
ds_sv.to_netcdf(
|
|
264
|
+
path=netcdf_name_computed_Sv,
|
|
265
|
+
mode="w",
|
|
266
|
+
)
|
|
267
|
+
gc.collect()
|
|
268
|
+
#################################################################
|
|
269
|
+
# output_netcdf_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
|
|
270
|
+
#################################################################
|
|
271
|
+
# If netcdf already exists then delete
|
|
272
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
273
|
+
child_objects = s3_manager.get_child_objects(
|
|
274
|
+
bucket_name=output_bucket_name,
|
|
275
|
+
sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.nc",
|
|
276
|
+
)
|
|
277
|
+
if len(child_objects) > 0:
|
|
278
|
+
print(
|
|
279
|
+
"NetCDF dataset already exists in s3, deleting existing and continuing."
|
|
280
|
+
)
|
|
281
|
+
s3_manager.delete_nodd_objects(
|
|
282
|
+
bucket_name=output_bucket_name,
|
|
283
|
+
objects=child_objects,
|
|
284
|
+
)
|
|
285
|
+
child_objects_computed_Sv = s3_manager.get_child_objects(
|
|
286
|
+
bucket_name=output_bucket_name,
|
|
287
|
+
sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}_computed_Sv.nc",
|
|
288
|
+
)
|
|
289
|
+
if len(child_objects_computed_Sv) > 0:
|
|
290
|
+
print("data already exists in s3, deleting existing and continuing.")
|
|
291
|
+
s3_manager.delete_nodd_objects(
|
|
292
|
+
bucket_name=output_bucket_name,
|
|
293
|
+
objects=child_objects_computed_Sv,
|
|
294
|
+
)
|
|
295
|
+
#################################################################
|
|
296
|
+
s3_manager.upload_file(
|
|
297
|
+
filename=netcdf_name,
|
|
298
|
+
bucket_name=output_bucket_name,
|
|
299
|
+
key=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.nc",
|
|
300
|
+
)
|
|
301
|
+
s3_manager.upload_file(
|
|
302
|
+
filename=netcdf_name_computed_Sv,
|
|
303
|
+
bucket_name=output_bucket_name,
|
|
304
|
+
key=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}_computed_Sv.nc",
|
|
305
|
+
)
|
|
306
|
+
except Exception as err:
|
|
307
|
+
print(f"Exception encountered creating local netcdf with echopype: {err}")
|
|
308
|
+
raise RuntimeError(f"Problem creating local netcdf, {err}")
|
|
309
|
+
finally:
|
|
310
|
+
gc.collect()
|
|
311
|
+
cleaner.delete_local_files(
|
|
312
|
+
file_types=["*.raw", "*.bot", "*.zarr", "*.nc", "*.json"]
|
|
313
|
+
)
|
|
314
|
+
print("Done creating local zarr store.")
|
|
315
|
+
|
|
316
|
+
############################################################################
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
################################################################################
|
|
320
|
+
############################################################################
|