water-column-sonar-processing 0.0.9__py3-none-any.whl → 26.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. water_column_sonar_processing/aws/dynamodb_manager.py +138 -59
  2. water_column_sonar_processing/aws/s3_manager.py +179 -141
  3. water_column_sonar_processing/aws/s3fs_manager.py +29 -33
  4. water_column_sonar_processing/aws/sqs_manager.py +1 -1
  5. water_column_sonar_processing/cruise/create_empty_zarr_store.py +35 -96
  6. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  7. water_column_sonar_processing/cruise/resample_regrid.py +142 -127
  8. water_column_sonar_processing/geometry/__init__.py +10 -2
  9. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  10. water_column_sonar_processing/geometry/geometry_manager.py +50 -49
  11. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  12. water_column_sonar_processing/geometry/pmtile_generation.py +227 -223
  13. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  14. water_column_sonar_processing/index/index_manager.py +151 -33
  15. water_column_sonar_processing/model/zarr_manager.py +665 -262
  16. water_column_sonar_processing/processing/__init__.py +3 -3
  17. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  18. water_column_sonar_processing/processing/raw_to_zarr.py +206 -214
  19. water_column_sonar_processing/utility/__init__.py +9 -2
  20. water_column_sonar_processing/utility/constants.py +69 -18
  21. water_column_sonar_processing/utility/pipeline_status.py +11 -15
  22. water_column_sonar_processing/utility/timestamp.py +3 -4
  23. water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
  24. water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
  25. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
  26. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
  27. water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
  28. water_column_sonar_processing/process.py +0 -147
  29. water_column_sonar_processing/processing/cruise_sampler.py +0 -342
  30. water_column_sonar_processing-0.0.9.dist-info/METADATA +0 -134
  31. water_column_sonar_processing-0.0.9.dist-info/RECORD +0 -32
  32. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- from .cruise_sampler import CruiseSampler
2
- from .raw_to_zarr import RawToZarr
1
+ from .raw_to_netcdf import RawToNetCDF
2
+ from .raw_to_zarr import RawToZarr, get_water_level
3
3
 
4
- __all__ = ["CruiseSampler", "RawToZarr"]
4
+ __all__ = ["RawToZarr", "get_water_level", "RawToNetCDF"]
@@ -0,0 +1,320 @@
1
+ import gc
2
+ import os
3
+ from datetime import datetime
4
+ from pathlib import Path # , PurePath
5
+
6
+ import echopype as ep
7
+ import numpy as np
8
+ from zarr.codecs import Blosc
9
+
10
+ from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
11
+ from water_column_sonar_processing.geometry import GeometryManager
12
+ from water_column_sonar_processing.utility import Cleaner
13
+
14
+
15
+ # This code is getting copied from echofish-aws-raw-to-zarr-lambda
16
+ class RawToNetCDF:
17
+ #######################################################
18
+ def __init__(
19
+ self,
20
+ # output_bucket_access_key,
21
+ # output_bucket_secret_access_key,
22
+ # # overwrite_existing_zarr_store,
23
+ ):
24
+ # TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
25
+ self.__compressor = Blosc(cname="zstd", clevel=9) # shuffle=Blosc.NOSHUFFLE
26
+ self.__overwrite = True
27
+ # self.__num_threads = numcodecs.blosc.get_nthreads()
28
+ # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
29
+ # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
30
+ # self.__table_name = table_name
31
+ # # self.__overwrite_existing_zarr_store = overwrite_existing_zarr_store
32
+
33
+ ############################################################################
34
+ ############################################################################
35
+ def __netcdf_info_to_table(
36
+ self,
37
+ # output_bucket_name,
38
+ table_name,
39
+ ship_name,
40
+ cruise_name,
41
+ sensor_name,
42
+ file_name,
43
+ # zarr_path,
44
+ min_echo_range,
45
+ max_echo_range,
46
+ num_ping_time_dropna,
47
+ start_time,
48
+ end_time,
49
+ frequencies,
50
+ channels,
51
+ water_level,
52
+ ):
53
+ print("Writing Zarr information to DynamoDB table.")
54
+ dynamodb_manager = DynamoDBManager()
55
+ dynamodb_manager.update_item(
56
+ table_name=table_name,
57
+ key={
58
+ "FILE_NAME": {"S": file_name}, # Partition Key
59
+ "CRUISE_NAME": {"S": cruise_name}, # Sort Key
60
+ },
61
+ expression_attribute_names={
62
+ "#CH": "CHANNELS",
63
+ "#ET": "END_TIME",
64
+ # "#ED": "ERROR_DETAIL",
65
+ "#FR": "FREQUENCIES",
66
+ "#MA": "MAX_ECHO_RANGE",
67
+ "#MI": "MIN_ECHO_RANGE",
68
+ "#ND": "NUM_PING_TIME_DROPNA",
69
+ # "#PS": "PIPELINE_STATUS",
70
+ "#PT": "PIPELINE_TIME",
71
+ "#SE": "SENSOR_NAME",
72
+ "#SH": "SHIP_NAME",
73
+ "#ST": "START_TIME",
74
+ # "#ZB": "ZARR_BUCKET",
75
+ # "#ZP": "ZARR_PATH",
76
+ "#WL": "WATER_LEVEL",
77
+ },
78
+ expression_attribute_values={
79
+ ":ch": {"L": [{"S": i} for i in channels]},
80
+ ":et": {"S": end_time},
81
+ # ":ed": {"S": ""},
82
+ ":fr": {"L": [{"N": str(i)} for i in frequencies]},
83
+ ":ma": {"N": str(np.round(max_echo_range, 4))},
84
+ ":mi": {"N": str(np.round(min_echo_range, 4))},
85
+ ":nd": {"N": str(num_ping_time_dropna)},
86
+ # ":ps": {"S": "PROCESSING_RESAMPLE_AND_WRITE_TO_ZARR_STORE"},
87
+ # ":ps": {"S": PipelineStatus.LEVEL_1_PROCESSING.name},
88
+ ":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
89
+ ":se": {"S": sensor_name},
90
+ ":sh": {"S": ship_name},
91
+ ":st": {"S": start_time},
92
+ ":wl": {"N": str(np.round(water_level, 2))},
93
+ # ":zb": {"S": output_bucket_name},
94
+ # ":zp": {"S": zarr_path},
95
+ },
96
+ update_expression=(
97
+ "SET "
98
+ "#CH = :ch, "
99
+ "#ET = :et, "
100
+ # "#ED = :ed, "
101
+ "#FR = :fr, "
102
+ "#MA = :ma, "
103
+ "#MI = :mi, "
104
+ "#ND = :nd, "
105
+ # "#PS = :ps, "
106
+ "#PT = :pt, "
107
+ "#SE = :se, "
108
+ "#SH = :sh, "
109
+ "#ST = :st, "
110
+ "#WL = :wl"
111
+ # "#ZB = :zb, "
112
+ # "#ZP = :zp"
113
+ ),
114
+ )
115
+ print("Done writing Zarr information to DynamoDB table.")
116
+
117
+ ############################################################################
118
+ ############################################################################
119
+ ############################################################################
120
+ def __upload_files_to_output_bucket(
121
+ self,
122
+ output_bucket_name,
123
+ local_directory,
124
+ object_prefix,
125
+ endpoint_url,
126
+ ):
127
+ # Note: this will be passed credentials if using NODD
128
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
129
+ print("Uploading files using thread pool executor.")
130
+ all_files = []
131
+ for subdir, dirs, files in os.walk(local_directory):
132
+ for file in files:
133
+ local_path = os.path.join(subdir, file)
134
+ s3_key = os.path.join(object_prefix, local_path)
135
+ all_files.append([local_path, s3_key])
136
+ # all_files
137
+ all_uploads = s3_manager.upload_files_with_thread_pool_executor(
138
+ output_bucket_name=output_bucket_name,
139
+ all_files=all_files,
140
+ )
141
+ return all_uploads
142
+
143
+ def __upload_file_to_output_bucket(
144
+ self,
145
+ output_bucket_name,
146
+ local_directory,
147
+ object_prefix,
148
+ endpoint_url,
149
+ ):
150
+ # Note: this will be passed credentials if using NODD
151
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
152
+ print("Uploading files using thread pool executor.")
153
+ all_files = [local_directory]
154
+ all_uploads = s3_manager.upload_files_with_thread_pool_executor(
155
+ output_bucket_name=output_bucket_name,
156
+ all_files=all_files,
157
+ )
158
+ return all_uploads
159
+
160
+ ############################################################################
161
+ def raw_to_netcdf(
162
+ self,
163
+ table_name,
164
+ input_bucket_name,
165
+ output_bucket_name,
166
+ ship_name,
167
+ cruise_name,
168
+ sensor_name,
169
+ raw_file_name,
170
+ endpoint_url=None,
171
+ include_bot=True,
172
+ ):
173
+ """
174
+ Downloads the raw files, processes them with echopype, and uploads files
175
+ to the nodd bucket.
176
+
177
+ Needs to create two files, one echopype opened file, one is Sv calibrated file
178
+ """
179
+ print(f"Opening raw: {raw_file_name} and creating netcdf.")
180
+ try:
181
+ geometry_manager = GeometryManager()
182
+ cleaner = Cleaner()
183
+ cleaner.delete_local_files(
184
+ file_types=["*.nc", "*.json"]
185
+ ) # TODO: include bot and raw?
186
+
187
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
188
+ s3_file_path = (
189
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
190
+ )
191
+ bottom_file_name = f"{Path(raw_file_name).stem}.bot"
192
+ s3_bottom_file_path = (
193
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
194
+ )
195
+ s3_manager.download_file(
196
+ bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name
197
+ )
198
+ # TODO: add the bottom file
199
+ if include_bot:
200
+ s3_manager.download_file(
201
+ bucket_name=input_bucket_name,
202
+ key=s3_bottom_file_path,
203
+ file_name=bottom_file_name,
204
+ )
205
+
206
+ gc.collect()
207
+ print("Opening raw file with echopype.")
208
+ # s3_file_path = f"s3://{bucket_name}/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}"
209
+ # s3_file_path = Path(f"s3://noaa-wcsd-pds/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}")
210
+ echodata = ep.open_raw(
211
+ raw_file=raw_file_name,
212
+ sonar_model=sensor_name,
213
+ include_bot=include_bot,
214
+ )
215
+
216
+ netcdf_name = f"{Path(raw_file_name).stem}.nc"
217
+ # Xarray Dataset to netcdf
218
+ echodata.to_netcdf(
219
+ save_path=netcdf_name,
220
+ compress=True,
221
+ overwrite=True,
222
+ )
223
+
224
+ print("Compute volume backscattering strength (Sv) from raw dataset.")
225
+ ds_sv = ep.calibrate.compute_Sv(echodata)
226
+ ds_sv = ep.consolidate.add_depth(
227
+ ds_sv, echodata
228
+ ) # TODO: consolidate with other depth values
229
+ # water_level = ds_sv["water_level"].values
230
+ gc.collect()
231
+ print("Done computing volume backscatter strength (Sv) from raw dataset.")
232
+ # Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
233
+ # but is not written out with ds_sv
234
+ if "detected_seafloor_depth" in list(echodata.vendor.variables):
235
+ ds_sv["detected_seafloor_depth"] = (
236
+ echodata.vendor.detected_seafloor_depth
237
+ )
238
+ #
239
+ # frequencies = echodata.environment.frequency_nominal.values
240
+ #################################################################
241
+ # Get GPS coordinates, just overwrite the lat lon values
242
+ gps_data, lat, lon = geometry_manager.read_echodata_gps_data(
243
+ echodata=echodata,
244
+ output_bucket_name=output_bucket_name,
245
+ ship_name=ship_name,
246
+ cruise_name=cruise_name,
247
+ sensor_name=sensor_name,
248
+ file_name=raw_file_name,
249
+ endpoint_url=endpoint_url,
250
+ write_geojson=False,
251
+ )
252
+ ds_sv = ep.consolidate.add_location(ds_sv, echodata)
253
+ ds_sv.latitude.values = (
254
+ lat # overwriting echopype gps values to include missing values
255
+ )
256
+ ds_sv.longitude.values = lon
257
+ # gps_data, lat, lon = self.__get_gps_data(echodata=echodata)
258
+
259
+ # Create the netcdf
260
+ netcdf_name_computed_Sv = f"{Path(raw_file_name).stem}_computed_Sv.nc"
261
+
262
+ # Xarray Dataset to netcdf
263
+ ds_sv.to_netcdf(
264
+ path=netcdf_name_computed_Sv,
265
+ mode="w",
266
+ )
267
+ gc.collect()
268
+ #################################################################
269
+ # output_netcdf_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
270
+ #################################################################
271
+ # If netcdf already exists then delete
272
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
273
+ child_objects = s3_manager.get_child_objects(
274
+ bucket_name=output_bucket_name,
275
+ sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.nc",
276
+ )
277
+ if len(child_objects) > 0:
278
+ print(
279
+ "NetCDF dataset already exists in s3, deleting existing and continuing."
280
+ )
281
+ s3_manager.delete_nodd_objects(
282
+ bucket_name=output_bucket_name,
283
+ objects=child_objects,
284
+ )
285
+ child_objects_computed_Sv = s3_manager.get_child_objects(
286
+ bucket_name=output_bucket_name,
287
+ sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}_computed_Sv.nc",
288
+ )
289
+ if len(child_objects_computed_Sv) > 0:
290
+ print("data already exists in s3, deleting existing and continuing.")
291
+ s3_manager.delete_nodd_objects(
292
+ bucket_name=output_bucket_name,
293
+ objects=child_objects_computed_Sv,
294
+ )
295
+ #################################################################
296
+ s3_manager.upload_file(
297
+ filename=netcdf_name,
298
+ bucket_name=output_bucket_name,
299
+ key=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.nc",
300
+ )
301
+ s3_manager.upload_file(
302
+ filename=netcdf_name_computed_Sv,
303
+ bucket_name=output_bucket_name,
304
+ key=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}_computed_Sv.nc",
305
+ )
306
+ except Exception as err:
307
+ print(f"Exception encountered creating local netcdf with echopype: {err}")
308
+ raise RuntimeError(f"Problem creating local netcdf, {err}")
309
+ finally:
310
+ gc.collect()
311
+ cleaner.delete_local_files(
312
+ file_types=["*.raw", "*.bot", "*.zarr", "*.nc", "*.json"]
313
+ )
314
+ print("Done creating local zarr store.")
315
+
316
+ ############################################################################
317
+
318
+
319
+ ################################################################################
320
+ ############################################################################