water-column-sonar-processing 0.0.1__py3-none-any.whl → 26.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (60) hide show
  1. water_column_sonar_processing/__init__.py +13 -0
  2. water_column_sonar_processing/aws/__init__.py +7 -0
  3. water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
  4. water_column_sonar_processing/aws/s3_manager.py +418 -0
  5. water_column_sonar_processing/aws/s3fs_manager.py +64 -0
  6. {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
  7. {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
  8. water_column_sonar_processing/cruise/__init__.py +4 -0
  9. water_column_sonar_processing/cruise/create_empty_zarr_store.py +129 -0
  10. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  11. water_column_sonar_processing/cruise/resample_regrid.py +323 -0
  12. water_column_sonar_processing/geometry/__init__.py +13 -0
  13. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  14. water_column_sonar_processing/geometry/geometry_manager.py +241 -0
  15. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  16. water_column_sonar_processing/geometry/pmtile_generation.py +266 -0
  17. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  18. water_column_sonar_processing/index/__init__.py +3 -0
  19. water_column_sonar_processing/index/index_manager.py +381 -0
  20. water_column_sonar_processing/model/__init__.py +3 -0
  21. water_column_sonar_processing/model/zarr_manager.py +741 -0
  22. water_column_sonar_processing/processing/__init__.py +4 -0
  23. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  24. water_column_sonar_processing/processing/raw_to_zarr.py +331 -0
  25. water_column_sonar_processing/utility/__init__.py +13 -0
  26. {model → water_column_sonar_processing}/utility/cleaner.py +7 -7
  27. water_column_sonar_processing/utility/constants.py +118 -0
  28. {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
  29. water_column_sonar_processing/utility/timestamp.py +12 -0
  30. water_column_sonar_processing-26.1.14.dist-info/METADATA +240 -0
  31. water_column_sonar_processing-26.1.14.dist-info/RECORD +34 -0
  32. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/WHEEL +1 -1
  33. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info/licenses}/LICENSE +1 -1
  34. water_column_sonar_processing-26.1.14.dist-info/top_level.txt +1 -0
  35. __init__.py +0 -0
  36. model/__init__.py +0 -0
  37. model/aws/__init__.py +0 -0
  38. model/aws/dynamodb_manager.py +0 -149
  39. model/aws/s3_manager.py +0 -356
  40. model/aws/s3fs_manager.py +0 -74
  41. model/cruise/__init__.py +0 -0
  42. model/cruise/create_empty_zarr_store.py +0 -166
  43. model/cruise/resample_regrid.py +0 -248
  44. model/geospatial/__init__.py +0 -0
  45. model/geospatial/geometry_manager.py +0 -194
  46. model/geospatial/geometry_simplification.py +0 -81
  47. model/geospatial/pmtile_generation.py +0 -74
  48. model/index/__init__.py +0 -0
  49. model/index/index.py +0 -228
  50. model/model.py +0 -138
  51. model/utility/__init__.py +0 -0
  52. model/utility/constants.py +0 -56
  53. model/utility/timestamp.py +0 -12
  54. model/zarr/__init__.py +0 -0
  55. model/zarr/bar.py +0 -28
  56. model/zarr/foo.py +0 -11
  57. model/zarr/zarr_manager.py +0 -298
  58. water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
  59. water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
  60. water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
@@ -0,0 +1,4 @@
1
+ from .raw_to_netcdf import RawToNetCDF
2
+ from .raw_to_zarr import RawToZarr, get_water_level
3
+
4
+ __all__ = ["RawToZarr", "get_water_level", "RawToNetCDF"]
@@ -0,0 +1,320 @@
1
+ import gc
2
+ import os
3
+ from datetime import datetime
4
+ from pathlib import Path # , PurePath
5
+
6
+ import echopype as ep
7
+ import numpy as np
8
+ from zarr.codecs import Blosc
9
+
10
+ from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
11
+ from water_column_sonar_processing.geometry import GeometryManager
12
+ from water_column_sonar_processing.utility import Cleaner
13
+
14
+
15
+ # This code is getting copied from echofish-aws-raw-to-zarr-lambda
16
+ class RawToNetCDF:
17
+ #######################################################
18
+ def __init__(
19
+ self,
20
+ # output_bucket_access_key,
21
+ # output_bucket_secret_access_key,
22
+ # # overwrite_existing_zarr_store,
23
+ ):
24
+ # TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
25
+ self.__compressor = Blosc(cname="zstd", clevel=9) # shuffle=Blosc.NOSHUFFLE
26
+ self.__overwrite = True
27
+ # self.__num_threads = numcodecs.blosc.get_nthreads()
28
+ # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
29
+ # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
30
+ # self.__table_name = table_name
31
+ # # self.__overwrite_existing_zarr_store = overwrite_existing_zarr_store
32
+
33
+ ############################################################################
34
+ ############################################################################
35
+ def __netcdf_info_to_table(
36
+ self,
37
+ # output_bucket_name,
38
+ table_name,
39
+ ship_name,
40
+ cruise_name,
41
+ sensor_name,
42
+ file_name,
43
+ # zarr_path,
44
+ min_echo_range,
45
+ max_echo_range,
46
+ num_ping_time_dropna,
47
+ start_time,
48
+ end_time,
49
+ frequencies,
50
+ channels,
51
+ water_level,
52
+ ):
53
+ print("Writing Zarr information to DynamoDB table.")
54
+ dynamodb_manager = DynamoDBManager()
55
+ dynamodb_manager.update_item(
56
+ table_name=table_name,
57
+ key={
58
+ "FILE_NAME": {"S": file_name}, # Partition Key
59
+ "CRUISE_NAME": {"S": cruise_name}, # Sort Key
60
+ },
61
+ expression_attribute_names={
62
+ "#CH": "CHANNELS",
63
+ "#ET": "END_TIME",
64
+ # "#ED": "ERROR_DETAIL",
65
+ "#FR": "FREQUENCIES",
66
+ "#MA": "MAX_ECHO_RANGE",
67
+ "#MI": "MIN_ECHO_RANGE",
68
+ "#ND": "NUM_PING_TIME_DROPNA",
69
+ # "#PS": "PIPELINE_STATUS",
70
+ "#PT": "PIPELINE_TIME",
71
+ "#SE": "SENSOR_NAME",
72
+ "#SH": "SHIP_NAME",
73
+ "#ST": "START_TIME",
74
+ # "#ZB": "ZARR_BUCKET",
75
+ # "#ZP": "ZARR_PATH",
76
+ "#WL": "WATER_LEVEL",
77
+ },
78
+ expression_attribute_values={
79
+ ":ch": {"L": [{"S": i} for i in channels]},
80
+ ":et": {"S": end_time},
81
+ # ":ed": {"S": ""},
82
+ ":fr": {"L": [{"N": str(i)} for i in frequencies]},
83
+ ":ma": {"N": str(np.round(max_echo_range, 4))},
84
+ ":mi": {"N": str(np.round(min_echo_range, 4))},
85
+ ":nd": {"N": str(num_ping_time_dropna)},
86
+ # ":ps": {"S": "PROCESSING_RESAMPLE_AND_WRITE_TO_ZARR_STORE"},
87
+ # ":ps": {"S": PipelineStatus.LEVEL_1_PROCESSING.name},
88
+ ":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
89
+ ":se": {"S": sensor_name},
90
+ ":sh": {"S": ship_name},
91
+ ":st": {"S": start_time},
92
+ ":wl": {"N": str(np.round(water_level, 2))},
93
+ # ":zb": {"S": output_bucket_name},
94
+ # ":zp": {"S": zarr_path},
95
+ },
96
+ update_expression=(
97
+ "SET "
98
+ "#CH = :ch, "
99
+ "#ET = :et, "
100
+ # "#ED = :ed, "
101
+ "#FR = :fr, "
102
+ "#MA = :ma, "
103
+ "#MI = :mi, "
104
+ "#ND = :nd, "
105
+ # "#PS = :ps, "
106
+ "#PT = :pt, "
107
+ "#SE = :se, "
108
+ "#SH = :sh, "
109
+ "#ST = :st, "
110
+ "#WL = :wl"
111
+ # "#ZB = :zb, "
112
+ # "#ZP = :zp"
113
+ ),
114
+ )
115
+ print("Done writing Zarr information to DynamoDB table.")
116
+
117
+ ############################################################################
118
+ ############################################################################
119
+ ############################################################################
120
+ def __upload_files_to_output_bucket(
121
+ self,
122
+ output_bucket_name,
123
+ local_directory,
124
+ object_prefix,
125
+ endpoint_url,
126
+ ):
127
+ # Note: this will be passed credentials if using NODD
128
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
129
+ print("Uploading files using thread pool executor.")
130
+ all_files = []
131
+ for subdir, dirs, files in os.walk(local_directory):
132
+ for file in files:
133
+ local_path = os.path.join(subdir, file)
134
+ s3_key = os.path.join(object_prefix, local_path)
135
+ all_files.append([local_path, s3_key])
136
+ # all_files
137
+ all_uploads = s3_manager.upload_files_with_thread_pool_executor(
138
+ output_bucket_name=output_bucket_name,
139
+ all_files=all_files,
140
+ )
141
+ return all_uploads
142
+
143
+ def __upload_file_to_output_bucket(
144
+ self,
145
+ output_bucket_name,
146
+ local_directory,
147
+ object_prefix,
148
+ endpoint_url,
149
+ ):
150
+ # Note: this will be passed credentials if using NODD
151
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
152
+ print("Uploading files using thread pool executor.")
153
+ all_files = [local_directory]
154
+ all_uploads = s3_manager.upload_files_with_thread_pool_executor(
155
+ output_bucket_name=output_bucket_name,
156
+ all_files=all_files,
157
+ )
158
+ return all_uploads
159
+
160
+ ############################################################################
161
+ def raw_to_netcdf(
162
+ self,
163
+ table_name,
164
+ input_bucket_name,
165
+ output_bucket_name,
166
+ ship_name,
167
+ cruise_name,
168
+ sensor_name,
169
+ raw_file_name,
170
+ endpoint_url=None,
171
+ include_bot=True,
172
+ ):
173
+ """
174
+ Downloads the raw files, processes them with echopype, and uploads files
175
+ to the nodd bucket.
176
+
177
+ Needs to create two files, one echopype opened file, one is Sv calibrated file
178
+ """
179
+ print(f"Opening raw: {raw_file_name} and creating netcdf.")
180
+ try:
181
+ geometry_manager = GeometryManager()
182
+ cleaner = Cleaner()
183
+ cleaner.delete_local_files(
184
+ file_types=["*.nc", "*.json"]
185
+ ) # TODO: include bot and raw?
186
+
187
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
188
+ s3_file_path = (
189
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
190
+ )
191
+ bottom_file_name = f"{Path(raw_file_name).stem}.bot"
192
+ s3_bottom_file_path = (
193
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
194
+ )
195
+ s3_manager.download_file(
196
+ bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name
197
+ )
198
+ # TODO: add the bottom file
199
+ if include_bot:
200
+ s3_manager.download_file(
201
+ bucket_name=input_bucket_name,
202
+ key=s3_bottom_file_path,
203
+ file_name=bottom_file_name,
204
+ )
205
+
206
+ gc.collect()
207
+ print("Opening raw file with echopype.")
208
+ # s3_file_path = f"s3://{bucket_name}/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}"
209
+ # s3_file_path = Path(f"s3://noaa-wcsd-pds/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}")
210
+ echodata = ep.open_raw(
211
+ raw_file=raw_file_name,
212
+ sonar_model=sensor_name,
213
+ include_bot=include_bot,
214
+ )
215
+
216
+ netcdf_name = f"{Path(raw_file_name).stem}.nc"
217
+ # Xarray Dataset to netcdf
218
+ echodata.to_netcdf(
219
+ save_path=netcdf_name,
220
+ compress=True,
221
+ overwrite=True,
222
+ )
223
+
224
+ print("Compute volume backscattering strength (Sv) from raw dataset.")
225
+ ds_sv = ep.calibrate.compute_Sv(echodata)
226
+ ds_sv = ep.consolidate.add_depth(
227
+ ds_sv, echodata
228
+ ) # TODO: consolidate with other depth values
229
+ # water_level = ds_sv["water_level"].values
230
+ gc.collect()
231
+ print("Done computing volume backscatter strength (Sv) from raw dataset.")
232
+ # Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
233
+ # but is not written out with ds_sv
234
+ if "detected_seafloor_depth" in list(echodata.vendor.variables):
235
+ ds_sv["detected_seafloor_depth"] = (
236
+ echodata.vendor.detected_seafloor_depth
237
+ )
238
+ #
239
+ # frequencies = echodata.environment.frequency_nominal.values
240
+ #################################################################
241
+ # Get GPS coordinates, just overwrite the lat lon values
242
+ gps_data, lat, lon = geometry_manager.read_echodata_gps_data(
243
+ echodata=echodata,
244
+ output_bucket_name=output_bucket_name,
245
+ ship_name=ship_name,
246
+ cruise_name=cruise_name,
247
+ sensor_name=sensor_name,
248
+ file_name=raw_file_name,
249
+ endpoint_url=endpoint_url,
250
+ write_geojson=False,
251
+ )
252
+ ds_sv = ep.consolidate.add_location(ds_sv, echodata)
253
+ ds_sv.latitude.values = (
254
+ lat # overwriting echopype gps values to include missing values
255
+ )
256
+ ds_sv.longitude.values = lon
257
+ # gps_data, lat, lon = self.__get_gps_data(echodata=echodata)
258
+
259
+ # Create the netcdf
260
+ netcdf_name_computed_Sv = f"{Path(raw_file_name).stem}_computed_Sv.nc"
261
+
262
+ # Xarray Dataset to netcdf
263
+ ds_sv.to_netcdf(
264
+ path=netcdf_name_computed_Sv,
265
+ mode="w",
266
+ )
267
+ gc.collect()
268
+ #################################################################
269
+ # output_netcdf_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
270
+ #################################################################
271
+ # If netcdf already exists then delete
272
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
273
+ child_objects = s3_manager.get_child_objects(
274
+ bucket_name=output_bucket_name,
275
+ sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.nc",
276
+ )
277
+ if len(child_objects) > 0:
278
+ print(
279
+ "NetCDF dataset already exists in s3, deleting existing and continuing."
280
+ )
281
+ s3_manager.delete_nodd_objects(
282
+ bucket_name=output_bucket_name,
283
+ objects=child_objects,
284
+ )
285
+ child_objects_computed_Sv = s3_manager.get_child_objects(
286
+ bucket_name=output_bucket_name,
287
+ sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}_computed_Sv.nc",
288
+ )
289
+ if len(child_objects_computed_Sv) > 0:
290
+ print("data already exists in s3, deleting existing and continuing.")
291
+ s3_manager.delete_nodd_objects(
292
+ bucket_name=output_bucket_name,
293
+ objects=child_objects_computed_Sv,
294
+ )
295
+ #################################################################
296
+ s3_manager.upload_file(
297
+ filename=netcdf_name,
298
+ bucket_name=output_bucket_name,
299
+ key=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.nc",
300
+ )
301
+ s3_manager.upload_file(
302
+ filename=netcdf_name_computed_Sv,
303
+ bucket_name=output_bucket_name,
304
+ key=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}_computed_Sv.nc",
305
+ )
306
+ except Exception as err:
307
+ print(f"Exception encountered creating local netcdf with echopype: {err}")
308
+ raise RuntimeError(f"Problem creating local netcdf, {err}")
309
+ finally:
310
+ gc.collect()
311
+ cleaner.delete_local_files(
312
+ file_types=["*.raw", "*.bot", "*.zarr", "*.nc", "*.json"]
313
+ )
314
+ print("Done creating local zarr store.")
315
+
316
+ ############################################################################
317
+
318
+
319
+ ################################################################################
320
+ ############################################################################
@@ -0,0 +1,331 @@
1
+ import gc
2
+ import os
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ import echopype as ep
8
+ import numpy as np
9
+ from zarr.codecs import Blosc
10
+
11
+ from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
12
+ from water_column_sonar_processing.utility import Cleaner
13
+ from water_column_sonar_processing.utility import Constants
14
+
15
+ # from numcodecs import Blosc
16
+ level_1 = str(Constants.LEVEL_1.value)
17
+
18
+
19
+ def get_water_level(ds):
20
+ """
21
+ needs to be mocked up so that's why this is broken out
22
+ """
23
+ if "water_level" in ds.keys():
24
+ return ds.water_level.values
25
+ else:
26
+ return 0.0
27
+
28
+
29
+ # This code is getting copied from echofish-aws-raw-to-zarr-lambda
30
+ class RawToZarr:
31
+ #######################################################
32
+ def __init__(
33
+ self,
34
+ # output_bucket_access_key,
35
+ # output_bucket_secret_access_key,
36
+ # # overwrite_existing_zarr_store,
37
+ ):
38
+ # TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
39
+ # self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
40
+ self.__compressor = Blosc(cname="zstd", clevel=9)
41
+ self.__overwrite = True
42
+ # self.__num_threads = numcodecs.blosc.get_nthreads()
43
+ # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
44
+ # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
45
+ # self.__table_name = table_name
46
+ # # self.__overwrite_existing_zarr_store = overwrite_existing_zarr_store
47
+
48
+ ############################################################################
49
+ ############################################################################
50
+ @staticmethod
51
+ def __zarr_info_to_table(
52
+ table_name,
53
+ ship_name,
54
+ cruise_name,
55
+ sensor_name, # : Constants, TODO: convert to enum
56
+ file_name,
57
+ min_echo_range,
58
+ max_echo_range,
59
+ num_ping_time_dropna,
60
+ start_time,
61
+ end_time,
62
+ frequencies,
63
+ channels,
64
+ water_level,
65
+ ):
66
+ print("Writing Zarr information to DynamoDB table.")
67
+ dynamodb_manager = DynamoDBManager()
68
+ dynamodb_manager.update_item(
69
+ table_name=table_name,
70
+ key={
71
+ "FILE_NAME": {"S": file_name}, # Partition Key
72
+ "CRUISE_NAME": {"S": cruise_name}, # Sort Key
73
+ },
74
+ expression_attribute_names={
75
+ "#CH": "CHANNELS",
76
+ "#ET": "END_TIME",
77
+ # "#ED": "ERROR_DETAIL",
78
+ "#FR": "FREQUENCIES",
79
+ "#MA": "MAX_ECHO_RANGE",
80
+ "#MI": "MIN_ECHO_RANGE",
81
+ "#ND": "NUM_PING_TIME_DROPNA",
82
+ "#PT": "PIPELINE_TIME",
83
+ "#SE": "SENSOR_NAME",
84
+ "#SH": "SHIP_NAME",
85
+ "#ST": "START_TIME",
86
+ "#WL": "WATER_LEVEL",
87
+ },
88
+ expression_attribute_values={
89
+ ":ch": {"L": [{"S": i} for i in channels]},
90
+ ":et": {"S": end_time},
91
+ # ":ed": {"S": ""},
92
+ ":fr": {"L": [{"N": str(i)} for i in frequencies]},
93
+ ":ma": {"N": str(np.round(max_echo_range, 4))},
94
+ ":mi": {"N": str(np.round(min_echo_range, 4))},
95
+ ":nd": {"N": str(num_ping_time_dropna)},
96
+ ":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
97
+ ":se": {"S": sensor_name},
98
+ ":sh": {"S": ship_name},
99
+ ":st": {"S": start_time},
100
+ ":wl": {"N": str(np.round(water_level, 2))},
101
+ },
102
+ update_expression=(
103
+ "SET "
104
+ "#CH = :ch, "
105
+ "#ET = :et, "
106
+ "#FR = :fr, "
107
+ "#MA = :ma, "
108
+ "#MI = :mi, "
109
+ "#ND = :nd, "
110
+ "#PT = :pt, "
111
+ "#SE = :se, "
112
+ "#SH = :sh, "
113
+ "#ST = :st, "
114
+ "#WL = :wl"
115
+ ),
116
+ )
117
+ print("Done writing Zarr information to DynamoDB table.")
118
+
119
+ ############################################################################
120
+ ############################################################################
121
+ ############################################################################
122
+ @staticmethod
123
+ def __upload_files_to_output_bucket(
124
+ output_bucket_name: str,
125
+ local_directory: str,
126
+ # e.g. 'D20070724-T042400.zarr' # TODO: problem: if this is not in the current directory
127
+ object_prefix: str, # e.g. "level_1/Henry_B._Bigelow/HB0706/EK60/"
128
+ endpoint_url,
129
+ ):
130
+ # Note: this will be passed credentials if using NODD
131
+ # TODO: this will not work if the local_directory is anywhere other than the current folder
132
+ # see test_s3_manager test_upload...pool_executor for solution
133
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
134
+ print("Uploading files using thread pool executor.")
135
+ all_files = []
136
+ for subdir, dirs, files in os.walk(
137
+ local_directory
138
+ ): # os.path.basename(s3_manager_test_path.joinpath("HB0707.zarr/"))
139
+ for file in files:
140
+ local_path = os.path.join(subdir, file)
141
+ s3_key = os.path.join(object_prefix, local_path)
142
+ all_files.append([local_path, s3_key])
143
+ # all_files
144
+ all_uploads = s3_manager.upload_files_with_thread_pool_executor(
145
+ output_bucket_name=output_bucket_name,
146
+ all_files=all_files,
147
+ )
148
+ return all_uploads
149
+
150
+ ############################################################################
151
+
152
+ ############################################################################
153
+ def raw_to_zarr(
154
+ self,
155
+ table_name,
156
+ input_bucket_name,
157
+ output_bucket_name,
158
+ ship_name,
159
+ cruise_name,
160
+ sensor_name,
161
+ raw_file_name,
162
+ endpoint_url: Optional[str] = None,
163
+ include_bot=True,
164
+ ):
165
+ """
166
+ Downloads the raw files, processes them with echopype, writes geojson, and uploads files
167
+ to the nodd bucket.
168
+ """
169
+ print(f"Opening raw: {raw_file_name} and creating zarr store.")
170
+ # geometry_manager = GeometryManager()
171
+ cleaner = Cleaner()
172
+ cleaner.delete_local_files(
173
+ file_types=["*.zarr", "*.json"]
174
+ ) # TODO: include bot and raw?
175
+
176
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
177
+ s3_file_path = (
178
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
179
+ )
180
+ bottom_file_name = f"{Path(raw_file_name).stem}.bot"
181
+ s3_bottom_file_path = (
182
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
183
+ )
184
+ s3_manager.download_file(
185
+ bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name
186
+ )
187
+ # TODO: add the bottom file
188
+ if include_bot:
189
+ s3_manager.download_file(
190
+ bucket_name=input_bucket_name,
191
+ key=s3_bottom_file_path,
192
+ file_name=bottom_file_name,
193
+ )
194
+
195
+ try:
196
+ gc.collect()
197
+ print("Opening raw file with echopype.")
198
+ echodata = ep.open_raw(
199
+ raw_file=raw_file_name,
200
+ sonar_model=sensor_name,
201
+ include_bot=include_bot,
202
+ )
203
+ print("Compute volume backscattering strength (Sv) from raw dataset.")
204
+ ds_sv = ep.calibrate.compute_Sv(echodata)
205
+ ds_sv = ep.consolidate.add_depth(ds_sv, echodata)
206
+ water_level = get_water_level(ds_sv)
207
+
208
+ gc.collect()
209
+ print("Done computing volume backscatter strength (Sv) from raw dataset.")
210
+ # Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
211
+ # but is not written out with ds_sv --> add to ds_sv
212
+ if "detected_seafloor_depth" in list(echodata.vendor.variables):
213
+ ds_sv["detected_seafloor_depth"] = (
214
+ echodata.vendor.detected_seafloor_depth
215
+ )
216
+ #
217
+ frequencies = echodata.environment.frequency_nominal.values
218
+ if len(frequencies) != len(set(frequencies)):
219
+ raise Exception("Problem number of frequencies does not match channels")
220
+ #################################################################
221
+ # add gps data
222
+ ds_sv = ep.consolidate.add_location(ds_sv, echodata)
223
+
224
+ if np.any(ds_sv.latitude.values > 90.0) or np.any(
225
+ ds_sv.latitude.values < -90.0
226
+ ):
227
+ ds_sv.latitude.values[np.where(ds_sv.latitude.values > 90.0)] = np.nan
228
+ ds_sv.latitude.values[np.where(ds_sv.latitude.values < -90.0)] = np.nan
229
+
230
+ if np.any(ds_sv.longitude.values > 180.0) or np.any(
231
+ ds_sv.longitude.values < -180.0
232
+ ):
233
+ ds_sv.longitude.values[np.where(ds_sv.longitude.values > 180.0)] = (
234
+ np.nan
235
+ )
236
+ ds_sv.longitude.values[np.where(ds_sv.longitude.values < -180.0)] = (
237
+ np.nan
238
+ )
239
+
240
+ #################################################################
241
+ min_echo_range = np.round(np.nanmin(np.diff(ds_sv.echo_range.values)), 2)
242
+ max_echo_range = float(np.nanmax(ds_sv.echo_range))
243
+
244
+ # This is the number of missing values found throughout the lat/lon
245
+ lat = ds_sv.latitude.values
246
+ lon = ds_sv.longitude.values
247
+ num_ping_time_drop_na = np.min(
248
+ [ # Isn't always symmetric
249
+ lat[~np.isnan(lat)].shape[0],
250
+ lon[~np.isnan(lon)].shape[0],
251
+ ]
252
+ )
253
+ start_time = (
254
+ np.datetime_as_string(ds_sv.ping_time.values[0], unit="ms") + "Z"
255
+ )
256
+ end_time = (
257
+ np.datetime_as_string(ds_sv.ping_time.values[-1], unit="ms") + "Z"
258
+ )
259
+ channels = list(ds_sv.channel.values)
260
+ #
261
+ #################################################################
262
+ # Create the zarr store
263
+ store_name = f"{Path(raw_file_name).stem}.zarr"
264
+ ds_sv.to_zarr(
265
+ store=store_name,
266
+ zarr_format=3,
267
+ consolidated=False,
268
+ write_empty_chunks=False,
269
+ )
270
+ gc.collect()
271
+ #################################################################
272
+ output_zarr_prefix = f"{level_1}/{ship_name}/{cruise_name}/{sensor_name}/"
273
+ #################################################################
274
+ # If zarr store already exists then delete
275
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
276
+ child_objects = s3_manager.get_child_objects(
277
+ bucket_name=output_bucket_name,
278
+ sub_prefix=f"{level_1}/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.zarr",
279
+ )
280
+ if len(child_objects) > 0:
281
+ print(
282
+ "Zarr store dataset already exists in s3, deleting existing and continuing."
283
+ )
284
+ s3_manager.delete_nodd_objects(
285
+ bucket_name=output_bucket_name,
286
+ objects=child_objects,
287
+ )
288
+ #################################################################
289
+ self.__upload_files_to_output_bucket(
290
+ output_bucket_name=output_bucket_name,
291
+ local_directory=store_name,
292
+ object_prefix=output_zarr_prefix,
293
+ endpoint_url=endpoint_url,
294
+ )
295
+ #################################################################
296
+ self.__zarr_info_to_table(
297
+ table_name=table_name,
298
+ ship_name=ship_name,
299
+ cruise_name=cruise_name,
300
+ sensor_name=sensor_name,
301
+ file_name=raw_file_name,
302
+ min_echo_range=min_echo_range,
303
+ max_echo_range=max_echo_range,
304
+ num_ping_time_dropna=num_ping_time_drop_na,
305
+ start_time=start_time,
306
+ end_time=end_time,
307
+ frequencies=frequencies,
308
+ channels=channels,
309
+ water_level=water_level,
310
+ )
311
+ #######################################################################
312
+ # TODO: verify count of objects matches, publish message, update status
313
+ #######################################################################
314
+ except Exception as err:
315
+ print(
316
+ f"Exception encountered creating local Zarr store with echopype: {err}"
317
+ )
318
+ raise RuntimeError(f"Problem creating local Zarr store, {err}")
319
+ finally:
320
+ gc.collect()
321
+ cleaner.delete_local_files(
322
+ file_types=["*.raw", "*.bot", "*.zarr", "*.json"]
323
+ )
324
+ print("Finished raw-to-zarr conversion.")
325
+
326
+ ############################################################################
327
+ ############################################################################
328
+
329
+
330
+ ################################################################################
331
+ ############################################################################
@@ -0,0 +1,13 @@
1
+ from .cleaner import Cleaner
2
+ from .constants import Constants, Coordinates, Instruments
3
+ from .pipeline_status import PipelineStatus
4
+ from .timestamp import Timestamp
5
+
6
+ __all__ = [
7
+ "Cleaner",
8
+ "Instruments",
9
+ "Constants",
10
+ "Coordinates",
11
+ "PipelineStatus",
12
+ "Timestamp",
13
+ ]