water-column-sonar-processing 0.0.6__py3-none-any.whl → 26.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. water_column_sonar_processing/__init__.py +2 -5
  2. water_column_sonar_processing/aws/__init__.py +2 -2
  3. water_column_sonar_processing/aws/dynamodb_manager.py +257 -72
  4. water_column_sonar_processing/aws/s3_manager.py +184 -112
  5. water_column_sonar_processing/aws/s3fs_manager.py +29 -33
  6. water_column_sonar_processing/aws/sqs_manager.py +1 -1
  7. water_column_sonar_processing/cruise/create_empty_zarr_store.py +38 -97
  8. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  9. water_column_sonar_processing/cruise/resample_regrid.py +144 -129
  10. water_column_sonar_processing/geometry/__init__.py +10 -2
  11. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  12. water_column_sonar_processing/geometry/geometry_manager.py +60 -44
  13. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  14. water_column_sonar_processing/geometry/pmtile_generation.py +242 -51
  15. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  16. water_column_sonar_processing/index/index_manager.py +157 -27
  17. water_column_sonar_processing/model/zarr_manager.py +663 -258
  18. water_column_sonar_processing/processing/__init__.py +4 -0
  19. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  20. water_column_sonar_processing/processing/raw_to_zarr.py +341 -0
  21. water_column_sonar_processing/utility/__init__.py +9 -2
  22. water_column_sonar_processing/utility/cleaner.py +1 -0
  23. water_column_sonar_processing/utility/constants.py +69 -14
  24. water_column_sonar_processing/utility/pipeline_status.py +11 -15
  25. water_column_sonar_processing/utility/timestamp.py +3 -4
  26. water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
  27. water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
  28. {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
  29. {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
  30. water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
  31. water_column_sonar_processing/process.py +0 -147
  32. water_column_sonar_processing-0.0.6.dist-info/METADATA +0 -123
  33. water_column_sonar_processing-0.0.6.dist-info/RECORD +0 -29
  34. {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,4 @@
1
+ from .raw_to_netcdf import RawToNetCDF
2
+ from .raw_to_zarr import RawToZarr, get_water_level
3
+
4
+ __all__ = ["RawToZarr", "get_water_level", "RawToNetCDF"]
@@ -0,0 +1,320 @@
1
+ import gc
2
+ import os
3
+ from datetime import datetime
4
+ from pathlib import Path # , PurePath
5
+
6
+ import echopype as ep
7
+ import numpy as np
8
+ from zarr.codecs import Blosc
9
+
10
+ from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
11
+ from water_column_sonar_processing.geometry import GeometryManager
12
+ from water_column_sonar_processing.utility import Cleaner
13
+
14
+
15
+ # This code is getting copied from echofish-aws-raw-to-zarr-lambda
16
+ class RawToNetCDF:
17
+ #######################################################
18
+ def __init__(
19
+ self,
20
+ # output_bucket_access_key,
21
+ # output_bucket_secret_access_key,
22
+ # # overwrite_existing_zarr_store,
23
+ ):
24
+ # TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
25
+ self.__compressor = Blosc(cname="zstd", clevel=9) # shuffle=Blosc.NOSHUFFLE
26
+ self.__overwrite = True
27
+ # self.__num_threads = numcodecs.blosc.get_nthreads()
28
+ # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
29
+ # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
30
+ # self.__table_name = table_name
31
+ # # self.__overwrite_existing_zarr_store = overwrite_existing_zarr_store
32
+
33
+ ############################################################################
34
+ ############################################################################
35
+ def __netcdf_info_to_table(
36
+ self,
37
+ # output_bucket_name,
38
+ table_name,
39
+ ship_name,
40
+ cruise_name,
41
+ sensor_name,
42
+ file_name,
43
+ # zarr_path,
44
+ min_echo_range,
45
+ max_echo_range,
46
+ num_ping_time_dropna,
47
+ start_time,
48
+ end_time,
49
+ frequencies,
50
+ channels,
51
+ water_level,
52
+ ):
53
+ print("Writing Zarr information to DynamoDB table.")
54
+ dynamodb_manager = DynamoDBManager()
55
+ dynamodb_manager.update_item(
56
+ table_name=table_name,
57
+ key={
58
+ "FILE_NAME": {"S": file_name}, # Partition Key
59
+ "CRUISE_NAME": {"S": cruise_name}, # Sort Key
60
+ },
61
+ expression_attribute_names={
62
+ "#CH": "CHANNELS",
63
+ "#ET": "END_TIME",
64
+ # "#ED": "ERROR_DETAIL",
65
+ "#FR": "FREQUENCIES",
66
+ "#MA": "MAX_ECHO_RANGE",
67
+ "#MI": "MIN_ECHO_RANGE",
68
+ "#ND": "NUM_PING_TIME_DROPNA",
69
+ # "#PS": "PIPELINE_STATUS",
70
+ "#PT": "PIPELINE_TIME",
71
+ "#SE": "SENSOR_NAME",
72
+ "#SH": "SHIP_NAME",
73
+ "#ST": "START_TIME",
74
+ # "#ZB": "ZARR_BUCKET",
75
+ # "#ZP": "ZARR_PATH",
76
+ "#WL": "WATER_LEVEL",
77
+ },
78
+ expression_attribute_values={
79
+ ":ch": {"L": [{"S": i} for i in channels]},
80
+ ":et": {"S": end_time},
81
+ # ":ed": {"S": ""},
82
+ ":fr": {"L": [{"N": str(i)} for i in frequencies]},
83
+ ":ma": {"N": str(np.round(max_echo_range, 4))},
84
+ ":mi": {"N": str(np.round(min_echo_range, 4))},
85
+ ":nd": {"N": str(num_ping_time_dropna)},
86
+ # ":ps": {"S": "PROCESSING_RESAMPLE_AND_WRITE_TO_ZARR_STORE"},
87
+ # ":ps": {"S": PipelineStatus.LEVEL_1_PROCESSING.name},
88
+ ":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
89
+ ":se": {"S": sensor_name},
90
+ ":sh": {"S": ship_name},
91
+ ":st": {"S": start_time},
92
+ ":wl": {"N": str(np.round(water_level, 2))},
93
+ # ":zb": {"S": output_bucket_name},
94
+ # ":zp": {"S": zarr_path},
95
+ },
96
+ update_expression=(
97
+ "SET "
98
+ "#CH = :ch, "
99
+ "#ET = :et, "
100
+ # "#ED = :ed, "
101
+ "#FR = :fr, "
102
+ "#MA = :ma, "
103
+ "#MI = :mi, "
104
+ "#ND = :nd, "
105
+ # "#PS = :ps, "
106
+ "#PT = :pt, "
107
+ "#SE = :se, "
108
+ "#SH = :sh, "
109
+ "#ST = :st, "
110
+ "#WL = :wl"
111
+ # "#ZB = :zb, "
112
+ # "#ZP = :zp"
113
+ ),
114
+ )
115
+ print("Done writing Zarr information to DynamoDB table.")
116
+
117
+ ############################################################################
118
+ ############################################################################
119
+ ############################################################################
120
+ def __upload_files_to_output_bucket(
121
+ self,
122
+ output_bucket_name,
123
+ local_directory,
124
+ object_prefix,
125
+ endpoint_url,
126
+ ):
127
+ # Note: this will be passed credentials if using NODD
128
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
129
+ print("Uploading files using thread pool executor.")
130
+ all_files = []
131
+ for subdir, dirs, files in os.walk(local_directory):
132
+ for file in files:
133
+ local_path = os.path.join(subdir, file)
134
+ s3_key = os.path.join(object_prefix, local_path)
135
+ all_files.append([local_path, s3_key])
136
+ # all_files
137
+ all_uploads = s3_manager.upload_files_with_thread_pool_executor(
138
+ output_bucket_name=output_bucket_name,
139
+ all_files=all_files,
140
+ )
141
+ return all_uploads
142
+
143
+ def __upload_file_to_output_bucket(
144
+ self,
145
+ output_bucket_name,
146
+ local_directory,
147
+ object_prefix,
148
+ endpoint_url,
149
+ ):
150
+ # Note: this will be passed credentials if using NODD
151
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
152
+ print("Uploading files using thread pool executor.")
153
+ all_files = [local_directory]
154
+ all_uploads = s3_manager.upload_files_with_thread_pool_executor(
155
+ output_bucket_name=output_bucket_name,
156
+ all_files=all_files,
157
+ )
158
+ return all_uploads
159
+
160
+ ############################################################################
161
+ def raw_to_netcdf(
162
+ self,
163
+ table_name,
164
+ input_bucket_name,
165
+ output_bucket_name,
166
+ ship_name,
167
+ cruise_name,
168
+ sensor_name,
169
+ raw_file_name,
170
+ endpoint_url=None,
171
+ include_bot=True,
172
+ ):
173
+ """
174
+ Downloads the raw files, processes them with echopype, and uploads files
175
+ to the nodd bucket.
176
+
177
+ Needs to create two files, one echopype opened file, one is Sv calibrated file
178
+ """
179
+ print(f"Opening raw: {raw_file_name} and creating netcdf.")
180
+ try:
181
+ geometry_manager = GeometryManager()
182
+ cleaner = Cleaner()
183
+ cleaner.delete_local_files(
184
+ file_types=["*.nc", "*.json"]
185
+ ) # TODO: include bot and raw?
186
+
187
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
188
+ s3_file_path = (
189
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
190
+ )
191
+ bottom_file_name = f"{Path(raw_file_name).stem}.bot"
192
+ s3_bottom_file_path = (
193
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
194
+ )
195
+ s3_manager.download_file(
196
+ bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name
197
+ )
198
+ # TODO: add the bottom file
199
+ if include_bot:
200
+ s3_manager.download_file(
201
+ bucket_name=input_bucket_name,
202
+ key=s3_bottom_file_path,
203
+ file_name=bottom_file_name,
204
+ )
205
+
206
+ gc.collect()
207
+ print("Opening raw file with echopype.")
208
+ # s3_file_path = f"s3://{bucket_name}/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}"
209
+ # s3_file_path = Path(f"s3://noaa-wcsd-pds/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}")
210
+ echodata = ep.open_raw(
211
+ raw_file=raw_file_name,
212
+ sonar_model=sensor_name,
213
+ include_bot=include_bot,
214
+ )
215
+
216
+ netcdf_name = f"{Path(raw_file_name).stem}.nc"
217
+ # Xarray Dataset to netcdf
218
+ echodata.to_netcdf(
219
+ save_path=netcdf_name,
220
+ compress=True,
221
+ overwrite=True,
222
+ )
223
+
224
+ print("Compute volume backscattering strength (Sv) from raw dataset.")
225
+ ds_sv = ep.calibrate.compute_Sv(echodata)
226
+ ds_sv = ep.consolidate.add_depth(
227
+ ds_sv, echodata
228
+ ) # TODO: consolidate with other depth values
229
+ # water_level = ds_sv["water_level"].values
230
+ gc.collect()
231
+ print("Done computing volume backscatter strength (Sv) from raw dataset.")
232
+ # Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
233
+ # but is not written out with ds_sv
234
+ if "detected_seafloor_depth" in list(echodata.vendor.variables):
235
+ ds_sv["detected_seafloor_depth"] = (
236
+ echodata.vendor.detected_seafloor_depth
237
+ )
238
+ #
239
+ # frequencies = echodata.environment.frequency_nominal.values
240
+ #################################################################
241
+ # Get GPS coordinates, just overwrite the lat lon values
242
+ gps_data, lat, lon = geometry_manager.read_echodata_gps_data(
243
+ echodata=echodata,
244
+ output_bucket_name=output_bucket_name,
245
+ ship_name=ship_name,
246
+ cruise_name=cruise_name,
247
+ sensor_name=sensor_name,
248
+ file_name=raw_file_name,
249
+ endpoint_url=endpoint_url,
250
+ write_geojson=False,
251
+ )
252
+ ds_sv = ep.consolidate.add_location(ds_sv, echodata)
253
+ ds_sv.latitude.values = (
254
+ lat # overwriting echopype gps values to include missing values
255
+ )
256
+ ds_sv.longitude.values = lon
257
+ # gps_data, lat, lon = self.__get_gps_data(echodata=echodata)
258
+
259
+ # Create the netcdf
260
+ netcdf_name_computed_Sv = f"{Path(raw_file_name).stem}_computed_Sv.nc"
261
+
262
+ # Xarray Dataset to netcdf
263
+ ds_sv.to_netcdf(
264
+ path=netcdf_name_computed_Sv,
265
+ mode="w",
266
+ )
267
+ gc.collect()
268
+ #################################################################
269
+ # output_netcdf_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
270
+ #################################################################
271
+ # If netcdf already exists then delete
272
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
273
+ child_objects = s3_manager.get_child_objects(
274
+ bucket_name=output_bucket_name,
275
+ sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.nc",
276
+ )
277
+ if len(child_objects) > 0:
278
+ print(
279
+ "NetCDF dataset already exists in s3, deleting existing and continuing."
280
+ )
281
+ s3_manager.delete_nodd_objects(
282
+ bucket_name=output_bucket_name,
283
+ objects=child_objects,
284
+ )
285
+ child_objects_computed_Sv = s3_manager.get_child_objects(
286
+ bucket_name=output_bucket_name,
287
+ sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}_computed_Sv.nc",
288
+ )
289
+ if len(child_objects_computed_Sv) > 0:
290
+ print("data already exists in s3, deleting existing and continuing.")
291
+ s3_manager.delete_nodd_objects(
292
+ bucket_name=output_bucket_name,
293
+ objects=child_objects_computed_Sv,
294
+ )
295
+ #################################################################
296
+ s3_manager.upload_file(
297
+ filename=netcdf_name,
298
+ bucket_name=output_bucket_name,
299
+ key=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.nc",
300
+ )
301
+ s3_manager.upload_file(
302
+ filename=netcdf_name_computed_Sv,
303
+ bucket_name=output_bucket_name,
304
+ key=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}_computed_Sv.nc",
305
+ )
306
+ except Exception as err:
307
+ print(f"Exception encountered creating local netcdf with echopype: {err}")
308
+ raise RuntimeError(f"Problem creating local netcdf, {err}")
309
+ finally:
310
+ gc.collect()
311
+ cleaner.delete_local_files(
312
+ file_types=["*.raw", "*.bot", "*.zarr", "*.nc", "*.json"]
313
+ )
314
+ print("Done creating local zarr store.")
315
+
316
+ ############################################################################
317
+
318
+
319
+ ################################################################################
320
+ ############################################################################
@@ -0,0 +1,341 @@
1
+ import gc
2
+ import os
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ import echopype as ep
8
+ import numpy as np
9
+ from zarr.codecs import Blosc
10
+
11
+ from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
12
+ from water_column_sonar_processing.utility import Cleaner
13
+ from water_column_sonar_processing.utility import Constants
14
+
15
+ # from numcodecs import Blosc
16
+ level_1 = str(Constants.LEVEL_1.value)
17
+
18
+
19
+ def get_water_level(ds):
20
+ """
21
+ needs to be mocked up so that's why this is broken out
22
+ """
23
+ if "water_level" in ds.keys():
24
+ return ds.water_level.values
25
+ else:
26
+ return 0.0
27
+
28
+
29
+ # This code is getting copied from echofish-aws-raw-to-zarr-lambda
30
+ class RawToZarr:
31
+ #######################################################
32
+ def __init__(
33
+ self,
34
+ # output_bucket_access_key,
35
+ # output_bucket_secret_access_key,
36
+ # # overwrite_existing_zarr_store,
37
+ ):
38
+ # TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
39
+ # self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
40
+ self.__compressor = Blosc(cname="zstd", clevel=9)
41
+ self.__overwrite = True
42
+ # self.__num_threads = numcodecs.blosc.get_nthreads()
43
+ # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
44
+ # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
45
+ # self.__table_name = table_name
46
+ # # self.__overwrite_existing_zarr_store = overwrite_existing_zarr_store
47
+
48
+ ############################################################################
49
+ ############################################################################
50
+ @staticmethod
51
+ def __zarr_info_to_table(
52
+ table_name,
53
+ ship_name,
54
+ cruise_name,
55
+ sensor_name, # : Constants, TODO: convert to enum
56
+ file_name,
57
+ min_echo_range,
58
+ max_echo_range,
59
+ num_ping_time_dropna,
60
+ start_time,
61
+ end_time,
62
+ frequencies,
63
+ channels,
64
+ water_level,
65
+ ):
66
+ print("Writing Zarr information to DynamoDB table.")
67
+ dynamodb_manager = DynamoDBManager()
68
+ dynamodb_manager.update_item(
69
+ table_name=table_name,
70
+ key={
71
+ "FILE_NAME": {"S": file_name}, # Partition Key
72
+ "CRUISE_NAME": {"S": cruise_name}, # Sort Key
73
+ },
74
+ expression_attribute_names={
75
+ "#CH": "CHANNELS",
76
+ "#ET": "END_TIME",
77
+ # "#ED": "ERROR_DETAIL",
78
+ "#FR": "FREQUENCIES",
79
+ "#MA": "MAX_ECHO_RANGE",
80
+ "#MI": "MIN_ECHO_RANGE",
81
+ "#ND": "NUM_PING_TIME_DROPNA",
82
+ "#PT": "PIPELINE_TIME",
83
+ "#SE": "SENSOR_NAME",
84
+ "#SH": "SHIP_NAME",
85
+ "#ST": "START_TIME",
86
+ "#WL": "WATER_LEVEL",
87
+ },
88
+ expression_attribute_values={
89
+ ":ch": {"L": [{"S": i} for i in channels]},
90
+ ":et": {"S": end_time},
91
+ # ":ed": {"S": ""},
92
+ ":fr": {"L": [{"N": str(i)} for i in frequencies]},
93
+ ":ma": {"N": str(np.round(max_echo_range, 4))},
94
+ ":mi": {"N": str(np.round(min_echo_range, 4))},
95
+ ":nd": {"N": str(num_ping_time_dropna)},
96
+ ":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
97
+ ":se": {"S": sensor_name},
98
+ ":sh": {"S": ship_name},
99
+ ":st": {"S": start_time},
100
+ ":wl": {"N": str(np.round(water_level, 2))},
101
+ },
102
+ update_expression=(
103
+ "SET "
104
+ "#CH = :ch, "
105
+ "#ET = :et, "
106
+ "#FR = :fr, "
107
+ "#MA = :ma, "
108
+ "#MI = :mi, "
109
+ "#ND = :nd, "
110
+ "#PT = :pt, "
111
+ "#SE = :se, "
112
+ "#SH = :sh, "
113
+ "#ST = :st, "
114
+ "#WL = :wl"
115
+ ),
116
+ )
117
+ print("Done writing Zarr information to DynamoDB table.")
118
+
119
+ ############################################################################
120
+ ############################################################################
121
+ ############################################################################
122
+ @staticmethod
123
+ def __upload_files_to_output_bucket(
124
+ output_bucket_name: str,
125
+ local_directory: str,
126
+ # e.g. 'D20070724-T042400.zarr' # TODO: problem: if this is not in the current directory
127
+ object_prefix: str, # e.g. "level_1/Henry_B._Bigelow/HB0706/EK60/"
128
+ endpoint_url,
129
+ ):
130
+ # Note: this will be passed credentials if using NODD
131
+ # TODO: this will not work if the local_directory is anywhere other than the current folder
132
+ # see test_s3_manager test_upload...pool_executor for solution
133
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
134
+ print("Uploading files using thread pool executor.")
135
+ all_files = []
136
+ for subdir, dirs, files in os.walk(
137
+ local_directory
138
+ ): # os.path.basename(s3_manager_test_path.joinpath("HB0707.zarr/"))
139
+ for file in files:
140
+ local_path = os.path.join(subdir, file)
141
+ s3_key = os.path.join(object_prefix, local_path)
142
+ all_files.append([local_path, s3_key])
143
+ # all_files
144
+ all_uploads = s3_manager.upload_files_with_thread_pool_executor(
145
+ output_bucket_name=output_bucket_name,
146
+ all_files=all_files,
147
+ )
148
+ return all_uploads
149
+
150
+ ############################################################################
151
+
152
+ ############################################################################
153
+ def raw_to_zarr(
154
+ self,
155
+ table_name,
156
+ input_bucket_name,
157
+ output_bucket_name,
158
+ ship_name,
159
+ cruise_name,
160
+ sensor_name,
161
+ raw_file_name,
162
+ endpoint_url: Optional[str] = None,
163
+ include_bot=True,
164
+ ):
165
+ """
166
+ Downloads the raw files, processes them with echopype, writes geojson, and uploads files
167
+ to the nodd bucket.
168
+ """
169
+ print(f"Opening raw: {raw_file_name} and creating zarr store.")
170
+ # geometry_manager = GeometryManager()
171
+ cleaner = Cleaner()
172
+ cleaner.delete_local_files(
173
+ file_types=["*.zarr", "*.json"]
174
+ ) # TODO: include bot and raw?
175
+
176
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
177
+ s3_file_path = (
178
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
179
+ )
180
+ bottom_file_name = f"{Path(raw_file_name).stem}.bot"
181
+ s3_bottom_file_path = (
182
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
183
+ )
184
+ s3_manager.download_file(
185
+ bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name
186
+ )
187
+ # TODO: add the bottom file
188
+ if include_bot:
189
+ s3_manager.download_file(
190
+ bucket_name=input_bucket_name,
191
+ key=s3_bottom_file_path,
192
+ file_name=bottom_file_name,
193
+ )
194
+
195
+ try:
196
+ gc.collect()
197
+ print("Opening raw file with echopype.")
198
+ echodata = ep.open_raw(
199
+ raw_file=raw_file_name,
200
+ sonar_model=sensor_name,
201
+ include_bot=include_bot,
202
+ )
203
+ print("Compute volume backscattering strength (Sv) from raw dataset.")
204
+ ds_sv = ep.calibrate.compute_Sv(echodata)
205
+ ds_sv = ep.consolidate.add_depth(ds_sv, echodata)
206
+ water_level = get_water_level(ds_sv)
207
+
208
+ gc.collect()
209
+ print("Done computing volume backscatter strength (Sv) from raw dataset.")
210
+ # Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
211
+ # but is not written out with ds_sv --> add to ds_sv
212
+ if "detected_seafloor_depth" in list(echodata.vendor.variables):
213
+ ds_sv["detected_seafloor_depth"] = (
214
+ echodata.vendor.detected_seafloor_depth
215
+ )
216
+ #
217
+ frequencies = echodata.environment.frequency_nominal.values
218
+ if len(frequencies) != len(set(frequencies)):
219
+ raise Exception("Problem number of frequencies does not match channels")
220
+ #################################################################
221
+ # add gps data
222
+ ds_sv = ep.consolidate.add_location(ds_sv, echodata)
223
+
224
+ # Get GPS coordinates
225
+ # gps_data, lat, lon = geometry_manager.read_echodata_gps_data(
226
+ # echodata=echodata,
227
+ # output_bucket_name=output_bucket_name,
228
+ # ship_name=ship_name,
229
+ # cruise_name=cruise_name,
230
+ # sensor_name=sensor_name,
231
+ # file_name=raw_file_name,
232
+ # endpoint_url=endpoint_url,
233
+ # write_geojson=True,
234
+ # )
235
+
236
+ # ds_sv.latitude.values = ( # their lat values are better than mine
237
+ # lat # overwriting echopype gps values to include missing values
238
+ # )
239
+ # ds_sv.longitude.values = lon
240
+ # gps_data, lat, lon = self.__get_gps_data(echodata=echodata)
241
+ #################################################################
242
+ # Technically the min_echo_range would be 0 m.
243
+ # TODO: this var name is supposed to represent minimum resolution of depth measurements
244
+ # TODO revert this so that smaller diffs can be used
245
+ # The most minimum the resolution can be is as small as 0.25 meters
246
+ min_echo_range = np.round(np.nanmin(np.diff(ds_sv.echo_range.values)), 2)
247
+ # For the HB0710 cruise the depths vary from 499.7215 @19cm to 2999.4805 @ 1cm. Moving that back
248
+ # inline with the
249
+ # min_echo_range = np.max( # TODO: I think this is creating problems with the water-level
250
+ # [0.20, min_echo_range]
251
+ # )
252
+
253
+ max_echo_range = float(np.nanmax(ds_sv.echo_range))
254
+
255
+ # This is the number of missing values found throughout the lat/lon
256
+ # num_ping_time_dropna = lat[~np.isnan(lat)].shape[0] # symmetric to lon
257
+ num_ping_time_drop_na = ds_sv.latitude.shape[
258
+ 0
259
+ ] # TODO: just settting to size
260
+ #
261
+ start_time = (
262
+ np.datetime_as_string(ds_sv.ping_time.values[0], unit="ms") + "Z"
263
+ )
264
+ end_time = (
265
+ np.datetime_as_string(ds_sv.ping_time.values[-1], unit="ms") + "Z"
266
+ )
267
+ channels = list(ds_sv.channel.values)
268
+ #
269
+ #################################################################
270
+ # Create the zarr store
271
+ store_name = f"{Path(raw_file_name).stem}.zarr"
272
+ # Sv = ds_sv.Sv
273
+ # ds_sv['Sv'] = Sv.astype('int32', copy=False)
274
+ ds_sv.to_zarr(
275
+ store=store_name,
276
+ zarr_format=3,
277
+ consolidated=False,
278
+ write_empty_chunks=False,
279
+ ) # ds_sv.Sv.sel(channel=ds_sv.channel.values[0]).shape
280
+ gc.collect()
281
+ #################################################################
282
+ output_zarr_prefix = f"{level_1}/{ship_name}/{cruise_name}/{sensor_name}/"
283
+ #################################################################
284
+ # If zarr store already exists then delete
285
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
286
+ child_objects = s3_manager.get_child_objects(
287
+ bucket_name=output_bucket_name,
288
+ sub_prefix=f"{level_1}/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.zarr",
289
+ )
290
+ if len(child_objects) > 0:
291
+ print(
292
+ "Zarr store dataset already exists in s3, deleting existing and continuing."
293
+ )
294
+ s3_manager.delete_nodd_objects(
295
+ bucket_name=output_bucket_name,
296
+ objects=child_objects,
297
+ )
298
+ #################################################################
299
+ self.__upload_files_to_output_bucket(
300
+ output_bucket_name=output_bucket_name,
301
+ local_directory=store_name,
302
+ object_prefix=output_zarr_prefix,
303
+ endpoint_url=endpoint_url,
304
+ )
305
+ #################################################################
306
+ self.__zarr_info_to_table(
307
+ table_name=table_name,
308
+ ship_name=ship_name,
309
+ cruise_name=cruise_name,
310
+ sensor_name=sensor_name,
311
+ file_name=raw_file_name,
312
+ min_echo_range=min_echo_range,
313
+ max_echo_range=max_echo_range,
314
+ num_ping_time_dropna=num_ping_time_drop_na,
315
+ start_time=start_time,
316
+ end_time=end_time,
317
+ frequencies=frequencies,
318
+ channels=channels,
319
+ water_level=water_level,
320
+ )
321
+ #######################################################################
322
+ # TODO: verify count of objects matches, publish message, update status
323
+ #######################################################################
324
+ except Exception as err:
325
+ print(
326
+ f"Exception encountered creating local Zarr store with echopype: {err}"
327
+ )
328
+ raise RuntimeError(f"Problem creating local Zarr store, {err}")
329
+ finally:
330
+ gc.collect()
331
+ cleaner.delete_local_files(
332
+ file_types=["*.raw", "*.bot", "*.zarr", "*.json"]
333
+ )
334
+ print("Finished raw-to-zarr conversion.")
335
+
336
+ ############################################################################
337
+ ############################################################################
338
+
339
+
340
+ ################################################################################
341
+ ############################################################################
@@ -1,6 +1,13 @@
1
1
  from .cleaner import Cleaner
2
- from .constants import Constants
2
+ from .constants import Constants, Coordinates, Instruments
3
3
  from .pipeline_status import PipelineStatus
4
4
  from .timestamp import Timestamp
5
5
 
6
- __all__ = ["Cleaner", "Constants", "PipelineStatus", "Timestamp"]
6
+ __all__ = [
7
+ "Cleaner",
8
+ "Instruments",
9
+ "Constants",
10
+ "Coordinates",
11
+ "PipelineStatus",
12
+ "Timestamp",
13
+ ]
@@ -7,6 +7,7 @@ import shutil
7
7
  class Cleaner:
8
8
  @staticmethod
9
9
  def delete_local_files(file_types=["*.raw*", "*.model"]): # '*.json'
10
+ # TODO: add .zarr to this
10
11
  print("Deleting all local raw and model files")
11
12
  for i in file_types:
12
13
  for j in glob.glob(i):