water-column-sonar-processing 0.0.9__py3-none-any.whl → 26.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. water_column_sonar_processing/aws/dynamodb_manager.py +138 -59
  2. water_column_sonar_processing/aws/s3_manager.py +179 -141
  3. water_column_sonar_processing/aws/s3fs_manager.py +29 -33
  4. water_column_sonar_processing/aws/sqs_manager.py +1 -1
  5. water_column_sonar_processing/cruise/create_empty_zarr_store.py +35 -96
  6. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  7. water_column_sonar_processing/cruise/resample_regrid.py +142 -127
  8. water_column_sonar_processing/geometry/__init__.py +10 -2
  9. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  10. water_column_sonar_processing/geometry/geometry_manager.py +50 -49
  11. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  12. water_column_sonar_processing/geometry/pmtile_generation.py +227 -223
  13. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  14. water_column_sonar_processing/index/index_manager.py +151 -33
  15. water_column_sonar_processing/model/zarr_manager.py +665 -262
  16. water_column_sonar_processing/processing/__init__.py +3 -3
  17. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  18. water_column_sonar_processing/processing/raw_to_zarr.py +206 -214
  19. water_column_sonar_processing/utility/__init__.py +9 -2
  20. water_column_sonar_processing/utility/constants.py +69 -18
  21. water_column_sonar_processing/utility/pipeline_status.py +11 -15
  22. water_column_sonar_processing/utility/timestamp.py +3 -4
  23. water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
  24. water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
  25. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
  26. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
  27. water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
  28. water_column_sonar_processing/process.py +0 -147
  29. water_column_sonar_processing/processing/cruise_sampler.py +0 -342
  30. water_column_sonar_processing-0.0.9.dist-info/METADATA +0 -134
  31. water_column_sonar_processing-0.0.9.dist-info/RECORD +0 -32
  32. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
@@ -1,32 +1,45 @@
1
1
  import gc
2
2
  import os
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
3
7
  import echopype as ep
4
- import numcodecs
5
8
  import numpy as np
6
- from numcodecs import Blosc
7
- from datetime import datetime
8
- from pathlib import Path # , PurePath
9
+ from zarr.codecs import Blosc
9
10
 
10
11
  from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
11
- from water_column_sonar_processing.geometry import GeometryManager
12
12
  from water_column_sonar_processing.utility import Cleaner
13
+ from water_column_sonar_processing.utility import Constants
14
+
15
+ # from numcodecs import Blosc
16
+ level_1 = str(Constants.LEVEL_1.value)
13
17
 
14
- TEMPDIR = "/tmp"
18
+
19
+ def get_water_level(ds):
20
+ """
21
+ needs to be mocked up so that's why this is broken out
22
+ """
23
+ if "water_level" in ds.keys():
24
+ return ds.water_level.values
25
+ else:
26
+ return 0.0
15
27
 
16
28
 
17
29
  # This code is getting copied from echofish-aws-raw-to-zarr-lambda
18
30
  class RawToZarr:
19
31
  #######################################################
20
32
  def __init__(
21
- self,
22
- # output_bucket_access_key,
23
- # output_bucket_secret_access_key,
24
- # # overwrite_existing_zarr_store,
33
+ self,
34
+ # output_bucket_access_key,
35
+ # output_bucket_secret_access_key,
36
+ # # overwrite_existing_zarr_store,
25
37
  ):
26
38
  # TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
27
- self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
39
+ # self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
40
+ self.__compressor = Blosc(cname="zstd", clevel=9)
28
41
  self.__overwrite = True
29
- self.__num_threads = numcodecs.blosc.get_nthreads()
42
+ # self.__num_threads = numcodecs.blosc.get_nthreads()
30
43
  # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
31
44
  # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
32
45
  # self.__table_name = table_name
@@ -34,50 +47,43 @@ class RawToZarr:
34
47
 
35
48
  ############################################################################
36
49
  ############################################################################
50
+ @staticmethod
37
51
  def __zarr_info_to_table(
38
- self,
39
- output_bucket_name,
40
- table_name,
41
- ship_name,
42
- cruise_name,
43
- sensor_name,
44
- file_name,
45
- zarr_path,
46
- min_echo_range,
47
- max_echo_range,
48
- num_ping_time_dropna,
49
- start_time,
50
- end_time,
51
- frequencies,
52
- channels
52
+ table_name,
53
+ ship_name,
54
+ cruise_name,
55
+ sensor_name, # : Constants, TODO: convert to enum
56
+ file_name,
57
+ min_echo_range,
58
+ max_echo_range,
59
+ num_ping_time_dropna,
60
+ start_time,
61
+ end_time,
62
+ frequencies,
63
+ channels,
64
+ water_level,
53
65
  ):
54
- print('Writing Zarr information to DynamoDB table.')
66
+ print("Writing Zarr information to DynamoDB table.")
55
67
  dynamodb_manager = DynamoDBManager()
56
-
57
- # The problem is that these values were never populated
58
- # and so when the query looks for values that aren't there
59
- # they fail
60
68
  dynamodb_manager.update_item(
61
69
  table_name=table_name,
62
70
  key={
63
- 'FILE_NAME': {'S': file_name}, # Partition Key
64
- 'CRUISE_NAME': {'S': cruise_name}, # Sort Key
71
+ "FILE_NAME": {"S": file_name}, # Partition Key
72
+ "CRUISE_NAME": {"S": cruise_name}, # Sort Key
65
73
  },
66
74
  expression_attribute_names={
67
- '#CH': 'CHANNELS',
68
- '#ET': 'END_TIME',
75
+ "#CH": "CHANNELS",
76
+ "#ET": "END_TIME",
69
77
  # "#ED": "ERROR_DETAIL",
70
- '#FR': 'FREQUENCIES',
71
- '#MA': 'MAX_ECHO_RANGE',
72
- '#MI': 'MIN_ECHO_RANGE',
73
- '#ND': 'NUM_PING_TIME_DROPNA',
74
- "#PS": "PIPELINE_STATUS",
78
+ "#FR": "FREQUENCIES",
79
+ "#MA": "MAX_ECHO_RANGE",
80
+ "#MI": "MIN_ECHO_RANGE",
81
+ "#ND": "NUM_PING_TIME_DROPNA",
75
82
  "#PT": "PIPELINE_TIME",
76
83
  "#SE": "SENSOR_NAME",
77
84
  "#SH": "SHIP_NAME",
78
- '#ST': 'START_TIME',
79
- '#ZB': 'ZARR_BUCKET',
80
- '#ZP': 'ZARR_PATH',
85
+ "#ST": "START_TIME",
86
+ "#WL": "WATER_LEVEL",
81
87
  },
82
88
  expression_attribute_values={
83
89
  ":ch": {"L": [{"S": i} for i in channels]},
@@ -87,47 +93,49 @@ class RawToZarr:
87
93
  ":ma": {"N": str(np.round(max_echo_range, 4))},
88
94
  ":mi": {"N": str(np.round(min_echo_range, 4))},
89
95
  ":nd": {"N": str(num_ping_time_dropna)},
90
- ":ps": {"S": "PROCESSING_RESAMPLE_AND_WRITE_TO_ZARR_STORE"},
91
96
  ":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
92
97
  ":se": {"S": sensor_name},
93
98
  ":sh": {"S": ship_name},
94
99
  ":st": {"S": start_time},
95
- ":zb": {"S": output_bucket_name},
96
- ":zp": { "S": zarr_path },
100
+ ":wl": {"N": str(np.round(water_level, 2))},
97
101
  },
98
102
  update_expression=(
99
103
  "SET "
100
104
  "#CH = :ch, "
101
105
  "#ET = :et, "
102
- # "#ED = :ed, "
103
106
  "#FR = :fr, "
104
107
  "#MA = :ma, "
105
108
  "#MI = :mi, "
106
109
  "#ND = :nd, "
107
- "#PS = :ps, "
108
110
  "#PT = :pt, "
109
111
  "#SE = :se, "
110
112
  "#SH = :sh, "
111
113
  "#ST = :st, "
112
- "#ZB = :zb, "
113
- "#ZP = :zp"
114
+ "#WL = :wl"
114
115
  ),
115
116
  )
117
+ print("Done writing Zarr information to DynamoDB table.")
116
118
 
117
119
  ############################################################################
118
120
  ############################################################################
119
121
  ############################################################################
122
+ @staticmethod
120
123
  def __upload_files_to_output_bucket(
121
- self,
122
- output_bucket_name,
123
- local_directory,
124
- object_prefix,
124
+ output_bucket_name: str,
125
+ local_directory: str,
126
+ # e.g. 'D20070724-T042400.zarr' # TODO: problem: if this is not in the current directory
127
+ object_prefix: str, # e.g. "level_1/Henry_B._Bigelow/HB0706/EK60/"
128
+ endpoint_url,
125
129
  ):
126
130
  # Note: this will be passed credentials if using NODD
127
- s3_manager = S3Manager()
128
- print('Uploading files using thread pool executor.')
131
+ # TODO: this will not work if the local_directory is anywhere other than the current folder
132
+ # see test_s3_manager test_upload...pool_executor for solution
133
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
134
+ print("Uploading files using thread pool executor.")
129
135
  all_files = []
130
- for subdir, dirs, files in os.walk(local_directory):
136
+ for subdir, dirs, files in os.walk(
137
+ local_directory
138
+ ): # os.path.basename(s3_manager_test_path.joinpath("HB0707.zarr/"))
131
139
  for file in files:
132
140
  local_path = os.path.join(subdir, file)
133
141
  s3_key = os.path.join(object_prefix, local_path)
@@ -139,211 +147,195 @@ class RawToZarr:
139
147
  )
140
148
  return all_uploads
141
149
 
150
+ ############################################################################
151
+
142
152
  ############################################################################
143
153
  def raw_to_zarr(
144
- self,
145
- table_name,
146
- output_bucket_name,
147
- ship_name,
148
- cruise_name,
149
- sensor_name,
150
- raw_file_name,
154
+ self,
155
+ table_name,
156
+ input_bucket_name,
157
+ output_bucket_name,
158
+ ship_name,
159
+ cruise_name,
160
+ sensor_name,
161
+ raw_file_name,
162
+ endpoint_url: Optional[str] = None,
163
+ include_bot=True,
151
164
  ):
152
- print(f'Opening raw: {raw_file_name} and creating zarr store.')
153
- geometry_manager = GeometryManager()
165
+ """
166
+ Downloads the raw files, processes them with echopype, writes geojson, and uploads files
167
+ to the nodd bucket.
168
+ """
169
+ print(f"Opening raw: {raw_file_name} and creating zarr store.")
170
+ # geometry_manager = GeometryManager()
154
171
  cleaner = Cleaner()
155
- cleaner.delete_local_files(file_types=["*.zarr", "*.json"]) # TODO: include bot and raw?ß
172
+ cleaner.delete_local_files(
173
+ file_types=["*.zarr", "*.json"]
174
+ ) # TODO: include bot and raw?
175
+
176
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
177
+ s3_file_path = (
178
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
179
+ )
180
+ bottom_file_name = f"{Path(raw_file_name).stem}.bot"
181
+ s3_bottom_file_path = (
182
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
183
+ )
184
+ s3_manager.download_file(
185
+ bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name
186
+ )
187
+ # TODO: add the bottom file
188
+ if include_bot:
189
+ s3_manager.download_file(
190
+ bucket_name=input_bucket_name,
191
+ key=s3_bottom_file_path,
192
+ file_name=bottom_file_name,
193
+ )
194
+
156
195
  try:
157
196
  gc.collect()
158
- print('Opening raw file with echopype.')
159
- # s3_file_path = f"s3://{bucket_name}/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}"
160
- # s3_file_path = Path(f"s3://noaa-wcsd-pds/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}")
197
+ print("Opening raw file with echopype.")
161
198
  echodata = ep.open_raw(
162
199
  raw_file=raw_file_name,
163
200
  sonar_model=sensor_name,
164
- include_bot=True,
165
- use_swap=True,
166
- # max_chunk_size=100,
167
- # storage_options={'anon': True } # 'endpoint_url': self.endpoint_url} # this was creating problems
201
+ include_bot=include_bot,
168
202
  )
169
- print('Compute volume backscattering strength (Sv) from raw data.')
203
+ print("Compute volume backscattering strength (Sv) from raw dataset.")
170
204
  ds_sv = ep.calibrate.compute_Sv(echodata)
171
- print('Done computing volume backscattering strength (Sv) from raw data.')
205
+ ds_sv = ep.consolidate.add_depth(ds_sv, echodata)
206
+ water_level = get_water_level(ds_sv)
207
+
208
+ gc.collect()
209
+ print("Done computing volume backscatter strength (Sv) from raw dataset.")
210
+ # Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
211
+ # but is not written out with ds_sv --> add to ds_sv
212
+ if "detected_seafloor_depth" in list(echodata.vendor.variables):
213
+ ds_sv["detected_seafloor_depth"] = (
214
+ echodata.vendor.detected_seafloor_depth
215
+ )
216
+ #
172
217
  frequencies = echodata.environment.frequency_nominal.values
218
+ if len(frequencies) != len(set(frequencies)):
219
+ raise Exception("Problem number of frequencies does not match channels")
173
220
  #################################################################
221
+ # add gps data
222
+ ds_sv = ep.consolidate.add_location(ds_sv, echodata)
223
+
174
224
  # Get GPS coordinates
175
- gps_data, lat, lon = geometry_manager.read_echodata_gps_data(
176
- echodata=echodata,
177
- output_bucket_name=output_bucket_name,
178
- ship_name=ship_name,
179
- cruise_name=cruise_name,
180
- sensor_name=sensor_name,
181
- file_name=raw_file_name,
182
- write_geojson=True
183
- )
225
+ # gps_data, lat, lon = geometry_manager.read_echodata_gps_data(
226
+ # echodata=echodata,
227
+ # output_bucket_name=output_bucket_name,
228
+ # ship_name=ship_name,
229
+ # cruise_name=cruise_name,
230
+ # sensor_name=sensor_name,
231
+ # file_name=raw_file_name,
232
+ # endpoint_url=endpoint_url,
233
+ # write_geojson=True,
234
+ # )
235
+
236
+ # ds_sv.latitude.values = ( # their lat values are better than mine
237
+ # lat # overwriting echopype gps values to include missing values
238
+ # )
239
+ # ds_sv.longitude.values = lon
184
240
  # gps_data, lat, lon = self.__get_gps_data(echodata=echodata)
185
241
  #################################################################
186
242
  # Technically the min_echo_range would be 0 m.
187
243
  # TODO: this var name is supposed to represent minimum resolution of depth measurements
188
244
  # TODO revert this so that smaller diffs can be used
189
245
  # The most minimum the resolution can be is as small as 0.25 meters
190
- min_echo_range = np.maximum(
191
- 0.25,
192
- np.nanmin(np.diff(ds_sv.echo_range.values))
193
- )
246
+ min_echo_range = np.round(np.nanmin(np.diff(ds_sv.echo_range.values)), 2)
247
+ # For the HB0710 cruise the depths vary from 499.7215 @19cm to 2999.4805 @ 1cm. Moving that back
248
+ # inline with the
249
+ # min_echo_range = np.max( # TODO: I think this is creating problems with the water-level
250
+ # [0.20, min_echo_range]
251
+ # )
252
+
194
253
  max_echo_range = float(np.nanmax(ds_sv.echo_range))
254
+
255
+ # This is the number of missing values found throughout the lat/lon
256
+ # num_ping_time_dropna = lat[~np.isnan(lat)].shape[0] # symmetric to lon
257
+ num_ping_time_drop_na = ds_sv.latitude.shape[
258
+ 0
259
+ ] # TODO: just settting to size
195
260
  #
196
- num_ping_time_dropna = lat[~np.isnan(lat)].shape[0] # symmetric to lon
197
- #
198
- start_time = np.datetime_as_string(ds_sv.ping_time.values[0], unit='ms') + "Z"
199
- end_time = np.datetime_as_string(ds_sv.ping_time.values[-1], unit='ms') + "Z"
261
+ start_time = (
262
+ np.datetime_as_string(ds_sv.ping_time.values[0], unit="ms") + "Z"
263
+ )
264
+ end_time = (
265
+ np.datetime_as_string(ds_sv.ping_time.values[-1], unit="ms") + "Z"
266
+ )
200
267
  channels = list(ds_sv.channel.values)
201
268
  #
202
269
  #################################################################
203
270
  # Create the zarr store
204
271
  store_name = f"{Path(raw_file_name).stem}.zarr"
205
- ds_sv.to_zarr(store=store_name)
272
+ # Sv = ds_sv.Sv
273
+ # ds_sv['Sv'] = Sv.astype('int32', copy=False)
274
+ ds_sv.to_zarr(
275
+ store=store_name,
276
+ zarr_format=3,
277
+ consolidated=False,
278
+ write_empty_chunks=False,
279
+ ) # ds_sv.Sv.sel(channel=ds_sv.channel.values[0]).shape
280
+ gc.collect()
206
281
  #################################################################
207
- # TODO: do i still need this?
208
- # print('Note: Adding GeoJSON inside Zarr store')
209
- # self.__write_geojson_to_file( # Was trying to write geojson to the L1 zarr store
210
- # store_name=store_name,
211
- # data=gps_data
212
- # )
282
+ output_zarr_prefix = f"{level_1}/{ship_name}/{cruise_name}/{sensor_name}/"
213
283
  #################################################################
214
- output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
215
- self.__zarr_info_to_table(
284
+ # If zarr store already exists then delete
285
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
286
+ child_objects = s3_manager.get_child_objects(
287
+ bucket_name=output_bucket_name,
288
+ sub_prefix=f"{level_1}/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.zarr",
289
+ )
290
+ if len(child_objects) > 0:
291
+ print(
292
+ "Zarr store dataset already exists in s3, deleting existing and continuing."
293
+ )
294
+ s3_manager.delete_nodd_objects(
295
+ bucket_name=output_bucket_name,
296
+ objects=child_objects,
297
+ )
298
+ #################################################################
299
+ self.__upload_files_to_output_bucket(
216
300
  output_bucket_name=output_bucket_name,
301
+ local_directory=store_name,
302
+ object_prefix=output_zarr_prefix,
303
+ endpoint_url=endpoint_url,
304
+ )
305
+ #################################################################
306
+ self.__zarr_info_to_table(
217
307
  table_name=table_name,
218
308
  ship_name=ship_name,
219
309
  cruise_name=cruise_name,
220
310
  sensor_name=sensor_name,
221
311
  file_name=raw_file_name,
222
- zarr_path=os.path.join(output_zarr_prefix, store_name),
223
312
  min_echo_range=min_echo_range,
224
313
  max_echo_range=max_echo_range,
225
- num_ping_time_dropna=num_ping_time_dropna,
314
+ num_ping_time_dropna=num_ping_time_drop_na,
226
315
  start_time=start_time,
227
316
  end_time=end_time,
228
317
  frequencies=frequencies,
229
- channels=channels
318
+ channels=channels,
319
+ water_level=water_level,
230
320
  )
231
- ###################################################################
232
321
  #######################################################################
233
- self.__upload_files_to_output_bucket(
234
- output_bucket_name=output_bucket_name,
235
- local_directory=store_name,
236
- object_prefix=output_zarr_prefix
237
- )
238
- #######################################################################
239
- # # TODO: verify count of objects matches
240
- # s3_objects = self.__s3.list_objects(
241
- # bucket_name=self.__output_bucket,
242
- # prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
243
- # access_key_id=self.__output_bucket_access_key,
244
- # secret_access_key=self.__output_bucket_secret_access_key
245
- # )
246
- #######################################################################
247
- # self.__update_processing_status(
248
- # file_name=input_file_name,
249
- # cruise_name=cruise_name,
250
- # pipeline_status='SUCCESS_RAW_TO_ZARR'
251
- # )
322
+ # TODO: verify count of objects matches, publish message, update status
252
323
  #######################################################################
253
- # self.__publish_done_message(input_message)
254
- print('here')
255
324
  except Exception as err:
256
- print(f'Exception encountered creating local Zarr store with echopype: {err}')
325
+ print(
326
+ f"Exception encountered creating local Zarr store with echopype: {err}"
327
+ )
257
328
  raise RuntimeError(f"Problem creating local Zarr store, {err}")
258
329
  finally:
259
- cleaner.delete_local_files(file_types=["*.raw", "*.bot", "*.zarr", "*.json"])
260
- print('Done creating local zarr store.')
330
+ gc.collect()
331
+ cleaner.delete_local_files(
332
+ file_types=["*.raw", "*.bot", "*.zarr", "*.json"]
333
+ )
334
+ print("Finished raw-to-zarr conversion.")
261
335
 
262
336
  ############################################################################
263
- # TODO: does this get called?
264
- # def execute(self, input_message):
265
- # ship_name = input_message['shipName']
266
- # cruise_name = input_message['cruiseName']
267
- # sensor_name = input_message['sensorName']
268
- # input_file_name = input_message['fileName']
269
- # #
270
- # try:
271
- # self.__update_processing_status(
272
- # file_name=input_file_name,
273
- # cruise_name=cruise_name,
274
- # pipeline_status="PROCESSING_RAW_TO_ZARR"
275
- # )
276
- # #######################################################################
277
- # store_name = f"{os.path.splitext(input_file_name)[0]}.zarr"
278
- # output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}"
279
- # bucket_key = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{input_file_name}"
280
- # zarr_prefix = os.path.join("level_1", ship_name, cruise_name, sensor_name)
281
- # #
282
- # os.chdir(TEMPDIR) # Lambdas require use of temp directory
283
- # #######################################################################
284
- # #######################################################################
285
- # # Check if zarr store already exists
286
- # s3_objects = self.__s3.list_objects(
287
- # bucket_name=self.__output_bucket,
288
- # prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
289
- # access_key_id=self.__output_bucket_access_key,
290
- # secret_access_key=self.__output_bucket_secret_access_key
291
- # )
292
- # if len(s3_objects) > 0:
293
- # print('Zarr store data already exists in s3, deleting existing and continuing.')
294
- # self.__s3.delete_objects(
295
- # bucket_name=self.__output_bucket,
296
- # objects=s3_objects,
297
- # access_key_id=self.__output_bucket_access_key,
298
- # secret_access_key=self.__output_bucket_secret_access_key
299
- # )
300
- # #######################################################################
301
- # # self.__delete_all_local_raw_and_zarr_files()
302
- # Cleaner.delete_local_files(file_types=["*.raw*", "*.zarr"])
303
- # self.__s3.download_file(
304
- # bucket_name=self.__input_bucket,
305
- # key=bucket_key,
306
- # file_name=input_file_name
307
- # )
308
- # self.__create_local_zarr_store(
309
- # raw_file_name=input_file_name,
310
- # cruise_name=cruise_name,
311
- # sensor_name=sensor_name,
312
- # output_zarr_prefix=output_zarr_prefix,
313
- # store_name=store_name
314
- # )
315
- # #######################################################################
316
- # self.__upload_files_to_output_bucket(store_name, output_zarr_prefix)
317
- # #######################################################################
318
- # # # TODO: verify count of objects matches
319
- # # s3_objects = self.__s3.list_objects(
320
- # # bucket_name=self.__output_bucket,
321
- # # prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
322
- # # access_key_id=self.__output_bucket_access_key,
323
- # # secret_access_key=self.__output_bucket_secret_access_key
324
- # # )
325
- # #######################################################################
326
- # self.__update_processing_status(
327
- # file_name=input_file_name,
328
- # cruise_name=cruise_name,
329
- # pipeline_status='SUCCESS_RAW_TO_ZARR'
330
- # )
331
- # #######################################################################
332
- # self.__publish_done_message(input_message)
333
- # #######################################################################
334
- # # except Exception as err:
335
- # # print(f'Exception encountered: {err}')
336
- # # self.__update_processing_status(
337
- # # file_name=input_file_name,
338
- # # cruise_name=cruise_name,
339
- # # pipeline_status='FAILURE_RAW_TO_ZARR',
340
- # # error_message=str(err),
341
- # # )
342
- # finally:
343
- # self.__delete_all_local_raw_and_zarr_files()
344
- #######################################################################
345
-
346
337
  ############################################################################
347
338
 
339
+
348
340
  ################################################################################
349
341
  ############################################################################
@@ -1,6 +1,13 @@
1
1
  from .cleaner import Cleaner
2
- from .constants import Constants, Coordinates
2
+ from .constants import Constants, Coordinates, Instruments
3
3
  from .pipeline_status import PipelineStatus
4
4
  from .timestamp import Timestamp
5
5
 
6
- __all__ = ["Cleaner", "Constants", "Coordinates", "PipelineStatus", "Timestamp"]
6
+ __all__ = [
7
+ "Cleaner",
8
+ "Instruments",
9
+ "Constants",
10
+ "Coordinates",
11
+ "PipelineStatus",
12
+ "Timestamp",
13
+ ]