water-column-sonar-processing 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

@@ -111,17 +111,21 @@ class DynamoDBManager:
111
111
  expression_attribute_names,
112
112
  expression_attribute_values,
113
113
  update_expression,
114
- ):
115
- response = self.__dynamodb_client.update_item(
116
- TableName=table_name,
117
- Key=key,
118
- ExpressionAttributeNames=expression_attribute_names,
119
- ExpressionAttributeValues=expression_attribute_values,
120
- UpdateExpression=update_expression,
121
- )
122
- status_code = response["ResponseMetadata"]["HTTPStatusCode"]
123
- assert response['ConsumedCapacity']['TableName'] == table_name
124
- assert status_code == 200, "Problem, unable to update dynamodb table."
114
+ ): # TODO: convert to boolean
115
+ try:
116
+ response = self.__dynamodb_client.update_item(
117
+ TableName=table_name,
118
+ Key=key,
119
+ ExpressionAttributeNames=expression_attribute_names,
120
+ ExpressionAttributeValues=expression_attribute_values,
121
+ UpdateExpression=update_expression,
122
+ )
123
+ status_code = response["ResponseMetadata"]["HTTPStatusCode"]
124
+ print(f"HTTPStatusCode: {status_code}")
125
+ # assert status_code == 200, "Problem, unable to update dynamodb table."
126
+ # assert response['ConsumedCapacity']['TableName'] == table_name
127
+ except Exception as err:
128
+ print(f"Problem was encountered while updating item: {err}")
125
129
 
126
130
  #####################################################################
127
131
  # TODO: change to "get_cruise_as_df"
@@ -3,6 +3,8 @@ import os
3
3
  import boto3
4
4
  from collections.abc import Generator
5
5
  from concurrent.futures import ThreadPoolExecutor, as_completed
6
+
7
+ import botocore
6
8
  from boto3.s3.transfer import TransferConfig
7
9
  from botocore.config import Config
8
10
  from botocore.exceptions import ClientError
@@ -14,7 +16,10 @@ GB = 1024**3
14
16
 
15
17
 
16
18
  #########################################################################
17
- def chunked(ll: list, n: int) -> Generator:
19
+ def chunked(
20
+ ll: list,
21
+ n: int
22
+ ) -> Generator:
18
23
  # Yields successively n-sized chunks from ll.
19
24
  for i in range(0, len(ll), n):
20
25
  yield ll[i : i + n]
@@ -24,16 +29,9 @@ class S3Manager:
24
29
  #####################################################################
25
30
  def __init__(
26
31
  self,
27
- # input_endpoint_url: str,
28
- # output_endpoint_url: str,
29
- # endpoint_url
30
- # TODO: Need to allow passing in of credentials when writing to protected bucket
31
32
  ):
32
33
  self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
33
34
  self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
34
- # self.endpoint_url = endpoint_url
35
- # self.input_endpoint_url = input_endpoint_url
36
- # self.output_endpoint_url = output_endpoint_url
37
35
  self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
38
36
  self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
39
37
  self.s3_transfer_config = TransferConfig(
@@ -51,14 +49,12 @@ class S3Manager:
51
49
  service_name="s3",
52
50
  config=self.s3_client_config,
53
51
  region_name=self.s3_region,
54
- # endpoint_url=endpoint_url, # TODO: temporary
55
52
  )
56
53
  self.s3_resource = boto3.resource(
57
54
  service_name="s3",
58
55
  config=self.s3_client_config,
59
56
  region_name=self.s3_region,
60
57
  )
61
- # self.paginator = self.s3_client.get_paginator(operation_name='list_objects_v2')
62
58
  self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
63
59
  aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
64
60
  aws_secret_access_key=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
@@ -68,7 +64,6 @@ class S3Manager:
68
64
  service_name="s3",
69
65
  config=self.s3_client_config,
70
66
  region_name=self.s3_region,
71
- # endpoint_url=endpoint_url, # TODO: temporary
72
67
  )
73
68
  self.s3_resource_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.resource(
74
69
  service_name="s3",
@@ -78,12 +73,12 @@ class S3Manager:
78
73
  self.paginator = self.s3_client.get_paginator('list_objects_v2')
79
74
  self.paginator_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds.get_paginator('list_objects_v2')
80
75
 
81
- def get_client(self): # TODO: do i need this?
82
- return self.s3_session.client(
83
- service_name="s3",
84
- config=self.s3_client_config,
85
- region_name=self.s3_region,
86
- )
76
+ # def get_client(self): # TODO: do i need this?
77
+ # return self.s3_session.client(
78
+ # service_name="s3",
79
+ # config=self.s3_client_config,
80
+ # region_name=self.s3_region,
81
+ # )
87
82
 
88
83
  #####################################################################
89
84
  def create_bucket(
@@ -146,18 +141,6 @@ class S3Manager:
146
141
  return all_uploads
147
142
 
148
143
  #####################################################################
149
- # def upload_nodd_file2(
150
- # self,
151
- # body: str,
152
- # bucket: str,
153
- # key: str,
154
- # ):
155
- # self.s3_client_noaa_wcsd_zarr_pds.put_object(
156
- # Body=body,
157
- # Bucket=bucket,
158
- # Key=key,
159
- # )
160
-
161
144
  # TODO: this uses resource, try to use client
162
145
  def upload_file(
163
146
  self,
@@ -190,11 +173,36 @@ class S3Manager:
190
173
  all_files.append([local_path, s3_key])
191
174
 
192
175
  all_uploads = self.upload_files_with_thread_pool_executor(
176
+ output_bucket_name=self.output_bucket_name,
193
177
  all_files=all_files,
194
178
  )
195
179
  print("Done uploading files to output bucket.")
196
180
  return all_uploads
197
181
 
182
+ #####################################################################
183
+ def check_if_object_exists(
184
+ self,
185
+ bucket_name,
186
+ key_name
187
+ ) -> bool:
188
+ s3_manager2 = S3Manager()
189
+ s3_manager2.list_objects(bucket_name=bucket_name, prefix=key_name)
190
+ s3_client_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds
191
+ try:
192
+ # response = s3_resource_noaa_wcsd_zarr_pds.Object(bucket_name, key_name).load()
193
+ s3_client_noaa_wcsd_zarr_pds.head_object(Bucket=bucket_name, Key=key_name)
194
+ except botocore.exceptions.ClientError as e:
195
+ if e.response['Error']['Code'] == "404":
196
+ # The object does not exist.
197
+ return False
198
+ elif e.response['Error']['Code'] == 403:
199
+ # Unauthorized, including invalid bucket
200
+ return False
201
+ else:
202
+ # Something else has gone wrong.
203
+ raise
204
+ return True
205
+
198
206
  #####################################################################
199
207
  # used: raw-to-zarr
200
208
  def list_objects( # noaa-wcsd-pds and noaa-wcsd-zarr-pds
@@ -202,6 +210,7 @@ class S3Manager:
202
210
  bucket_name,
203
211
  prefix
204
212
  ):
213
+ # TODO: this isn't working for geojson detecting objects!!!!!!!
205
214
  # analog to "find_children_objects"
206
215
  # Returns a list of key strings for each object in bucket defined by prefix
207
216
  # s3_client = self.s3_client
@@ -227,7 +236,11 @@ class S3Manager:
227
236
 
228
237
  #####################################################################
229
238
  # TODO: change name to "directory"
230
- def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
239
+ def folder_exists_and_not_empty(
240
+ self,
241
+ bucket_name: str,
242
+ path: str
243
+ ) -> bool:
231
244
  if not path.endswith("/"):
232
245
  path = path + "/"
233
246
  s3_client = self.s3_client
@@ -319,23 +332,15 @@ class S3Manager:
319
332
  print("downloaded file")
320
333
 
321
334
  #####################################################################
322
- # not used
323
- # def delete_nodd_object( # noaa-wcsd-model-pds
324
- # self,
325
- # bucket_name,
326
- # key
327
- # ): # -> dict:
328
- # #return self.__s3_client.delete_object(Bucket=bucket_name, Key=key)
329
- # self.s3_client.delete_object(Bucket=bucket_name, Key=key)
330
-
331
- #####################################################################
335
+ # TODO: need to test this!!!
332
336
  def delete_nodd_objects( # nodd-bucket
333
337
  self,
338
+ bucket_name,
334
339
  objects: list,
335
340
  ):
336
341
  try:
337
342
  print(
338
- f"Deleting {len(objects)} objects in {self.output_bucket_name} in batches."
343
+ f"Deleting {len(objects)} objects in {bucket_name} in batches."
339
344
  )
340
345
  objects_to_delete = []
341
346
  for obj in objects:
@@ -343,12 +348,28 @@ class S3Manager:
343
348
  # Note: request can contain a list of up to 1000 keys
344
349
  for batch in chunked(ll=objects_to_delete, n=1000):
345
350
  self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
346
- Bucket=self.output_bucket_name, Delete={"Objects": batch}
351
+ Bucket=bucket_name, Delete={"Objects": batch}
347
352
  )
348
353
  print(f"Deleted files.")
349
354
  except Exception as err:
350
355
  print(f"Problem was encountered while deleting objects: {err}")
351
356
 
357
+ #####################################################################
358
+ # TODO: need to test this!!!
359
+ def delete_nodd_object(
360
+ self,
361
+ bucket_name,
362
+ key_name,
363
+ ):
364
+ try:
365
+ print(
366
+ f"Deleting {key_name} objects in {bucket_name}."
367
+ )
368
+ self.s3_client_noaa_wcsd_zarr_pds.delete_object(Bucket=bucket_name, Key=key_name)
369
+ print(f"Deleted file.")
370
+ except Exception as err:
371
+ print(f"Problem was encountered while deleting objects: {err}")
372
+
352
373
  #####################################################################
353
374
  # not used TODO: remove
354
375
  def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
@@ -47,7 +47,7 @@ class CreateEmptyZarrStore:
47
47
  ):
48
48
  for file in files:
49
49
  local_path = os.path.join(subdir, file)
50
- # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.model/.zattrs'
50
+ # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.model/..zattrs'
51
51
  s3_key = f'{object_prefix}/{cruise_name}.model{local_path.split(f"{cruise_name}.model")[-1]}'
52
52
  all_files.append([local_path, s3_key])
53
53
  #
@@ -138,15 +138,13 @@ class GeometryManager:
138
138
 
139
139
  print("Checking s3 and deleting any existing GeoJSON file.")
140
140
  s3_manager = S3Manager()
141
- s3_objects = s3_manager.list_objects(
141
+ geojson_object_exists = s3_manager.check_if_object_exists(
142
142
  bucket_name=output_bucket_name,
143
- prefix=f"{geo_json_prefix}/{geo_json_name}"
143
+ key_name=f"{geo_json_prefix}/{geo_json_name}"
144
144
  )
145
- if len(s3_objects) > 0:
146
- print(
147
- "GeoJSON already exists in s3, deleting existing and continuing."
148
- )
149
- s3_manager.delete_nodd_objects(objects=s3_objects)
145
+ if geojson_object_exists:
146
+ print("GeoJSON already exists in s3, deleting existing and continuing.")
147
+ s3_manager.delete_nodd_object(bucket_name=output_bucket_name, key_name=f"{geo_json_prefix}/{geo_json_name}")
150
148
 
151
149
  print("Upload GeoJSON to s3.")
152
150
  s3_manager.upload_nodd_file(
@@ -86,8 +86,6 @@ class ZarrManager:
86
86
  data=np.repeat(0.0, width),
87
87
  shape=width,
88
88
  chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
89
- # Constants.TILE_SIZE.value,
90
- #), # TODO: the chunking scheme doesn't seem to be working here
91
89
  dtype=np.dtype(Coordinates.TIME_DTYPE.value),
92
90
  compressor=self.__compressor,
93
91
  fill_value=np.nan, # TODO: do i want nan's?
@@ -125,14 +123,16 @@ class ZarrManager:
125
123
 
126
124
  root.depth.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.DEPTH.value]
127
125
 
128
- root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
129
126
  root.depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
127
+ root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
128
+ root.depth.attrs["standard_name"] = Coordinates.DEPTH_STANDARD_NAME.value
130
129
 
131
130
  #####################################################################
132
131
  # --- Coordinate: Latitude --- #
133
132
  root.create_dataset(
134
133
  name=Coordinates.LATITUDE.value,
135
- # data=np.repeat(0.0, width),
134
+ # data=np.repeat(0.0, width), # root.longitude[:] = np.nan
135
+ data=np.repeat(np.nan, width),
136
136
  shape=width,
137
137
  chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
138
138
  dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
@@ -144,14 +144,16 @@ class ZarrManager:
144
144
  # Note: LATITUDE is indexed by TIME
145
145
  root.latitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
146
146
 
147
- root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
148
147
  root.latitude.attrs["units"] = Coordinates.LATITUDE_UNITS.value
148
+ root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
149
+ root.latitude.attrs["standard_name"] = Coordinates.LATITUDE_STANDARD_NAME.value
149
150
 
150
151
  #####################################################################
151
152
  # --- Coordinate: Longitude --- #
152
153
  root.create_dataset(
153
154
  name=Coordinates.LONGITUDE.value,
154
155
  # data=np.repeat(0.0, width), # root.longitude[:] = np.nan
156
+ data=np.repeat(np.nan, width),
155
157
  shape=width,
156
158
  chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
157
159
  dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
@@ -163,8 +165,9 @@ class ZarrManager:
163
165
  # Note: LONGITUDE is indexed by TIME
164
166
  root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
165
167
 
166
- root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
167
168
  root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
169
+ root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
170
+ root.longitude.attrs["standard_name"] = Coordinates.LONGITUDE_STANDARD_NAME.value
168
171
 
169
172
  #####################################################################
170
173
  # TODO: verify adding this variable for where the bottom was detected
@@ -183,8 +186,9 @@ class ZarrManager:
183
186
  # BOTTOM is indexed by TIME
184
187
  root.bottom.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
185
188
 
186
- root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
187
189
  root.bottom.attrs["units"] = Coordinates.BOTTOM_UNITS.value
190
+ root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
191
+ root.bottom.attrs["standard_name"] = Coordinates.BOTTOM_STANDARD_NAME.value
188
192
 
189
193
  #####################################################################
190
194
  # --- Coordinate: Frequency --- #
@@ -204,11 +208,11 @@ class ZarrManager:
204
208
  Coordinates.FREQUENCY.value
205
209
  ] # TODO: is this correct
206
210
 
211
+ root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
207
212
  root.frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
208
213
  root.frequency.attrs["standard_name"] = (
209
214
  Coordinates.FREQUENCY_STANDARD_NAME.value
210
215
  )
211
- root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
212
216
 
213
217
  #####################################################################
214
218
  # --- Sv Data --- #
@@ -230,8 +234,8 @@ class ZarrManager:
230
234
  Coordinates.FREQUENCY.value,
231
235
  ]
232
236
 
233
- root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
234
237
  root.Sv.attrs["units"] = Coordinates.SV_UNITS.value
238
+ root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
235
239
  root.Sv.attrs["tile_size"] = Constants.TILE_SIZE.value
236
240
 
237
241
  #####################################################################
@@ -242,7 +246,7 @@ class ZarrManager:
242
246
  #
243
247
  root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
244
248
  root.attrs["processing_software_version"] = (
245
- "0.0.6" # TODO: get programmatically
249
+ "0.0.9" # TODO: get programmatically, echopype>utils>prov.py
246
250
  )
247
251
  root.attrs["processing_software_time"] = Timestamp.get_timestamp()
248
252
  #
@@ -9,7 +9,7 @@ from pathlib import Path # , PurePath
9
9
 
10
10
  from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
11
11
  from water_column_sonar_processing.geometry import GeometryManager
12
- from water_column_sonar_processing.utility import Cleaner
12
+ from water_column_sonar_processing.utility import Cleaner, PipelineStatus
13
13
 
14
14
  TEMPDIR = "/tmp"
15
15
 
@@ -53,10 +53,6 @@ class RawToZarr:
53
53
  ):
54
54
  print('Writing Zarr information to DynamoDB table.')
55
55
  dynamodb_manager = DynamoDBManager()
56
-
57
- # The problem is that these values were never populated
58
- # and so when the query looks for values that aren't there
59
- # they fail
60
56
  dynamodb_manager.update_item(
61
57
  table_name=table_name,
62
58
  key={
@@ -87,7 +83,8 @@ class RawToZarr:
87
83
  ":ma": {"N": str(np.round(max_echo_range, 4))},
88
84
  ":mi": {"N": str(np.round(min_echo_range, 4))},
89
85
  ":nd": {"N": str(num_ping_time_dropna)},
90
- ":ps": {"S": "PROCESSING_RESAMPLE_AND_WRITE_TO_ZARR_STORE"},
86
+ # ":ps": {"S": "PROCESSING_RESAMPLE_AND_WRITE_TO_ZARR_STORE"},
87
+ ":ps": {"S": PipelineStatus.LEVEL_1_PROCESSING.name},
91
88
  ":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
92
89
  ":se": {"S": sensor_name},
93
90
  ":sh": {"S": ship_name},
@@ -113,6 +110,7 @@ class RawToZarr:
113
110
  "#ZP = :zp"
114
111
  ),
115
112
  )
113
+ print('Done writing Zarr information to DynamoDB table.')
116
114
 
117
115
  ############################################################################
118
116
  ############################################################################
@@ -143,16 +141,29 @@ class RawToZarr:
143
141
  def raw_to_zarr(
144
142
  self,
145
143
  table_name,
144
+ input_bucket_name,
146
145
  output_bucket_name,
147
146
  ship_name,
148
147
  cruise_name,
149
148
  sensor_name,
150
149
  raw_file_name,
151
150
  ):
151
+ """
152
+ Downloads the raw files, processes them with echopype, writes geojson, and uploads files
153
+ to the nodd bucket.
154
+ """
152
155
  print(f'Opening raw: {raw_file_name} and creating zarr store.')
153
156
  geometry_manager = GeometryManager()
154
157
  cleaner = Cleaner()
155
- cleaner.delete_local_files(file_types=["*.zarr", "*.json"]) # TODO: include bot and raw
158
+ cleaner.delete_local_files(file_types=["*.zarr", "*.json"]) # TODO: include bot and raw?
159
+
160
+ s3_manager = S3Manager()
161
+ s3_file_path = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
162
+ bottom_file_name = f"{Path(raw_file_name).stem}.bot"
163
+ s3_bottom_file_path = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
164
+ s3_manager.download_file(bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name)
165
+ s3_manager.download_file(bucket_name=input_bucket_name, key=s3_bottom_file_path, file_name=bottom_file_name)
166
+
156
167
  try:
157
168
  gc.collect()
158
169
  print('Opening raw file with echopype.')
@@ -168,7 +179,12 @@ class RawToZarr:
168
179
  )
169
180
  print('Compute volume backscattering strength (Sv) from raw data.')
170
181
  ds_sv = ep.calibrate.compute_Sv(echodata)
171
- print('Done computing volume backscattering strength (Sv) from raw data.')
182
+ print('Done computing volume backscatter strength (Sv) from raw data.')
183
+ # Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
184
+ # but is not written out with ds_sv
185
+ if "detected_seafloor_depth" in list(echodata.vendor.variables):
186
+ ds_sv["detected_seafloor_depth"] = echodata.vendor.detected_seafloor_depth
187
+ #
172
188
  frequencies = echodata.environment.frequency_nominal.values
173
189
  #################################################################
174
190
  # Get GPS coordinates
@@ -187,12 +203,9 @@ class RawToZarr:
187
203
  # TODO: this var name is supposed to represent minimum resolution of depth measurements
188
204
  # TODO revert this so that smaller diffs can be used
189
205
  # The most minimum the resolution can be is as small as 0.25 meters
190
- min_echo_range = np.maximum(
191
- 0.25,
192
- np.nanmin(np.diff(ds_sv.echo_range.values))
193
- )
206
+ min_echo_range = np.round(np.nanmin(np.diff(ds_sv.echo_range.values)), 2)
194
207
  max_echo_range = float(np.nanmax(ds_sv.echo_range))
195
- #
208
+ # This is the number of missing values found throughout the lat/lon
196
209
  num_ping_time_dropna = lat[~np.isnan(lat)].shape[0] # symmetric to lon
197
210
  #
198
211
  start_time = np.datetime_as_string(ds_sv.ping_time.values[0], unit='ms') + "Z"
@@ -204,14 +217,27 @@ class RawToZarr:
204
217
  store_name = f"{Path(raw_file_name).stem}.zarr"
205
218
  ds_sv.to_zarr(store=store_name)
206
219
  #################################################################
207
- # TODO: do i still need this?
208
- # print('Note: Adding GeoJSON inside Zarr store')
209
- # self.__write_geojson_to_file( # Was trying to write geojson to the L1 zarr store
210
- # store_name=store_name,
211
- # data=gps_data
212
- # )
213
- #################################################################
214
220
  output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
221
+ #################################################################
222
+ # If zarr store already exists then delete
223
+ s3_manager = S3Manager()
224
+ child_objects = s3_manager.get_child_objects(
225
+ bucket_name=output_bucket_name,
226
+ sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.zarr",
227
+ )
228
+ if len(child_objects) > 0:
229
+ print('Zarr store data already exists in s3, deleting existing and continuing.')
230
+ s3_manager.delete_nodd_objects(
231
+ bucket_name=output_bucket_name,
232
+ objects=child_objects,
233
+ )
234
+ #################################################################
235
+ self.__upload_files_to_output_bucket(
236
+ output_bucket_name=output_bucket_name,
237
+ local_directory=store_name,
238
+ object_prefix=output_zarr_prefix
239
+ )
240
+ #################################################################
215
241
  self.__zarr_info_to_table(
216
242
  output_bucket_name=output_bucket_name,
217
243
  table_name=table_name,
@@ -228,34 +254,15 @@ class RawToZarr:
228
254
  frequencies=frequencies,
229
255
  channels=channels
230
256
  )
231
- ###################################################################
232
- #######################################################################
233
- self.__upload_files_to_output_bucket(
234
- output_bucket_name=output_bucket_name,
235
- local_directory=store_name,
236
- object_prefix=output_zarr_prefix
237
- )
238
- #######################################################################
239
- # # TODO: verify count of objects matches
240
- # s3_objects = self.__s3.list_objects(
241
- # bucket_name=self.__output_bucket,
242
- # prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
243
- # access_key_id=self.__output_bucket_access_key,
244
- # secret_access_key=self.__output_bucket_secret_access_key
245
- # )
246
257
  #######################################################################
247
- # self.__update_processing_status(
248
- # file_name=input_file_name,
249
- # cruise_name=cruise_name,
250
- # pipeline_status='SUCCESS_RAW_TO_ZARR'
251
- # )
258
+ # TODO: verify count of objects matches, publish message, update status
252
259
  #######################################################################
253
- # self.__publish_done_message(input_message)
254
- print('here')
260
+ print('Finished raw-to-zarr conversion.')
255
261
  except Exception as err:
256
262
  print(f'Exception encountered creating local Zarr store with echopype: {err}')
257
263
  raise RuntimeError(f"Problem creating local Zarr store, {err}")
258
264
  finally:
265
+ print("Finally.")
259
266
  cleaner.delete_local_files(file_types=["*.raw", "*.bot", "*.zarr", "*.json"])
260
267
  print('Done creating local zarr store.')
261
268
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: water_column_sonar_processing
3
- Version: 0.0.9
3
+ Version: 0.0.11
4
4
  Summary: A processing tool for water column sonar data.
5
5
  Author-email: Rudy Klucik <rudy.klucik@noaa.gov>
6
6
  Project-URL: Homepage, https://github.com/CI-CMG/water-column-sonar-processing
@@ -24,7 +24,7 @@ Requires-Dist: numcodecs==0.13.1
24
24
  Requires-Dist: numpy==1.26.4
25
25
  Requires-Dist: pandas==2.2.3
26
26
  Requires-Dist: pyarrow==18.1.0
27
- Requires-Dist: python-dotenv==1.0.0
27
+ Requires-Dist: python-dotenv==1.0.1
28
28
  Requires-Dist: requests==2.32.3
29
29
  Requires-Dist: s3fs==2023.12.1
30
30
  Requires-Dist: scipy==1.14.1
@@ -114,6 +114,7 @@ python -m twine upload --repository pypi dist/*
114
114
  ```
115
115
 
116
116
  # Pre Commit Hook
117
+ see here for installation: https://pre-commit.com/
117
118
  https://dev.to/rafaelherik/using-trufflehog-and-pre-commit-hook-to-prevent-secret-exposure-edo
118
119
  ```
119
120
  pre-commit install --allow-missing-config
@@ -132,3 +133,8 @@ https://colab.research.google.com/drive/1KiLMueXiz9WVB9o4RuzYeGjNZ6PsZU7a#scroll
132
133
  20241125
133
134
  5 failed, 35 passed, 3 skipped, 1 warning in 9.71s
134
135
  3 failed, 38 passed, 3 skipped, 1 warning in 7.24s
136
+
137
+
138
+ # TODO:
139
+ add https://pypi.org/project/setuptools-scm/
140
+ for extracting the version
@@ -1,32 +1,32 @@
1
1
  water_column_sonar_processing/__init__.py,sha256=fvRK4uFo_A0l7w_T4yckvDqJ3wMUq4JB3VVPXqWfewE,226
2
2
  water_column_sonar_processing/process.py,sha256=-yQtK3rnZq6lGAr3q02zLDe1NuMH9c0PiUOxKzG_r18,5386
3
3
  water_column_sonar_processing/aws/__init__.py,sha256=KJqK8oYMn-u8n8i-Jp_lG5BvCOTjwWSjWP8yAyDlWVo,297
4
- water_column_sonar_processing/aws/dynamodb_manager.py,sha256=sZHn-hgCt3K3w0x5BcXfF5jLMt_F11dAtQHJToij9nU,10008
5
- water_column_sonar_processing/aws/s3_manager.py,sha256=ctNWMkgqMlwbwmXHgwKEV8otLwIjr-dHX6bQ2rOw1ug,14718
4
+ water_column_sonar_processing/aws/dynamodb_manager.py,sha256=gMDAXLE_p_nKmNZYICKA9T56PYDqtXBySlysSOVnWrI,10250
5
+ water_column_sonar_processing/aws/s3_manager.py,sha256=kS48Vu_jE_fOKbwKOhCLWKDSqHzOGVEdZ_Lc4MaMCfA,15291
6
6
  water_column_sonar_processing/aws/s3fs_manager.py,sha256=thVJPQKhbvF1g-Ue3BYgwazFOFDYOICIEJx4zkXBQ1E,2381
7
7
  water_column_sonar_processing/aws/sns_manager.py,sha256=Dp9avG5VSugSWPR1dZ-askuAw1fCZkNUHbOUP65iR-k,1867
8
8
  water_column_sonar_processing/aws/sqs_manager.py,sha256=NSUrWmnSC8h8Gf7gT0U8zFaQQ-yX89h0Q0mDLKGqp2Y,1597
9
9
  water_column_sonar_processing/cruise/__init__.py,sha256=H5hW0JMORuaFvQk_R31B4VL8RnRyKeanOOiWmqEMZJk,156
10
- water_column_sonar_processing/cruise/create_empty_zarr_store.py,sha256=uLwHZazndSy4puXrS-2PrGhicV-umsCCiXoqt2MMpkM,7317
10
+ water_column_sonar_processing/cruise/create_empty_zarr_store.py,sha256=uQiZoKm16jD0SUuXmhuPryxdE-6bUc6BlCi2UtmzUpw,7318
11
11
  water_column_sonar_processing/cruise/resample_regrid.py,sha256=4Tw6Ro9mQZOr0uIph6foz6a1OeFAZW0SMUT_asIwvKw,12309
12
12
  water_column_sonar_processing/geometry/__init__.py,sha256=_ol5nI8AL30pYXeAh5rtP7YmQggitPC6LA_kuTfPJ0Q,231
13
- water_column_sonar_processing/geometry/geometry_manager.py,sha256=7WZ1UerY_h3uOKc3mcaOpvhgZ1yV3gD-CUnhZJl1BOQ,10550
13
+ water_column_sonar_processing/geometry/geometry_manager.py,sha256=0Q9IRiBr6XvxUg5M2vCPtUhbnYnwa5pJI1ayfWXMgMs,10587
14
14
  water_column_sonar_processing/geometry/geometry_simplification.py,sha256=im1HG9nfYIerQv3w-PUHzphw2B7aGgnsA3Zcdy2oTmA,3016
15
15
  water_column_sonar_processing/geometry/pmtile_generation.py,sha256=7Lm08Jr6YaM4nYmexClxbIMOqSV1teo9wMm6dfjFuNA,12384
16
16
  water_column_sonar_processing/index/__init__.py,sha256=izEObsKiOoIJ0kZCFhvaYsBd6Ga71XJxnogjrNInw68,68
17
17
  water_column_sonar_processing/index/index_manager.py,sha256=YS6y_THfGAZpjfBZOj5n8O1aY_BnBYS781eNHfhpip0,11239
18
18
  water_column_sonar_processing/model/__init__.py,sha256=FXaCdbPqxp0ogmZm9NplRirqpgMiYs1iRYgJbFbbX2Y,65
19
- water_column_sonar_processing/model/zarr_manager.py,sha256=fpRkk6Qg1_LVdLg1M_X0J9Lchp2OJygTxfekEk1Mi88,13641
19
+ water_column_sonar_processing/model/zarr_manager.py,sha256=TbcVux-GWfX4XJ7UT20E7dI_h_islrKsGtjx_VwSsLg,14003
20
20
  water_column_sonar_processing/processing/__init__.py,sha256=UwdB3BnoUxy4q3k9-ZjBF6KzmCWVDcqbcArTeHgmvGA,118
21
21
  water_column_sonar_processing/processing/cruise_sampler.py,sha256=hadPrnH5nz7_oG_4pND7YbMFH6NMR9d6p3xAXedtKU8,15927
22
- water_column_sonar_processing/processing/raw_to_zarr.py,sha256=QBz58P-hYTZwg6hhf4u_kNg710lrfxPwnvIr7UmK30I,16125
22
+ water_column_sonar_processing/processing/raw_to_zarr.py,sha256=OPu4CoIlHQFW38iY4DLe5A5Ttrdz4NXtjYThrB-FuPs,16874
23
23
  water_column_sonar_processing/utility/__init__.py,sha256=yDObMOL0_OxKWet5wffK2-XVJgoE9iwiY2q04GZrtBQ,234
24
24
  water_column_sonar_processing/utility/cleaner.py,sha256=bNbs-hopWxtKAFBK0Eu18xdRErZCGZvtla3j-1bTwQw,619
25
25
  water_column_sonar_processing/utility/constants.py,sha256=EbzsorvYKadsPjuutRjQKKByGibhFm0Gw6D-Sp2ZD3I,2143
26
26
  water_column_sonar_processing/utility/pipeline_status.py,sha256=O-0SySqdRGJ6bs3zQe1NV9vkOpmsRM7zj5QoHgzYioY,4395
27
27
  water_column_sonar_processing/utility/timestamp.py,sha256=bO0oir7KxxoEHPGRkz9FCBfOligkocUyRiWRzAq8fnU,361
28
- water_column_sonar_processing-0.0.9.dist-info/LICENSE,sha256=lz4IpJ5_adG3S0ali-WaIpQFVTnEAOucMDQPECUVEYw,1110
29
- water_column_sonar_processing-0.0.9.dist-info/METADATA,sha256=5mtfW5UU7dpl2oKGUi7GKTbjsrcMo13ackWFpNVkjr4,4432
30
- water_column_sonar_processing-0.0.9.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
31
- water_column_sonar_processing-0.0.9.dist-info/top_level.txt,sha256=aRYU4A7RNBlNrL4vzjytFAir3BNnmOgsvIGKKA36tg4,30
32
- water_column_sonar_processing-0.0.9.dist-info/RECORD,,
28
+ water_column_sonar_processing-0.0.11.dist-info/LICENSE,sha256=lz4IpJ5_adG3S0ali-WaIpQFVTnEAOucMDQPECUVEYw,1110
29
+ water_column_sonar_processing-0.0.11.dist-info/METADATA,sha256=KFkI1367kV7L7pl8SIK4UFwUVJvUCHkRTPwBCqpnxWA,4566
30
+ water_column_sonar_processing-0.0.11.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
31
+ water_column_sonar_processing-0.0.11.dist-info/top_level.txt,sha256=aRYU4A7RNBlNrL4vzjytFAir3BNnmOgsvIGKKA36tg4,30
32
+ water_column_sonar_processing-0.0.11.dist-info/RECORD,,