water-column-sonar-processing 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

@@ -1,8 +1,6 @@
1
1
  from __future__ import absolute_import
2
2
 
3
3
  from . import aws, cruise, geometry, index, model, processing, utility
4
- # from .model import ZarrManager
5
- # from .process import Process
6
4
 
7
5
  __all__ = [
8
6
  "aws",
@@ -1,7 +1,7 @@
1
1
  from .dynamodb_manager import DynamoDBManager
2
- from .s3_manager import S3Manager
2
+ from .s3_manager import S3Manager, chunked
3
3
  from .s3fs_manager import S3FSManager
4
4
  from .sns_manager import SNSManager
5
5
  from .sqs_manager import SQSManager
6
6
 
7
- __all__ = ["DynamoDBManager", "S3Manager", "S3FSManager", "SNSManager", "SQSManager"]
7
+ __all__ = ["DynamoDBManager", "S3Manager", "chunked", "S3FSManager", "SNSManager", "SQSManager"]
@@ -3,6 +3,8 @@ import os
3
3
  import boto3
4
4
  from collections.abc import Generator
5
5
  from concurrent.futures import ThreadPoolExecutor, as_completed
6
+
7
+ import botocore
6
8
  from boto3.s3.transfer import TransferConfig
7
9
  from botocore.config import Config
8
10
  from botocore.exceptions import ClientError
@@ -14,7 +16,10 @@ GB = 1024**3
14
16
 
15
17
 
16
18
  #########################################################################
17
- def chunked(ll: list, n: int) -> Generator:
19
+ def chunked(
20
+ ll: list,
21
+ n: int
22
+ ) -> Generator:
18
23
  # Yields successively n-sized chunks from ll.
19
24
  for i in range(0, len(ll), n):
20
25
  yield ll[i : i + n]
@@ -24,16 +29,9 @@ class S3Manager:
24
29
  #####################################################################
25
30
  def __init__(
26
31
  self,
27
- # input_endpoint_url: str,
28
- # output_endpoint_url: str,
29
- # endpoint_url
30
- # TODO: Need to allow passing in of credentials when writing to protected bucket
31
32
  ):
32
33
  self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
33
34
  self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
34
- # self.endpoint_url = endpoint_url
35
- # self.input_endpoint_url = input_endpoint_url
36
- # self.output_endpoint_url = output_endpoint_url
37
35
  self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
38
36
  self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
39
37
  self.s3_transfer_config = TransferConfig(
@@ -51,14 +49,12 @@ class S3Manager:
51
49
  service_name="s3",
52
50
  config=self.s3_client_config,
53
51
  region_name=self.s3_region,
54
- # endpoint_url=endpoint_url, # TODO: temporary
55
52
  )
56
53
  self.s3_resource = boto3.resource(
57
54
  service_name="s3",
58
55
  config=self.s3_client_config,
59
56
  region_name=self.s3_region,
60
57
  )
61
- # self.paginator = self.s3_client.get_paginator(operation_name='list_objects_v2')
62
58
  self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
63
59
  aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
64
60
  aws_secret_access_key=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
@@ -68,7 +64,6 @@ class S3Manager:
68
64
  service_name="s3",
69
65
  config=self.s3_client_config,
70
66
  region_name=self.s3_region,
71
- # endpoint_url=endpoint_url, # TODO: temporary
72
67
  )
73
68
  self.s3_resource_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.resource(
74
69
  service_name="s3",
@@ -78,12 +73,12 @@ class S3Manager:
78
73
  self.paginator = self.s3_client.get_paginator('list_objects_v2')
79
74
  self.paginator_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds.get_paginator('list_objects_v2')
80
75
 
81
- def get_client(self): # TODO: do i need this?
82
- return self.s3_session.client(
83
- service_name="s3",
84
- config=self.s3_client_config,
85
- region_name=self.s3_region,
86
- )
76
+ # def get_client(self): # TODO: do i need this?
77
+ # return self.s3_session.client(
78
+ # service_name="s3",
79
+ # config=self.s3_client_config,
80
+ # region_name=self.s3_region,
81
+ # )
87
82
 
88
83
  #####################################################################
89
84
  def create_bucket(
@@ -146,18 +141,6 @@ class S3Manager:
146
141
  return all_uploads
147
142
 
148
143
  #####################################################################
149
- # def upload_nodd_file2(
150
- # self,
151
- # body: str,
152
- # bucket: str,
153
- # key: str,
154
- # ):
155
- # self.s3_client_noaa_wcsd_zarr_pds.put_object(
156
- # Body=body,
157
- # Bucket=bucket,
158
- # Key=key,
159
- # )
160
-
161
144
  # TODO: this uses resource, try to use client
162
145
  def upload_file(
163
146
  self,
@@ -190,11 +173,36 @@ class S3Manager:
190
173
  all_files.append([local_path, s3_key])
191
174
 
192
175
  all_uploads = self.upload_files_with_thread_pool_executor(
176
+ output_bucket_name=self.output_bucket_name,
193
177
  all_files=all_files,
194
178
  )
195
179
  print("Done uploading files to output bucket.")
196
180
  return all_uploads
197
181
 
182
+ #####################################################################
183
+ def check_if_object_exists(
184
+ self,
185
+ bucket_name,
186
+ key_name
187
+ ) -> bool:
188
+ s3_manager2 = S3Manager()
189
+ s3_manager2.list_objects(bucket_name=bucket_name, prefix=key_name)
190
+ s3_client_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds
191
+ try:
192
+ # response = s3_resource_noaa_wcsd_zarr_pds.Object(bucket_name, key_name).load()
193
+ s3_client_noaa_wcsd_zarr_pds.head_object(Bucket=bucket_name, Key=key_name)
194
+ except botocore.exceptions.ClientError as e:
195
+ if e.response['Error']['Code'] == "404":
196
+ # The object does not exist.
197
+ return False
198
+ elif e.response['Error']['Code'] == 403:
199
+ # Unauthorized, including invalid bucket
200
+ return False
201
+ else:
202
+ # Something else has gone wrong.
203
+ raise
204
+ return True
205
+
198
206
  #####################################################################
199
207
  # used: raw-to-zarr
200
208
  def list_objects( # noaa-wcsd-pds and noaa-wcsd-zarr-pds
@@ -202,6 +210,7 @@ class S3Manager:
202
210
  bucket_name,
203
211
  prefix
204
212
  ):
213
+ # TODO: this isn't working for geojson detecting objects!!!!!!!
205
214
  # analog to "find_children_objects"
206
215
  # Returns a list of key strings for each object in bucket defined by prefix
207
216
  # s3_client = self.s3_client
@@ -227,7 +236,11 @@ class S3Manager:
227
236
 
228
237
  #####################################################################
229
238
  # TODO: change name to "directory"
230
- def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
239
+ def folder_exists_and_not_empty(
240
+ self,
241
+ bucket_name: str,
242
+ path: str
243
+ ) -> bool:
231
244
  if not path.endswith("/"):
232
245
  path = path + "/"
233
246
  s3_client = self.s3_client
@@ -319,23 +332,15 @@ class S3Manager:
319
332
  print("downloaded file")
320
333
 
321
334
  #####################################################################
322
- # not used
323
- # def delete_nodd_object( # noaa-wcsd-model-pds
324
- # self,
325
- # bucket_name,
326
- # key
327
- # ): # -> dict:
328
- # #return self.__s3_client.delete_object(Bucket=bucket_name, Key=key)
329
- # self.s3_client.delete_object(Bucket=bucket_name, Key=key)
330
-
331
- #####################################################################
335
+ # TODO: need to test this!!!
332
336
  def delete_nodd_objects( # nodd-bucket
333
337
  self,
338
+ bucket_name,
334
339
  objects: list,
335
340
  ):
336
341
  try:
337
342
  print(
338
- f"Deleting {len(objects)} objects in {self.output_bucket_name} in batches."
343
+ f"Deleting {len(objects)} objects in {bucket_name} in batches."
339
344
  )
340
345
  objects_to_delete = []
341
346
  for obj in objects:
@@ -343,12 +348,28 @@ class S3Manager:
343
348
  # Note: request can contain a list of up to 1000 keys
344
349
  for batch in chunked(ll=objects_to_delete, n=1000):
345
350
  self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
346
- Bucket=self.output_bucket_name, Delete={"Objects": batch}
351
+ Bucket=bucket_name, Delete={"Objects": batch}
347
352
  )
348
353
  print(f"Deleted files.")
349
354
  except Exception as err:
350
355
  print(f"Problem was encountered while deleting objects: {err}")
351
356
 
357
+ #####################################################################
358
+ # TODO: need to test this!!!
359
+ def delete_nodd_object(
360
+ self,
361
+ bucket_name,
362
+ key_name,
363
+ ):
364
+ try:
365
+ print(
366
+ f"Deleting {key_name} objects in {bucket_name}."
367
+ )
368
+ self.s3_client_noaa_wcsd_zarr_pds.delete_object(Bucket=bucket_name, Key=key_name)
369
+ print(f"Deleted file.")
370
+ except Exception as err:
371
+ print(f"Problem was encountered while deleting objects: {err}")
372
+
352
373
  #####################################################################
353
374
  # not used TODO: remove
354
375
  def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
@@ -3,10 +3,10 @@ import os
3
3
  import numcodecs
4
4
  import numpy as np
5
5
 
6
- from src.water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
7
- from src.water_column_sonar_processing.aws.s3_manager import S3Manager
8
- from src.water_column_sonar_processing.model.zarr_manager import ZarrManager
9
- from src.water_column_sonar_processing.utility.cleaner import Cleaner
6
+ from water_column_sonar_processing.aws import DynamoDBManager
7
+ from water_column_sonar_processing.aws import S3Manager
8
+ from water_column_sonar_processing.model import ZarrManager
9
+ from water_column_sonar_processing.utility import Cleaner
10
10
 
11
11
  numcodecs.blosc.use_threads = False
12
12
  numcodecs.blosc.set_nthreads(1)
@@ -47,7 +47,7 @@ class CreateEmptyZarrStore:
47
47
  ):
48
48
  for file in files:
49
49
  local_path = os.path.join(subdir, file)
50
- # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.model/.zattrs'
50
+ # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.model/..zattrs'
51
51
  s3_key = f'{object_prefix}/{cruise_name}.model{local_path.split(f"{cruise_name}.model")[-1]}'
52
52
  all_files.append([local_path, s3_key])
53
53
  #
@@ -7,9 +7,9 @@ import numpy as np
7
7
  import pandas as pd
8
8
  import xarray as xr
9
9
 
10
- from src.water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
11
- from src.water_column_sonar_processing.geometry.geometry_manager import GeometryManager
12
- from src.water_column_sonar_processing.model.zarr_manager import ZarrManager
10
+ from water_column_sonar_processing.aws import DynamoDBManager
11
+ from water_column_sonar_processing.geometry import GeometryManager
12
+ from water_column_sonar_processing.model import ZarrManager
13
13
 
14
14
  numcodecs.blosc.use_threads = False
15
15
  numcodecs.blosc.set_nthreads(1)
@@ -5,8 +5,8 @@ import geopandas
5
5
  import numpy as np
6
6
  import pandas as pd
7
7
 
8
- from src.water_column_sonar_processing.aws.s3_manager import S3Manager
9
- from src.water_column_sonar_processing.utility.cleaner import Cleaner
8
+ from water_column_sonar_processing.aws import S3Manager
9
+ from water_column_sonar_processing.utility import Cleaner
10
10
 
11
11
  """
12
12
  // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
@@ -138,15 +138,13 @@ class GeometryManager:
138
138
 
139
139
  print("Checking s3 and deleting any existing GeoJSON file.")
140
140
  s3_manager = S3Manager()
141
- s3_objects = s3_manager.list_objects(
141
+ geojson_object_exists = s3_manager.check_if_object_exists(
142
142
  bucket_name=output_bucket_name,
143
- prefix=f"{geo_json_prefix}/{geo_json_name}"
143
+ key_name=f"{geo_json_prefix}/{geo_json_name}"
144
144
  )
145
- if len(s3_objects) > 0:
146
- print(
147
- "GeoJSON already exists in s3, deleting existing and continuing."
148
- )
149
- s3_manager.delete_nodd_objects(objects=s3_objects)
145
+ if geojson_object_exists:
146
+ print("GeoJSON already exists in s3, deleting existing and continuing.")
147
+ s3_manager.delete_nodd_object(bucket_name=output_bucket_name, key_name=f"{geo_json_prefix}/{geo_json_name}")
150
148
 
151
149
  print("Upload GeoJSON to s3.")
152
150
  s3_manager.upload_nodd_file(
@@ -12,8 +12,6 @@ import pyogrio
12
12
  from concurrent.futures import ThreadPoolExecutor, as_completed
13
13
  from shapely.geometry import LineString
14
14
 
15
- from src.water_column_sonar_processing.aws import S3Manager, S3FSManager
16
-
17
15
  MAX_POOL_CONNECTIONS = 64
18
16
  MAX_CONCURRENCY = 64
19
17
  MAX_WORKERS = 64
@@ -4,7 +4,7 @@ import pandas as pd
4
4
  from datetime import datetime
5
5
  from concurrent.futures import ThreadPoolExecutor
6
6
  from concurrent.futures import as_completed
7
- from src.water_column_sonar_processing.aws.s3_manager import S3Manager
7
+ from water_column_sonar_processing.aws import S3Manager
8
8
 
9
9
 
10
10
  class IndexManager:
@@ -5,9 +5,10 @@ import xarray as xr
5
5
  import zarr
6
6
  from numcodecs import Blosc
7
7
 
8
- from src.water_column_sonar_processing.aws.s3fs_manager import S3FSManager
9
- from src.water_column_sonar_processing.utility.constants import Constants, Coordinates
10
- from src.water_column_sonar_processing.utility.timestamp import Timestamp
8
+ from water_column_sonar_processing.aws import S3FSManager
9
+ from water_column_sonar_processing.utility import Constants
10
+ from water_column_sonar_processing.utility import Timestamp
11
+ from water_column_sonar_processing.utility import Coordinates
11
12
 
12
13
  numcodecs.blosc.use_threads = False
13
14
  numcodecs.blosc.set_nthreads(1)
@@ -85,8 +86,6 @@ class ZarrManager:
85
86
  data=np.repeat(0.0, width),
86
87
  shape=width,
87
88
  chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
88
- # Constants.TILE_SIZE.value,
89
- #), # TODO: the chunking scheme doesn't seem to be working here
90
89
  dtype=np.dtype(Coordinates.TIME_DTYPE.value),
91
90
  compressor=self.__compressor,
92
91
  fill_value=np.nan, # TODO: do i want nan's?
@@ -124,14 +123,16 @@ class ZarrManager:
124
123
 
125
124
  root.depth.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.DEPTH.value]
126
125
 
127
- root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
128
126
  root.depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
127
+ root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
128
+ root.depth.attrs["standard_name"] = Coordinates.DEPTH_STANDARD_NAME.value
129
129
 
130
130
  #####################################################################
131
131
  # --- Coordinate: Latitude --- #
132
132
  root.create_dataset(
133
133
  name=Coordinates.LATITUDE.value,
134
- # data=np.repeat(0.0, width),
134
+ # data=np.repeat(0.0, width), # root.longitude[:] = np.nan
135
+ data=np.repeat(np.nan, width),
135
136
  shape=width,
136
137
  chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
137
138
  dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
@@ -143,14 +144,16 @@ class ZarrManager:
143
144
  # Note: LATITUDE is indexed by TIME
144
145
  root.latitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
145
146
 
146
- root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
147
147
  root.latitude.attrs["units"] = Coordinates.LATITUDE_UNITS.value
148
+ root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
149
+ root.latitude.attrs["standard_name"] = Coordinates.LATITUDE_STANDARD_NAME.value
148
150
 
149
151
  #####################################################################
150
152
  # --- Coordinate: Longitude --- #
151
153
  root.create_dataset(
152
154
  name=Coordinates.LONGITUDE.value,
153
155
  # data=np.repeat(0.0, width), # root.longitude[:] = np.nan
156
+ data=np.repeat(np.nan, width),
154
157
  shape=width,
155
158
  chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
156
159
  dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
@@ -162,8 +165,9 @@ class ZarrManager:
162
165
  # Note: LONGITUDE is indexed by TIME
163
166
  root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
164
167
 
165
- root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
166
168
  root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
169
+ root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
170
+ root.longitude.attrs["standard_name"] = Coordinates.LONGITUDE_STANDARD_NAME.value
167
171
 
168
172
  #####################################################################
169
173
  # TODO: verify adding this variable for where the bottom was detected
@@ -182,8 +186,9 @@ class ZarrManager:
182
186
  # BOTTOM is indexed by TIME
183
187
  root.bottom.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
184
188
 
185
- root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
186
189
  root.bottom.attrs["units"] = Coordinates.BOTTOM_UNITS.value
190
+ root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
191
+ root.bottom.attrs["standard_name"] = Coordinates.BOTTOM_STANDARD_NAME.value
187
192
 
188
193
  #####################################################################
189
194
  # --- Coordinate: Frequency --- #
@@ -203,11 +208,11 @@ class ZarrManager:
203
208
  Coordinates.FREQUENCY.value
204
209
  ] # TODO: is this correct
205
210
 
211
+ root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
206
212
  root.frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
207
213
  root.frequency.attrs["standard_name"] = (
208
214
  Coordinates.FREQUENCY_STANDARD_NAME.value
209
215
  )
210
- root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
211
216
 
212
217
  #####################################################################
213
218
  # --- Sv Data --- #
@@ -229,8 +234,8 @@ class ZarrManager:
229
234
  Coordinates.FREQUENCY.value,
230
235
  ]
231
236
 
232
- root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
233
237
  root.Sv.attrs["units"] = Coordinates.SV_UNITS.value
238
+ root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
234
239
  root.Sv.attrs["tile_size"] = Constants.TILE_SIZE.value
235
240
 
236
241
  #####################################################################
@@ -241,7 +246,7 @@ class ZarrManager:
241
246
  #
242
247
  root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
243
248
  root.attrs["processing_software_version"] = (
244
- "0.0.6" # TODO: get programmatically
249
+ "0.0.9" # TODO: get programmatically, echopype>utils>prov.py
245
250
  )
246
251
  root.attrs["processing_software_time"] = Timestamp.get_timestamp()
247
252
  #
@@ -3,10 +3,10 @@ import os
3
3
 
4
4
  import numpy as np
5
5
 
6
- from src.water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
7
- from src.water_column_sonar_processing.aws.s3_manager import S3Manager
8
- from src.water_column_sonar_processing.aws.s3fs_manager import S3FSManager
9
- from src.water_column_sonar_processing.aws.sns_manager import SNSManager
6
+ from water_column_sonar_processing.aws import DynamoDBManager
7
+ from water_column_sonar_processing.aws import S3Manager
8
+ from water_column_sonar_processing.aws import S3FSManager
9
+ from water_column_sonar_processing.aws import SNSManager
10
10
 
11
11
 
12
12
  ###########################################################
@@ -4,7 +4,7 @@ import echopype as ep
4
4
  import numpy as np
5
5
  from numcodecs import Blosc
6
6
 
7
- from src.water_column_sonar_processing.utility import Cleaner
7
+ from water_column_sonar_processing.utility import Cleaner
8
8
 
9
9
  TEMPDIR = "/tmp"
10
10
 
@@ -7,9 +7,9 @@ from numcodecs import Blosc
7
7
  from datetime import datetime
8
8
  from pathlib import Path # , PurePath
9
9
 
10
- from src.water_column_sonar_processing.aws import DynamoDBManager, S3Manager
11
- from src.water_column_sonar_processing.geometry.geometry_manager import GeometryManager
12
- from src.water_column_sonar_processing.utility import Cleaner
10
+ from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
11
+ from water_column_sonar_processing.geometry import GeometryManager
12
+ from water_column_sonar_processing.utility import Cleaner
13
13
 
14
14
  TEMPDIR = "/tmp"
15
15
 
@@ -149,10 +149,14 @@ class RawToZarr:
149
149
  sensor_name,
150
150
  raw_file_name,
151
151
  ):
152
+ """
153
+ Downloads the raw files, processes them with echopype, writes geojson, and uploads files
154
+ to the nodd bucket.
155
+ """
152
156
  print(f'Opening raw: {raw_file_name} and creating zarr store.')
153
157
  geometry_manager = GeometryManager()
154
158
  cleaner = Cleaner()
155
- cleaner.delete_local_files(file_types=["*.zarr", "*.json"]) # TODO: include bot and raw
159
+ cleaner.delete_local_files(file_types=["*.zarr", "*.json"]) # TODO: include bot and raw?
156
160
  try:
157
161
  gc.collect()
158
162
  print('Opening raw file with echopype.')
@@ -204,14 +208,27 @@ class RawToZarr:
204
208
  store_name = f"{Path(raw_file_name).stem}.zarr"
205
209
  ds_sv.to_zarr(store=store_name)
206
210
  #################################################################
207
- # TODO: do i still need this?
208
- # print('Note: Adding GeoJSON inside Zarr store')
209
- # self.__write_geojson_to_file( # Was trying to write geojson to the L1 zarr store
210
- # store_name=store_name,
211
- # data=gps_data
212
- # )
213
- #################################################################
214
211
  output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
212
+ #################################################################
213
+ # If zarr store already exists then delete
214
+ s3_manager = S3Manager()
215
+ child_objects = s3_manager.get_child_objects(
216
+ bucket_name=output_bucket_name,
217
+ sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.zarr",
218
+ )
219
+ if len(child_objects) > 0:
220
+ print('Zarr store data already exists in s3, deleting existing and continuing.')
221
+ s3_manager.delete_nodd_objects(
222
+ bucket_name=output_bucket_name,
223
+ objects=child_objects,
224
+ )
225
+ #################################################################
226
+ self.__upload_files_to_output_bucket(
227
+ output_bucket_name=output_bucket_name,
228
+ local_directory=store_name,
229
+ object_prefix=output_zarr_prefix
230
+ )
231
+ #################################################################
215
232
  self.__zarr_info_to_table(
216
233
  output_bucket_name=output_bucket_name,
217
234
  table_name=table_name,
@@ -228,29 +245,9 @@ class RawToZarr:
228
245
  frequencies=frequencies,
229
246
  channels=channels
230
247
  )
231
- ###################################################################
232
- #######################################################################
233
- self.__upload_files_to_output_bucket(
234
- output_bucket_name=output_bucket_name,
235
- local_directory=store_name,
236
- object_prefix=output_zarr_prefix
237
- )
238
- #######################################################################
239
- # # TODO: verify count of objects matches
240
- # s3_objects = self.__s3.list_objects(
241
- # bucket_name=self.__output_bucket,
242
- # prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
243
- # access_key_id=self.__output_bucket_access_key,
244
- # secret_access_key=self.__output_bucket_secret_access_key
245
- # )
246
248
  #######################################################################
247
- # self.__update_processing_status(
248
- # file_name=input_file_name,
249
- # cruise_name=cruise_name,
250
- # pipeline_status='SUCCESS_RAW_TO_ZARR'
251
- # )
249
+ # TODO: verify count of objects matches, publish message, update status
252
250
  #######################################################################
253
- # self.__publish_done_message(input_message)
254
251
  print('here')
255
252
  except Exception as err:
256
253
  print(f'Exception encountered creating local Zarr store with echopype: {err}')
@@ -1,6 +1,6 @@
1
1
  from .cleaner import Cleaner
2
- from .constants import Constants
2
+ from .constants import Constants, Coordinates
3
3
  from .pipeline_status import PipelineStatus
4
4
  from .timestamp import Timestamp
5
5
 
6
- __all__ = ["Cleaner", "Constants", "PipelineStatus", "Timestamp"]
6
+ __all__ = ["Cleaner", "Constants", "Coordinates", "PipelineStatus", "Timestamp"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: water_column_sonar_processing
3
- Version: 0.0.8
3
+ Version: 0.0.10
4
4
  Summary: A processing tool for water column sonar data.
5
5
  Author-email: Rudy Klucik <rudy.klucik@noaa.gov>
6
6
  Project-URL: Homepage, https://github.com/CI-CMG/water-column-sonar-processing
@@ -24,7 +24,7 @@ Requires-Dist: numcodecs==0.13.1
24
24
  Requires-Dist: numpy==1.26.4
25
25
  Requires-Dist: pandas==2.2.3
26
26
  Requires-Dist: pyarrow==18.1.0
27
- Requires-Dist: python-dotenv==1.0.0
27
+ Requires-Dist: python-dotenv==1.0.1
28
28
  Requires-Dist: requests==2.32.3
29
29
  Requires-Dist: s3fs==2023.12.1
30
30
  Requires-Dist: scipy==1.14.1
@@ -114,6 +114,7 @@ python -m twine upload --repository pypi dist/*
114
114
  ```
115
115
 
116
116
  # Pre Commit Hook
117
+ see here for installation: https://pre-commit.com/
117
118
  https://dev.to/rafaelherik/using-trufflehog-and-pre-commit-hook-to-prevent-secret-exposure-edo
118
119
  ```
119
120
  pre-commit install --allow-missing-config
@@ -132,3 +133,8 @@ https://colab.research.google.com/drive/1KiLMueXiz9WVB9o4RuzYeGjNZ6PsZU7a#scroll
132
133
  20241125
133
134
  5 failed, 35 passed, 3 skipped, 1 warning in 9.71s
134
135
  3 failed, 38 passed, 3 skipped, 1 warning in 7.24s
136
+
137
+
138
+ # TODO:
139
+ add https://pypi.org/project/setuptools-scm/
140
+ for extracting the version
@@ -1,32 +1,32 @@
1
- water_column_sonar_processing/__init__.py,sha256=Ipl74g4btRHNYqgJ4Ro957HQC4YXBoIrGhgMmBPP0Uw,290
2
- water_column_sonar_processing/process.py,sha256=mb8_UpcOTy7RAAMlv-nF5hZG5zFoMsFxlT4OyvrpObk,5455
3
- water_column_sonar_processing/aws/__init__.py,sha256=u5J-TOEVgAQsMdc5LMo1igUESRclzV8gf-b0jUaJ9Gg,277
1
+ water_column_sonar_processing/__init__.py,sha256=fvRK4uFo_A0l7w_T4yckvDqJ3wMUq4JB3VVPXqWfewE,226
2
+ water_column_sonar_processing/process.py,sha256=-yQtK3rnZq6lGAr3q02zLDe1NuMH9c0PiUOxKzG_r18,5386
3
+ water_column_sonar_processing/aws/__init__.py,sha256=KJqK8oYMn-u8n8i-Jp_lG5BvCOTjwWSjWP8yAyDlWVo,297
4
4
  water_column_sonar_processing/aws/dynamodb_manager.py,sha256=sZHn-hgCt3K3w0x5BcXfF5jLMt_F11dAtQHJToij9nU,10008
5
- water_column_sonar_processing/aws/s3_manager.py,sha256=ctNWMkgqMlwbwmXHgwKEV8otLwIjr-dHX6bQ2rOw1ug,14718
5
+ water_column_sonar_processing/aws/s3_manager.py,sha256=kS48Vu_jE_fOKbwKOhCLWKDSqHzOGVEdZ_Lc4MaMCfA,15291
6
6
  water_column_sonar_processing/aws/s3fs_manager.py,sha256=thVJPQKhbvF1g-Ue3BYgwazFOFDYOICIEJx4zkXBQ1E,2381
7
7
  water_column_sonar_processing/aws/sns_manager.py,sha256=Dp9avG5VSugSWPR1dZ-askuAw1fCZkNUHbOUP65iR-k,1867
8
8
  water_column_sonar_processing/aws/sqs_manager.py,sha256=NSUrWmnSC8h8Gf7gT0U8zFaQQ-yX89h0Q0mDLKGqp2Y,1597
9
9
  water_column_sonar_processing/cruise/__init__.py,sha256=H5hW0JMORuaFvQk_R31B4VL8RnRyKeanOOiWmqEMZJk,156
10
- water_column_sonar_processing/cruise/create_empty_zarr_store.py,sha256=ev5jMZiwLyY1zrIAQhAw9X55eD3rxMrW5PpotjlOxDE,7382
11
- water_column_sonar_processing/cruise/resample_regrid.py,sha256=aCbY84wtQma7GK9QooMqXzrJuIpC8IPUp1cRwN8ZOoA,12368
10
+ water_column_sonar_processing/cruise/create_empty_zarr_store.py,sha256=uQiZoKm16jD0SUuXmhuPryxdE-6bUc6BlCi2UtmzUpw,7318
11
+ water_column_sonar_processing/cruise/resample_regrid.py,sha256=4Tw6Ro9mQZOr0uIph6foz6a1OeFAZW0SMUT_asIwvKw,12309
12
12
  water_column_sonar_processing/geometry/__init__.py,sha256=_ol5nI8AL30pYXeAh5rtP7YmQggitPC6LA_kuTfPJ0Q,231
13
- water_column_sonar_processing/geometry/geometry_manager.py,sha256=xN1zyD_4Apry69BNcm5GtBKPpbHaPc_TYRYYFXjtwMc,10577
13
+ water_column_sonar_processing/geometry/geometry_manager.py,sha256=0Q9IRiBr6XvxUg5M2vCPtUhbnYnwa5pJI1ayfWXMgMs,10587
14
14
  water_column_sonar_processing/geometry/geometry_simplification.py,sha256=im1HG9nfYIerQv3w-PUHzphw2B7aGgnsA3Zcdy2oTmA,3016
15
- water_column_sonar_processing/geometry/pmtile_generation.py,sha256=Hh7UP8_mSC4_vw0F-LEv0DCT0FBYzbuNWZ0bITdg3gI,12458
15
+ water_column_sonar_processing/geometry/pmtile_generation.py,sha256=7Lm08Jr6YaM4nYmexClxbIMOqSV1teo9wMm6dfjFuNA,12384
16
16
  water_column_sonar_processing/index/__init__.py,sha256=izEObsKiOoIJ0kZCFhvaYsBd6Ga71XJxnogjrNInw68,68
17
- water_column_sonar_processing/index/index_manager.py,sha256=k_OOpU8FNQ5pz4HZkr1xGLakLbuyeBm8LZcFVqjU2cQ,11254
17
+ water_column_sonar_processing/index/index_manager.py,sha256=YS6y_THfGAZpjfBZOj5n8O1aY_BnBYS781eNHfhpip0,11239
18
18
  water_column_sonar_processing/model/__init__.py,sha256=FXaCdbPqxp0ogmZm9NplRirqpgMiYs1iRYgJbFbbX2Y,65
19
- water_column_sonar_processing/model/zarr_manager.py,sha256=hKikypzqqxGPUpjkSrf_RweHihaBuAxIQyOichf4Vhs,13637
19
+ water_column_sonar_processing/model/zarr_manager.py,sha256=TbcVux-GWfX4XJ7UT20E7dI_h_islrKsGtjx_VwSsLg,14003
20
20
  water_column_sonar_processing/processing/__init__.py,sha256=UwdB3BnoUxy4q3k9-ZjBF6KzmCWVDcqbcArTeHgmvGA,118
21
- water_column_sonar_processing/processing/cruise_sampler.py,sha256=d5_rwk9ucCGxNnL4gjDoY12fN6t9jOwJJenftGV4dGE,15931
22
- water_column_sonar_processing/processing/raw_to_zarr.py,sha256=W5BswuiHSnHj6jhVY_31qHo6OQHbYEvP2cqBliIqVZQ,16154
23
- water_column_sonar_processing/utility/__init__.py,sha256=nyqPobcvwftr6T4MNxNtQtfbWzW9Kgpbp6JO7Gr5IZI,206
21
+ water_column_sonar_processing/processing/cruise_sampler.py,sha256=hadPrnH5nz7_oG_4pND7YbMFH6NMR9d6p3xAXedtKU8,15927
22
+ water_column_sonar_processing/processing/raw_to_zarr.py,sha256=7vvoNe0jlB34R5mBPceQjL9N_5X0GTWs9xpCqvRK1nQ,15931
23
+ water_column_sonar_processing/utility/__init__.py,sha256=yDObMOL0_OxKWet5wffK2-XVJgoE9iwiY2q04GZrtBQ,234
24
24
  water_column_sonar_processing/utility/cleaner.py,sha256=bNbs-hopWxtKAFBK0Eu18xdRErZCGZvtla3j-1bTwQw,619
25
25
  water_column_sonar_processing/utility/constants.py,sha256=EbzsorvYKadsPjuutRjQKKByGibhFm0Gw6D-Sp2ZD3I,2143
26
26
  water_column_sonar_processing/utility/pipeline_status.py,sha256=O-0SySqdRGJ6bs3zQe1NV9vkOpmsRM7zj5QoHgzYioY,4395
27
27
  water_column_sonar_processing/utility/timestamp.py,sha256=bO0oir7KxxoEHPGRkz9FCBfOligkocUyRiWRzAq8fnU,361
28
- water_column_sonar_processing-0.0.8.dist-info/LICENSE,sha256=lz4IpJ5_adG3S0ali-WaIpQFVTnEAOucMDQPECUVEYw,1110
29
- water_column_sonar_processing-0.0.8.dist-info/METADATA,sha256=N2RBgvC6W-KnKzS4CoijfHeQ5PVIVhhBNOZkIRH4Kuc,4432
30
- water_column_sonar_processing-0.0.8.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
31
- water_column_sonar_processing-0.0.8.dist-info/top_level.txt,sha256=aRYU4A7RNBlNrL4vzjytFAir3BNnmOgsvIGKKA36tg4,30
32
- water_column_sonar_processing-0.0.8.dist-info/RECORD,,
28
+ water_column_sonar_processing-0.0.10.dist-info/LICENSE,sha256=lz4IpJ5_adG3S0ali-WaIpQFVTnEAOucMDQPECUVEYw,1110
29
+ water_column_sonar_processing-0.0.10.dist-info/METADATA,sha256=qFNeJ3GduRHKfcJRYShO9LamuMREk66qm18IUUXsMg8,4566
30
+ water_column_sonar_processing-0.0.10.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
31
+ water_column_sonar_processing-0.0.10.dist-info/top_level.txt,sha256=aRYU4A7RNBlNrL4vzjytFAir3BNnmOgsvIGKKA36tg4,30
32
+ water_column_sonar_processing-0.0.10.dist-info/RECORD,,