water-column-sonar-processing 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. water_column_sonar_processing/__init__.py +16 -0
  2. water_column_sonar_processing/aws/__init__.py +7 -4
  3. water_column_sonar_processing/aws/dynamodb_manager.py +70 -49
  4. water_column_sonar_processing/aws/s3_manager.py +112 -122
  5. water_column_sonar_processing/aws/s3fs_manager.py +13 -19
  6. water_column_sonar_processing/aws/sns_manager.py +10 -21
  7. water_column_sonar_processing/aws/sqs_manager.py +10 -18
  8. water_column_sonar_processing/cruise/__init__.py +4 -0
  9. water_column_sonar_processing/cruise/create_empty_zarr_store.py +51 -33
  10. water_column_sonar_processing/cruise/resample_regrid.py +109 -58
  11. water_column_sonar_processing/geometry/__init__.py +5 -0
  12. water_column_sonar_processing/geometry/geometry_manager.py +79 -48
  13. water_column_sonar_processing/geometry/geometry_simplification.py +13 -12
  14. water_column_sonar_processing/geometry/pmtile_generation.py +24 -23
  15. water_column_sonar_processing/index/__init__.py +3 -0
  16. water_column_sonar_processing/index/index_manager.py +104 -80
  17. water_column_sonar_processing/model/__init__.py +3 -0
  18. water_column_sonar_processing/model/zarr_manager.py +113 -75
  19. water_column_sonar_processing/process.py +76 -69
  20. water_column_sonar_processing/utility/__init__.py +6 -0
  21. water_column_sonar_processing/utility/cleaner.py +6 -7
  22. water_column_sonar_processing/utility/constants.py +42 -35
  23. water_column_sonar_processing/utility/pipeline_status.py +37 -10
  24. water_column_sonar_processing/utility/timestamp.py +3 -2
  25. {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/METADATA +31 -1
  26. water_column_sonar_processing-0.0.6.dist-info/RECORD +29 -0
  27. water_column_sonar_processing-0.0.5.dist-info/RECORD +0 -29
  28. {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/LICENSE +0 -0
  29. {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/WHEEL +0 -0
  30. {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,16 @@
1
+ from __future__ import absolute_import
2
+
3
+ from . import aws, cruise, geometry, index, model, utility, process
4
+ from .model import ZarrManager
5
+ from .process import Process
6
+
7
+ __all__ = [
8
+ "aws",
9
+ "cruise",
10
+ "geometry",
11
+ "index",
12
+ "model",
13
+ "utility",
14
+ "process",
15
+ "Process",
16
+ ]
@@ -1,4 +1,7 @@
1
- # from .dynamodb_manager import DynamoDBManager
2
- # from .s3_manager import S3Manager
3
- # from .s3fs_manager import S3FSManager
4
- # from .sns_manager import SNSManager
1
+ from .dynamodb_manager import DynamoDBManager
2
+ from .s3_manager import S3Manager
3
+ from .s3fs_manager import S3FSManager
4
+ from .sns_manager import SNSManager
5
+ from .sqs_manager import SQSManager
6
+
7
+ __all__ = ["DynamoDBManager", "S3Manager", "S3FSManager", "SNSManager", "SQSManager"]
@@ -1,7 +1,8 @@
1
1
  import os
2
+
2
3
  import boto3
3
4
  import pandas as pd
4
- from boto3.dynamodb.types import TypeSerializer, TypeDeserializer
5
+ from boto3.dynamodb.types import TypeDeserializer, TypeSerializer
5
6
 
6
7
 
7
8
  #########################################################################
@@ -9,9 +10,9 @@ class DynamoDBManager:
9
10
  #####################################################################
10
11
  def __init__(self):
11
12
  self.__dynamodb_session = boto3.Session(
12
- aws_access_key_id=os.environ.get('ACCESS_KEY_ID'),
13
- aws_secret_access_key=os.environ.get('SECRET_ACCESS_KEY'),
14
- region_name=os.environ.get("AWS_REGION", default="us-east-1")
13
+ aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
14
+ aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
15
+ region_name=os.environ.get("AWS_REGION", default="us-east-1"),
15
16
  )
16
17
  self.__dynamodb_resource = self.__dynamodb_session.resource(
17
18
  service_name="dynamodb",
@@ -35,10 +36,10 @@ class DynamoDBManager:
35
36
 
36
37
  #####################################################################
37
38
  def create_table(
38
- self,
39
- table_name,
40
- key_schema,
41
- attribute_definitions,
39
+ self,
40
+ table_name,
41
+ key_schema,
42
+ attribute_definitions,
42
43
  ):
43
44
  self.__dynamodb_client.create_table(
44
45
  AttributeDefinitions=attribute_definitions,
@@ -52,98 +53,118 @@ class DynamoDBManager:
52
53
  )
53
54
 
54
55
  #####################################################################
55
- def get_item(
56
- self,
57
- table_name,
58
- key
56
+ def create_water_column_sonar_table(
57
+ self,
58
+ table_name,
59
59
  ):
60
+ self.create_table(
61
+ table_name=table_name,
62
+ key_schema=[
63
+ {
64
+ "AttributeName": "FILE_NAME",
65
+ "KeyType": "HASH",
66
+ },
67
+ {
68
+ "AttributeName": "CRUISE_NAME",
69
+ "KeyType": "RANGE",
70
+ },
71
+ ],
72
+ attribute_definitions=[
73
+ {"AttributeName": "FILE_NAME", "AttributeType": "S"},
74
+ {"AttributeName": "CRUISE_NAME", "AttributeType": "S"},
75
+ ],
76
+ )
77
+
78
+ #####################################################################
79
+ def get_item(self, table_name, key):
60
80
  response = self.__dynamodb_client.get_item(TableName=table_name, Key=key)
61
81
  item = None
62
- if response['ResponseMetadata']['HTTPStatusCode'] == 200:
63
- if 'Item' in response:
64
- item = response['Item']
82
+ if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
83
+ if "Item" in response:
84
+ item = response["Item"]
65
85
  return item
66
86
 
67
87
  #####################################################################
68
88
  def update_item(
69
- self,
70
- table_name,
71
- key,
72
- expression_attribute_names,
73
- expression_attribute_values,
74
- update_expression
89
+ self,
90
+ table_name,
91
+ key,
92
+ expression_attribute_names,
93
+ expression_attribute_values,
94
+ update_expression,
75
95
  ):
76
96
  response = self.__dynamodb_client.update_item(
77
97
  TableName=table_name,
78
98
  Key=key,
79
99
  ExpressionAttributeNames=expression_attribute_names,
80
100
  ExpressionAttributeValues=expression_attribute_values,
81
- UpdateExpression=update_expression
101
+ UpdateExpression=update_expression,
82
102
  )
83
- status_code = response['ResponseMetadata']['HTTPStatusCode']
103
+ status_code = response["ResponseMetadata"]["HTTPStatusCode"]
84
104
  # TODO: change to exception
85
- assert (status_code == 200), "Problem, unable to update dynamodb table."
105
+ assert status_code == 200, "Problem, unable to update dynamodb table."
86
106
 
87
107
  #####################################################################
88
108
  def get_table_as_df(
89
- self,
90
- ship_name,
91
- cruise_name,
92
- sensor_name,
93
- table_name,
109
+ self,
110
+ ship_name,
111
+ cruise_name,
112
+ sensor_name,
113
+ table_name,
94
114
  ):
95
115
  expression_attribute_values = {
96
- ':cr': {'S': cruise_name},
97
- ':se': {'S': sensor_name},
98
- ':sh': {'S': ship_name},
116
+ ":cr": {"S": cruise_name},
117
+ ":se": {"S": sensor_name},
118
+ ":sh": {"S": ship_name},
99
119
  }
100
120
 
101
- filter_expression = 'CRUISE_NAME = :cr and SENSOR_NAME = :se and SHIP_NAME = :sh'
121
+ filter_expression = (
122
+ "CRUISE_NAME = :cr and SENSOR_NAME = :se and SHIP_NAME = :sh"
123
+ )
102
124
  response = self.__dynamodb_client.scan(
103
125
  TableName=table_name,
104
- Select='ALL_ATTRIBUTES',
126
+ Select="ALL_ATTRIBUTES",
105
127
  ExpressionAttributeValues=expression_attribute_values,
106
128
  FilterExpression=filter_expression,
107
129
  )
108
130
  # Note: table.scan() has 1 MB limit on results so pagination is used
109
- data = response['Items']
131
+ data = response["Items"]
110
132
 
111
- while 'LastEvaluatedKey' in response:
133
+ while "LastEvaluatedKey" in response:
112
134
  response = self.__dynamodb_client.scan(
113
135
  TableName=table_name,
114
- Select='ALL_ATTRIBUTES',
136
+ Select="ALL_ATTRIBUTES",
115
137
  ExpressionAttributeValues=expression_attribute_values,
116
138
  FilterExpression=filter_expression,
117
- ExclusiveStartKey=response['LastEvaluatedKey']
139
+ ExclusiveStartKey=response["LastEvaluatedKey"],
118
140
  )
119
- data.extend(response['Items'])
141
+ data.extend(response["Items"])
120
142
 
121
143
  deserializer = self.type_deserializer
122
144
  df = pd.DataFrame([deserializer.deserialize({"M": i}) for i in data])
123
145
 
124
- return df.sort_values(by='START_TIME', ignore_index=True)
146
+ return df.sort_values(by="START_TIME", ignore_index=True)
125
147
 
126
148
  #####################################################################
127
149
  # is this used?
128
150
  def get_table_item(
129
- self,
130
- table_name,
131
- key,
151
+ self,
152
+ table_name,
153
+ key,
132
154
  ):
133
155
  # a bit more high level, uses resource to get table item
134
156
  table = self.__dynamodb_resource.Table(table_name)
135
- response = table.get_item(
136
- Key=key
137
- )
157
+ response = table.get_item(Key=key)
138
158
  return response
139
159
 
140
160
  #####################################################################
141
161
  # TODO: add helper method to delete the data
142
162
  def delete_cruise(
143
- self,
144
- table_name,
145
- cruise_name,
163
+ self,
164
+ table_name,
165
+ cruise_name,
146
166
  ):
147
167
  pass
148
168
 
169
+
149
170
  #########################################################################
@@ -1,47 +1,45 @@
1
1
  import json
2
2
  import os
3
- import boto3
4
- # import pandas as pd
5
3
  from collections.abc import Generator
4
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
5
 
7
- # import geopandas
8
- from botocore.config import Config
6
+ import boto3
9
7
  from boto3.s3.transfer import TransferConfig
8
+ from botocore.config import Config
10
9
  from botocore.exceptions import ClientError
11
- from concurrent.futures import ThreadPoolExecutor
12
- from concurrent.futures import as_completed
13
10
 
14
11
  MAX_POOL_CONNECTIONS = 64
15
12
  MAX_CONCURRENCY = 64
16
13
  MAX_WORKERS = 64
17
- GB = 1024 ** 3
14
+ GB = 1024**3
15
+
18
16
 
19
17
  #########################################################################
20
18
  def chunked(ll: list, n: int) -> Generator:
21
19
  # Yields successively n-sized chunks from ll.
22
20
  for i in range(0, len(ll), n):
23
- yield ll[i:i + n]
21
+ yield ll[i : i + n]
24
22
 
25
23
 
26
24
  class S3Manager:
27
25
  #####################################################################
28
26
  def __init__(
29
- self,
30
- # TODO: Need to allow passing in of credentials when writing to protected bucket
27
+ self,
28
+ # TODO: Need to allow passing in of credentials when writing to protected bucket
31
29
  ):
32
- self.input_bucket_name = os.environ.get('INPUT_BUCKET_NAME')
33
- self.output_bucket_name = os.environ.get('OUTPUT_BUCKET_NAME')
30
+ self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
31
+ self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
34
32
  self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
35
33
  self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
36
34
  self.s3_transfer_config = TransferConfig(
37
35
  max_concurrency=MAX_CONCURRENCY,
38
36
  use_threads=True,
39
37
  max_bandwidth=None,
40
- multipart_threshold=10 * GB
38
+ multipart_threshold=10 * GB,
41
39
  )
42
40
  self.s3_session = boto3.Session(
43
- aws_access_key_id=os.environ.get('ACCESS_KEY_ID'),
44
- aws_secret_access_key=os.environ.get('SECRET_ACCESS_KEY'),
41
+ aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
42
+ aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
45
43
  region_name=self.s3_region,
46
44
  )
47
45
  self.s3_client = self.s3_session.client(
@@ -57,8 +55,8 @@ class S3Manager:
57
55
  # self.paginator = self.s3_client.get_paginator(operation_name='list_objects_v2')
58
56
  # TODO: create both "s3_client_input" and "s3_client_output" ???
59
57
  self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
60
- aws_access_key_id=os.environ.get('OUTPUT_BUCKET_ACCESS_KEY'),
61
- aws_secret_access_key=os.environ.get('OUTPUT_BUCKET_SECRET_ACCESS_KEY'),
58
+ aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
59
+ aws_secret_access_key=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
62
60
  region_name=self.s3_region,
63
61
  )
64
62
  self.s3_client_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.client(
@@ -66,15 +64,15 @@ class S3Manager:
66
64
  config=self.s3_client_config,
67
65
  region_name=self.s3_region,
68
66
  )
69
- self.s3_resource_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.resource(
70
- service_name="s3",
71
- config=self.s3_client_config,
72
- region_name=self.s3_region,
67
+ self.s3_resource_noaa_wcsd_zarr_pds = (
68
+ self.s3_session_noaa_wcsd_zarr_pds.resource(
69
+ service_name="s3",
70
+ config=self.s3_client_config,
71
+ region_name=self.s3_region,
72
+ )
73
73
  )
74
74
 
75
- def get_client(
76
- self
77
- ):
75
+ def get_client(self):
78
76
  return self.s3_session.client(
79
77
  service_name="s3",
80
78
  config=self.__s3_client_config,
@@ -83,8 +81,8 @@ class S3Manager:
83
81
 
84
82
  #####################################################################
85
83
  def create_bucket(
86
- self,
87
- bucket_name: str,
84
+ self,
85
+ bucket_name: str,
88
86
  ):
89
87
  self.s3_client.create_bucket(
90
88
  Bucket=bucket_name,
@@ -95,18 +93,16 @@ class S3Manager:
95
93
  )
96
94
 
97
95
  #####################################################################
98
- def list_buckets(
99
- self
100
- ):
96
+ def list_buckets(self):
101
97
  # client = self.get_client()
102
98
  client = self.s3_client
103
99
  return client.list_buckets()
104
100
 
105
101
  #####################################################################
106
102
  def upload_nodd_file(
107
- self,
108
- file_name: str,
109
- key: str,
103
+ self,
104
+ file_name: str,
105
+ key: str,
110
106
  ):
111
107
  self.s3_client_noaa_wcsd_zarr_pds.upload_file(
112
108
  Filename=file_name,
@@ -117,115 +113,120 @@ class S3Manager:
117
113
 
118
114
  #####################################################################
119
115
  def upload_files_with_thread_pool_executor(
120
- self,
121
- all_files: list,
116
+ self,
117
+ all_files: list,
122
118
  ):
123
119
  # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
124
120
  all_uploads = []
125
121
  try: # TODO: problem with threadpool here, missing child files
126
122
  with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
127
- futures = [executor.submit(
128
- self.upload_nodd_file,
129
- all_file[0], # file_name
130
- all_file[1] # key
131
- ) for all_file in all_files]
123
+ futures = [
124
+ executor.submit(
125
+ self.upload_nodd_file,
126
+ all_file[0], # file_name
127
+ all_file[1], # key
128
+ )
129
+ for all_file in all_files
130
+ ]
132
131
  for future in as_completed(futures):
133
132
  result = future.result()
134
133
  if result:
135
134
  all_uploads.extend(result)
136
135
  except Exception as err:
137
136
  print(err)
138
- print('Done uploading files using threading pool.')
137
+ print("Done uploading files using threading pool.")
139
138
  return all_uploads
140
139
 
141
140
  #####################################################################
142
141
  def upload_zarr_files_to_bucket( # noaa-wcsd-model-pds
143
- self,
144
- local_directory,
145
- remote_directory,
142
+ self,
143
+ local_directory,
144
+ remote_directory,
146
145
  ):
147
146
  # Right now this is just for uploading a model store to s3
148
- print('Uploading files to output bucket.')
147
+ print("Uploading files to output bucket.")
149
148
  store_name = os.path.basename(local_directory)
150
149
  all_files = []
151
150
  for subdir, dirs, files in os.walk(local_directory):
152
151
  for file in files:
153
152
  local_path = os.path.join(subdir, file)
154
153
  # s3_key = os.path.join(object_prefix, local_path)
155
- s3_key = os.path.join(remote_directory, store_name, subdir.split(store_name)[-1].strip('/'))
154
+ s3_key = os.path.join(
155
+ remote_directory,
156
+ store_name,
157
+ subdir.split(store_name)[-1].strip("/"),
158
+ )
156
159
  all_files.append([local_path, s3_key])
157
160
 
158
161
  all_uploads = self.upload_files_with_thread_pool_executor(
159
162
  all_files=all_files,
160
163
  )
161
- print('Done uploading files to output bucket.')
164
+ print("Done uploading files to output bucket.")
162
165
  return all_uploads
163
166
 
164
167
  #####################################################################
165
168
  # used: raw-to-model
166
169
  def list_objects( # noaa-wcsd-pds and noaa-wcsd-model-pds
167
- self,
168
- bucket_name,
169
- prefix
170
+ self, bucket_name, prefix
170
171
  ):
171
172
  # analog to "find_children_objects"
172
173
  # Returns a list of key strings for each object in bucket defined by prefix
173
174
  s3_client = self.s3_client
174
175
  keys = []
175
- paginator = s3_client.get_paginator('list_objects_v2')
176
+ paginator = s3_client.get_paginator("list_objects_v2")
176
177
  page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
177
178
  for page in page_iterator:
178
- if 'Contents' in page.keys():
179
- keys.extend([k['Key'] for k in page['Contents']])
179
+ if "Contents" in page.keys():
180
+ keys.extend([k["Key"] for k in page["Contents"]])
180
181
  return keys
181
182
 
182
183
  def list_nodd_objects( # These are used by the geometry for uploading data
183
- self,
184
- prefix,
184
+ self,
185
+ prefix,
185
186
  ):
186
187
  # Returns a list of key strings for each object in bucket defined by prefix
187
188
  keys = []
188
- paginator = self.s3_client_noaa_wcsd_zarr_pds.get_paginator('list_objects_v2')
189
+ paginator = self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
189
190
  for page in paginator.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
190
- if 'Contents' in page.keys():
191
- keys.extend([k['Key'] for k in page['Contents']])
191
+ if "Contents" in page.keys():
192
+ keys.extend([k["Key"] for k in page["Contents"]])
192
193
  return keys
193
194
 
194
195
  #####################################################################
195
196
  # TODO: change name to "directory"
196
- def folder_exists_and_not_empty(
197
- self,
198
- bucket_name: str,
199
- path: str
200
- ) -> bool:
201
- if not path.endswith('/'):
202
- path = path + '/'
197
+ def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
198
+ if not path.endswith("/"):
199
+ path = path + "/"
203
200
  s3_client = self.s3_client
204
- resp = self.list_objects(bucket_name=bucket_name, prefix=path) # TODO: this is returning root folder and doesn't include children or hidden folders
205
- #resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
206
- return 'Contents' in resp
201
+ resp = self.list_objects(
202
+ bucket_name=bucket_name, prefix=path
203
+ ) # TODO: this is returning root folder and doesn't include children or hidden folders
204
+ # resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
205
+ return "Contents" in resp
207
206
 
208
207
  #####################################################################
209
208
  # used
210
209
  def __paginate_child_objects(
211
- self,
212
- bucket_name: str,
213
- sub_prefix: str = None,
210
+ self,
211
+ bucket_name: str,
212
+ sub_prefix: str = None,
214
213
  ) -> list:
215
- page_iterator = self.s3_client.get_paginator('list_objects_v2').paginate(Bucket=bucket_name, Prefix=sub_prefix)
214
+ page_iterator = self.s3_client.get_paginator("list_objects_v2").paginate(
215
+ Bucket=bucket_name, Prefix=sub_prefix
216
+ )
216
217
  objects = []
217
218
  for page in page_iterator:
218
- if 'Contents' in page.keys():
219
- objects.extend(page['Contents'])
219
+ if "Contents" in page.keys():
220
+ objects.extend(page["Contents"])
220
221
  return objects
221
222
 
222
223
  def get_child_objects(
223
- self,
224
- bucket_name: str,
225
- sub_prefix: str,
226
- file_suffix: str = None,
224
+ self,
225
+ bucket_name: str,
226
+ sub_prefix: str,
227
+ file_suffix: str = None,
227
228
  ) -> list:
228
- print('Getting child objects')
229
+ print("Getting child objects")
229
230
  raw_files = []
230
231
  try:
231
232
  children = self.__paginate_child_objects(
@@ -238,10 +239,10 @@ class S3Manager:
238
239
  for child in children:
239
240
  # Note: Any files with predicate 'NOISE' are to be ignored
240
241
  # see: "Bell_M._Shimada/SH1507" cruise for more details.
241
- if child['Key'].endswith(file_suffix) and not os.path.basename(child['Key']).startswith(
242
- 'NOISE'
243
- ):
244
- raw_files.append(child['Key'])
242
+ if child["Key"].endswith(file_suffix) and not os.path.basename(
243
+ child["Key"]
244
+ ).startswith("NOISE"):
245
+ raw_files.append(child["Key"])
245
246
  return raw_files
246
247
  except ClientError as err:
247
248
  print(f"Problem was encountered while getting s3 files: {err}")
@@ -251,10 +252,10 @@ class S3Manager:
251
252
 
252
253
  #####################################################################
253
254
  def get_object( # TODO: Move this to index.py
254
- # noaa-wcsd-pds or noaa-wcsd-model-pds
255
- self,
256
- bucket_name,
257
- key_name,
255
+ # noaa-wcsd-pds or noaa-wcsd-model-pds
256
+ self,
257
+ bucket_name,
258
+ key_name,
258
259
  ):
259
260
  # Meant for getting singular objects from a bucket, used by indexing lambda
260
261
  print(f"Getting object {key_name} from {bucket_name}")
@@ -274,18 +275,14 @@ class S3Manager:
274
275
  #####################################################################
275
276
  # used raw-to-model
276
277
  def download_file( # TODO: change to download_object
277
- # noaa-wcsd-pds or noaa-wcsd-model-pds
278
- self,
279
- bucket_name,
280
- key,
281
- file_name,
278
+ # noaa-wcsd-pds or noaa-wcsd-model-pds
279
+ self,
280
+ bucket_name,
281
+ key,
282
+ file_name,
282
283
  ):
283
- self.s3_client.download_file(
284
- Bucket=bucket_name,
285
- Key=key,
286
- Filename=file_name
287
- )
288
- print('downloaded file')
284
+ self.s3_client.download_file(Bucket=bucket_name, Key=key, Filename=file_name)
285
+ print("downloaded file")
289
286
 
290
287
  #####################################################################
291
288
  # not used
@@ -299,19 +296,20 @@ class S3Manager:
299
296
 
300
297
  #####################################################################
301
298
  def delete_nodd_objects( # nodd-bucket
302
- self,
303
- objects: list,
299
+ self,
300
+ objects: list,
304
301
  ):
305
302
  try:
306
- print(f"Deleting {len(objects)} objects in {self.output_bucket_name} in batches.")
303
+ print(
304
+ f"Deleting {len(objects)} objects in {self.output_bucket_name} in batches."
305
+ )
307
306
  objects_to_delete = []
308
307
  for obj in objects:
309
- objects_to_delete.append({'Key': obj['Key']})
308
+ objects_to_delete.append({"Key": obj["Key"]})
310
309
  # Note: request can contain a list of up to 1000 keys
311
310
  for batch in chunked(ll=objects_to_delete, n=1000):
312
311
  self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
313
- Bucket=self.output_bucket_name,
314
- Delete={'Objects': batch}
312
+ Bucket=self.output_bucket_name, Delete={"Objects": batch}
315
313
  )
316
314
  print(f"Deleted files.")
317
315
  except Exception as err:
@@ -319,38 +317,30 @@ class S3Manager:
319
317
 
320
318
  #####################################################################
321
319
  # not used TODO: remove
322
- def put( # noaa-wcsd-model-pds
323
- self,
324
- bucket_name,
325
- key,
326
- body
327
- ):
328
- self.s3_client.put_object(
329
- Bucket=bucket_name,
330
- Key=key,
331
- Body=body
332
- )
320
+ def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
321
+ self.s3_client.put_object(Bucket=bucket_name, Key=key, Body=body)
333
322
 
334
323
  #####################################################################
335
324
  def read_s3_json(
336
- self,
337
- ship_name,
338
- cruise_name,
339
- sensor_name,
340
- file_name_stem,
325
+ self,
326
+ ship_name,
327
+ cruise_name,
328
+ sensor_name,
329
+ file_name_stem,
341
330
  ) -> str:
342
331
  try:
343
332
  content_object = self.s3_resource_noaa_wcsd_zarr_pds.Object(
344
333
  bucket_name=self.output_bucket_name,
345
- key=f'spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.json'
334
+ key=f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.json",
346
335
  ).get()
347
- file_content = content_object['Body'].read().decode('utf-8')
336
+ file_content = content_object["Body"].read().decode("utf-8")
348
337
  json_content = json.loads(file_content)
349
338
  return json_content
350
339
  except Exception as err: # Failure
351
- print(f'Exception encountered reading s3 GeoJSON: {err}')
340
+ print(f"Exception encountered reading s3 GeoJSON: {err}")
352
341
  raise
353
342
 
354
343
  #####################################################################
355
344
 
345
+
356
346
  #########################################################################