water-column-sonar-processing 0.0.1__py3-none-any.whl → 26.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/__init__.py +13 -0
- water_column_sonar_processing/aws/__init__.py +7 -0
- water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
- water_column_sonar_processing/aws/s3_manager.py +418 -0
- water_column_sonar_processing/aws/s3fs_manager.py +64 -0
- {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
- {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
- water_column_sonar_processing/cruise/__init__.py +4 -0
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +129 -0
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +323 -0
- water_column_sonar_processing/geometry/__init__.py +13 -0
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +241 -0
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +266 -0
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/__init__.py +3 -0
- water_column_sonar_processing/index/index_manager.py +381 -0
- water_column_sonar_processing/model/__init__.py +3 -0
- water_column_sonar_processing/model/zarr_manager.py +741 -0
- water_column_sonar_processing/processing/__init__.py +4 -0
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +331 -0
- water_column_sonar_processing/utility/__init__.py +13 -0
- {model → water_column_sonar_processing}/utility/cleaner.py +7 -7
- water_column_sonar_processing/utility/constants.py +118 -0
- {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
- water_column_sonar_processing/utility/timestamp.py +12 -0
- water_column_sonar_processing-26.1.14.dist-info/METADATA +240 -0
- water_column_sonar_processing-26.1.14.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing-26.1.14.dist-info/top_level.txt +1 -0
- __init__.py +0 -0
- model/__init__.py +0 -0
- model/aws/__init__.py +0 -0
- model/aws/dynamodb_manager.py +0 -149
- model/aws/s3_manager.py +0 -356
- model/aws/s3fs_manager.py +0 -74
- model/cruise/__init__.py +0 -0
- model/cruise/create_empty_zarr_store.py +0 -166
- model/cruise/resample_regrid.py +0 -248
- model/geospatial/__init__.py +0 -0
- model/geospatial/geometry_manager.py +0 -194
- model/geospatial/geometry_simplification.py +0 -81
- model/geospatial/pmtile_generation.py +0 -74
- model/index/__init__.py +0 -0
- model/index/index.py +0 -228
- model/model.py +0 -138
- model/utility/__init__.py +0 -0
- model/utility/constants.py +0 -56
- model/utility/timestamp.py +0 -12
- model/zarr/__init__.py +0 -0
- model/zarr/bar.py +0 -28
- model/zarr/foo.py +0 -11
- model/zarr/zarr_manager.py +0 -298
- water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
- water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
- water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from collections.abc import Generator
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
+
from time import sleep
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
import boto3
|
|
9
|
+
import botocore
|
|
10
|
+
from boto3.s3.transfer import TransferConfig
|
|
11
|
+
from botocore.config import Config
|
|
12
|
+
from botocore.exceptions import ClientError
|
|
13
|
+
|
|
14
|
+
MAX_POOL_CONNECTIONS = 64
|
|
15
|
+
MAX_CONCURRENCY = 64
|
|
16
|
+
MAX_WORKERS = 64
|
|
17
|
+
GB = 1024**3
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
#########################################################################
|
|
21
|
+
def chunked(ll: list, n: int) -> Generator:
|
|
22
|
+
# Yields successively n-sized chunks from ll.
|
|
23
|
+
for i in range(0, len(ll), n):
|
|
24
|
+
yield ll[i : i + n]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class S3Manager:
|
|
28
|
+
#####################################################################
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
endpoint_url: Optional[str] = None,
|
|
32
|
+
):
|
|
33
|
+
self.endpoint_url = endpoint_url
|
|
34
|
+
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
35
|
+
self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
|
|
36
|
+
self.s3_transfer_config = TransferConfig(
|
|
37
|
+
max_concurrency=MAX_CONCURRENCY,
|
|
38
|
+
use_threads=True,
|
|
39
|
+
max_bandwidth=None,
|
|
40
|
+
multipart_threshold=10 * GB,
|
|
41
|
+
)
|
|
42
|
+
self.s3_session = boto3.Session(
|
|
43
|
+
aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
|
|
44
|
+
aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
|
|
45
|
+
region_name=self.s3_region,
|
|
46
|
+
)
|
|
47
|
+
self.s3_client = self.s3_session.client(
|
|
48
|
+
service_name="s3",
|
|
49
|
+
config=self.s3_client_config,
|
|
50
|
+
region_name=self.s3_region,
|
|
51
|
+
endpoint_url=self.endpoint_url,
|
|
52
|
+
)
|
|
53
|
+
self.s3_resource = boto3.resource(
|
|
54
|
+
service_name="s3",
|
|
55
|
+
config=self.s3_client_config,
|
|
56
|
+
region_name=self.s3_region,
|
|
57
|
+
endpoint_url=self.endpoint_url,
|
|
58
|
+
)
|
|
59
|
+
self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
|
|
60
|
+
aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
61
|
+
aws_secret_access_key=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
62
|
+
region_name=self.s3_region,
|
|
63
|
+
)
|
|
64
|
+
self.s3_client_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.client(
|
|
65
|
+
service_name="s3",
|
|
66
|
+
config=self.s3_client_config,
|
|
67
|
+
region_name=self.s3_region,
|
|
68
|
+
endpoint_url=self.endpoint_url,
|
|
69
|
+
)
|
|
70
|
+
self.s3_resource_noaa_wcsd_zarr_pds = (
|
|
71
|
+
self.s3_session_noaa_wcsd_zarr_pds.resource(
|
|
72
|
+
service_name="s3",
|
|
73
|
+
config=self.s3_client_config,
|
|
74
|
+
region_name=self.s3_region,
|
|
75
|
+
endpoint_url=self.endpoint_url,
|
|
76
|
+
)
|
|
77
|
+
)
|
|
78
|
+
#
|
|
79
|
+
self.paginator = self.s3_client.get_paginator("list_objects_v2")
|
|
80
|
+
self.paginator_noaa_wcsd_zarr_pds = (
|
|
81
|
+
self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
#####################################################################
|
|
85
|
+
# tested
|
|
86
|
+
def create_bucket(
|
|
87
|
+
self,
|
|
88
|
+
bucket_name: str,
|
|
89
|
+
):
|
|
90
|
+
"""
|
|
91
|
+
Note: this function is only really meant to be used for creating test
|
|
92
|
+
buckets. It allows public read of all objects.
|
|
93
|
+
"""
|
|
94
|
+
# https://github.com/aodn/aodn_cloud_optimised/blob/e5035495e782783cc8b9e58711d63ed466420350/test_aodn_cloud_optimised/test_schema.py#L7
|
|
95
|
+
# public_policy = {
|
|
96
|
+
# "Version": "2012-10-17",
|
|
97
|
+
# "Statement": [
|
|
98
|
+
# {
|
|
99
|
+
# "Effect": "Allow",
|
|
100
|
+
# "Principal": "*",
|
|
101
|
+
# "Action": "s3:GetObject",
|
|
102
|
+
# "Resource": f"arn:aws:s3:::{bucket_name}/*",
|
|
103
|
+
# }
|
|
104
|
+
# ],
|
|
105
|
+
# }
|
|
106
|
+
response1 = self.s3_client.create_bucket(Bucket=bucket_name, ACL="public-read")
|
|
107
|
+
print(response1)
|
|
108
|
+
# response = self.s3_client.put_bucket_policy(
|
|
109
|
+
# Bucket=bucket_name, Policy=json.dumps(public_policy)
|
|
110
|
+
# )
|
|
111
|
+
# print(response)
|
|
112
|
+
|
|
113
|
+
#####################################################################
|
|
114
|
+
# tested
|
|
115
|
+
def list_buckets(self):
|
|
116
|
+
client = self.s3_client
|
|
117
|
+
return client.list_buckets()
|
|
118
|
+
|
|
119
|
+
#####################################################################
|
|
120
|
+
def upload_nodd_file(
|
|
121
|
+
self,
|
|
122
|
+
file_name: str,
|
|
123
|
+
key: str,
|
|
124
|
+
output_bucket_name: str,
|
|
125
|
+
):
|
|
126
|
+
"""
|
|
127
|
+
Used to upload a single file, e.g. the GeoJSON file to the NODD bucket
|
|
128
|
+
"""
|
|
129
|
+
self.s3_resource_noaa_wcsd_zarr_pds.Bucket(output_bucket_name).upload_file(
|
|
130
|
+
Filename=file_name, Key=key
|
|
131
|
+
)
|
|
132
|
+
return key
|
|
133
|
+
|
|
134
|
+
#####################################################################
|
|
135
|
+
def upload_files_with_thread_pool_executor(
|
|
136
|
+
self,
|
|
137
|
+
output_bucket_name: str,
|
|
138
|
+
all_files: list,
|
|
139
|
+
):
|
|
140
|
+
# 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
141
|
+
all_uploads = []
|
|
142
|
+
try: # TODO: problem with threadpool here, missing child files
|
|
143
|
+
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
|
144
|
+
futures = [
|
|
145
|
+
executor.submit(
|
|
146
|
+
self.upload_nodd_file, # TODO: verify which one is using this
|
|
147
|
+
all_file[0], # file_name
|
|
148
|
+
all_file[1], # key
|
|
149
|
+
output_bucket_name, # output_bucket_name
|
|
150
|
+
)
|
|
151
|
+
for all_file in all_files
|
|
152
|
+
]
|
|
153
|
+
for future in as_completed(futures):
|
|
154
|
+
result = future.result()
|
|
155
|
+
if result:
|
|
156
|
+
all_uploads.extend([result])
|
|
157
|
+
except Exception as err:
|
|
158
|
+
raise RuntimeError(f"Problem, {err}")
|
|
159
|
+
|
|
160
|
+
print("Done uploading files using threading pool.")
|
|
161
|
+
return all_uploads
|
|
162
|
+
|
|
163
|
+
#####################################################################
|
|
164
|
+
# tested
|
|
165
|
+
def upload_zarr_store_to_s3(
|
|
166
|
+
self,
|
|
167
|
+
output_bucket_name: str,
|
|
168
|
+
local_directory: str,
|
|
169
|
+
object_prefix: str,
|
|
170
|
+
cruise_name: str,
|
|
171
|
+
) -> None:
|
|
172
|
+
print("uploading model store to s3")
|
|
173
|
+
try:
|
|
174
|
+
#
|
|
175
|
+
print("Starting upload with thread pool executor.")
|
|
176
|
+
# # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
177
|
+
all_files = []
|
|
178
|
+
for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
|
|
179
|
+
for file in files:
|
|
180
|
+
local_path = os.path.join(subdir, file)
|
|
181
|
+
# TODO: find a better method for splitting strings here:
|
|
182
|
+
# 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
|
|
183
|
+
# s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
|
|
184
|
+
s3_key = os.path.join(
|
|
185
|
+
object_prefix,
|
|
186
|
+
os.path.join(
|
|
187
|
+
subdir[subdir.find(f"{cruise_name}.zarr") :], file
|
|
188
|
+
),
|
|
189
|
+
)
|
|
190
|
+
all_files.append([local_path, s3_key])
|
|
191
|
+
self.upload_files_with_thread_pool_executor(
|
|
192
|
+
output_bucket_name=output_bucket_name,
|
|
193
|
+
all_files=all_files,
|
|
194
|
+
)
|
|
195
|
+
print("Done uploading with thread pool executor.")
|
|
196
|
+
except Exception as err:
|
|
197
|
+
raise RuntimeError(f"Problem uploading zarr store to s3, {err}")
|
|
198
|
+
|
|
199
|
+
#####################################################################
|
|
200
|
+
# tested
|
|
201
|
+
def upload_file(
|
|
202
|
+
self,
|
|
203
|
+
filename: str,
|
|
204
|
+
bucket_name: str,
|
|
205
|
+
key: str,
|
|
206
|
+
):
|
|
207
|
+
self.s3_resource.Bucket(bucket_name).upload_file(Filename=filename, Key=key)
|
|
208
|
+
|
|
209
|
+
#####################################################################
|
|
210
|
+
# tested
|
|
211
|
+
def check_if_object_exists(self, bucket_name, key_name) -> bool:
|
|
212
|
+
s3_manager2 = S3Manager()
|
|
213
|
+
s3_manager2.list_objects(bucket_name=bucket_name, prefix=key_name)
|
|
214
|
+
s3_client_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds
|
|
215
|
+
try:
|
|
216
|
+
s3_client_noaa_wcsd_zarr_pds.head_object(Bucket=bucket_name, Key=key_name)
|
|
217
|
+
return True
|
|
218
|
+
except botocore.exceptions.ClientError as e:
|
|
219
|
+
if e.response["Error"]["Code"] == "404":
|
|
220
|
+
# The object does not exist.
|
|
221
|
+
return False
|
|
222
|
+
elif e.response["Error"]["Code"] == 403:
|
|
223
|
+
# Unauthorized, including invalid bucket
|
|
224
|
+
return False
|
|
225
|
+
else:
|
|
226
|
+
# Something else has gone wrong.
|
|
227
|
+
raise
|
|
228
|
+
|
|
229
|
+
#####################################################################
|
|
230
|
+
# tested
|
|
231
|
+
def list_objects(self, bucket_name, prefix): # noaa-wcsd-pds and noaa-wcsd-zarr-pds
|
|
232
|
+
# TODO: this isn't working for geojson detecting objects!!!!!!!
|
|
233
|
+
# analog to "find_children_objects"
|
|
234
|
+
# Returns a list of key strings for each object in bucket defined by prefix
|
|
235
|
+
# s3_client = self.s3_client
|
|
236
|
+
keys = []
|
|
237
|
+
# paginator = s3_client.get_paginator("list_objects_v2")
|
|
238
|
+
page_iterator = self.paginator.paginate(Bucket=bucket_name, Prefix=prefix)
|
|
239
|
+
for page in page_iterator:
|
|
240
|
+
if "Contents" in page.keys():
|
|
241
|
+
keys.extend([k["Key"] for k in page["Contents"]])
|
|
242
|
+
return keys
|
|
243
|
+
|
|
244
|
+
#####################################################################
|
|
245
|
+
# TODO: change name to "directory"
|
|
246
|
+
# def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
|
|
247
|
+
# if not path.endswith("/"):
|
|
248
|
+
# path = path + "/"
|
|
249
|
+
# # s3_client = self.s3_client
|
|
250
|
+
# resp = self.list_objects(
|
|
251
|
+
# bucket_name=bucket_name, prefix=path
|
|
252
|
+
# ) # TODO: this is returning root folder and doesn't include children or hidden folders
|
|
253
|
+
# # resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
|
|
254
|
+
# return "Contents" in resp
|
|
255
|
+
|
|
256
|
+
#####################################################################
|
|
257
|
+
# private
|
|
258
|
+
def __paginate_child_objects(
|
|
259
|
+
self,
|
|
260
|
+
bucket_name: str,
|
|
261
|
+
sub_prefix: str = None,
|
|
262
|
+
) -> list:
|
|
263
|
+
page_iterator = self.s3_client.get_paginator("list_objects_v2").paginate(
|
|
264
|
+
Bucket=bucket_name, Prefix=sub_prefix
|
|
265
|
+
)
|
|
266
|
+
objects = []
|
|
267
|
+
for page in page_iterator:
|
|
268
|
+
if "Contents" in page.keys():
|
|
269
|
+
objects.extend(page["Contents"])
|
|
270
|
+
return objects
|
|
271
|
+
|
|
272
|
+
#####################################################################
|
|
273
|
+
# tested
|
|
274
|
+
def get_child_objects(
|
|
275
|
+
self,
|
|
276
|
+
bucket_name: str,
|
|
277
|
+
sub_prefix: str,
|
|
278
|
+
file_suffix: str = None,
|
|
279
|
+
) -> list:
|
|
280
|
+
print("Getting child objects")
|
|
281
|
+
raw_files = []
|
|
282
|
+
try:
|
|
283
|
+
children = self.__paginate_child_objects(
|
|
284
|
+
bucket_name=bucket_name,
|
|
285
|
+
sub_prefix=sub_prefix,
|
|
286
|
+
)
|
|
287
|
+
if file_suffix is None:
|
|
288
|
+
raw_files = children
|
|
289
|
+
else:
|
|
290
|
+
for child in children:
|
|
291
|
+
# Note: Any files with predicate 'NOISE' are to be ignored
|
|
292
|
+
# see: "Bell_M._Shimada/SH1507" cruise for more details.
|
|
293
|
+
if child["Key"].endswith(file_suffix) and not os.path.basename(
|
|
294
|
+
child["Key"]
|
|
295
|
+
).startswith("NOISE"):
|
|
296
|
+
raw_files.append(child["Key"])
|
|
297
|
+
return raw_files
|
|
298
|
+
except ClientError as err:
|
|
299
|
+
print(f"Problem was encountered while getting s3 files: {err}")
|
|
300
|
+
raise
|
|
301
|
+
print(f"Found {len(raw_files)} files.")
|
|
302
|
+
return raw_files
|
|
303
|
+
|
|
304
|
+
#####################################################################
|
|
305
|
+
# tested
|
|
306
|
+
def get_object( # noaa-wcsd-pds or noaa-wcsd-zarr-pds
|
|
307
|
+
self,
|
|
308
|
+
bucket_name,
|
|
309
|
+
key_name,
|
|
310
|
+
):
|
|
311
|
+
# Meant for getting singular objects from a bucket, used by indexing lambda
|
|
312
|
+
# can also return byte range potentially.
|
|
313
|
+
print(f"Getting object {key_name} from {bucket_name}")
|
|
314
|
+
try:
|
|
315
|
+
response = self.s3_client.get_object(
|
|
316
|
+
Bucket=bucket_name,
|
|
317
|
+
Key=key_name,
|
|
318
|
+
)
|
|
319
|
+
# status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")
|
|
320
|
+
# if status == 200:
|
|
321
|
+
print(f"Done getting object {key_name} from {bucket_name}")
|
|
322
|
+
return response
|
|
323
|
+
except ClientError as err:
|
|
324
|
+
print(f"Problem was encountered while getting s3 file: {err}")
|
|
325
|
+
raise
|
|
326
|
+
|
|
327
|
+
#####################################################################
|
|
328
|
+
# tested
|
|
329
|
+
def download_file(
|
|
330
|
+
self,
|
|
331
|
+
bucket_name,
|
|
332
|
+
key,
|
|
333
|
+
file_name, # path to where the file will be saved
|
|
334
|
+
):
|
|
335
|
+
try:
|
|
336
|
+
self.s3_client.download_file(
|
|
337
|
+
Bucket=bucket_name, Key=key, Filename=file_name
|
|
338
|
+
)
|
|
339
|
+
# TODO: if bottom file doesn't exist, don't fail downloader
|
|
340
|
+
print("downloaded file")
|
|
341
|
+
except Exception as err:
|
|
342
|
+
raise RuntimeError(f"Problem was encountered while downloading_file, {err}")
|
|
343
|
+
|
|
344
|
+
#####################################################################
|
|
345
|
+
# tested
|
|
346
|
+
def delete_nodd_objects( # nodd-bucket
|
|
347
|
+
self,
|
|
348
|
+
bucket_name,
|
|
349
|
+
objects: list,
|
|
350
|
+
):
|
|
351
|
+
try:
|
|
352
|
+
print(f"Deleting {len(objects)} objects in {bucket_name} in batches.")
|
|
353
|
+
objects_to_delete = []
|
|
354
|
+
for obj in objects:
|
|
355
|
+
objects_to_delete.append({"Key": obj["Key"]})
|
|
356
|
+
# Note: request can contain a list of up to 1000 keys
|
|
357
|
+
for batch in chunked(ll=objects_to_delete, n=1000):
|
|
358
|
+
# An error occurred (SlowDown) when calling the DeleteObjects operation (reached max retries: 4):
|
|
359
|
+
# Please reduce your request rate.
|
|
360
|
+
sleep(0.5)
|
|
361
|
+
#
|
|
362
|
+
self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
|
|
363
|
+
Bucket=bucket_name, Delete={"Objects": batch}
|
|
364
|
+
)
|
|
365
|
+
print("Deleted files.")
|
|
366
|
+
except Exception as err:
|
|
367
|
+
raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
|
|
368
|
+
|
|
369
|
+
#####################################################################
|
|
370
|
+
# tested
|
|
371
|
+
def delete_nodd_object( # only used to delete geojson it looks like?! Remove.
|
|
372
|
+
self,
|
|
373
|
+
bucket_name,
|
|
374
|
+
key_name,
|
|
375
|
+
):
|
|
376
|
+
try:
|
|
377
|
+
print(f"Deleting {key_name} objects in {bucket_name}.")
|
|
378
|
+
self.s3_client_noaa_wcsd_zarr_pds.delete_object(
|
|
379
|
+
Bucket=bucket_name, Key=key_name
|
|
380
|
+
)
|
|
381
|
+
print("Deleted file.")
|
|
382
|
+
except Exception as err:
|
|
383
|
+
raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
|
|
384
|
+
|
|
385
|
+
#####################################################################
|
|
386
|
+
# tested
|
|
387
|
+
def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
|
|
388
|
+
try:
|
|
389
|
+
self.s3_client.put_object(
|
|
390
|
+
Bucket=bucket_name, Key=key, Body=body
|
|
391
|
+
) # "Body" can be a file
|
|
392
|
+
except Exception as err:
|
|
393
|
+
raise RuntimeError(f"Problem was encountered putting object, {err}")
|
|
394
|
+
|
|
395
|
+
#####################################################################
|
|
396
|
+
# tested
|
|
397
|
+
def read_s3_json(
|
|
398
|
+
self,
|
|
399
|
+
ship_name,
|
|
400
|
+
cruise_name,
|
|
401
|
+
sensor_name,
|
|
402
|
+
file_name_stem,
|
|
403
|
+
output_bucket_name, # TODO: change to just bucket_name
|
|
404
|
+
) -> str:
|
|
405
|
+
try:
|
|
406
|
+
resource = self.s3_resource_noaa_wcsd_zarr_pds
|
|
407
|
+
content_object = resource.Object(
|
|
408
|
+
bucket_name=output_bucket_name,
|
|
409
|
+
key=f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.json",
|
|
410
|
+
).get()
|
|
411
|
+
file_content = content_object["Body"].read().decode("utf-8")
|
|
412
|
+
json_content = json.loads(file_content)
|
|
413
|
+
return json_content
|
|
414
|
+
except Exception as err:
|
|
415
|
+
raise RuntimeError(f"Exception encountered reading s3 GeoJSON, {err}")
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
#########################################################################
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
import s3fs
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# TODO: S3FS_LOGGING_LEVEL=DEBUG
|
|
8
|
+
# S3FS_LOGGING_LEVEL=DEBUG
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class S3FSManager:
|
|
12
|
+
#####################################################################
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
endpoint_url: Optional[str] = None,
|
|
16
|
+
):
|
|
17
|
+
self.endpoint_url = endpoint_url
|
|
18
|
+
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
19
|
+
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
20
|
+
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
21
|
+
self.s3fs = s3fs.S3FileSystem(
|
|
22
|
+
endpoint_url=endpoint_url,
|
|
23
|
+
key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
24
|
+
secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
#####################################################################
|
|
28
|
+
def s3_map(
|
|
29
|
+
self,
|
|
30
|
+
s3_zarr_store_path, # f's3://{bucket}/{input_zarr_path}'
|
|
31
|
+
):
|
|
32
|
+
# The "s3_zarr_store_path" is defined as f's3://{bucket}/{input_zarr_path}'
|
|
33
|
+
# create=False, not false because will be writing
|
|
34
|
+
# return s3fs.S3Map(root=s3_zarr_store_path, s3=self.s3fs, check=True)
|
|
35
|
+
return s3fs.S3Map(
|
|
36
|
+
root=s3_zarr_store_path, s3=self.s3fs
|
|
37
|
+
) # create=False, not false because will be writing
|
|
38
|
+
|
|
39
|
+
#####################################################################
|
|
40
|
+
# def add_file(self, filename):
|
|
41
|
+
# full_path = f"{os.getenv('OUTPUT_BUCKET_NAME')}/testing/{filename}"
|
|
42
|
+
# print(full_path)
|
|
43
|
+
#
|
|
44
|
+
# self.s3fs.touch(full_path)
|
|
45
|
+
# ff = self.s3fs.ls(f"{os.getenv('OUTPUT_BUCKET_NAME')}/")
|
|
46
|
+
#
|
|
47
|
+
# print(ff)
|
|
48
|
+
|
|
49
|
+
#####################################################################
|
|
50
|
+
def upload_data(self, bucket_name, file_path, prefix):
|
|
51
|
+
# TODO: this works in theory but use boto3 to upload files
|
|
52
|
+
s3_path = f"s3://{bucket_name}/{prefix}/"
|
|
53
|
+
s3_file_system = self.s3fs
|
|
54
|
+
s3_file_system.put(file_path, s3_path, recursive=True)
|
|
55
|
+
|
|
56
|
+
#####################################################################
|
|
57
|
+
def exists(
|
|
58
|
+
self,
|
|
59
|
+
s3_path,
|
|
60
|
+
):
|
|
61
|
+
# s3_file_system =
|
|
62
|
+
return self.s3fs.exists(s3_path)
|
|
63
|
+
|
|
64
|
+
#####################################################################
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import boto3
|
|
4
4
|
|
|
5
5
|
|
|
@@ -7,32 +7,22 @@ import boto3
|
|
|
7
7
|
class SNSManager:
|
|
8
8
|
#######################################################
|
|
9
9
|
def __init__(
|
|
10
|
-
|
|
10
|
+
self,
|
|
11
11
|
):
|
|
12
12
|
self.__sns_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
13
13
|
self.__sns_session = boto3.Session(
|
|
14
|
-
aws_access_key_id=os.environ.get(
|
|
15
|
-
aws_secret_access_key=os.environ.get(
|
|
16
|
-
region_name=self.__sns_region
|
|
14
|
+
aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
|
|
15
|
+
aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
|
|
16
|
+
region_name=self.__sns_region,
|
|
17
17
|
)
|
|
18
18
|
self.__sns_resource = self.__sns_session.resource(
|
|
19
|
-
service_name="sns",
|
|
20
|
-
region_name=self.__sns_region
|
|
19
|
+
service_name="sns", region_name=self.__sns_region
|
|
21
20
|
)
|
|
22
21
|
self.__sns_client = self.__sns_session.client(
|
|
23
|
-
service_name="sns",
|
|
24
|
-
region_name=self.__sns_region
|
|
22
|
+
service_name="sns", region_name=self.__sns_region
|
|
25
23
|
)
|
|
26
24
|
|
|
27
25
|
#######################################################
|
|
28
|
-
# TODO: pick one
|
|
29
|
-
# def publish_message(self, topic_arn, message):
|
|
30
|
-
# response = self.__sns_client.publish(
|
|
31
|
-
# TopicArn=topic_arn,
|
|
32
|
-
# Message=message
|
|
33
|
-
# )
|
|
34
|
-
# print(f"Topic Response: {topic_arn} : '{message}' => {response}")
|
|
35
|
-
|
|
36
26
|
# TODO: pick one
|
|
37
27
|
def publish(self, topic_arn, message):
|
|
38
28
|
response = self.__sns_client.publish(
|
|
@@ -55,13 +45,12 @@ class SNSManager:
|
|
|
55
45
|
#######################################################
|
|
56
46
|
def subscribe(self, topic_arn, endpoint):
|
|
57
47
|
self.__sns_client.subscribe(
|
|
58
|
-
TopicArn=topic_arn,
|
|
59
|
-
Protocol='sqs',
|
|
60
|
-
Endpoint=endpoint
|
|
48
|
+
TopicArn=topic_arn, Protocol="sqs", Endpoint=endpoint
|
|
61
49
|
)
|
|
62
50
|
|
|
63
51
|
#######################################################
|
|
64
52
|
def list_topics(self):
|
|
65
53
|
print(self.__sns_client.list_topics())
|
|
66
54
|
|
|
67
|
-
|
|
55
|
+
|
|
56
|
+
###########################################################
|
|
@@ -1,34 +1,29 @@
|
|
|
1
1
|
import os
|
|
2
|
+
|
|
2
3
|
import boto3
|
|
3
|
-
import time
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
###########################################################
|
|
7
7
|
class SQSManager:
|
|
8
8
|
#######################################################
|
|
9
9
|
def __init__(
|
|
10
|
-
|
|
10
|
+
self,
|
|
11
11
|
):
|
|
12
12
|
self.__sqs_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
13
13
|
self.__sqs_session = boto3.Session(
|
|
14
|
-
aws_access_key_id=os.environ.get(
|
|
15
|
-
aws_secret_access_key=os.environ.get(
|
|
16
|
-
region_name=self.__sqs_region
|
|
14
|
+
aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
|
|
15
|
+
aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
|
|
16
|
+
region_name=self.__sqs_region,
|
|
17
17
|
)
|
|
18
18
|
self.__sqs_resource = self.__sqs_session.resource(
|
|
19
|
-
service_name="sqs",
|
|
20
|
-
region_name=self.__sqs_region
|
|
19
|
+
service_name="sqs", region_name=self.__sqs_region
|
|
21
20
|
)
|
|
22
21
|
self.__sqs_client = self.__sqs_session.client(
|
|
23
|
-
service_name="sqs",
|
|
24
|
-
region_name=self.__sqs_region
|
|
22
|
+
service_name="sqs", region_name=self.__sqs_region
|
|
25
23
|
)
|
|
26
24
|
|
|
27
25
|
#######################################################
|
|
28
|
-
def create_queue(
|
|
29
|
-
self,
|
|
30
|
-
queue_name
|
|
31
|
-
):
|
|
26
|
+
def create_queue(self, queue_name):
|
|
32
27
|
response = self.__sqs_client.create_queue(QueueName=queue_name)
|
|
33
28
|
return response
|
|
34
29
|
|
|
@@ -38,13 +33,10 @@ class SQSManager:
|
|
|
38
33
|
return sqs_queue
|
|
39
34
|
|
|
40
35
|
#######################################################
|
|
41
|
-
def list_queues(
|
|
42
|
-
self,
|
|
43
|
-
queue_name_prefix
|
|
44
|
-
):
|
|
36
|
+
def list_queues(self, queue_name_prefix):
|
|
45
37
|
# Note: SQS control plane is eventually consistent, meaning that it
|
|
46
|
-
# takes a while to propagate the
|
|
38
|
+
# takes a while to propagate the dataset accross the systems.
|
|
47
39
|
response = self.__sqs_client.list_queues(QueueNamePrefix=queue_name_prefix)
|
|
48
40
|
print(response)
|
|
49
41
|
|
|
50
|
-
#######################################################
|
|
42
|
+
#######################################################
|