water-column-sonar-processing 0.0.9__py3-none-any.whl → 26.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/aws/dynamodb_manager.py +138 -59
- water_column_sonar_processing/aws/s3_manager.py +179 -141
- water_column_sonar_processing/aws/s3fs_manager.py +29 -33
- water_column_sonar_processing/aws/sqs_manager.py +1 -1
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +35 -96
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +142 -127
- water_column_sonar_processing/geometry/__init__.py +10 -2
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +50 -49
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +227 -223
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/index_manager.py +151 -33
- water_column_sonar_processing/model/zarr_manager.py +665 -262
- water_column_sonar_processing/processing/__init__.py +3 -3
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +206 -214
- water_column_sonar_processing/utility/__init__.py +9 -2
- water_column_sonar_processing/utility/constants.py +69 -18
- water_column_sonar_processing/utility/pipeline_status.py +11 -15
- water_column_sonar_processing/utility/timestamp.py +3 -4
- water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
- water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
- water_column_sonar_processing/process.py +0 -147
- water_column_sonar_processing/processing/cruise_sampler.py +0 -342
- water_column_sonar_processing-0.0.9.dist-info/METADATA +0 -134
- water_column_sonar_processing-0.0.9.dist-info/RECORD +0 -32
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
import boto3
|
|
4
3
|
from collections.abc import Generator
|
|
5
4
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
+
from time import sleep
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
import boto3
|
|
9
|
+
import botocore
|
|
6
10
|
from boto3.s3.transfer import TransferConfig
|
|
7
11
|
from botocore.config import Config
|
|
8
12
|
from botocore.exceptions import ClientError
|
|
@@ -24,16 +28,9 @@ class S3Manager:
|
|
|
24
28
|
#####################################################################
|
|
25
29
|
def __init__(
|
|
26
30
|
self,
|
|
27
|
-
|
|
28
|
-
# output_endpoint_url: str,
|
|
29
|
-
# endpoint_url
|
|
30
|
-
# TODO: Need to allow passing in of credentials when writing to protected bucket
|
|
31
|
+
endpoint_url: Optional[str] = None,
|
|
31
32
|
):
|
|
32
|
-
self.
|
|
33
|
-
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
34
|
-
# self.endpoint_url = endpoint_url
|
|
35
|
-
# self.input_endpoint_url = input_endpoint_url
|
|
36
|
-
# self.output_endpoint_url = output_endpoint_url
|
|
33
|
+
self.endpoint_url = endpoint_url
|
|
37
34
|
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
38
35
|
self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
|
|
39
36
|
self.s3_transfer_config = TransferConfig(
|
|
@@ -51,14 +48,14 @@ class S3Manager:
|
|
|
51
48
|
service_name="s3",
|
|
52
49
|
config=self.s3_client_config,
|
|
53
50
|
region_name=self.s3_region,
|
|
54
|
-
|
|
51
|
+
endpoint_url=self.endpoint_url,
|
|
55
52
|
)
|
|
56
53
|
self.s3_resource = boto3.resource(
|
|
57
54
|
service_name="s3",
|
|
58
55
|
config=self.s3_client_config,
|
|
59
56
|
region_name=self.s3_region,
|
|
57
|
+
endpoint_url=self.endpoint_url,
|
|
60
58
|
)
|
|
61
|
-
# self.paginator = self.s3_client.get_paginator(operation_name='list_objects_v2')
|
|
62
59
|
self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
|
|
63
60
|
aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
64
61
|
aws_secret_access_key=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
@@ -68,39 +65,54 @@ class S3Manager:
|
|
|
68
65
|
service_name="s3",
|
|
69
66
|
config=self.s3_client_config,
|
|
70
67
|
region_name=self.s3_region,
|
|
71
|
-
|
|
68
|
+
endpoint_url=self.endpoint_url,
|
|
72
69
|
)
|
|
73
|
-
self.s3_resource_noaa_wcsd_zarr_pds =
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
70
|
+
self.s3_resource_noaa_wcsd_zarr_pds = (
|
|
71
|
+
self.s3_session_noaa_wcsd_zarr_pds.resource(
|
|
72
|
+
service_name="s3",
|
|
73
|
+
config=self.s3_client_config,
|
|
74
|
+
region_name=self.s3_region,
|
|
75
|
+
endpoint_url=self.endpoint_url,
|
|
76
|
+
)
|
|
77
77
|
)
|
|
78
|
-
|
|
79
|
-
self.
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
return self.s3_session.client(
|
|
83
|
-
service_name="s3",
|
|
84
|
-
config=self.s3_client_config,
|
|
85
|
-
region_name=self.s3_region,
|
|
78
|
+
#
|
|
79
|
+
self.paginator = self.s3_client.get_paginator("list_objects_v2")
|
|
80
|
+
self.paginator_noaa_wcsd_zarr_pds = (
|
|
81
|
+
self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
|
|
86
82
|
)
|
|
87
83
|
|
|
88
84
|
#####################################################################
|
|
85
|
+
# tested
|
|
89
86
|
def create_bucket(
|
|
90
87
|
self,
|
|
91
88
|
bucket_name: str,
|
|
92
89
|
):
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
90
|
+
"""
|
|
91
|
+
Note: this function is only really meant to be used for creating test
|
|
92
|
+
buckets. It allows public read of all objects.
|
|
93
|
+
"""
|
|
94
|
+
# https://github.com/aodn/aodn_cloud_optimised/blob/e5035495e782783cc8b9e58711d63ed466420350/test_aodn_cloud_optimised/test_schema.py#L7
|
|
95
|
+
# public_policy = {
|
|
96
|
+
# "Version": "2012-10-17",
|
|
97
|
+
# "Statement": [
|
|
98
|
+
# {
|
|
99
|
+
# "Effect": "Allow",
|
|
100
|
+
# "Principal": "*",
|
|
101
|
+
# "Action": "s3:GetObject",
|
|
102
|
+
# "Resource": f"arn:aws:s3:::{bucket_name}/*",
|
|
103
|
+
# }
|
|
104
|
+
# ],
|
|
105
|
+
# }
|
|
106
|
+
response1 = self.s3_client.create_bucket(Bucket=bucket_name, ACL="public-read")
|
|
107
|
+
print(response1)
|
|
108
|
+
# response = self.s3_client.put_bucket_policy(
|
|
109
|
+
# Bucket=bucket_name, Policy=json.dumps(public_policy)
|
|
110
|
+
# )
|
|
111
|
+
# print(response)
|
|
100
112
|
|
|
101
113
|
#####################################################################
|
|
114
|
+
# tested
|
|
102
115
|
def list_buckets(self):
|
|
103
|
-
# client = self.get_client()
|
|
104
116
|
client = self.s3_client
|
|
105
117
|
return client.list_buckets()
|
|
106
118
|
|
|
@@ -114,7 +126,9 @@ class S3Manager:
|
|
|
114
126
|
"""
|
|
115
127
|
Used to upload a single file, e.g. the GeoJSON file to the NODD bucket
|
|
116
128
|
"""
|
|
117
|
-
self.s3_resource_noaa_wcsd_zarr_pds.Bucket(output_bucket_name).upload_file(
|
|
129
|
+
self.s3_resource_noaa_wcsd_zarr_pds.Bucket(output_bucket_name).upload_file(
|
|
130
|
+
Filename=file_name, Key=key
|
|
131
|
+
)
|
|
118
132
|
return key
|
|
119
133
|
|
|
120
134
|
#####################################################################
|
|
@@ -141,67 +155,81 @@ class S3Manager:
|
|
|
141
155
|
if result:
|
|
142
156
|
all_uploads.extend([result])
|
|
143
157
|
except Exception as err:
|
|
144
|
-
|
|
158
|
+
raise RuntimeError(f"Problem, {err}")
|
|
159
|
+
|
|
145
160
|
print("Done uploading files using threading pool.")
|
|
146
161
|
return all_uploads
|
|
147
162
|
|
|
148
163
|
#####################################################################
|
|
149
|
-
#
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
164
|
+
# tested
|
|
165
|
+
def upload_zarr_store_to_s3(
|
|
166
|
+
self,
|
|
167
|
+
output_bucket_name: str,
|
|
168
|
+
local_directory: str,
|
|
169
|
+
object_prefix: str,
|
|
170
|
+
cruise_name: str,
|
|
171
|
+
) -> None:
|
|
172
|
+
print("uploading model store to s3")
|
|
173
|
+
try:
|
|
174
|
+
#
|
|
175
|
+
print("Starting upload with thread pool executor.")
|
|
176
|
+
# # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
177
|
+
all_files = []
|
|
178
|
+
for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
|
|
179
|
+
for file in files:
|
|
180
|
+
local_path = os.path.join(subdir, file)
|
|
181
|
+
# TODO: find a better method for splitting strings here:
|
|
182
|
+
# 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
|
|
183
|
+
# s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
|
|
184
|
+
s3_key = os.path.join(
|
|
185
|
+
object_prefix,
|
|
186
|
+
os.path.join(
|
|
187
|
+
subdir[subdir.find(f"{cruise_name}.zarr") :], file
|
|
188
|
+
),
|
|
189
|
+
)
|
|
190
|
+
all_files.append([local_path, s3_key])
|
|
191
|
+
self.upload_files_with_thread_pool_executor(
|
|
192
|
+
output_bucket_name=output_bucket_name,
|
|
193
|
+
all_files=all_files,
|
|
194
|
+
)
|
|
195
|
+
print("Done uploading with thread pool executor.")
|
|
196
|
+
except Exception as err:
|
|
197
|
+
raise RuntimeError(f"Problem uploading zarr store to s3, {err}")
|
|
160
198
|
|
|
161
|
-
|
|
199
|
+
#####################################################################
|
|
200
|
+
# tested
|
|
162
201
|
def upload_file(
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
202
|
+
self,
|
|
203
|
+
filename: str,
|
|
204
|
+
bucket_name: str,
|
|
205
|
+
key: str,
|
|
167
206
|
):
|
|
168
|
-
# self.s3_client.upload_file(Filename=filename, Bucket=bucket, Key=key)
|
|
169
207
|
self.s3_resource.Bucket(bucket_name).upload_file(Filename=filename, Key=key)
|
|
170
208
|
|
|
171
209
|
#####################################################################
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
all_files.append([local_path, s3_key])
|
|
191
|
-
|
|
192
|
-
all_uploads = self.upload_files_with_thread_pool_executor(
|
|
193
|
-
all_files=all_files,
|
|
194
|
-
)
|
|
195
|
-
print("Done uploading files to output bucket.")
|
|
196
|
-
return all_uploads
|
|
210
|
+
# tested
|
|
211
|
+
def check_if_object_exists(self, bucket_name, key_name) -> bool:
|
|
212
|
+
s3_manager2 = S3Manager()
|
|
213
|
+
s3_manager2.list_objects(bucket_name=bucket_name, prefix=key_name)
|
|
214
|
+
s3_client_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds
|
|
215
|
+
try:
|
|
216
|
+
s3_client_noaa_wcsd_zarr_pds.head_object(Bucket=bucket_name, Key=key_name)
|
|
217
|
+
return True
|
|
218
|
+
except botocore.exceptions.ClientError as e:
|
|
219
|
+
if e.response["Error"]["Code"] == "404":
|
|
220
|
+
# The object does not exist.
|
|
221
|
+
return False
|
|
222
|
+
elif e.response["Error"]["Code"] == 403:
|
|
223
|
+
# Unauthorized, including invalid bucket
|
|
224
|
+
return False
|
|
225
|
+
else:
|
|
226
|
+
# Something else has gone wrong.
|
|
227
|
+
raise
|
|
197
228
|
|
|
198
229
|
#####################################################################
|
|
199
|
-
#
|
|
200
|
-
def list_objects( # noaa-wcsd-pds and noaa-wcsd-zarr-pds
|
|
201
|
-
|
|
202
|
-
bucket_name,
|
|
203
|
-
prefix
|
|
204
|
-
):
|
|
230
|
+
# tested
|
|
231
|
+
def list_objects(self, bucket_name, prefix): # noaa-wcsd-pds and noaa-wcsd-zarr-pds
|
|
232
|
+
# TODO: this isn't working for geojson detecting objects!!!!!!!
|
|
205
233
|
# analog to "find_children_objects"
|
|
206
234
|
# Returns a list of key strings for each object in bucket defined by prefix
|
|
207
235
|
# s3_client = self.s3_client
|
|
@@ -213,32 +241,20 @@ class S3Manager:
|
|
|
213
241
|
keys.extend([k["Key"] for k in page["Contents"]])
|
|
214
242
|
return keys
|
|
215
243
|
|
|
216
|
-
# def list_nodd_objects( # These are used by the geometry for uploading data
|
|
217
|
-
# self,
|
|
218
|
-
# prefix,
|
|
219
|
-
# ):
|
|
220
|
-
# # Returns a list of key strings for each object in bucket defined by prefix
|
|
221
|
-
# keys = []
|
|
222
|
-
# page_iterator = self.paginator_noaa_wcsd_zarr_pds.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
|
|
223
|
-
# for page in paginator.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
|
|
224
|
-
# if "Contents" in page.keys():
|
|
225
|
-
# keys.extend([k["Key"] for k in page["Contents"]])
|
|
226
|
-
# return keys
|
|
227
|
-
|
|
228
244
|
#####################################################################
|
|
229
245
|
# TODO: change name to "directory"
|
|
230
|
-
def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
246
|
+
# def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
|
|
247
|
+
# if not path.endswith("/"):
|
|
248
|
+
# path = path + "/"
|
|
249
|
+
# # s3_client = self.s3_client
|
|
250
|
+
# resp = self.list_objects(
|
|
251
|
+
# bucket_name=bucket_name, prefix=path
|
|
252
|
+
# ) # TODO: this is returning root folder and doesn't include children or hidden folders
|
|
253
|
+
# # resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
|
|
254
|
+
# return "Contents" in resp
|
|
239
255
|
|
|
240
256
|
#####################################################################
|
|
241
|
-
#
|
|
257
|
+
# private
|
|
242
258
|
def __paginate_child_objects(
|
|
243
259
|
self,
|
|
244
260
|
bucket_name: str,
|
|
@@ -253,6 +269,8 @@ class S3Manager:
|
|
|
253
269
|
objects.extend(page["Contents"])
|
|
254
270
|
return objects
|
|
255
271
|
|
|
272
|
+
#####################################################################
|
|
273
|
+
# tested
|
|
256
274
|
def get_child_objects(
|
|
257
275
|
self,
|
|
258
276
|
bucket_name: str,
|
|
@@ -284,13 +302,14 @@ class S3Manager:
|
|
|
284
302
|
return raw_files
|
|
285
303
|
|
|
286
304
|
#####################################################################
|
|
287
|
-
|
|
288
|
-
|
|
305
|
+
# tested
|
|
306
|
+
def get_object( # noaa-wcsd-pds or noaa-wcsd-zarr-pds
|
|
289
307
|
self,
|
|
290
308
|
bucket_name,
|
|
291
309
|
key_name,
|
|
292
310
|
):
|
|
293
311
|
# Meant for getting singular objects from a bucket, used by indexing lambda
|
|
312
|
+
# can also return byte range potentially.
|
|
294
313
|
print(f"Getting object {key_name} from {bucket_name}")
|
|
295
314
|
try:
|
|
296
315
|
response = self.s3_client.get_object(
|
|
@@ -299,82 +318,101 @@ class S3Manager:
|
|
|
299
318
|
)
|
|
300
319
|
# status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")
|
|
301
320
|
# if status == 200:
|
|
321
|
+
print(f"Done getting object {key_name} from {bucket_name}")
|
|
322
|
+
return response
|
|
302
323
|
except ClientError as err:
|
|
303
324
|
print(f"Problem was encountered while getting s3 file: {err}")
|
|
304
325
|
raise
|
|
305
|
-
print(f"Done getting object {key_name} from {bucket_name}")
|
|
306
|
-
return response
|
|
307
326
|
|
|
308
327
|
#####################################################################
|
|
309
|
-
#
|
|
310
|
-
def download_file(
|
|
311
|
-
# noaa-wcsd-pds or noaa-wcsd-model-pds
|
|
328
|
+
# tested
|
|
329
|
+
def download_file(
|
|
312
330
|
self,
|
|
313
331
|
bucket_name,
|
|
314
332
|
key,
|
|
315
|
-
file_name,
|
|
333
|
+
file_name, # path to where the file will be saved
|
|
316
334
|
):
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
# bucket_name,
|
|
326
|
-
# key
|
|
327
|
-
# ): # -> dict:
|
|
328
|
-
# #return self.__s3_client.delete_object(Bucket=bucket_name, Key=key)
|
|
329
|
-
# self.s3_client.delete_object(Bucket=bucket_name, Key=key)
|
|
335
|
+
try:
|
|
336
|
+
self.s3_client.download_file(
|
|
337
|
+
Bucket=bucket_name, Key=key, Filename=file_name
|
|
338
|
+
)
|
|
339
|
+
# TODO: if bottom file doesn't exist, don't fail downloader
|
|
340
|
+
print("downloaded file")
|
|
341
|
+
except Exception as err:
|
|
342
|
+
raise RuntimeError(f"Problem was encountered while downloading_file, {err}")
|
|
330
343
|
|
|
331
344
|
#####################################################################
|
|
345
|
+
# tested
|
|
332
346
|
def delete_nodd_objects( # nodd-bucket
|
|
333
347
|
self,
|
|
348
|
+
bucket_name,
|
|
334
349
|
objects: list,
|
|
335
350
|
):
|
|
336
351
|
try:
|
|
337
|
-
print(
|
|
338
|
-
f"Deleting {len(objects)} objects in {self.output_bucket_name} in batches."
|
|
339
|
-
)
|
|
352
|
+
print(f"Deleting {len(objects)} objects in {bucket_name} in batches.")
|
|
340
353
|
objects_to_delete = []
|
|
341
354
|
for obj in objects:
|
|
342
355
|
objects_to_delete.append({"Key": obj["Key"]})
|
|
343
356
|
# Note: request can contain a list of up to 1000 keys
|
|
344
357
|
for batch in chunked(ll=objects_to_delete, n=1000):
|
|
358
|
+
# An error occurred (SlowDown) when calling the DeleteObjects operation (reached max retries: 4):
|
|
359
|
+
# Please reduce your request rate.
|
|
360
|
+
sleep(0.5)
|
|
361
|
+
#
|
|
345
362
|
self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
|
|
346
|
-
Bucket=
|
|
363
|
+
Bucket=bucket_name, Delete={"Objects": batch}
|
|
347
364
|
)
|
|
348
|
-
print(
|
|
365
|
+
print("Deleted files.")
|
|
349
366
|
except Exception as err:
|
|
350
|
-
|
|
367
|
+
raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
|
|
351
368
|
|
|
352
369
|
#####################################################################
|
|
353
|
-
#
|
|
370
|
+
# tested
|
|
371
|
+
def delete_nodd_object( # only used to delete geojson it looks like?! Remove.
|
|
372
|
+
self,
|
|
373
|
+
bucket_name,
|
|
374
|
+
key_name,
|
|
375
|
+
):
|
|
376
|
+
try:
|
|
377
|
+
print(f"Deleting {key_name} objects in {bucket_name}.")
|
|
378
|
+
self.s3_client_noaa_wcsd_zarr_pds.delete_object(
|
|
379
|
+
Bucket=bucket_name, Key=key_name
|
|
380
|
+
)
|
|
381
|
+
print("Deleted file.")
|
|
382
|
+
except Exception as err:
|
|
383
|
+
raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
|
|
384
|
+
|
|
385
|
+
#####################################################################
|
|
386
|
+
# tested
|
|
354
387
|
def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
|
|
355
|
-
|
|
388
|
+
try:
|
|
389
|
+
self.s3_client.put_object(
|
|
390
|
+
Bucket=bucket_name, Key=key, Body=body
|
|
391
|
+
) # "Body" can be a file
|
|
392
|
+
except Exception as err:
|
|
393
|
+
raise RuntimeError(f"Problem was encountered putting object, {err}")
|
|
356
394
|
|
|
357
395
|
#####################################################################
|
|
396
|
+
# tested
|
|
358
397
|
def read_s3_json(
|
|
359
398
|
self,
|
|
360
399
|
ship_name,
|
|
361
400
|
cruise_name,
|
|
362
401
|
sensor_name,
|
|
363
402
|
file_name_stem,
|
|
403
|
+
output_bucket_name, # TODO: change to just bucket_name
|
|
364
404
|
) -> str:
|
|
365
405
|
try:
|
|
366
|
-
|
|
367
|
-
|
|
406
|
+
resource = self.s3_resource_noaa_wcsd_zarr_pds
|
|
407
|
+
content_object = resource.Object(
|
|
408
|
+
bucket_name=output_bucket_name,
|
|
368
409
|
key=f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.json",
|
|
369
410
|
).get()
|
|
370
411
|
file_content = content_object["Body"].read().decode("utf-8")
|
|
371
412
|
json_content = json.loads(file_content)
|
|
372
413
|
return json_content
|
|
373
|
-
except Exception as err:
|
|
374
|
-
|
|
375
|
-
raise
|
|
376
|
-
|
|
377
|
-
#####################################################################
|
|
414
|
+
except Exception as err:
|
|
415
|
+
raise RuntimeError(f"Exception encountered reading s3 GeoJSON, {err}")
|
|
378
416
|
|
|
379
417
|
|
|
380
418
|
#########################################################################
|
|
@@ -1,45 +1,29 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
import s3fs
|
|
4
5
|
|
|
6
|
+
|
|
5
7
|
# TODO: S3FS_LOGGING_LEVEL=DEBUG
|
|
8
|
+
# S3FS_LOGGING_LEVEL=DEBUG
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
class S3FSManager:
|
|
9
12
|
#####################################################################
|
|
10
13
|
def __init__(
|
|
11
14
|
self,
|
|
15
|
+
endpoint_url: Optional[str] = None,
|
|
12
16
|
):
|
|
13
|
-
self.
|
|
17
|
+
self.endpoint_url = endpoint_url
|
|
18
|
+
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
19
|
+
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
20
|
+
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
14
21
|
self.s3fs = s3fs.S3FileSystem(
|
|
22
|
+
endpoint_url=endpoint_url,
|
|
15
23
|
key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
16
24
|
secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
17
|
-
# asynchronous=True
|
|
18
|
-
# use_ssl=False,
|
|
19
|
-
# skip_instance_cache=True,
|
|
20
|
-
# default_block_size='100MB', # if no specific value is given at all time. The built-in default is 5MB
|
|
21
|
-
# client_kwargs={
|
|
22
|
-
# "region_name": self.__s3_region
|
|
23
|
-
# }
|
|
24
25
|
)
|
|
25
26
|
|
|
26
|
-
#####################################################################
|
|
27
|
-
def add_file(self, filename):
|
|
28
|
-
full_path = f"{os.getenv('OUTPUT_BUCKET_NAME')}/testing/{filename}"
|
|
29
|
-
print(full_path)
|
|
30
|
-
|
|
31
|
-
self.s3fs.touch(full_path)
|
|
32
|
-
ff = self.s3fs.ls(f"{os.getenv('OUTPUT_BUCKET_NAME')}/")
|
|
33
|
-
|
|
34
|
-
print(ff)
|
|
35
|
-
|
|
36
|
-
#####################################################################
|
|
37
|
-
def upload_data(self, bucket_name, file_path, prefix):
|
|
38
|
-
# TODO: this works in theory but use boto3 to upload files
|
|
39
|
-
s3_path = f"s3://{bucket_name}/{prefix}/"
|
|
40
|
-
s3_file_system = self.s3fs
|
|
41
|
-
s3_file_system.put(file_path, s3_path, recursive=True)
|
|
42
|
-
|
|
43
27
|
#####################################################################
|
|
44
28
|
def s3_map(
|
|
45
29
|
self,
|
|
@@ -52,17 +36,29 @@ class S3FSManager:
|
|
|
52
36
|
root=s3_zarr_store_path, s3=self.s3fs
|
|
53
37
|
) # create=False, not false because will be writing
|
|
54
38
|
|
|
39
|
+
#####################################################################
|
|
40
|
+
# def add_file(self, filename):
|
|
41
|
+
# full_path = f"{os.getenv('OUTPUT_BUCKET_NAME')}/testing/{filename}"
|
|
42
|
+
# print(full_path)
|
|
43
|
+
#
|
|
44
|
+
# self.s3fs.touch(full_path)
|
|
45
|
+
# ff = self.s3fs.ls(f"{os.getenv('OUTPUT_BUCKET_NAME')}/")
|
|
46
|
+
#
|
|
47
|
+
# print(ff)
|
|
48
|
+
|
|
49
|
+
#####################################################################
|
|
50
|
+
def upload_data(self, bucket_name, file_path, prefix):
|
|
51
|
+
# TODO: this works in theory but use boto3 to upload files
|
|
52
|
+
s3_path = f"s3://{bucket_name}/{prefix}/"
|
|
53
|
+
s3_file_system = self.s3fs
|
|
54
|
+
s3_file_system.put(file_path, s3_path, recursive=True)
|
|
55
|
+
|
|
55
56
|
#####################################################################
|
|
56
57
|
def exists(
|
|
57
58
|
self,
|
|
58
|
-
|
|
59
|
+
s3_path,
|
|
59
60
|
):
|
|
60
|
-
s3_file_system =
|
|
61
|
-
return
|
|
61
|
+
# s3_file_system =
|
|
62
|
+
return self.s3fs.exists(s3_path)
|
|
62
63
|
|
|
63
64
|
#####################################################################
|
|
64
|
-
# def put(
|
|
65
|
-
# self
|
|
66
|
-
# ):
|
|
67
|
-
# s3_file_system = self.s3fs
|
|
68
|
-
# return
|
|
@@ -35,7 +35,7 @@ class SQSManager:
|
|
|
35
35
|
#######################################################
|
|
36
36
|
def list_queues(self, queue_name_prefix):
|
|
37
37
|
# Note: SQS control plane is eventually consistent, meaning that it
|
|
38
|
-
# takes a while to propagate the
|
|
38
|
+
# takes a while to propagate the dataset accross the systems.
|
|
39
39
|
response = self.__sqs_client.list_queues(QueueNamePrefix=queue_name_prefix)
|
|
40
40
|
print(response)
|
|
41
41
|
|