water-column-sonar-processing 0.0.6__py3-none-any.whl → 26.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/__init__.py +2 -5
- water_column_sonar_processing/aws/__init__.py +2 -2
- water_column_sonar_processing/aws/dynamodb_manager.py +257 -72
- water_column_sonar_processing/aws/s3_manager.py +184 -112
- water_column_sonar_processing/aws/s3fs_manager.py +29 -33
- water_column_sonar_processing/aws/sqs_manager.py +1 -1
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +38 -97
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +144 -129
- water_column_sonar_processing/geometry/__init__.py +10 -2
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +60 -44
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +242 -51
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/index_manager.py +157 -27
- water_column_sonar_processing/model/zarr_manager.py +663 -258
- water_column_sonar_processing/processing/__init__.py +4 -0
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +341 -0
- water_column_sonar_processing/utility/__init__.py +9 -2
- water_column_sonar_processing/utility/cleaner.py +1 -0
- water_column_sonar_processing/utility/constants.py +69 -14
- water_column_sonar_processing/utility/pipeline_status.py +11 -15
- water_column_sonar_processing/utility/timestamp.py +3 -4
- water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
- water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
- water_column_sonar_processing/process.py +0 -147
- water_column_sonar_processing-0.0.6.dist-info/METADATA +0 -123
- water_column_sonar_processing-0.0.6.dist-info/RECORD +0 -29
- {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
|
@@ -2,8 +2,11 @@ import json
|
|
|
2
2
|
import os
|
|
3
3
|
from collections.abc import Generator
|
|
4
4
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
+
from time import sleep
|
|
6
|
+
from typing import Optional
|
|
5
7
|
|
|
6
8
|
import boto3
|
|
9
|
+
import botocore
|
|
7
10
|
from boto3.s3.transfer import TransferConfig
|
|
8
11
|
from botocore.config import Config
|
|
9
12
|
from botocore.exceptions import ClientError
|
|
@@ -25,10 +28,9 @@ class S3Manager:
|
|
|
25
28
|
#####################################################################
|
|
26
29
|
def __init__(
|
|
27
30
|
self,
|
|
28
|
-
|
|
31
|
+
endpoint_url: Optional[str] = None,
|
|
29
32
|
):
|
|
30
|
-
self.
|
|
31
|
-
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
33
|
+
self.endpoint_url = endpoint_url
|
|
32
34
|
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
33
35
|
self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
|
|
34
36
|
self.s3_transfer_config = TransferConfig(
|
|
@@ -46,14 +48,14 @@ class S3Manager:
|
|
|
46
48
|
service_name="s3",
|
|
47
49
|
config=self.s3_client_config,
|
|
48
50
|
region_name=self.s3_region,
|
|
51
|
+
endpoint_url=self.endpoint_url,
|
|
49
52
|
)
|
|
50
53
|
self.s3_resource = boto3.resource(
|
|
51
54
|
service_name="s3",
|
|
52
55
|
config=self.s3_client_config,
|
|
53
56
|
region_name=self.s3_region,
|
|
57
|
+
endpoint_url=self.endpoint_url,
|
|
54
58
|
)
|
|
55
|
-
# self.paginator = self.s3_client.get_paginator(operation_name='list_objects_v2')
|
|
56
|
-
# TODO: create both "s3_client_input" and "s3_client_output" ???
|
|
57
59
|
self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
|
|
58
60
|
aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
59
61
|
aws_secret_access_key=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
@@ -63,38 +65,54 @@ class S3Manager:
|
|
|
63
65
|
service_name="s3",
|
|
64
66
|
config=self.s3_client_config,
|
|
65
67
|
region_name=self.s3_region,
|
|
68
|
+
endpoint_url=self.endpoint_url,
|
|
66
69
|
)
|
|
67
70
|
self.s3_resource_noaa_wcsd_zarr_pds = (
|
|
68
71
|
self.s3_session_noaa_wcsd_zarr_pds.resource(
|
|
69
72
|
service_name="s3",
|
|
70
73
|
config=self.s3_client_config,
|
|
71
74
|
region_name=self.s3_region,
|
|
75
|
+
endpoint_url=self.endpoint_url,
|
|
72
76
|
)
|
|
73
77
|
)
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
config=self.__s3_client_config,
|
|
79
|
-
region_name=self.s3_region,
|
|
78
|
+
#
|
|
79
|
+
self.paginator = self.s3_client.get_paginator("list_objects_v2")
|
|
80
|
+
self.paginator_noaa_wcsd_zarr_pds = (
|
|
81
|
+
self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
|
|
80
82
|
)
|
|
81
83
|
|
|
82
84
|
#####################################################################
|
|
85
|
+
# tested
|
|
83
86
|
def create_bucket(
|
|
84
87
|
self,
|
|
85
88
|
bucket_name: str,
|
|
86
89
|
):
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
90
|
+
"""
|
|
91
|
+
Note: this function is only really meant to be used for creating test
|
|
92
|
+
buckets. It allows public read of all objects.
|
|
93
|
+
"""
|
|
94
|
+
# https://github.com/aodn/aodn_cloud_optimised/blob/e5035495e782783cc8b9e58711d63ed466420350/test_aodn_cloud_optimised/test_schema.py#L7
|
|
95
|
+
# public_policy = {
|
|
96
|
+
# "Version": "2012-10-17",
|
|
97
|
+
# "Statement": [
|
|
98
|
+
# {
|
|
99
|
+
# "Effect": "Allow",
|
|
100
|
+
# "Principal": "*",
|
|
101
|
+
# "Action": "s3:GetObject",
|
|
102
|
+
# "Resource": f"arn:aws:s3:::{bucket_name}/*",
|
|
103
|
+
# }
|
|
104
|
+
# ],
|
|
105
|
+
# }
|
|
106
|
+
response1 = self.s3_client.create_bucket(Bucket=bucket_name, ACL="public-read")
|
|
107
|
+
print(response1)
|
|
108
|
+
# response = self.s3_client.put_bucket_policy(
|
|
109
|
+
# Bucket=bucket_name, Policy=json.dumps(public_policy)
|
|
110
|
+
# )
|
|
111
|
+
# print(response)
|
|
94
112
|
|
|
95
113
|
#####################################################################
|
|
114
|
+
# tested
|
|
96
115
|
def list_buckets(self):
|
|
97
|
-
# client = self.get_client()
|
|
98
116
|
client = self.s3_client
|
|
99
117
|
return client.list_buckets()
|
|
100
118
|
|
|
@@ -103,17 +121,20 @@ class S3Manager:
|
|
|
103
121
|
self,
|
|
104
122
|
file_name: str,
|
|
105
123
|
key: str,
|
|
124
|
+
output_bucket_name: str,
|
|
106
125
|
):
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
126
|
+
"""
|
|
127
|
+
Used to upload a single file, e.g. the GeoJSON file to the NODD bucket
|
|
128
|
+
"""
|
|
129
|
+
self.s3_resource_noaa_wcsd_zarr_pds.Bucket(output_bucket_name).upload_file(
|
|
130
|
+
Filename=file_name, Key=key
|
|
111
131
|
)
|
|
112
132
|
return key
|
|
113
133
|
|
|
114
134
|
#####################################################################
|
|
115
135
|
def upload_files_with_thread_pool_executor(
|
|
116
136
|
self,
|
|
137
|
+
output_bucket_name: str,
|
|
117
138
|
all_files: list,
|
|
118
139
|
):
|
|
119
140
|
# 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
@@ -122,90 +143,118 @@ class S3Manager:
|
|
|
122
143
|
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
|
123
144
|
futures = [
|
|
124
145
|
executor.submit(
|
|
125
|
-
self.upload_nodd_file,
|
|
146
|
+
self.upload_nodd_file, # TODO: verify which one is using this
|
|
126
147
|
all_file[0], # file_name
|
|
127
148
|
all_file[1], # key
|
|
149
|
+
output_bucket_name, # output_bucket_name
|
|
128
150
|
)
|
|
129
151
|
for all_file in all_files
|
|
130
152
|
]
|
|
131
153
|
for future in as_completed(futures):
|
|
132
154
|
result = future.result()
|
|
133
155
|
if result:
|
|
134
|
-
all_uploads.extend(result)
|
|
156
|
+
all_uploads.extend([result])
|
|
135
157
|
except Exception as err:
|
|
136
|
-
|
|
158
|
+
raise RuntimeError(f"Problem, {err}")
|
|
159
|
+
|
|
137
160
|
print("Done uploading files using threading pool.")
|
|
138
161
|
return all_uploads
|
|
139
162
|
|
|
140
163
|
#####################################################################
|
|
141
|
-
|
|
164
|
+
# tested
|
|
165
|
+
def upload_zarr_store_to_s3(
|
|
166
|
+
self,
|
|
167
|
+
output_bucket_name: str,
|
|
168
|
+
local_directory: str,
|
|
169
|
+
object_prefix: str,
|
|
170
|
+
cruise_name: str,
|
|
171
|
+
) -> None:
|
|
172
|
+
print("uploading model store to s3")
|
|
173
|
+
try:
|
|
174
|
+
#
|
|
175
|
+
print("Starting upload with thread pool executor.")
|
|
176
|
+
# # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
177
|
+
all_files = []
|
|
178
|
+
for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
|
|
179
|
+
for file in files:
|
|
180
|
+
local_path = os.path.join(subdir, file)
|
|
181
|
+
# TODO: find a better method for splitting strings here:
|
|
182
|
+
# 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
|
|
183
|
+
# s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
|
|
184
|
+
s3_key = os.path.join(
|
|
185
|
+
object_prefix,
|
|
186
|
+
os.path.join(
|
|
187
|
+
subdir[subdir.find(f"{cruise_name}.zarr") :], file
|
|
188
|
+
),
|
|
189
|
+
)
|
|
190
|
+
all_files.append([local_path, s3_key])
|
|
191
|
+
self.upload_files_with_thread_pool_executor(
|
|
192
|
+
output_bucket_name=output_bucket_name,
|
|
193
|
+
all_files=all_files,
|
|
194
|
+
)
|
|
195
|
+
print("Done uploading with thread pool executor.")
|
|
196
|
+
except Exception as err:
|
|
197
|
+
raise RuntimeError(f"Problem uploading zarr store to s3, {err}")
|
|
198
|
+
|
|
199
|
+
#####################################################################
|
|
200
|
+
# tested
|
|
201
|
+
def upload_file(
|
|
142
202
|
self,
|
|
143
|
-
|
|
144
|
-
|
|
203
|
+
filename: str,
|
|
204
|
+
bucket_name: str,
|
|
205
|
+
key: str,
|
|
145
206
|
):
|
|
146
|
-
|
|
147
|
-
print("Uploading files to output bucket.")
|
|
148
|
-
store_name = os.path.basename(local_directory)
|
|
149
|
-
all_files = []
|
|
150
|
-
for subdir, dirs, files in os.walk(local_directory):
|
|
151
|
-
for file in files:
|
|
152
|
-
local_path = os.path.join(subdir, file)
|
|
153
|
-
# s3_key = os.path.join(object_prefix, local_path)
|
|
154
|
-
s3_key = os.path.join(
|
|
155
|
-
remote_directory,
|
|
156
|
-
store_name,
|
|
157
|
-
subdir.split(store_name)[-1].strip("/"),
|
|
158
|
-
)
|
|
159
|
-
all_files.append([local_path, s3_key])
|
|
207
|
+
self.s3_resource.Bucket(bucket_name).upload_file(Filename=filename, Key=key)
|
|
160
208
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
209
|
+
#####################################################################
|
|
210
|
+
# tested
|
|
211
|
+
def check_if_object_exists(self, bucket_name, key_name) -> bool:
|
|
212
|
+
s3_manager2 = S3Manager()
|
|
213
|
+
s3_manager2.list_objects(bucket_name=bucket_name, prefix=key_name)
|
|
214
|
+
s3_client_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds
|
|
215
|
+
try:
|
|
216
|
+
s3_client_noaa_wcsd_zarr_pds.head_object(Bucket=bucket_name, Key=key_name)
|
|
217
|
+
return True
|
|
218
|
+
except botocore.exceptions.ClientError as e:
|
|
219
|
+
if e.response["Error"]["Code"] == "404":
|
|
220
|
+
# The object does not exist.
|
|
221
|
+
return False
|
|
222
|
+
elif e.response["Error"]["Code"] == 403:
|
|
223
|
+
# Unauthorized, including invalid bucket
|
|
224
|
+
return False
|
|
225
|
+
else:
|
|
226
|
+
# Something else has gone wrong.
|
|
227
|
+
raise
|
|
166
228
|
|
|
167
229
|
#####################################################################
|
|
168
|
-
#
|
|
169
|
-
def list_objects( # noaa-wcsd-pds and noaa-wcsd-
|
|
170
|
-
|
|
171
|
-
):
|
|
230
|
+
# tested
|
|
231
|
+
def list_objects(self, bucket_name, prefix): # noaa-wcsd-pds and noaa-wcsd-zarr-pds
|
|
232
|
+
# TODO: this isn't working for geojson detecting objects!!!!!!!
|
|
172
233
|
# analog to "find_children_objects"
|
|
173
234
|
# Returns a list of key strings for each object in bucket defined by prefix
|
|
174
|
-
s3_client = self.s3_client
|
|
235
|
+
# s3_client = self.s3_client
|
|
175
236
|
keys = []
|
|
176
|
-
paginator = s3_client.get_paginator("list_objects_v2")
|
|
177
|
-
page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
|
|
237
|
+
# paginator = s3_client.get_paginator("list_objects_v2")
|
|
238
|
+
page_iterator = self.paginator.paginate(Bucket=bucket_name, Prefix=prefix)
|
|
178
239
|
for page in page_iterator:
|
|
179
240
|
if "Contents" in page.keys():
|
|
180
241
|
keys.extend([k["Key"] for k in page["Contents"]])
|
|
181
242
|
return keys
|
|
182
243
|
|
|
183
|
-
def list_nodd_objects( # These are used by the geometry for uploading data
|
|
184
|
-
self,
|
|
185
|
-
prefix,
|
|
186
|
-
):
|
|
187
|
-
# Returns a list of key strings for each object in bucket defined by prefix
|
|
188
|
-
keys = []
|
|
189
|
-
paginator = self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
|
|
190
|
-
for page in paginator.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
|
|
191
|
-
if "Contents" in page.keys():
|
|
192
|
-
keys.extend([k["Key"] for k in page["Contents"]])
|
|
193
|
-
return keys
|
|
194
|
-
|
|
195
244
|
#####################################################################
|
|
196
245
|
# TODO: change name to "directory"
|
|
197
|
-
def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
246
|
+
# def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
|
|
247
|
+
# if not path.endswith("/"):
|
|
248
|
+
# path = path + "/"
|
|
249
|
+
# # s3_client = self.s3_client
|
|
250
|
+
# resp = self.list_objects(
|
|
251
|
+
# bucket_name=bucket_name, prefix=path
|
|
252
|
+
# ) # TODO: this is returning root folder and doesn't include children or hidden folders
|
|
253
|
+
# # resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
|
|
254
|
+
# return "Contents" in resp
|
|
206
255
|
|
|
207
256
|
#####################################################################
|
|
208
|
-
#
|
|
257
|
+
# private
|
|
209
258
|
def __paginate_child_objects(
|
|
210
259
|
self,
|
|
211
260
|
bucket_name: str,
|
|
@@ -220,6 +269,8 @@ class S3Manager:
|
|
|
220
269
|
objects.extend(page["Contents"])
|
|
221
270
|
return objects
|
|
222
271
|
|
|
272
|
+
#####################################################################
|
|
273
|
+
# tested
|
|
223
274
|
def get_child_objects(
|
|
224
275
|
self,
|
|
225
276
|
bucket_name: str,
|
|
@@ -251,13 +302,14 @@ class S3Manager:
|
|
|
251
302
|
return raw_files
|
|
252
303
|
|
|
253
304
|
#####################################################################
|
|
254
|
-
|
|
255
|
-
|
|
305
|
+
# tested
|
|
306
|
+
def get_object( # noaa-wcsd-pds or noaa-wcsd-zarr-pds
|
|
256
307
|
self,
|
|
257
308
|
bucket_name,
|
|
258
309
|
key_name,
|
|
259
310
|
):
|
|
260
311
|
# Meant for getting singular objects from a bucket, used by indexing lambda
|
|
312
|
+
# can also return byte range potentially.
|
|
261
313
|
print(f"Getting object {key_name} from {bucket_name}")
|
|
262
314
|
try:
|
|
263
315
|
response = self.s3_client.get_object(
|
|
@@ -266,81 +318,101 @@ class S3Manager:
|
|
|
266
318
|
)
|
|
267
319
|
# status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")
|
|
268
320
|
# if status == 200:
|
|
321
|
+
print(f"Done getting object {key_name} from {bucket_name}")
|
|
322
|
+
return response
|
|
269
323
|
except ClientError as err:
|
|
270
324
|
print(f"Problem was encountered while getting s3 file: {err}")
|
|
271
325
|
raise
|
|
272
|
-
print(f"Done getting object {key_name} from {bucket_name}")
|
|
273
|
-
return response
|
|
274
326
|
|
|
275
327
|
#####################################################################
|
|
276
|
-
#
|
|
277
|
-
def download_file(
|
|
278
|
-
# noaa-wcsd-pds or noaa-wcsd-model-pds
|
|
328
|
+
# tested
|
|
329
|
+
def download_file(
|
|
279
330
|
self,
|
|
280
331
|
bucket_name,
|
|
281
332
|
key,
|
|
282
|
-
file_name,
|
|
333
|
+
file_name, # path to where the file will be saved
|
|
283
334
|
):
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
# key
|
|
293
|
-
# ): # -> dict:
|
|
294
|
-
# #return self.__s3_client.delete_object(Bucket=bucket_name, Key=key)
|
|
295
|
-
# self.s3_client.delete_object(Bucket=bucket_name, Key=key)
|
|
335
|
+
try:
|
|
336
|
+
self.s3_client.download_file(
|
|
337
|
+
Bucket=bucket_name, Key=key, Filename=file_name
|
|
338
|
+
)
|
|
339
|
+
# TODO: if bottom file doesn't exist, don't fail downloader
|
|
340
|
+
print("downloaded file")
|
|
341
|
+
except Exception as err:
|
|
342
|
+
raise RuntimeError(f"Problem was encountered while downloading_file, {err}")
|
|
296
343
|
|
|
297
344
|
#####################################################################
|
|
345
|
+
# tested
|
|
298
346
|
def delete_nodd_objects( # nodd-bucket
|
|
299
347
|
self,
|
|
348
|
+
bucket_name,
|
|
300
349
|
objects: list,
|
|
301
350
|
):
|
|
302
351
|
try:
|
|
303
|
-
print(
|
|
304
|
-
f"Deleting {len(objects)} objects in {self.output_bucket_name} in batches."
|
|
305
|
-
)
|
|
352
|
+
print(f"Deleting {len(objects)} objects in {bucket_name} in batches.")
|
|
306
353
|
objects_to_delete = []
|
|
307
354
|
for obj in objects:
|
|
308
355
|
objects_to_delete.append({"Key": obj["Key"]})
|
|
309
356
|
# Note: request can contain a list of up to 1000 keys
|
|
310
357
|
for batch in chunked(ll=objects_to_delete, n=1000):
|
|
358
|
+
# An error occurred (SlowDown) when calling the DeleteObjects operation (reached max retries: 4):
|
|
359
|
+
# Please reduce your request rate.
|
|
360
|
+
sleep(0.5)
|
|
361
|
+
#
|
|
311
362
|
self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
|
|
312
|
-
Bucket=
|
|
363
|
+
Bucket=bucket_name, Delete={"Objects": batch}
|
|
313
364
|
)
|
|
314
|
-
print(
|
|
365
|
+
print("Deleted files.")
|
|
315
366
|
except Exception as err:
|
|
316
|
-
|
|
367
|
+
raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
|
|
317
368
|
|
|
318
369
|
#####################################################################
|
|
319
|
-
#
|
|
370
|
+
# tested
|
|
371
|
+
def delete_nodd_object( # only used to delete geojson it looks like?! Remove.
|
|
372
|
+
self,
|
|
373
|
+
bucket_name,
|
|
374
|
+
key_name,
|
|
375
|
+
):
|
|
376
|
+
try:
|
|
377
|
+
print(f"Deleting {key_name} objects in {bucket_name}.")
|
|
378
|
+
self.s3_client_noaa_wcsd_zarr_pds.delete_object(
|
|
379
|
+
Bucket=bucket_name, Key=key_name
|
|
380
|
+
)
|
|
381
|
+
print("Deleted file.")
|
|
382
|
+
except Exception as err:
|
|
383
|
+
raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
|
|
384
|
+
|
|
385
|
+
#####################################################################
|
|
386
|
+
# tested
|
|
320
387
|
def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
|
|
321
|
-
|
|
388
|
+
try:
|
|
389
|
+
self.s3_client.put_object(
|
|
390
|
+
Bucket=bucket_name, Key=key, Body=body
|
|
391
|
+
) # "Body" can be a file
|
|
392
|
+
except Exception as err:
|
|
393
|
+
raise RuntimeError(f"Problem was encountered putting object, {err}")
|
|
322
394
|
|
|
323
395
|
#####################################################################
|
|
396
|
+
# tested
|
|
324
397
|
def read_s3_json(
|
|
325
398
|
self,
|
|
326
399
|
ship_name,
|
|
327
400
|
cruise_name,
|
|
328
401
|
sensor_name,
|
|
329
402
|
file_name_stem,
|
|
403
|
+
output_bucket_name, # TODO: change to just bucket_name
|
|
330
404
|
) -> str:
|
|
331
405
|
try:
|
|
332
|
-
|
|
333
|
-
|
|
406
|
+
resource = self.s3_resource_noaa_wcsd_zarr_pds
|
|
407
|
+
content_object = resource.Object(
|
|
408
|
+
bucket_name=output_bucket_name,
|
|
334
409
|
key=f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.json",
|
|
335
410
|
).get()
|
|
336
411
|
file_content = content_object["Body"].read().decode("utf-8")
|
|
337
412
|
json_content = json.loads(file_content)
|
|
338
413
|
return json_content
|
|
339
|
-
except Exception as err:
|
|
340
|
-
|
|
341
|
-
raise
|
|
342
|
-
|
|
343
|
-
#####################################################################
|
|
414
|
+
except Exception as err:
|
|
415
|
+
raise RuntimeError(f"Exception encountered reading s3 GeoJSON, {err}")
|
|
344
416
|
|
|
345
417
|
|
|
346
418
|
#########################################################################
|
|
@@ -1,45 +1,29 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from typing import Optional
|
|
2
3
|
|
|
3
4
|
import s3fs
|
|
4
5
|
|
|
6
|
+
|
|
5
7
|
# TODO: S3FS_LOGGING_LEVEL=DEBUG
|
|
8
|
+
# S3FS_LOGGING_LEVEL=DEBUG
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
class S3FSManager:
|
|
9
12
|
#####################################################################
|
|
10
13
|
def __init__(
|
|
11
14
|
self,
|
|
15
|
+
endpoint_url: Optional[str] = None,
|
|
12
16
|
):
|
|
13
|
-
self.
|
|
17
|
+
self.endpoint_url = endpoint_url
|
|
18
|
+
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
19
|
+
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
20
|
+
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
14
21
|
self.s3fs = s3fs.S3FileSystem(
|
|
22
|
+
endpoint_url=endpoint_url,
|
|
15
23
|
key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
16
24
|
secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
17
|
-
# asynchronous=True
|
|
18
|
-
# use_ssl=False,
|
|
19
|
-
# skip_instance_cache=True,
|
|
20
|
-
# default_block_size='100MB', # if no specific value is given at all time. The built-in default is 5MB
|
|
21
|
-
# client_kwargs={
|
|
22
|
-
# "region_name": self.__s3_region
|
|
23
|
-
# }
|
|
24
25
|
)
|
|
25
26
|
|
|
26
|
-
#####################################################################
|
|
27
|
-
def add_file(self, filename):
|
|
28
|
-
full_path = f"{os.getenv('OUTPUT_BUCKET_NAME')}/testing/{filename}"
|
|
29
|
-
print(full_path)
|
|
30
|
-
|
|
31
|
-
self.s3fs.touch(full_path)
|
|
32
|
-
ff = self.s3fs.ls(f"{os.getenv('OUTPUT_BUCKET_NAME')}/")
|
|
33
|
-
|
|
34
|
-
print(ff)
|
|
35
|
-
|
|
36
|
-
#####################################################################
|
|
37
|
-
def upload_data(self, bucket_name, file_path, prefix):
|
|
38
|
-
# TODO: this works in theory but use boto3 to upload files
|
|
39
|
-
s3_path = f"s3://{bucket_name}/{prefix}/"
|
|
40
|
-
s3_file_system = self.s3fs
|
|
41
|
-
s3_file_system.put(file_path, s3_path, recursive=True)
|
|
42
|
-
|
|
43
27
|
#####################################################################
|
|
44
28
|
def s3_map(
|
|
45
29
|
self,
|
|
@@ -52,17 +36,29 @@ class S3FSManager:
|
|
|
52
36
|
root=s3_zarr_store_path, s3=self.s3fs
|
|
53
37
|
) # create=False, not false because will be writing
|
|
54
38
|
|
|
39
|
+
#####################################################################
|
|
40
|
+
# def add_file(self, filename):
|
|
41
|
+
# full_path = f"{os.getenv('OUTPUT_BUCKET_NAME')}/testing/{filename}"
|
|
42
|
+
# print(full_path)
|
|
43
|
+
#
|
|
44
|
+
# self.s3fs.touch(full_path)
|
|
45
|
+
# ff = self.s3fs.ls(f"{os.getenv('OUTPUT_BUCKET_NAME')}/")
|
|
46
|
+
#
|
|
47
|
+
# print(ff)
|
|
48
|
+
|
|
49
|
+
#####################################################################
|
|
50
|
+
def upload_data(self, bucket_name, file_path, prefix):
|
|
51
|
+
# TODO: this works in theory but use boto3 to upload files
|
|
52
|
+
s3_path = f"s3://{bucket_name}/{prefix}/"
|
|
53
|
+
s3_file_system = self.s3fs
|
|
54
|
+
s3_file_system.put(file_path, s3_path, recursive=True)
|
|
55
|
+
|
|
55
56
|
#####################################################################
|
|
56
57
|
def exists(
|
|
57
58
|
self,
|
|
58
|
-
|
|
59
|
+
s3_path,
|
|
59
60
|
):
|
|
60
|
-
s3_file_system =
|
|
61
|
-
return
|
|
61
|
+
# s3_file_system =
|
|
62
|
+
return self.s3fs.exists(s3_path)
|
|
62
63
|
|
|
63
64
|
#####################################################################
|
|
64
|
-
# def put(
|
|
65
|
-
# self
|
|
66
|
-
# ):
|
|
67
|
-
# s3_file_system = self.s3fs
|
|
68
|
-
# return
|
|
@@ -35,7 +35,7 @@ class SQSManager:
|
|
|
35
35
|
#######################################################
|
|
36
36
|
def list_queues(self, queue_name_prefix):
|
|
37
37
|
# Note: SQS control plane is eventually consistent, meaning that it
|
|
38
|
-
# takes a while to propagate the
|
|
38
|
+
# takes a while to propagate the dataset accross the systems.
|
|
39
39
|
response = self.__sqs_client.list_queues(QueueNamePrefix=queue_name_prefix)
|
|
40
40
|
print(response)
|
|
41
41
|
|