water-column-sonar-processing 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/__init__.py +16 -0
- water_column_sonar_processing/aws/__init__.py +7 -0
- {aws_manager → water_column_sonar_processing/aws}/dynamodb_manager.py +71 -50
- {aws_manager → water_column_sonar_processing/aws}/s3_manager.py +120 -130
- {aws_manager → water_column_sonar_processing/aws}/s3fs_manager.py +13 -19
- {aws_manager → water_column_sonar_processing/aws}/sns_manager.py +10 -21
- {aws_manager → water_column_sonar_processing/aws}/sqs_manager.py +10 -18
- water_column_sonar_processing/cruise/__init__.py +4 -0
- {cruise → water_column_sonar_processing/cruise}/create_empty_zarr_store.py +62 -44
- {cruise → water_column_sonar_processing/cruise}/resample_regrid.py +117 -66
- water_column_sonar_processing/geometry/__init__.py +5 -0
- {geometry_manager → water_column_sonar_processing/geometry}/geometry_manager.py +80 -49
- {geometry_manager → water_column_sonar_processing/geometry}/geometry_simplification.py +13 -12
- {geometry_manager → water_column_sonar_processing/geometry}/pmtile_generation.py +25 -24
- water_column_sonar_processing/index/__init__.py +3 -0
- {index_manager → water_column_sonar_processing/index}/index_manager.py +106 -82
- water_column_sonar_processing/model/__init__.py +3 -0
- {zarr_manager → water_column_sonar_processing/model}/zarr_manager.py +119 -83
- water_column_sonar_processing/process.py +147 -0
- water_column_sonar_processing/utility/__init__.py +6 -0
- {utility → water_column_sonar_processing/utility}/cleaner.py +6 -7
- water_column_sonar_processing/utility/constants.py +63 -0
- {utility → water_column_sonar_processing/utility}/pipeline_status.py +37 -10
- {utility → water_column_sonar_processing/utility}/timestamp.py +3 -2
- {water_column_sonar_processing-0.0.4.dist-info → water_column_sonar_processing-0.0.6.dist-info}/METADATA +31 -1
- water_column_sonar_processing-0.0.6.dist-info/RECORD +29 -0
- water_column_sonar_processing-0.0.6.dist-info/top_level.txt +1 -0
- __init__.py +0 -0
- aws_manager/__init__.py +0 -4
- cruise/__init__.py +0 -0
- geometry_manager/__init__.py +0 -0
- index_manager/__init__.py +0 -0
- model.py +0 -140
- utility/__init__.py +0 -0
- utility/constants.py +0 -56
- water_column_sonar_processing-0.0.4.dist-info/RECORD +0 -29
- water_column_sonar_processing-0.0.4.dist-info/top_level.txt +0 -8
- zarr_manager/__init__.py +0 -0
- {water_column_sonar_processing-0.0.4.dist-info → water_column_sonar_processing-0.0.6.dist-info}/LICENSE +0 -0
- {water_column_sonar_processing-0.0.4.dist-info → water_column_sonar_processing-0.0.6.dist-info}/WHEEL +0 -0
|
@@ -1,47 +1,45 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
-
import boto3
|
|
4
|
-
# import pandas as pd
|
|
5
3
|
from collections.abc import Generator
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
6
5
|
|
|
7
|
-
|
|
8
|
-
from botocore.config import Config
|
|
6
|
+
import boto3
|
|
9
7
|
from boto3.s3.transfer import TransferConfig
|
|
8
|
+
from botocore.config import Config
|
|
10
9
|
from botocore.exceptions import ClientError
|
|
11
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
12
|
-
from concurrent.futures import as_completed
|
|
13
10
|
|
|
14
11
|
MAX_POOL_CONNECTIONS = 64
|
|
15
12
|
MAX_CONCURRENCY = 64
|
|
16
13
|
MAX_WORKERS = 64
|
|
17
|
-
GB = 1024
|
|
14
|
+
GB = 1024**3
|
|
15
|
+
|
|
18
16
|
|
|
19
17
|
#########################################################################
|
|
20
18
|
def chunked(ll: list, n: int) -> Generator:
|
|
21
19
|
# Yields successively n-sized chunks from ll.
|
|
22
20
|
for i in range(0, len(ll), n):
|
|
23
|
-
yield ll[i:i + n]
|
|
21
|
+
yield ll[i : i + n]
|
|
24
22
|
|
|
25
23
|
|
|
26
24
|
class S3Manager:
|
|
27
25
|
#####################################################################
|
|
28
26
|
def __init__(
|
|
29
|
-
|
|
30
|
-
|
|
27
|
+
self,
|
|
28
|
+
# TODO: Need to allow passing in of credentials when writing to protected bucket
|
|
31
29
|
):
|
|
32
|
-
self.input_bucket_name = os.environ.get(
|
|
33
|
-
self.output_bucket_name = os.environ.get(
|
|
30
|
+
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
31
|
+
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
34
32
|
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
35
33
|
self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
|
|
36
34
|
self.s3_transfer_config = TransferConfig(
|
|
37
35
|
max_concurrency=MAX_CONCURRENCY,
|
|
38
36
|
use_threads=True,
|
|
39
37
|
max_bandwidth=None,
|
|
40
|
-
multipart_threshold=10 * GB
|
|
38
|
+
multipart_threshold=10 * GB,
|
|
41
39
|
)
|
|
42
40
|
self.s3_session = boto3.Session(
|
|
43
|
-
aws_access_key_id=os.environ.get(
|
|
44
|
-
aws_secret_access_key=os.environ.get(
|
|
41
|
+
aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
|
|
42
|
+
aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
|
|
45
43
|
region_name=self.s3_region,
|
|
46
44
|
)
|
|
47
45
|
self.s3_client = self.s3_session.client(
|
|
@@ -57,8 +55,8 @@ class S3Manager:
|
|
|
57
55
|
# self.paginator = self.s3_client.get_paginator(operation_name='list_objects_v2')
|
|
58
56
|
# TODO: create both "s3_client_input" and "s3_client_output" ???
|
|
59
57
|
self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
|
|
60
|
-
aws_access_key_id=os.environ.get(
|
|
61
|
-
aws_secret_access_key=os.environ.get(
|
|
58
|
+
aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
59
|
+
aws_secret_access_key=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
62
60
|
region_name=self.s3_region,
|
|
63
61
|
)
|
|
64
62
|
self.s3_client_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.client(
|
|
@@ -66,15 +64,15 @@ class S3Manager:
|
|
|
66
64
|
config=self.s3_client_config,
|
|
67
65
|
region_name=self.s3_region,
|
|
68
66
|
)
|
|
69
|
-
self.s3_resource_noaa_wcsd_zarr_pds =
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
67
|
+
self.s3_resource_noaa_wcsd_zarr_pds = (
|
|
68
|
+
self.s3_session_noaa_wcsd_zarr_pds.resource(
|
|
69
|
+
service_name="s3",
|
|
70
|
+
config=self.s3_client_config,
|
|
71
|
+
region_name=self.s3_region,
|
|
72
|
+
)
|
|
73
73
|
)
|
|
74
74
|
|
|
75
|
-
def get_client(
|
|
76
|
-
self
|
|
77
|
-
):
|
|
75
|
+
def get_client(self):
|
|
78
76
|
return self.s3_session.client(
|
|
79
77
|
service_name="s3",
|
|
80
78
|
config=self.__s3_client_config,
|
|
@@ -83,8 +81,8 @@ class S3Manager:
|
|
|
83
81
|
|
|
84
82
|
#####################################################################
|
|
85
83
|
def create_bucket(
|
|
86
|
-
|
|
87
|
-
|
|
84
|
+
self,
|
|
85
|
+
bucket_name: str,
|
|
88
86
|
):
|
|
89
87
|
self.s3_client.create_bucket(
|
|
90
88
|
Bucket=bucket_name,
|
|
@@ -95,18 +93,16 @@ class S3Manager:
|
|
|
95
93
|
)
|
|
96
94
|
|
|
97
95
|
#####################################################################
|
|
98
|
-
def list_buckets(
|
|
99
|
-
self
|
|
100
|
-
):
|
|
96
|
+
def list_buckets(self):
|
|
101
97
|
# client = self.get_client()
|
|
102
98
|
client = self.s3_client
|
|
103
99
|
return client.list_buckets()
|
|
104
100
|
|
|
105
101
|
#####################################################################
|
|
106
102
|
def upload_nodd_file(
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
103
|
+
self,
|
|
104
|
+
file_name: str,
|
|
105
|
+
key: str,
|
|
110
106
|
):
|
|
111
107
|
self.s3_client_noaa_wcsd_zarr_pds.upload_file(
|
|
112
108
|
Filename=file_name,
|
|
@@ -117,115 +113,120 @@ class S3Manager:
|
|
|
117
113
|
|
|
118
114
|
#####################################################################
|
|
119
115
|
def upload_files_with_thread_pool_executor(
|
|
120
|
-
|
|
121
|
-
|
|
116
|
+
self,
|
|
117
|
+
all_files: list,
|
|
122
118
|
):
|
|
123
119
|
# 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
124
120
|
all_uploads = []
|
|
125
121
|
try: # TODO: problem with threadpool here, missing child files
|
|
126
122
|
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
|
127
|
-
futures = [
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
123
|
+
futures = [
|
|
124
|
+
executor.submit(
|
|
125
|
+
self.upload_nodd_file,
|
|
126
|
+
all_file[0], # file_name
|
|
127
|
+
all_file[1], # key
|
|
128
|
+
)
|
|
129
|
+
for all_file in all_files
|
|
130
|
+
]
|
|
132
131
|
for future in as_completed(futures):
|
|
133
132
|
result = future.result()
|
|
134
133
|
if result:
|
|
135
134
|
all_uploads.extend(result)
|
|
136
135
|
except Exception as err:
|
|
137
136
|
print(err)
|
|
138
|
-
print(
|
|
137
|
+
print("Done uploading files using threading pool.")
|
|
139
138
|
return all_uploads
|
|
140
139
|
|
|
141
140
|
#####################################################################
|
|
142
|
-
def upload_zarr_files_to_bucket( # noaa-wcsd-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
141
|
+
def upload_zarr_files_to_bucket( # noaa-wcsd-model-pds
|
|
142
|
+
self,
|
|
143
|
+
local_directory,
|
|
144
|
+
remote_directory,
|
|
146
145
|
):
|
|
147
|
-
# Right now this is just for uploading a
|
|
148
|
-
print(
|
|
146
|
+
# Right now this is just for uploading a model store to s3
|
|
147
|
+
print("Uploading files to output bucket.")
|
|
149
148
|
store_name = os.path.basename(local_directory)
|
|
150
149
|
all_files = []
|
|
151
150
|
for subdir, dirs, files in os.walk(local_directory):
|
|
152
151
|
for file in files:
|
|
153
152
|
local_path = os.path.join(subdir, file)
|
|
154
153
|
# s3_key = os.path.join(object_prefix, local_path)
|
|
155
|
-
s3_key = os.path.join(
|
|
154
|
+
s3_key = os.path.join(
|
|
155
|
+
remote_directory,
|
|
156
|
+
store_name,
|
|
157
|
+
subdir.split(store_name)[-1].strip("/"),
|
|
158
|
+
)
|
|
156
159
|
all_files.append([local_path, s3_key])
|
|
157
160
|
|
|
158
161
|
all_uploads = self.upload_files_with_thread_pool_executor(
|
|
159
162
|
all_files=all_files,
|
|
160
163
|
)
|
|
161
|
-
print(
|
|
164
|
+
print("Done uploading files to output bucket.")
|
|
162
165
|
return all_uploads
|
|
163
166
|
|
|
164
167
|
#####################################################################
|
|
165
|
-
# used: raw-to-
|
|
166
|
-
def list_objects( # noaa-wcsd-pds and noaa-wcsd-
|
|
167
|
-
|
|
168
|
-
bucket_name,
|
|
169
|
-
prefix
|
|
168
|
+
# used: raw-to-model
|
|
169
|
+
def list_objects( # noaa-wcsd-pds and noaa-wcsd-model-pds
|
|
170
|
+
self, bucket_name, prefix
|
|
170
171
|
):
|
|
171
172
|
# analog to "find_children_objects"
|
|
172
173
|
# Returns a list of key strings for each object in bucket defined by prefix
|
|
173
174
|
s3_client = self.s3_client
|
|
174
175
|
keys = []
|
|
175
|
-
paginator = s3_client.get_paginator(
|
|
176
|
+
paginator = s3_client.get_paginator("list_objects_v2")
|
|
176
177
|
page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
|
|
177
178
|
for page in page_iterator:
|
|
178
|
-
if
|
|
179
|
-
keys.extend([k[
|
|
179
|
+
if "Contents" in page.keys():
|
|
180
|
+
keys.extend([k["Key"] for k in page["Contents"]])
|
|
180
181
|
return keys
|
|
181
182
|
|
|
182
|
-
def list_nodd_objects( # These are used by the
|
|
183
|
-
|
|
184
|
-
|
|
183
|
+
def list_nodd_objects( # These are used by the geometry for uploading data
|
|
184
|
+
self,
|
|
185
|
+
prefix,
|
|
185
186
|
):
|
|
186
187
|
# Returns a list of key strings for each object in bucket defined by prefix
|
|
187
188
|
keys = []
|
|
188
|
-
paginator = self.s3_client_noaa_wcsd_zarr_pds.get_paginator(
|
|
189
|
+
paginator = self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
|
|
189
190
|
for page in paginator.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
|
|
190
|
-
if
|
|
191
|
-
keys.extend([k[
|
|
191
|
+
if "Contents" in page.keys():
|
|
192
|
+
keys.extend([k["Key"] for k in page["Contents"]])
|
|
192
193
|
return keys
|
|
193
194
|
|
|
194
195
|
#####################################################################
|
|
195
196
|
# TODO: change name to "directory"
|
|
196
|
-
def folder_exists_and_not_empty(
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
path: str
|
|
200
|
-
) -> bool:
|
|
201
|
-
if not path.endswith('/'):
|
|
202
|
-
path = path + '/'
|
|
197
|
+
def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
|
|
198
|
+
if not path.endswith("/"):
|
|
199
|
+
path = path + "/"
|
|
203
200
|
s3_client = self.s3_client
|
|
204
|
-
resp = self.list_objects(
|
|
205
|
-
|
|
206
|
-
|
|
201
|
+
resp = self.list_objects(
|
|
202
|
+
bucket_name=bucket_name, prefix=path
|
|
203
|
+
) # TODO: this is returning root folder and doesn't include children or hidden folders
|
|
204
|
+
# resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
|
|
205
|
+
return "Contents" in resp
|
|
207
206
|
|
|
208
207
|
#####################################################################
|
|
209
208
|
# used
|
|
210
209
|
def __paginate_child_objects(
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
210
|
+
self,
|
|
211
|
+
bucket_name: str,
|
|
212
|
+
sub_prefix: str = None,
|
|
214
213
|
) -> list:
|
|
215
|
-
page_iterator = self.s3_client.get_paginator(
|
|
214
|
+
page_iterator = self.s3_client.get_paginator("list_objects_v2").paginate(
|
|
215
|
+
Bucket=bucket_name, Prefix=sub_prefix
|
|
216
|
+
)
|
|
216
217
|
objects = []
|
|
217
218
|
for page in page_iterator:
|
|
218
|
-
if
|
|
219
|
-
objects.extend(page[
|
|
219
|
+
if "Contents" in page.keys():
|
|
220
|
+
objects.extend(page["Contents"])
|
|
220
221
|
return objects
|
|
221
222
|
|
|
222
223
|
def get_child_objects(
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
224
|
+
self,
|
|
225
|
+
bucket_name: str,
|
|
226
|
+
sub_prefix: str,
|
|
227
|
+
file_suffix: str = None,
|
|
227
228
|
) -> list:
|
|
228
|
-
print(
|
|
229
|
+
print("Getting child objects")
|
|
229
230
|
raw_files = []
|
|
230
231
|
try:
|
|
231
232
|
children = self.__paginate_child_objects(
|
|
@@ -238,10 +239,10 @@ class S3Manager:
|
|
|
238
239
|
for child in children:
|
|
239
240
|
# Note: Any files with predicate 'NOISE' are to be ignored
|
|
240
241
|
# see: "Bell_M._Shimada/SH1507" cruise for more details.
|
|
241
|
-
if child[
|
|
242
|
-
|
|
243
|
-
):
|
|
244
|
-
raw_files.append(child[
|
|
242
|
+
if child["Key"].endswith(file_suffix) and not os.path.basename(
|
|
243
|
+
child["Key"]
|
|
244
|
+
).startswith("NOISE"):
|
|
245
|
+
raw_files.append(child["Key"])
|
|
245
246
|
return raw_files
|
|
246
247
|
except ClientError as err:
|
|
247
248
|
print(f"Problem was encountered while getting s3 files: {err}")
|
|
@@ -250,11 +251,11 @@ class S3Manager:
|
|
|
250
251
|
return raw_files
|
|
251
252
|
|
|
252
253
|
#####################################################################
|
|
253
|
-
def get_object( # TODO: Move this to
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
254
|
+
def get_object( # TODO: Move this to index.py
|
|
255
|
+
# noaa-wcsd-pds or noaa-wcsd-model-pds
|
|
256
|
+
self,
|
|
257
|
+
bucket_name,
|
|
258
|
+
key_name,
|
|
258
259
|
):
|
|
259
260
|
# Meant for getting singular objects from a bucket, used by indexing lambda
|
|
260
261
|
print(f"Getting object {key_name} from {bucket_name}")
|
|
@@ -272,24 +273,20 @@ class S3Manager:
|
|
|
272
273
|
return response
|
|
273
274
|
|
|
274
275
|
#####################################################################
|
|
275
|
-
# used raw-to-
|
|
276
|
+
# used raw-to-model
|
|
276
277
|
def download_file( # TODO: change to download_object
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
278
|
+
# noaa-wcsd-pds or noaa-wcsd-model-pds
|
|
279
|
+
self,
|
|
280
|
+
bucket_name,
|
|
281
|
+
key,
|
|
282
|
+
file_name,
|
|
282
283
|
):
|
|
283
|
-
self.s3_client.download_file(
|
|
284
|
-
|
|
285
|
-
Key=key,
|
|
286
|
-
Filename=file_name
|
|
287
|
-
)
|
|
288
|
-
print('downloaded file')
|
|
284
|
+
self.s3_client.download_file(Bucket=bucket_name, Key=key, Filename=file_name)
|
|
285
|
+
print("downloaded file")
|
|
289
286
|
|
|
290
287
|
#####################################################################
|
|
291
288
|
# not used
|
|
292
|
-
# def delete_nodd_object( # noaa-wcsd-
|
|
289
|
+
# def delete_nodd_object( # noaa-wcsd-model-pds
|
|
293
290
|
# self,
|
|
294
291
|
# bucket_name,
|
|
295
292
|
# key
|
|
@@ -299,19 +296,20 @@ class S3Manager:
|
|
|
299
296
|
|
|
300
297
|
#####################################################################
|
|
301
298
|
def delete_nodd_objects( # nodd-bucket
|
|
302
|
-
|
|
303
|
-
|
|
299
|
+
self,
|
|
300
|
+
objects: list,
|
|
304
301
|
):
|
|
305
302
|
try:
|
|
306
|
-
print(
|
|
303
|
+
print(
|
|
304
|
+
f"Deleting {len(objects)} objects in {self.output_bucket_name} in batches."
|
|
305
|
+
)
|
|
307
306
|
objects_to_delete = []
|
|
308
307
|
for obj in objects:
|
|
309
|
-
objects_to_delete.append({
|
|
308
|
+
objects_to_delete.append({"Key": obj["Key"]})
|
|
310
309
|
# Note: request can contain a list of up to 1000 keys
|
|
311
310
|
for batch in chunked(ll=objects_to_delete, n=1000):
|
|
312
311
|
self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
|
|
313
|
-
Bucket=self.output_bucket_name,
|
|
314
|
-
Delete={'Objects': batch}
|
|
312
|
+
Bucket=self.output_bucket_name, Delete={"Objects": batch}
|
|
315
313
|
)
|
|
316
314
|
print(f"Deleted files.")
|
|
317
315
|
except Exception as err:
|
|
@@ -319,38 +317,30 @@ class S3Manager:
|
|
|
319
317
|
|
|
320
318
|
#####################################################################
|
|
321
319
|
# not used TODO: remove
|
|
322
|
-
def put( # noaa-wcsd-
|
|
323
|
-
|
|
324
|
-
bucket_name,
|
|
325
|
-
key,
|
|
326
|
-
body
|
|
327
|
-
):
|
|
328
|
-
self.s3_client.put_object(
|
|
329
|
-
Bucket=bucket_name,
|
|
330
|
-
Key=key,
|
|
331
|
-
Body=body
|
|
332
|
-
)
|
|
320
|
+
def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
|
|
321
|
+
self.s3_client.put_object(Bucket=bucket_name, Key=key, Body=body)
|
|
333
322
|
|
|
334
323
|
#####################################################################
|
|
335
324
|
def read_s3_json(
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
325
|
+
self,
|
|
326
|
+
ship_name,
|
|
327
|
+
cruise_name,
|
|
328
|
+
sensor_name,
|
|
329
|
+
file_name_stem,
|
|
341
330
|
) -> str:
|
|
342
331
|
try:
|
|
343
332
|
content_object = self.s3_resource_noaa_wcsd_zarr_pds.Object(
|
|
344
333
|
bucket_name=self.output_bucket_name,
|
|
345
|
-
key=f
|
|
334
|
+
key=f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.json",
|
|
346
335
|
).get()
|
|
347
|
-
file_content = content_object[
|
|
336
|
+
file_content = content_object["Body"].read().decode("utf-8")
|
|
348
337
|
json_content = json.loads(file_content)
|
|
349
338
|
return json_content
|
|
350
339
|
except Exception as err: # Failure
|
|
351
|
-
print(f
|
|
340
|
+
print(f"Exception encountered reading s3 GeoJSON: {err}")
|
|
352
341
|
raise
|
|
353
342
|
|
|
354
343
|
#####################################################################
|
|
355
344
|
|
|
345
|
+
|
|
356
346
|
#########################################################################
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import s3fs
|
|
3
2
|
|
|
3
|
+
import s3fs
|
|
4
4
|
|
|
5
5
|
# TODO: S3FS_LOGGING_LEVEL=DEBUG
|
|
6
6
|
|
|
@@ -8,12 +8,12 @@ import s3fs
|
|
|
8
8
|
class S3FSManager:
|
|
9
9
|
#####################################################################
|
|
10
10
|
def __init__(
|
|
11
|
-
|
|
11
|
+
self,
|
|
12
12
|
):
|
|
13
13
|
self.__s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
14
14
|
self.s3fs = s3fs.S3FileSystem(
|
|
15
|
-
key=os.environ.get(
|
|
16
|
-
secret=os.environ.get(
|
|
15
|
+
key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
16
|
+
secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
17
17
|
# asynchronous=True
|
|
18
18
|
# use_ssl=False,
|
|
19
19
|
# skip_instance_cache=True,
|
|
@@ -24,10 +24,7 @@ class S3FSManager:
|
|
|
24
24
|
)
|
|
25
25
|
|
|
26
26
|
#####################################################################
|
|
27
|
-
def add_file(
|
|
28
|
-
self,
|
|
29
|
-
filename
|
|
30
|
-
):
|
|
27
|
+
def add_file(self, filename):
|
|
31
28
|
full_path = f"{os.getenv('OUTPUT_BUCKET_NAME')}/testing/{filename}"
|
|
32
29
|
print(full_path)
|
|
33
30
|
|
|
@@ -37,12 +34,7 @@ class S3FSManager:
|
|
|
37
34
|
print(ff)
|
|
38
35
|
|
|
39
36
|
#####################################################################
|
|
40
|
-
def upload_data(
|
|
41
|
-
self,
|
|
42
|
-
bucket_name,
|
|
43
|
-
file_path,
|
|
44
|
-
prefix
|
|
45
|
-
):
|
|
37
|
+
def upload_data(self, bucket_name, file_path, prefix):
|
|
46
38
|
# TODO: this works in theory but use boto3 to upload files
|
|
47
39
|
s3_path = f"s3://{bucket_name}/{prefix}/"
|
|
48
40
|
s3_file_system = self.s3fs
|
|
@@ -50,18 +42,20 @@ class S3FSManager:
|
|
|
50
42
|
|
|
51
43
|
#####################################################################
|
|
52
44
|
def s3_map(
|
|
53
|
-
|
|
54
|
-
|
|
45
|
+
self,
|
|
46
|
+
s3_zarr_store_path, # f's3://{bucket}/{input_zarr_path}'
|
|
55
47
|
):
|
|
56
48
|
# The "s3_zarr_store_path" is defined as f's3://{bucket}/{input_zarr_path}'
|
|
57
49
|
# create=False, not false because will be writing
|
|
58
50
|
# return s3fs.S3Map(root=s3_zarr_store_path, s3=self.s3fs, check=True)
|
|
59
|
-
return s3fs.S3Map(
|
|
51
|
+
return s3fs.S3Map(
|
|
52
|
+
root=s3_zarr_store_path, s3=self.s3fs
|
|
53
|
+
) # create=False, not false because will be writing
|
|
60
54
|
|
|
61
55
|
#####################################################################
|
|
62
56
|
def exists(
|
|
63
|
-
|
|
64
|
-
|
|
57
|
+
self,
|
|
58
|
+
geo_json_s3_path,
|
|
65
59
|
):
|
|
66
60
|
s3_file_system = self.s3fs
|
|
67
61
|
return s3_file_system.exists(path=geo_json_s3_path)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import boto3
|
|
4
4
|
|
|
5
5
|
|
|
@@ -7,32 +7,22 @@ import boto3
|
|
|
7
7
|
class SNSManager:
|
|
8
8
|
#######################################################
|
|
9
9
|
def __init__(
|
|
10
|
-
|
|
10
|
+
self,
|
|
11
11
|
):
|
|
12
12
|
self.__sns_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
13
13
|
self.__sns_session = boto3.Session(
|
|
14
|
-
aws_access_key_id=os.environ.get(
|
|
15
|
-
aws_secret_access_key=os.environ.get(
|
|
16
|
-
region_name=self.__sns_region
|
|
14
|
+
aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
|
|
15
|
+
aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
|
|
16
|
+
region_name=self.__sns_region,
|
|
17
17
|
)
|
|
18
18
|
self.__sns_resource = self.__sns_session.resource(
|
|
19
|
-
service_name="sns",
|
|
20
|
-
region_name=self.__sns_region
|
|
19
|
+
service_name="sns", region_name=self.__sns_region
|
|
21
20
|
)
|
|
22
21
|
self.__sns_client = self.__sns_session.client(
|
|
23
|
-
service_name="sns",
|
|
24
|
-
region_name=self.__sns_region
|
|
22
|
+
service_name="sns", region_name=self.__sns_region
|
|
25
23
|
)
|
|
26
24
|
|
|
27
25
|
#######################################################
|
|
28
|
-
# TODO: pick one
|
|
29
|
-
# def publish_message(self, topic_arn, message):
|
|
30
|
-
# response = self.__sns_client.publish(
|
|
31
|
-
# TopicArn=topic_arn,
|
|
32
|
-
# Message=message
|
|
33
|
-
# )
|
|
34
|
-
# print(f"Topic Response: {topic_arn} : '{message}' => {response}")
|
|
35
|
-
|
|
36
26
|
# TODO: pick one
|
|
37
27
|
def publish(self, topic_arn, message):
|
|
38
28
|
response = self.__sns_client.publish(
|
|
@@ -55,13 +45,12 @@ class SNSManager:
|
|
|
55
45
|
#######################################################
|
|
56
46
|
def subscribe(self, topic_arn, endpoint):
|
|
57
47
|
self.__sns_client.subscribe(
|
|
58
|
-
TopicArn=topic_arn,
|
|
59
|
-
Protocol='sqs',
|
|
60
|
-
Endpoint=endpoint
|
|
48
|
+
TopicArn=topic_arn, Protocol="sqs", Endpoint=endpoint
|
|
61
49
|
)
|
|
62
50
|
|
|
63
51
|
#######################################################
|
|
64
52
|
def list_topics(self):
|
|
65
53
|
print(self.__sns_client.list_topics())
|
|
66
54
|
|
|
67
|
-
|
|
55
|
+
|
|
56
|
+
###########################################################
|
|
@@ -1,34 +1,29 @@
|
|
|
1
1
|
import os
|
|
2
|
+
|
|
2
3
|
import boto3
|
|
3
|
-
# import time
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
###########################################################
|
|
7
7
|
class SQSManager:
|
|
8
8
|
#######################################################
|
|
9
9
|
def __init__(
|
|
10
|
-
|
|
10
|
+
self,
|
|
11
11
|
):
|
|
12
12
|
self.__sqs_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
13
13
|
self.__sqs_session = boto3.Session(
|
|
14
|
-
aws_access_key_id=os.environ.get(
|
|
15
|
-
aws_secret_access_key=os.environ.get(
|
|
16
|
-
region_name=self.__sqs_region
|
|
14
|
+
aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
|
|
15
|
+
aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
|
|
16
|
+
region_name=self.__sqs_region,
|
|
17
17
|
)
|
|
18
18
|
self.__sqs_resource = self.__sqs_session.resource(
|
|
19
|
-
service_name="sqs",
|
|
20
|
-
region_name=self.__sqs_region
|
|
19
|
+
service_name="sqs", region_name=self.__sqs_region
|
|
21
20
|
)
|
|
22
21
|
self.__sqs_client = self.__sqs_session.client(
|
|
23
|
-
service_name="sqs",
|
|
24
|
-
region_name=self.__sqs_region
|
|
22
|
+
service_name="sqs", region_name=self.__sqs_region
|
|
25
23
|
)
|
|
26
24
|
|
|
27
25
|
#######################################################
|
|
28
|
-
def create_queue(
|
|
29
|
-
self,
|
|
30
|
-
queue_name
|
|
31
|
-
):
|
|
26
|
+
def create_queue(self, queue_name):
|
|
32
27
|
response = self.__sqs_client.create_queue(QueueName=queue_name)
|
|
33
28
|
return response
|
|
34
29
|
|
|
@@ -38,13 +33,10 @@ class SQSManager:
|
|
|
38
33
|
return sqs_queue
|
|
39
34
|
|
|
40
35
|
#######################################################
|
|
41
|
-
def list_queues(
|
|
42
|
-
self,
|
|
43
|
-
queue_name_prefix
|
|
44
|
-
):
|
|
36
|
+
def list_queues(self, queue_name_prefix):
|
|
45
37
|
# Note: SQS control plane is eventually consistent, meaning that it
|
|
46
38
|
# takes a while to propagate the data accross the systems.
|
|
47
39
|
response = self.__sqs_client.list_queues(QueueNamePrefix=queue_name_prefix)
|
|
48
40
|
print(response)
|
|
49
41
|
|
|
50
|
-
#######################################################
|
|
42
|
+
#######################################################
|