water-column-sonar-processing 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/__init__.py +2 -5
- water_column_sonar_processing/aws/__init__.py +2 -2
- water_column_sonar_processing/aws/dynamodb_manager.py +149 -43
- water_column_sonar_processing/aws/s3_manager.py +71 -37
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +6 -4
- water_column_sonar_processing/cruise/resample_regrid.py +3 -3
- water_column_sonar_processing/geometry/geometry_manager.py +21 -6
- water_column_sonar_processing/geometry/pmtile_generation.py +200 -13
- water_column_sonar_processing/index/index_manager.py +25 -13
- water_column_sonar_processing/model/zarr_manager.py +27 -25
- water_column_sonar_processing/process.py +4 -4
- water_column_sonar_processing/processing/__init__.py +4 -0
- water_column_sonar_processing/processing/cruise_sampler.py +342 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +349 -0
- water_column_sonar_processing/utility/__init__.py +2 -2
- water_column_sonar_processing/utility/cleaner.py +1 -0
- water_column_sonar_processing/utility/constants.py +6 -2
- {water_column_sonar_processing-0.0.7.dist-info → water_column_sonar_processing-0.0.9.dist-info}/METADATA +20 -10
- water_column_sonar_processing-0.0.9.dist-info/RECORD +32 -0
- {water_column_sonar_processing-0.0.7.dist-info → water_column_sonar_processing-0.0.9.dist-info}/WHEEL +1 -1
- water_column_sonar_processing-0.0.7.dist-info/RECORD +0 -29
- {water_column_sonar_processing-0.0.7.dist-info → water_column_sonar_processing-0.0.9.dist-info}/LICENSE +0 -0
- {water_column_sonar_processing-0.0.7.dist-info → water_column_sonar_processing-0.0.9.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
from __future__ import absolute_import
|
|
2
2
|
|
|
3
|
-
from . import aws, cruise, geometry, index, model,
|
|
4
|
-
from .model import ZarrManager
|
|
5
|
-
from .process import Process
|
|
3
|
+
from . import aws, cruise, geometry, index, model, processing, utility
|
|
6
4
|
|
|
7
5
|
__all__ = [
|
|
8
6
|
"aws",
|
|
@@ -10,7 +8,6 @@ __all__ = [
|
|
|
10
8
|
"geometry",
|
|
11
9
|
"index",
|
|
12
10
|
"model",
|
|
11
|
+
"processing",
|
|
13
12
|
"utility",
|
|
14
|
-
"process",
|
|
15
|
-
"Process",
|
|
16
13
|
]
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from .dynamodb_manager import DynamoDBManager
|
|
2
|
-
from .s3_manager import S3Manager
|
|
2
|
+
from .s3_manager import S3Manager, chunked
|
|
3
3
|
from .s3fs_manager import S3FSManager
|
|
4
4
|
from .sns_manager import SNSManager
|
|
5
5
|
from .sqs_manager import SQSManager
|
|
6
6
|
|
|
7
|
-
__all__ = ["DynamoDBManager", "S3Manager", "S3FSManager", "SNSManager", "SQSManager"]
|
|
7
|
+
__all__ = ["DynamoDBManager", "S3Manager", "chunked", "S3FSManager", "SNSManager", "SQSManager"]
|
|
@@ -8,7 +8,11 @@ from boto3.dynamodb.types import TypeDeserializer, TypeSerializer
|
|
|
8
8
|
#########################################################################
|
|
9
9
|
class DynamoDBManager:
|
|
10
10
|
#####################################################################
|
|
11
|
-
def __init__(
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
# endpoint_url
|
|
14
|
+
):
|
|
15
|
+
# self.endpoint_url = endpoint_url
|
|
12
16
|
self.__dynamodb_session = boto3.Session(
|
|
13
17
|
aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
|
|
14
18
|
aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
|
|
@@ -16,9 +20,11 @@ class DynamoDBManager:
|
|
|
16
20
|
)
|
|
17
21
|
self.__dynamodb_resource = self.__dynamodb_session.resource(
|
|
18
22
|
service_name="dynamodb",
|
|
23
|
+
# endpoint_url=self.endpoint_url
|
|
19
24
|
)
|
|
20
25
|
self.__dynamodb_client = self.__dynamodb_session.client(
|
|
21
26
|
service_name="dynamodb",
|
|
27
|
+
# endpoint_url=self.endpoint_url
|
|
22
28
|
)
|
|
23
29
|
self.type_serializer = TypeSerializer() # https://stackoverflow.com/a/46738251
|
|
24
30
|
self.type_deserializer = TypeDeserializer()
|
|
@@ -35,31 +41,14 @@ class DynamoDBManager:
|
|
|
35
41
|
# assert (status_code == 200), "Problem, unable to update dynamodb table."
|
|
36
42
|
|
|
37
43
|
#####################################################################
|
|
38
|
-
def create_table(
|
|
39
|
-
self,
|
|
40
|
-
table_name,
|
|
41
|
-
key_schema,
|
|
42
|
-
attribute_definitions,
|
|
43
|
-
):
|
|
44
|
-
self.__dynamodb_client.create_table(
|
|
45
|
-
AttributeDefinitions=attribute_definitions,
|
|
46
|
-
TableName=table_name,
|
|
47
|
-
KeySchema=key_schema,
|
|
48
|
-
BillingMode="PAY_PER_REQUEST", # "PROVISIONED",
|
|
49
|
-
# ProvisionedThroughput={
|
|
50
|
-
# 'ReadCapacityUnits': 1_000,
|
|
51
|
-
# 'WriteCapacityUnits': 1_000
|
|
52
|
-
# }
|
|
53
|
-
)
|
|
54
|
-
|
|
55
44
|
#####################################################################
|
|
56
45
|
def create_water_column_sonar_table(
|
|
57
46
|
self,
|
|
58
47
|
table_name,
|
|
59
48
|
):
|
|
60
|
-
self.create_table(
|
|
61
|
-
|
|
62
|
-
|
|
49
|
+
self.__dynamodb_client.create_table(
|
|
50
|
+
TableName=table_name,
|
|
51
|
+
KeySchema=[
|
|
63
52
|
{
|
|
64
53
|
"AttributeName": "FILE_NAME",
|
|
65
54
|
"KeyType": "HASH",
|
|
@@ -69,20 +58,50 @@ class DynamoDBManager:
|
|
|
69
58
|
"KeyType": "RANGE",
|
|
70
59
|
},
|
|
71
60
|
],
|
|
72
|
-
|
|
61
|
+
AttributeDefinitions=[
|
|
73
62
|
{"AttributeName": "FILE_NAME", "AttributeType": "S"},
|
|
74
63
|
{"AttributeName": "CRUISE_NAME", "AttributeType": "S"},
|
|
75
64
|
],
|
|
65
|
+
BillingMode="PAY_PER_REQUEST"
|
|
66
|
+
# ProvisionedThroughput={
|
|
67
|
+
# 'ReadCapacityUnits': 1_000,
|
|
68
|
+
# 'WriteCapacityUnits': 1_000
|
|
69
|
+
# }
|
|
76
70
|
)
|
|
71
|
+
# TODO: after creating status is 'CREATING', wait until 'ACTIVE'
|
|
72
|
+
response = self.__dynamodb_client.describe_table(TableName=table_name)
|
|
73
|
+
print(response) # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb/client/describe_table.html
|
|
74
|
+
# sleep then response['Table']['TableStatus'] == 'ACTIVE'
|
|
75
|
+
|
|
76
|
+
#####################################################################
|
|
77
|
+
# don't think this is used?
|
|
78
|
+
# def get_item(
|
|
79
|
+
# self,
|
|
80
|
+
# table_name,
|
|
81
|
+
# key
|
|
82
|
+
# ):
|
|
83
|
+
# response = self.__dynamodb_client.get_item(TableName=table_name, Key=key)
|
|
84
|
+
# item = None
|
|
85
|
+
# if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
|
|
86
|
+
# if "Item" in response:
|
|
87
|
+
# item = response["Item"]
|
|
88
|
+
# return item
|
|
77
89
|
|
|
78
90
|
#####################################################################
|
|
79
|
-
def
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
91
|
+
def get_table_item(
|
|
92
|
+
self,
|
|
93
|
+
table_name,
|
|
94
|
+
key,
|
|
95
|
+
):
|
|
96
|
+
"""
|
|
97
|
+
Gets a single row from the db.
|
|
98
|
+
"""
|
|
99
|
+
table = self.__dynamodb_resource.Table(table_name)
|
|
100
|
+
response = table.get_item(Key=key)
|
|
101
|
+
# TODO:
|
|
102
|
+
# if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
|
|
103
|
+
# throw error
|
|
104
|
+
return response
|
|
86
105
|
|
|
87
106
|
#####################################################################
|
|
88
107
|
def update_item(
|
|
@@ -101,17 +120,22 @@ class DynamoDBManager:
|
|
|
101
120
|
UpdateExpression=update_expression,
|
|
102
121
|
)
|
|
103
122
|
status_code = response["ResponseMetadata"]["HTTPStatusCode"]
|
|
104
|
-
|
|
123
|
+
assert response['ConsumedCapacity']['TableName'] == table_name
|
|
105
124
|
assert status_code == 200, "Problem, unable to update dynamodb table."
|
|
106
125
|
|
|
107
126
|
#####################################################################
|
|
127
|
+
# TODO: change to "get_cruise_as_df"
|
|
108
128
|
def get_table_as_df(
|
|
109
129
|
self,
|
|
110
130
|
ship_name,
|
|
111
131
|
cruise_name,
|
|
112
132
|
sensor_name,
|
|
113
133
|
table_name,
|
|
114
|
-
):
|
|
134
|
+
) -> pd.DataFrame:
|
|
135
|
+
"""
|
|
136
|
+
To be used to initialize a cruise, deletes all entries associated with that cruise
|
|
137
|
+
in the database.
|
|
138
|
+
"""
|
|
115
139
|
expression_attribute_values = {
|
|
116
140
|
":cr": {"S": cruise_name},
|
|
117
141
|
":se": {"S": sensor_name},
|
|
@@ -128,6 +152,9 @@ class DynamoDBManager:
|
|
|
128
152
|
FilterExpression=filter_expression,
|
|
129
153
|
)
|
|
130
154
|
# Note: table.scan() has 1 MB limit on results so pagination is used
|
|
155
|
+
if len(response["Items"]) == 0:
|
|
156
|
+
return pd.DataFrame() # If no results, return empty dataframe
|
|
157
|
+
|
|
131
158
|
data = response["Items"]
|
|
132
159
|
|
|
133
160
|
while "LastEvaluatedKey" in response:
|
|
@@ -146,25 +173,104 @@ class DynamoDBManager:
|
|
|
146
173
|
return df.sort_values(by="START_TIME", ignore_index=True)
|
|
147
174
|
|
|
148
175
|
#####################################################################
|
|
149
|
-
#
|
|
150
|
-
def
|
|
176
|
+
# TODO: WIP
|
|
177
|
+
def delete_item(
|
|
151
178
|
self,
|
|
152
179
|
table_name,
|
|
153
|
-
|
|
180
|
+
cruise_name,
|
|
181
|
+
file_name,
|
|
154
182
|
):
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
183
|
+
"""
|
|
184
|
+
Finds all rows associated with a cruise and deletes them.
|
|
185
|
+
"""
|
|
186
|
+
response = self.__dynamodb_client.delete_item(
|
|
187
|
+
Key={
|
|
188
|
+
"CRUISE_NAME": {
|
|
189
|
+
"S": cruise_name
|
|
190
|
+
},
|
|
191
|
+
"FILE_NAME": {
|
|
192
|
+
"S": file_name
|
|
193
|
+
}
|
|
194
|
+
},
|
|
195
|
+
TableName=table_name,
|
|
196
|
+
ReturnConsumedCapacity="TOTALS",
|
|
197
|
+
)
|
|
198
|
+
# TODO: there should be attributes included in response but they are missing
|
|
199
|
+
# if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
|
|
200
|
+
# throw error
|
|
158
201
|
return response
|
|
159
202
|
|
|
160
203
|
#####################################################################
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
table_name,
|
|
165
|
-
cruise_name,
|
|
204
|
+
def describe_table(
|
|
205
|
+
self,
|
|
206
|
+
table_name,
|
|
166
207
|
):
|
|
167
|
-
|
|
208
|
+
"""
|
|
209
|
+
Get a description of the table. Used to verify that records were added/removed.
|
|
210
|
+
"""
|
|
211
|
+
response = self.__dynamodb_client.describe_table(TableName=table_name)
|
|
212
|
+
print(response)
|
|
213
|
+
return response
|
|
214
|
+
|
|
215
|
+
|
|
168
216
|
|
|
217
|
+
#####################################################################
|
|
218
|
+
# TODO: from test_raw_to_zarr get enum and use here
|
|
219
|
+
# def __update_processing_status(
|
|
220
|
+
# self,
|
|
221
|
+
# file_name: str,
|
|
222
|
+
# cruise_name: str,
|
|
223
|
+
# pipeline_status: str,
|
|
224
|
+
# error_message: str = None,
|
|
225
|
+
# ):
|
|
226
|
+
# print(f"Updating processing status to {pipeline_status}.")
|
|
227
|
+
# if error_message:
|
|
228
|
+
# print(f"Error message: {error_message}")
|
|
229
|
+
# self.__dynamo.update_item(
|
|
230
|
+
# table_name=self.__table_name,
|
|
231
|
+
# key={
|
|
232
|
+
# 'FILE_NAME': {'S': file_name}, # Partition Key
|
|
233
|
+
# 'CRUISE_NAME': {'S': cruise_name}, # Sort Key
|
|
234
|
+
# },
|
|
235
|
+
# attribute_names={
|
|
236
|
+
# '#PT': 'PIPELINE_TIME',
|
|
237
|
+
# '#PS': 'PIPELINE_STATUS',
|
|
238
|
+
# '#EM': 'ERROR_MESSAGE',
|
|
239
|
+
# },
|
|
240
|
+
# expression='SET #PT = :pt, #PS = :ps, #EM = :em',
|
|
241
|
+
# attribute_values={
|
|
242
|
+
# ':pt': {
|
|
243
|
+
# 'S': datetime.now().isoformat(timespec="seconds") + "Z"
|
|
244
|
+
# },
|
|
245
|
+
# ':ps': {
|
|
246
|
+
# 'S': pipeline_status
|
|
247
|
+
# },
|
|
248
|
+
# ':em': {
|
|
249
|
+
# 'S': error_message
|
|
250
|
+
# }
|
|
251
|
+
# }
|
|
252
|
+
# )
|
|
253
|
+
# else:
|
|
254
|
+
# self.__dynamo.update_item(
|
|
255
|
+
# table_name=self.__table_name,
|
|
256
|
+
# key={
|
|
257
|
+
# 'FILE_NAME': {'S': file_name}, # Partition Key
|
|
258
|
+
# 'CRUISE_NAME': {'S': cruise_name}, # Sort Key
|
|
259
|
+
# },
|
|
260
|
+
# attribute_names={
|
|
261
|
+
# '#PT': 'PIPELINE_TIME',
|
|
262
|
+
# '#PS': 'PIPELINE_STATUS',
|
|
263
|
+
# },
|
|
264
|
+
# expression='SET #PT = :pt, #PS = :ps',
|
|
265
|
+
# attribute_values={
|
|
266
|
+
# ':pt': {
|
|
267
|
+
# 'S': datetime.now().isoformat(timespec="seconds") + "Z"
|
|
268
|
+
# },
|
|
269
|
+
# ':ps': {
|
|
270
|
+
# 'S': pipeline_status
|
|
271
|
+
# }
|
|
272
|
+
# }
|
|
273
|
+
# )
|
|
274
|
+
# print("Done updating processing status.")
|
|
169
275
|
|
|
170
276
|
#########################################################################
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
+
import boto3
|
|
3
4
|
from collections.abc import Generator
|
|
4
5
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
-
|
|
6
|
-
import boto3
|
|
7
6
|
from boto3.s3.transfer import TransferConfig
|
|
8
7
|
from botocore.config import Config
|
|
9
8
|
from botocore.exceptions import ClientError
|
|
@@ -25,10 +24,16 @@ class S3Manager:
|
|
|
25
24
|
#####################################################################
|
|
26
25
|
def __init__(
|
|
27
26
|
self,
|
|
27
|
+
# input_endpoint_url: str,
|
|
28
|
+
# output_endpoint_url: str,
|
|
29
|
+
# endpoint_url
|
|
28
30
|
# TODO: Need to allow passing in of credentials when writing to protected bucket
|
|
29
31
|
):
|
|
30
32
|
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
31
33
|
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
34
|
+
# self.endpoint_url = endpoint_url
|
|
35
|
+
# self.input_endpoint_url = input_endpoint_url
|
|
36
|
+
# self.output_endpoint_url = output_endpoint_url
|
|
32
37
|
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
33
38
|
self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
|
|
34
39
|
self.s3_transfer_config = TransferConfig(
|
|
@@ -46,6 +51,7 @@ class S3Manager:
|
|
|
46
51
|
service_name="s3",
|
|
47
52
|
config=self.s3_client_config,
|
|
48
53
|
region_name=self.s3_region,
|
|
54
|
+
# endpoint_url=endpoint_url, # TODO: temporary
|
|
49
55
|
)
|
|
50
56
|
self.s3_resource = boto3.resource(
|
|
51
57
|
service_name="s3",
|
|
@@ -53,7 +59,6 @@ class S3Manager:
|
|
|
53
59
|
region_name=self.s3_region,
|
|
54
60
|
)
|
|
55
61
|
# self.paginator = self.s3_client.get_paginator(operation_name='list_objects_v2')
|
|
56
|
-
# TODO: create both "s3_client_input" and "s3_client_output" ???
|
|
57
62
|
self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
|
|
58
63
|
aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
59
64
|
aws_secret_access_key=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
@@ -63,19 +68,20 @@ class S3Manager:
|
|
|
63
68
|
service_name="s3",
|
|
64
69
|
config=self.s3_client_config,
|
|
65
70
|
region_name=self.s3_region,
|
|
71
|
+
# endpoint_url=endpoint_url, # TODO: temporary
|
|
66
72
|
)
|
|
67
|
-
self.s3_resource_noaa_wcsd_zarr_pds = (
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
region_name=self.s3_region,
|
|
72
|
-
)
|
|
73
|
+
self.s3_resource_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.resource(
|
|
74
|
+
service_name="s3",
|
|
75
|
+
config=self.s3_client_config,
|
|
76
|
+
region_name=self.s3_region,
|
|
73
77
|
)
|
|
78
|
+
self.paginator = self.s3_client.get_paginator('list_objects_v2')
|
|
79
|
+
self.paginator_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds.get_paginator('list_objects_v2')
|
|
74
80
|
|
|
75
|
-
def get_client(self):
|
|
81
|
+
def get_client(self): # TODO: do i need this?
|
|
76
82
|
return self.s3_session.client(
|
|
77
83
|
service_name="s3",
|
|
78
|
-
config=self.
|
|
84
|
+
config=self.s3_client_config,
|
|
79
85
|
region_name=self.s3_region,
|
|
80
86
|
)
|
|
81
87
|
|
|
@@ -103,17 +109,18 @@ class S3Manager:
|
|
|
103
109
|
self,
|
|
104
110
|
file_name: str,
|
|
105
111
|
key: str,
|
|
112
|
+
output_bucket_name: str,
|
|
106
113
|
):
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
)
|
|
114
|
+
"""
|
|
115
|
+
Used to upload a single file, e.g. the GeoJSON file to the NODD bucket
|
|
116
|
+
"""
|
|
117
|
+
self.s3_resource_noaa_wcsd_zarr_pds.Bucket(output_bucket_name).upload_file(Filename=file_name, Key=key)
|
|
112
118
|
return key
|
|
113
119
|
|
|
114
120
|
#####################################################################
|
|
115
121
|
def upload_files_with_thread_pool_executor(
|
|
116
122
|
self,
|
|
123
|
+
output_bucket_name: str,
|
|
117
124
|
all_files: list,
|
|
118
125
|
):
|
|
119
126
|
# 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
@@ -122,21 +129,45 @@ class S3Manager:
|
|
|
122
129
|
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
|
123
130
|
futures = [
|
|
124
131
|
executor.submit(
|
|
125
|
-
self.upload_nodd_file,
|
|
132
|
+
self.upload_nodd_file, # TODO: verify which one is using this
|
|
126
133
|
all_file[0], # file_name
|
|
127
134
|
all_file[1], # key
|
|
135
|
+
output_bucket_name, # output_bucket_name
|
|
128
136
|
)
|
|
129
137
|
for all_file in all_files
|
|
130
138
|
]
|
|
131
139
|
for future in as_completed(futures):
|
|
132
140
|
result = future.result()
|
|
133
141
|
if result:
|
|
134
|
-
all_uploads.extend(result)
|
|
142
|
+
all_uploads.extend([result])
|
|
135
143
|
except Exception as err:
|
|
136
144
|
print(err)
|
|
137
145
|
print("Done uploading files using threading pool.")
|
|
138
146
|
return all_uploads
|
|
139
147
|
|
|
148
|
+
#####################################################################
|
|
149
|
+
# def upload_nodd_file2(
|
|
150
|
+
# self,
|
|
151
|
+
# body: str,
|
|
152
|
+
# bucket: str,
|
|
153
|
+
# key: str,
|
|
154
|
+
# ):
|
|
155
|
+
# self.s3_client_noaa_wcsd_zarr_pds.put_object(
|
|
156
|
+
# Body=body,
|
|
157
|
+
# Bucket=bucket,
|
|
158
|
+
# Key=key,
|
|
159
|
+
# )
|
|
160
|
+
|
|
161
|
+
# TODO: this uses resource, try to use client
|
|
162
|
+
def upload_file(
|
|
163
|
+
self,
|
|
164
|
+
filename: str,
|
|
165
|
+
bucket_name: str,
|
|
166
|
+
key: str,
|
|
167
|
+
):
|
|
168
|
+
# self.s3_client.upload_file(Filename=filename, Bucket=bucket, Key=key)
|
|
169
|
+
self.s3_resource.Bucket(bucket_name).upload_file(Filename=filename, Key=key)
|
|
170
|
+
|
|
140
171
|
#####################################################################
|
|
141
172
|
def upload_zarr_files_to_bucket( # noaa-wcsd-model-pds
|
|
142
173
|
self,
|
|
@@ -165,32 +196,34 @@ class S3Manager:
|
|
|
165
196
|
return all_uploads
|
|
166
197
|
|
|
167
198
|
#####################################################################
|
|
168
|
-
# used: raw-to-
|
|
169
|
-
def list_objects( # noaa-wcsd-pds and noaa-wcsd-
|
|
170
|
-
self,
|
|
199
|
+
# used: raw-to-zarr
|
|
200
|
+
def list_objects( # noaa-wcsd-pds and noaa-wcsd-zarr-pds
|
|
201
|
+
self,
|
|
202
|
+
bucket_name,
|
|
203
|
+
prefix
|
|
171
204
|
):
|
|
172
205
|
# analog to "find_children_objects"
|
|
173
206
|
# Returns a list of key strings for each object in bucket defined by prefix
|
|
174
|
-
s3_client = self.s3_client
|
|
207
|
+
# s3_client = self.s3_client
|
|
175
208
|
keys = []
|
|
176
|
-
paginator = s3_client.get_paginator("list_objects_v2")
|
|
177
|
-
page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
|
|
209
|
+
# paginator = s3_client.get_paginator("list_objects_v2")
|
|
210
|
+
page_iterator = self.paginator.paginate(Bucket=bucket_name, Prefix=prefix)
|
|
178
211
|
for page in page_iterator:
|
|
179
212
|
if "Contents" in page.keys():
|
|
180
213
|
keys.extend([k["Key"] for k in page["Contents"]])
|
|
181
214
|
return keys
|
|
182
215
|
|
|
183
|
-
def list_nodd_objects( # These are used by the geometry for uploading data
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
):
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
216
|
+
# def list_nodd_objects( # These are used by the geometry for uploading data
|
|
217
|
+
# self,
|
|
218
|
+
# prefix,
|
|
219
|
+
# ):
|
|
220
|
+
# # Returns a list of key strings for each object in bucket defined by prefix
|
|
221
|
+
# keys = []
|
|
222
|
+
# page_iterator = self.paginator_noaa_wcsd_zarr_pds.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
|
|
223
|
+
# for page in paginator.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
|
|
224
|
+
# if "Contents" in page.keys():
|
|
225
|
+
# keys.extend([k["Key"] for k in page["Contents"]])
|
|
226
|
+
# return keys
|
|
194
227
|
|
|
195
228
|
#####################################################################
|
|
196
229
|
# TODO: change name to "directory"
|
|
@@ -279,9 +312,10 @@ class S3Manager:
|
|
|
279
312
|
self,
|
|
280
313
|
bucket_name,
|
|
281
314
|
key,
|
|
282
|
-
file_name,
|
|
315
|
+
file_name, # where the file will be saved
|
|
283
316
|
):
|
|
284
317
|
self.s3_client.download_file(Bucket=bucket_name, Key=key, Filename=file_name)
|
|
318
|
+
# TODO: if bottom file doesn't exist, don't fail downloader
|
|
285
319
|
print("downloaded file")
|
|
286
320
|
|
|
287
321
|
#####################################################################
|
|
@@ -318,7 +352,7 @@ class S3Manager:
|
|
|
318
352
|
#####################################################################
|
|
319
353
|
# not used TODO: remove
|
|
320
354
|
def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
|
|
321
|
-
self.s3_client.put_object(Bucket=bucket_name, Key=key, Body=body)
|
|
355
|
+
self.s3_client.put_object(Bucket=bucket_name, Key=key, Body=body) # "Body" can be a file
|
|
322
356
|
|
|
323
357
|
#####################################################################
|
|
324
358
|
def read_s3_json(
|
|
@@ -3,10 +3,10 @@ import os
|
|
|
3
3
|
import numcodecs
|
|
4
4
|
import numpy as np
|
|
5
5
|
|
|
6
|
-
from water_column_sonar_processing.aws
|
|
7
|
-
from water_column_sonar_processing.aws
|
|
8
|
-
from water_column_sonar_processing.model
|
|
9
|
-
from water_column_sonar_processing.utility
|
|
6
|
+
from water_column_sonar_processing.aws import DynamoDBManager
|
|
7
|
+
from water_column_sonar_processing.aws import S3Manager
|
|
8
|
+
from water_column_sonar_processing.model import ZarrManager
|
|
9
|
+
from water_column_sonar_processing.utility import Cleaner
|
|
10
10
|
|
|
11
11
|
numcodecs.blosc.use_threads = False
|
|
12
12
|
numcodecs.blosc.set_nthreads(1)
|
|
@@ -17,6 +17,7 @@ numcodecs.blosc.set_nthreads(1)
|
|
|
17
17
|
# creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
# TODO: change name to "CreateLocalEmptyZarrStore"
|
|
20
21
|
class CreateEmptyZarrStore:
|
|
21
22
|
#######################################################
|
|
22
23
|
def __init__(
|
|
@@ -28,6 +29,7 @@ class CreateEmptyZarrStore:
|
|
|
28
29
|
|
|
29
30
|
#######################################################
|
|
30
31
|
|
|
32
|
+
# TODO: move this to the s3_manager
|
|
31
33
|
def upload_zarr_store_to_s3(
|
|
32
34
|
self,
|
|
33
35
|
local_directory: str,
|
|
@@ -7,9 +7,9 @@ import numpy as np
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
import xarray as xr
|
|
9
9
|
|
|
10
|
-
from water_column_sonar_processing.aws
|
|
11
|
-
from water_column_sonar_processing.geometry
|
|
12
|
-
from water_column_sonar_processing.model
|
|
10
|
+
from water_column_sonar_processing.aws import DynamoDBManager
|
|
11
|
+
from water_column_sonar_processing.geometry import GeometryManager
|
|
12
|
+
from water_column_sonar_processing.model import ZarrManager
|
|
13
13
|
|
|
14
14
|
numcodecs.blosc.use_threads = False
|
|
15
15
|
numcodecs.blosc.set_nthreads(1)
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
|
|
3
4
|
import geopandas
|
|
4
5
|
import numpy as np
|
|
5
6
|
import pandas as pd
|
|
6
7
|
|
|
7
|
-
from water_column_sonar_processing.aws
|
|
8
|
-
from water_column_sonar_processing.utility
|
|
8
|
+
from water_column_sonar_processing.aws import S3Manager
|
|
9
|
+
from water_column_sonar_processing.utility import Cleaner
|
|
9
10
|
|
|
10
11
|
"""
|
|
11
12
|
// [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
|
|
@@ -26,12 +27,13 @@ class GeometryManager:
|
|
|
26
27
|
self,
|
|
27
28
|
):
|
|
28
29
|
self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
|
|
29
|
-
self.SIMPLIFICATION_TOLERANCE = 0.0001 # RDP simplification to street level
|
|
30
|
+
self.SIMPLIFICATION_TOLERANCE = 0.0001 # RDP simplification to "street level"
|
|
30
31
|
|
|
31
32
|
#######################################################
|
|
32
33
|
def read_echodata_gps_data(
|
|
33
34
|
self,
|
|
34
35
|
echodata,
|
|
36
|
+
output_bucket_name,
|
|
35
37
|
ship_name,
|
|
36
38
|
cruise_name,
|
|
37
39
|
sensor_name,
|
|
@@ -123,12 +125,12 @@ class GeometryManager:
|
|
|
123
125
|
crs="epsg:4326",
|
|
124
126
|
)
|
|
125
127
|
# Note: We set np.nan to 0,0 so downstream missing values can be omitted
|
|
126
|
-
|
|
128
|
+
# TODO: so what ends up here is data with corruption at null island!!!
|
|
127
129
|
geo_json_line = gps_gdf.to_json()
|
|
128
130
|
if write_geojson:
|
|
129
131
|
print("Creating local copy of geojson file.")
|
|
130
132
|
with open(geo_json_name, "w") as write_file:
|
|
131
|
-
write_file.write(geo_json_line)
|
|
133
|
+
write_file.write(geo_json_line) # NOTE: this file can include zeros for lat lon
|
|
132
134
|
|
|
133
135
|
geo_json_prefix = (
|
|
134
136
|
f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
|
|
@@ -136,7 +138,8 @@ class GeometryManager:
|
|
|
136
138
|
|
|
137
139
|
print("Checking s3 and deleting any existing GeoJSON file.")
|
|
138
140
|
s3_manager = S3Manager()
|
|
139
|
-
s3_objects = s3_manager.
|
|
141
|
+
s3_objects = s3_manager.list_objects(
|
|
142
|
+
bucket_name=output_bucket_name,
|
|
140
143
|
prefix=f"{geo_json_prefix}/{geo_json_name}"
|
|
141
144
|
)
|
|
142
145
|
if len(s3_objects) > 0:
|
|
@@ -149,6 +152,7 @@ class GeometryManager:
|
|
|
149
152
|
s3_manager.upload_nodd_file(
|
|
150
153
|
file_name=geo_json_name, # file_name
|
|
151
154
|
key=f"{geo_json_prefix}/{geo_json_name}", # key
|
|
155
|
+
output_bucket_name=output_bucket_name,
|
|
152
156
|
)
|
|
153
157
|
|
|
154
158
|
# TODO: delete geo_json file
|
|
@@ -221,5 +225,16 @@ class GeometryManager:
|
|
|
221
225
|
print(f"Exception encountered reading s3 GeoJSON: {err}")
|
|
222
226
|
raise
|
|
223
227
|
|
|
228
|
+
############################################################################
|
|
229
|
+
# COMES from the raw-to-zarr conversion
|
|
230
|
+
def __write_geojson_to_file(
|
|
231
|
+
self,
|
|
232
|
+
store_name,
|
|
233
|
+
data
|
|
234
|
+
) -> None:
|
|
235
|
+
print('Writing GeoJSON to file.')
|
|
236
|
+
with open(os.path.join(store_name, 'geo.json'), "w") as outfile:
|
|
237
|
+
outfile.write(data)
|
|
238
|
+
|
|
224
239
|
|
|
225
240
|
###########################################################
|