water-column-sonar-processing 0.0.1__py3-none-any.whl → 25.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/__init__.py +13 -0
- water_column_sonar_processing/aws/__init__.py +7 -0
- water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
- water_column_sonar_processing/aws/s3_manager.py +420 -0
- water_column_sonar_processing/aws/s3fs_manager.py +72 -0
- {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
- {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
- water_column_sonar_processing/cruise/__init__.py +4 -0
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +191 -0
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +339 -0
- water_column_sonar_processing/geometry/__init__.py +11 -0
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +243 -0
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +261 -0
- water_column_sonar_processing/index/__init__.py +3 -0
- water_column_sonar_processing/index/index_manager.py +384 -0
- water_column_sonar_processing/model/__init__.py +3 -0
- water_column_sonar_processing/model/zarr_manager.py +722 -0
- water_column_sonar_processing/process.py +149 -0
- water_column_sonar_processing/processing/__init__.py +4 -0
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +425 -0
- water_column_sonar_processing/utility/__init__.py +13 -0
- {model → water_column_sonar_processing}/utility/cleaner.py +7 -8
- water_column_sonar_processing/utility/constants.py +118 -0
- {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
- water_column_sonar_processing/utility/timestamp.py +12 -0
- water_column_sonar_processing-25.11.1.dist-info/METADATA +182 -0
- water_column_sonar_processing-25.11.1.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing-25.11.1.dist-info/top_level.txt +1 -0
- __init__.py +0 -0
- model/__init__.py +0 -0
- model/aws/__init__.py +0 -0
- model/aws/dynamodb_manager.py +0 -149
- model/aws/s3_manager.py +0 -356
- model/aws/s3fs_manager.py +0 -74
- model/cruise/__init__.py +0 -0
- model/cruise/create_empty_zarr_store.py +0 -166
- model/cruise/resample_regrid.py +0 -248
- model/geospatial/__init__.py +0 -0
- model/geospatial/geometry_manager.py +0 -194
- model/geospatial/geometry_simplification.py +0 -81
- model/geospatial/pmtile_generation.py +0 -74
- model/index/__init__.py +0 -0
- model/index/index.py +0 -228
- model/model.py +0 -138
- model/utility/__init__.py +0 -0
- model/utility/constants.py +0 -56
- model/utility/timestamp.py +0 -12
- model/zarr/__init__.py +0 -0
- model/zarr/bar.py +0 -28
- model/zarr/foo.py +0 -11
- model/zarr/zarr_manager.py +0 -298
- water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
- water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
- water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
from .dynamodb_manager import DynamoDBManager
|
|
2
|
+
from .s3_manager import S3Manager, chunked
|
|
3
|
+
from .s3fs_manager import S3FSManager
|
|
4
|
+
from .sns_manager import SNSManager
|
|
5
|
+
from .sqs_manager import SQSManager
|
|
6
|
+
|
|
7
|
+
__all__ = ["DynamoDBManager", "S3Manager", "chunked", "S3FSManager", "SNSManager", "SQSManager"]
|
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import boto3
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from boto3.dynamodb.types import TypeDeserializer, TypeSerializer
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
#########################################################################
|
|
9
|
+
class DynamoDBManager:
|
|
10
|
+
#####################################################################
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
# endpoint_url
|
|
14
|
+
):
|
|
15
|
+
# self.endpoint_url = endpoint_url
|
|
16
|
+
self.dynamodb_session = boto3.Session(
|
|
17
|
+
aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
|
|
18
|
+
aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
|
|
19
|
+
region_name=os.environ.get("AWS_REGION", default="us-east-1"),
|
|
20
|
+
)
|
|
21
|
+
self.dynamodb_resource = self.dynamodb_session.resource(
|
|
22
|
+
service_name="dynamodb",
|
|
23
|
+
# endpoint_url=self.endpoint_url
|
|
24
|
+
)
|
|
25
|
+
self.dynamodb_client = self.dynamodb_session.client(
|
|
26
|
+
service_name="dynamodb",
|
|
27
|
+
# endpoint_url=self.endpoint_url
|
|
28
|
+
)
|
|
29
|
+
self.type_serializer = TypeSerializer() # https://stackoverflow.com/a/46738251
|
|
30
|
+
self.type_deserializer = TypeDeserializer()
|
|
31
|
+
|
|
32
|
+
#####################################################################
|
|
33
|
+
### defined in raw-to-model, not used
|
|
34
|
+
# def put_item(
|
|
35
|
+
# self,
|
|
36
|
+
# table_name,
|
|
37
|
+
# item
|
|
38
|
+
# ):
|
|
39
|
+
# response = boto3.Session().client(service_name='dynamodb').put_item(TableName=table_name, Item=item)
|
|
40
|
+
# status_code = response['ResponseMetadata']['HTTPStatusCode']
|
|
41
|
+
# assert (status_code == 200), "Problem, unable to update dynamodb table."
|
|
42
|
+
|
|
43
|
+
#####################################################################
|
|
44
|
+
#####################################################################
|
|
45
|
+
def create_water_column_sonar_table(
|
|
46
|
+
self,
|
|
47
|
+
table_name,
|
|
48
|
+
):
|
|
49
|
+
self.dynamodb_client.create_table(
|
|
50
|
+
TableName=table_name,
|
|
51
|
+
KeySchema=[
|
|
52
|
+
{
|
|
53
|
+
"AttributeName": "FILE_NAME",
|
|
54
|
+
"KeyType": "HASH",
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
"AttributeName": "CRUISE_NAME",
|
|
58
|
+
"KeyType": "RANGE",
|
|
59
|
+
},
|
|
60
|
+
],
|
|
61
|
+
AttributeDefinitions=[
|
|
62
|
+
{"AttributeName": "FILE_NAME", "AttributeType": "S"},
|
|
63
|
+
{"AttributeName": "CRUISE_NAME", "AttributeType": "S"},
|
|
64
|
+
],
|
|
65
|
+
BillingMode="PAY_PER_REQUEST",
|
|
66
|
+
# ProvisionedThroughput={
|
|
67
|
+
# 'ReadCapacityUnits': 1_000,
|
|
68
|
+
# 'WriteCapacityUnits': 1_000
|
|
69
|
+
# }
|
|
70
|
+
)
|
|
71
|
+
# TODO: after creating status is 'CREATING', wait until 'ACTIVE'
|
|
72
|
+
response = self.dynamodb_client.describe_table(TableName=table_name)
|
|
73
|
+
print(
|
|
74
|
+
response
|
|
75
|
+
) # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb/client/describe_table.html
|
|
76
|
+
# sleep then response['Table']['TableStatus'] == 'ACTIVE'
|
|
77
|
+
|
|
78
|
+
#####################################################################
|
|
79
|
+
# don't think this is used?
|
|
80
|
+
# def get_item(
|
|
81
|
+
# self,
|
|
82
|
+
# table_name,
|
|
83
|
+
# key
|
|
84
|
+
# ):
|
|
85
|
+
# response = self.dynamodb_client.get_item(TableName=table_name, Key=key)
|
|
86
|
+
# item = None
|
|
87
|
+
# if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
|
|
88
|
+
# if "Item" in response:
|
|
89
|
+
# item = response["Item"]
|
|
90
|
+
# return item
|
|
91
|
+
|
|
92
|
+
#####################################################################
|
|
93
|
+
def get_table_item(
|
|
94
|
+
self,
|
|
95
|
+
table_name,
|
|
96
|
+
key,
|
|
97
|
+
):
|
|
98
|
+
"""
|
|
99
|
+
Gets a single row from the db.
|
|
100
|
+
"""
|
|
101
|
+
table = self.dynamodb_resource.Table(table_name)
|
|
102
|
+
response = table.get_item(Key=key)
|
|
103
|
+
# TODO:
|
|
104
|
+
# if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
|
|
105
|
+
# throw error
|
|
106
|
+
return response
|
|
107
|
+
|
|
108
|
+
#####################################################################
|
|
109
|
+
def update_item(
|
|
110
|
+
self,
|
|
111
|
+
table_name,
|
|
112
|
+
key,
|
|
113
|
+
expression_attribute_names,
|
|
114
|
+
expression_attribute_values,
|
|
115
|
+
update_expression,
|
|
116
|
+
): # TODO: convert to boolean
|
|
117
|
+
try:
|
|
118
|
+
response = self.dynamodb_client.update_item(
|
|
119
|
+
TableName=table_name,
|
|
120
|
+
Key=key,
|
|
121
|
+
ExpressionAttributeNames=expression_attribute_names,
|
|
122
|
+
ExpressionAttributeValues=expression_attribute_values,
|
|
123
|
+
UpdateExpression=update_expression,
|
|
124
|
+
)
|
|
125
|
+
return response["ResponseMetadata"]["HTTPStatusCode"] # TODO: should be 200
|
|
126
|
+
# print(f"HTTPStatusCode: {status_code}")
|
|
127
|
+
# assert status_code == 200, "Problem, unable to update dynamodb table."
|
|
128
|
+
# assert response['ConsumedCapacity']['TableName'] == table_name
|
|
129
|
+
except Exception as err:
|
|
130
|
+
raise RuntimeError(f"Problem was encountered while updating item, {err}")
|
|
131
|
+
|
|
132
|
+
#####################################################################
|
|
133
|
+
# TODO: change to "get_cruise_as_df"
|
|
134
|
+
def get_table_as_df(
|
|
135
|
+
self,
|
|
136
|
+
# ship_name,
|
|
137
|
+
cruise_name,
|
|
138
|
+
# sensor_name, # TODO: need to add this back for EK80
|
|
139
|
+
table_name,
|
|
140
|
+
) -> pd.DataFrame:
|
|
141
|
+
"""
|
|
142
|
+
To be used to initialize a cruise, deletes all entries associated with that cruise
|
|
143
|
+
in the database.
|
|
144
|
+
#TODO: cruise names isn't good enough, there could be two instrument for a cruise...
|
|
145
|
+
"""
|
|
146
|
+
filter_expression = "CRUISE_NAME = :cr"
|
|
147
|
+
response = self.dynamodb_client.scan(
|
|
148
|
+
TableName=table_name,
|
|
149
|
+
# Limit=1000,
|
|
150
|
+
Select="ALL_ATTRIBUTES", # or 'SPECIFIC_ATTRIBUTES',
|
|
151
|
+
# ExclusiveStartKey=where to pick up
|
|
152
|
+
# ReturnConsumedCapacity='INDEXES' | 'TOTAL' | 'NONE', ...not sure
|
|
153
|
+
# ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
|
|
154
|
+
FilterExpression=filter_expression,
|
|
155
|
+
# ExpressionAttributeNames={
|
|
156
|
+
# '#SH': 'SHIP_NAME',
|
|
157
|
+
# '#CR': 'CRUISE_NAME',
|
|
158
|
+
# '#FN': 'FILE_NAME',
|
|
159
|
+
# },
|
|
160
|
+
ExpressionAttributeValues={ # criteria
|
|
161
|
+
":cr": {
|
|
162
|
+
"S": cruise_name,
|
|
163
|
+
},
|
|
164
|
+
},
|
|
165
|
+
ConsistentRead=True,
|
|
166
|
+
# ExclusiveStartKey=response["LastEvaluatedKey"],
|
|
167
|
+
)
|
|
168
|
+
# Note: table.scan() has 1 MB limit on results so pagination is used
|
|
169
|
+
|
|
170
|
+
if len(response["Items"]) == 0 and "LastEvaluatedKey" not in response:
|
|
171
|
+
return pd.DataFrame() # If no results, return empty dataframe
|
|
172
|
+
|
|
173
|
+
data = response["Items"]
|
|
174
|
+
|
|
175
|
+
while response.get("LastEvaluatedKey"): # "LastEvaluatedKey" in response:
|
|
176
|
+
response = self.dynamodb_client.scan(
|
|
177
|
+
TableName=table_name,
|
|
178
|
+
### Either 'Select' or 'ExpressionAttributeNames'/'ProjectionExpression'
|
|
179
|
+
Select="ALL_ATTRIBUTES", # or 'SPECIFIC_ATTRIBUTES',
|
|
180
|
+
FilterExpression=filter_expression,
|
|
181
|
+
# ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
|
|
182
|
+
# ExpressionAttributeNames={ # would need to specify all cols in df
|
|
183
|
+
# '#SH': 'SHIP_NAME',
|
|
184
|
+
# '#CR': 'CRUISE_NAME',
|
|
185
|
+
# '#FN': 'FILE_NAME',
|
|
186
|
+
# },
|
|
187
|
+
ExpressionAttributeValues={ # criteria
|
|
188
|
+
":cr": {
|
|
189
|
+
"S": cruise_name,
|
|
190
|
+
},
|
|
191
|
+
},
|
|
192
|
+
ConsistentRead=True,
|
|
193
|
+
ExclusiveStartKey=response["LastEvaluatedKey"],
|
|
194
|
+
)
|
|
195
|
+
data.extend(response["Items"])
|
|
196
|
+
|
|
197
|
+
deserializer = self.type_deserializer
|
|
198
|
+
df = pd.DataFrame([deserializer.deserialize({"M": i}) for i in data])
|
|
199
|
+
|
|
200
|
+
return df.sort_values(by="START_TIME", ignore_index=True)
|
|
201
|
+
|
|
202
|
+
#####################################################################
|
|
203
|
+
# def get_cruise_list(
|
|
204
|
+
# self,
|
|
205
|
+
# table_name,
|
|
206
|
+
# ) -> list:
|
|
207
|
+
# """
|
|
208
|
+
# Experimental, gets all cruise names as list
|
|
209
|
+
# """
|
|
210
|
+
# filter_expression = "CRUISE_NAME = :cr"
|
|
211
|
+
# response = self.dynamodb_client.scan(
|
|
212
|
+
# TableName=table_name,
|
|
213
|
+
# Select='SPECIFIC_ATTRIBUTES',
|
|
214
|
+
# #ReturnConsumedCapacity='INDEXES' | 'TOTAL' | 'NONE', ...not sure
|
|
215
|
+
# # ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
|
|
216
|
+
# FilterExpression=filter_expression,
|
|
217
|
+
# # ExpressionAttributeNames={
|
|
218
|
+
# # '#SH': 'SHIP_NAME',
|
|
219
|
+
# # '#CR': 'CRUISE_NAME',
|
|
220
|
+
# # '#FN': 'FILE_NAME',
|
|
221
|
+
# # },
|
|
222
|
+
# # ExpressionAttributeValues={ # criteria
|
|
223
|
+
# # ':cr': {
|
|
224
|
+
# # 'S': cruise_name,
|
|
225
|
+
# # },
|
|
226
|
+
# # },
|
|
227
|
+
# )
|
|
228
|
+
# # Note: table.scan() has 1 MB limit on results so pagination is used
|
|
229
|
+
#
|
|
230
|
+
# if len(response["Items"]) == 0 and "LastEvaluatedKey" not in response:
|
|
231
|
+
# return pd.DataFrame() # If no results, return empty dataframe
|
|
232
|
+
#
|
|
233
|
+
# dataset = response["Items"]
|
|
234
|
+
#
|
|
235
|
+
# while response.get('LastEvaluatedKey'): #"LastEvaluatedKey" in response:
|
|
236
|
+
# response = self.dynamodb_client.scan(
|
|
237
|
+
# TableName=table_name,
|
|
238
|
+
# ### Either 'Select' or 'ExpressionAttributeNames'/'ProjectionExpression'
|
|
239
|
+
# Select='ALL_ATTRIBUTES', # or 'SPECIFIC_ATTRIBUTES',
|
|
240
|
+
# FilterExpression=filter_expression,
|
|
241
|
+
# #ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
|
|
242
|
+
# # ExpressionAttributeNames={ # would need to specify all cols in df
|
|
243
|
+
# # '#SH': 'SHIP_NAME',
|
|
244
|
+
# # '#CR': 'CRUISE_NAME',
|
|
245
|
+
# # '#FN': 'FILE_NAME',
|
|
246
|
+
# # },
|
|
247
|
+
# ExpressionAttributeValues={ # criteria
|
|
248
|
+
# ':cr': {
|
|
249
|
+
# 'S': cruise_name,
|
|
250
|
+
# },
|
|
251
|
+
# },
|
|
252
|
+
# ConsistentRead=True,
|
|
253
|
+
# ExclusiveStartKey=response["LastEvaluatedKey"],
|
|
254
|
+
# )
|
|
255
|
+
# dataset.extend(response["Items"])
|
|
256
|
+
#
|
|
257
|
+
# deserializer = self.type_deserializer
|
|
258
|
+
# df = pd.DataFrame([deserializer.deserialize({"M": i}) for i in dataset])
|
|
259
|
+
#
|
|
260
|
+
# return df.sort_values(by="START_TIME", ignore_index=True)
|
|
261
|
+
|
|
262
|
+
#####################################################################
|
|
263
|
+
# TODO: WIP
|
|
264
|
+
def delete_item(
|
|
265
|
+
self,
|
|
266
|
+
table_name,
|
|
267
|
+
cruise_name,
|
|
268
|
+
file_name,
|
|
269
|
+
):
|
|
270
|
+
"""
|
|
271
|
+
Finds all rows associated with a cruise and deletes them.
|
|
272
|
+
"""
|
|
273
|
+
response = self.dynamodb_client.delete_item(
|
|
274
|
+
Key={"CRUISE_NAME": {"S": cruise_name}, "FILE_NAME": {"S": file_name}},
|
|
275
|
+
TableName=table_name,
|
|
276
|
+
ReturnConsumedCapacity="TOTAL",
|
|
277
|
+
)
|
|
278
|
+
# TODO: there should be attributes included in response but they are missing
|
|
279
|
+
# if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
|
|
280
|
+
# throw error
|
|
281
|
+
return response
|
|
282
|
+
|
|
283
|
+
#####################################################################
|
|
284
|
+
def describe_table(
|
|
285
|
+
self,
|
|
286
|
+
table_name,
|
|
287
|
+
):
|
|
288
|
+
"""
|
|
289
|
+
Get a description of the table. Used to verify that records were added/removed.
|
|
290
|
+
"""
|
|
291
|
+
response = self.dynamodb_client.describe_table(TableName=table_name)
|
|
292
|
+
print(response)
|
|
293
|
+
return response
|
|
294
|
+
|
|
295
|
+
#####################################################################
|
|
296
|
+
# TODO: from test_raw_to_zarr get enum and use here
|
|
297
|
+
# def __update_processing_status(
|
|
298
|
+
# self,
|
|
299
|
+
# file_name: str,
|
|
300
|
+
# cruise_name: str,
|
|
301
|
+
# pipeline_status: str,
|
|
302
|
+
# error_message: str = None,
|
|
303
|
+
# ):
|
|
304
|
+
# print(f"Updating processing status to {pipeline_status}.")
|
|
305
|
+
# if error_message:
|
|
306
|
+
# print(f"Error message: {error_message}")
|
|
307
|
+
# self.dynamo.update_item(
|
|
308
|
+
# table_name=self.__table_name,
|
|
309
|
+
# key={
|
|
310
|
+
# 'FILE_NAME': {'S': file_name}, # Partition Key
|
|
311
|
+
# 'CRUISE_NAME': {'S': cruise_name}, # Sort Key
|
|
312
|
+
# },
|
|
313
|
+
# attribute_names={
|
|
314
|
+
# '#PT': 'PIPELINE_TIME',
|
|
315
|
+
# '#PS': 'PIPELINE_STATUS',
|
|
316
|
+
# '#EM': 'ERROR_MESSAGE',
|
|
317
|
+
# },
|
|
318
|
+
# expression='SET #PT = :pt, #PS = :ps, #EM = :em',
|
|
319
|
+
# attribute_values={
|
|
320
|
+
# ':pt': {
|
|
321
|
+
# 'S': datetime.now().isoformat(timespec="seconds") + "Z"
|
|
322
|
+
# },
|
|
323
|
+
# ':ps': {
|
|
324
|
+
# 'S': pipeline_status
|
|
325
|
+
# },
|
|
326
|
+
# ':em': {
|
|
327
|
+
# 'S': error_message
|
|
328
|
+
# }
|
|
329
|
+
# }
|
|
330
|
+
# )
|
|
331
|
+
# else:
|
|
332
|
+
# self.dynamo.update_item(
|
|
333
|
+
# table_name=self.__table_name,
|
|
334
|
+
# key={
|
|
335
|
+
# 'FILE_NAME': {'S': file_name}, # Partition Key
|
|
336
|
+
# 'CRUISE_NAME': {'S': cruise_name}, # Sort Key
|
|
337
|
+
# },
|
|
338
|
+
# attribute_names={
|
|
339
|
+
# '#PT': 'PIPELINE_TIME',
|
|
340
|
+
# '#PS': 'PIPELINE_STATUS',
|
|
341
|
+
# },
|
|
342
|
+
# expression='SET #PT = :pt, #PS = :ps',
|
|
343
|
+
# attribute_values={
|
|
344
|
+
# ':pt': {
|
|
345
|
+
# 'S': datetime.now().isoformat(timespec="seconds") + "Z"
|
|
346
|
+
# },
|
|
347
|
+
# ':ps': {
|
|
348
|
+
# 'S': pipeline_status
|
|
349
|
+
# }
|
|
350
|
+
# }
|
|
351
|
+
# )
|
|
352
|
+
# print("Done updating processing status.")
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
#########################################################################
|