water-column-sonar-processing 0.0.9__py3-none-any.whl → 26.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/aws/dynamodb_manager.py +138 -59
- water_column_sonar_processing/aws/s3_manager.py +179 -141
- water_column_sonar_processing/aws/s3fs_manager.py +29 -33
- water_column_sonar_processing/aws/sqs_manager.py +1 -1
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +35 -96
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +142 -127
- water_column_sonar_processing/geometry/__init__.py +10 -2
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +50 -49
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +227 -223
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/index_manager.py +151 -33
- water_column_sonar_processing/model/zarr_manager.py +665 -262
- water_column_sonar_processing/processing/__init__.py +3 -3
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +206 -214
- water_column_sonar_processing/utility/__init__.py +9 -2
- water_column_sonar_processing/utility/constants.py +69 -18
- water_column_sonar_processing/utility/pipeline_status.py +11 -15
- water_column_sonar_processing/utility/timestamp.py +3 -4
- water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
- water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
- water_column_sonar_processing/process.py +0 -147
- water_column_sonar_processing/processing/cruise_sampler.py +0 -342
- water_column_sonar_processing-0.0.9.dist-info/METADATA +0 -134
- water_column_sonar_processing-0.0.9.dist-info/RECORD +0 -32
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
|
@@ -9,20 +9,20 @@ from boto3.dynamodb.types import TypeDeserializer, TypeSerializer
|
|
|
9
9
|
class DynamoDBManager:
|
|
10
10
|
#####################################################################
|
|
11
11
|
def __init__(
|
|
12
|
-
|
|
13
|
-
|
|
12
|
+
self,
|
|
13
|
+
# endpoint_url
|
|
14
14
|
):
|
|
15
15
|
# self.endpoint_url = endpoint_url
|
|
16
|
-
self.
|
|
16
|
+
self.dynamodb_session = boto3.Session(
|
|
17
17
|
aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
|
|
18
18
|
aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
|
|
19
19
|
region_name=os.environ.get("AWS_REGION", default="us-east-1"),
|
|
20
20
|
)
|
|
21
|
-
self.
|
|
21
|
+
self.dynamodb_resource = self.dynamodb_session.resource(
|
|
22
22
|
service_name="dynamodb",
|
|
23
23
|
# endpoint_url=self.endpoint_url
|
|
24
24
|
)
|
|
25
|
-
self.
|
|
25
|
+
self.dynamodb_client = self.dynamodb_session.client(
|
|
26
26
|
service_name="dynamodb",
|
|
27
27
|
# endpoint_url=self.endpoint_url
|
|
28
28
|
)
|
|
@@ -46,7 +46,7 @@ class DynamoDBManager:
|
|
|
46
46
|
self,
|
|
47
47
|
table_name,
|
|
48
48
|
):
|
|
49
|
-
self.
|
|
49
|
+
self.dynamodb_client.create_table(
|
|
50
50
|
TableName=table_name,
|
|
51
51
|
KeySchema=[
|
|
52
52
|
{
|
|
@@ -62,15 +62,17 @@ class DynamoDBManager:
|
|
|
62
62
|
{"AttributeName": "FILE_NAME", "AttributeType": "S"},
|
|
63
63
|
{"AttributeName": "CRUISE_NAME", "AttributeType": "S"},
|
|
64
64
|
],
|
|
65
|
-
BillingMode="PAY_PER_REQUEST"
|
|
65
|
+
BillingMode="PAY_PER_REQUEST",
|
|
66
66
|
# ProvisionedThroughput={
|
|
67
67
|
# 'ReadCapacityUnits': 1_000,
|
|
68
68
|
# 'WriteCapacityUnits': 1_000
|
|
69
69
|
# }
|
|
70
70
|
)
|
|
71
71
|
# TODO: after creating status is 'CREATING', wait until 'ACTIVE'
|
|
72
|
-
response = self.
|
|
73
|
-
print(
|
|
72
|
+
response = self.dynamodb_client.describe_table(TableName=table_name)
|
|
73
|
+
print(
|
|
74
|
+
response
|
|
75
|
+
) # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb/client/describe_table.html
|
|
74
76
|
# sleep then response['Table']['TableStatus'] == 'ACTIVE'
|
|
75
77
|
|
|
76
78
|
#####################################################################
|
|
@@ -80,7 +82,7 @@ class DynamoDBManager:
|
|
|
80
82
|
# table_name,
|
|
81
83
|
# key
|
|
82
84
|
# ):
|
|
83
|
-
# response = self.
|
|
85
|
+
# response = self.dynamodb_client.get_item(TableName=table_name, Key=key)
|
|
84
86
|
# item = None
|
|
85
87
|
# if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
|
|
86
88
|
# if "Item" in response:
|
|
@@ -96,7 +98,7 @@ class DynamoDBManager:
|
|
|
96
98
|
"""
|
|
97
99
|
Gets a single row from the db.
|
|
98
100
|
"""
|
|
99
|
-
table = self.
|
|
101
|
+
table = self.dynamodb_resource.Table(table_name)
|
|
100
102
|
response = table.get_item(Key=key)
|
|
101
103
|
# TODO:
|
|
102
104
|
# if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
|
|
@@ -111,58 +113,83 @@ class DynamoDBManager:
|
|
|
111
113
|
expression_attribute_names,
|
|
112
114
|
expression_attribute_values,
|
|
113
115
|
update_expression,
|
|
114
|
-
):
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
116
|
+
): # TODO: convert to boolean
|
|
117
|
+
try:
|
|
118
|
+
response = self.dynamodb_client.update_item(
|
|
119
|
+
TableName=table_name,
|
|
120
|
+
Key=key,
|
|
121
|
+
ExpressionAttributeNames=expression_attribute_names,
|
|
122
|
+
ExpressionAttributeValues=expression_attribute_values,
|
|
123
|
+
UpdateExpression=update_expression,
|
|
124
|
+
)
|
|
125
|
+
return response["ResponseMetadata"]["HTTPStatusCode"] # TODO: should be 200
|
|
126
|
+
# print(f"HTTPStatusCode: {status_code}")
|
|
127
|
+
# assert status_code == 200, "Problem, unable to update dynamodb table."
|
|
128
|
+
# assert response['ConsumedCapacity']['TableName'] == table_name
|
|
129
|
+
except Exception as err:
|
|
130
|
+
raise RuntimeError(f"Problem was encountered while updating item, {err}")
|
|
125
131
|
|
|
126
132
|
#####################################################################
|
|
127
133
|
# TODO: change to "get_cruise_as_df"
|
|
128
134
|
def get_table_as_df(
|
|
129
135
|
self,
|
|
130
|
-
ship_name,
|
|
136
|
+
# ship_name,
|
|
131
137
|
cruise_name,
|
|
132
|
-
sensor_name,
|
|
138
|
+
# sensor_name, # TODO: need to add this back for EK80
|
|
133
139
|
table_name,
|
|
134
140
|
) -> pd.DataFrame:
|
|
135
141
|
"""
|
|
136
142
|
To be used to initialize a cruise, deletes all entries associated with that cruise
|
|
137
143
|
in the database.
|
|
144
|
+
#TODO: cruise names isn't good enough, there could be two instrument for a cruise...
|
|
138
145
|
"""
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
":se": {"S": sensor_name},
|
|
142
|
-
":sh": {"S": ship_name},
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
filter_expression = (
|
|
146
|
-
"CRUISE_NAME = :cr and SENSOR_NAME = :se and SHIP_NAME = :sh"
|
|
147
|
-
)
|
|
148
|
-
response = self.__dynamodb_client.scan(
|
|
146
|
+
filter_expression = "CRUISE_NAME = :cr"
|
|
147
|
+
response = self.dynamodb_client.scan(
|
|
149
148
|
TableName=table_name,
|
|
150
|
-
|
|
151
|
-
|
|
149
|
+
# Limit=1000,
|
|
150
|
+
Select="ALL_ATTRIBUTES", # or 'SPECIFIC_ATTRIBUTES',
|
|
151
|
+
# ExclusiveStartKey=where to pick up
|
|
152
|
+
# ReturnConsumedCapacity='INDEXES' | 'TOTAL' | 'NONE', ...not sure
|
|
153
|
+
# ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
|
|
152
154
|
FilterExpression=filter_expression,
|
|
155
|
+
# ExpressionAttributeNames={
|
|
156
|
+
# '#SH': 'SHIP_NAME',
|
|
157
|
+
# '#CR': 'CRUISE_NAME',
|
|
158
|
+
# '#FN': 'FILE_NAME',
|
|
159
|
+
# },
|
|
160
|
+
ExpressionAttributeValues={ # criteria
|
|
161
|
+
":cr": {
|
|
162
|
+
"S": cruise_name,
|
|
163
|
+
},
|
|
164
|
+
},
|
|
165
|
+
ConsistentRead=True,
|
|
166
|
+
# ExclusiveStartKey=response["LastEvaluatedKey"],
|
|
153
167
|
)
|
|
154
168
|
# Note: table.scan() has 1 MB limit on results so pagination is used
|
|
155
|
-
|
|
156
|
-
|
|
169
|
+
|
|
170
|
+
if len(response["Items"]) == 0 and "LastEvaluatedKey" not in response:
|
|
171
|
+
return pd.DataFrame() # If no results, return empty dataframe
|
|
157
172
|
|
|
158
173
|
data = response["Items"]
|
|
159
174
|
|
|
160
|
-
while "LastEvaluatedKey" in response:
|
|
161
|
-
response = self.
|
|
175
|
+
while response.get("LastEvaluatedKey"): # "LastEvaluatedKey" in response:
|
|
176
|
+
response = self.dynamodb_client.scan(
|
|
162
177
|
TableName=table_name,
|
|
163
|
-
Select
|
|
164
|
-
|
|
178
|
+
### Either 'Select' or 'ExpressionAttributeNames'/'ProjectionExpression'
|
|
179
|
+
Select="ALL_ATTRIBUTES", # or 'SPECIFIC_ATTRIBUTES',
|
|
165
180
|
FilterExpression=filter_expression,
|
|
181
|
+
# ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
|
|
182
|
+
# ExpressionAttributeNames={ # would need to specify all cols in df
|
|
183
|
+
# '#SH': 'SHIP_NAME',
|
|
184
|
+
# '#CR': 'CRUISE_NAME',
|
|
185
|
+
# '#FN': 'FILE_NAME',
|
|
186
|
+
# },
|
|
187
|
+
ExpressionAttributeValues={ # criteria
|
|
188
|
+
":cr": {
|
|
189
|
+
"S": cruise_name,
|
|
190
|
+
},
|
|
191
|
+
},
|
|
192
|
+
ConsistentRead=True,
|
|
166
193
|
ExclusiveStartKey=response["LastEvaluatedKey"],
|
|
167
194
|
)
|
|
168
195
|
data.extend(response["Items"])
|
|
@@ -172,6 +199,66 @@ class DynamoDBManager:
|
|
|
172
199
|
|
|
173
200
|
return df.sort_values(by="START_TIME", ignore_index=True)
|
|
174
201
|
|
|
202
|
+
#####################################################################
|
|
203
|
+
# def get_cruise_list(
|
|
204
|
+
# self,
|
|
205
|
+
# table_name,
|
|
206
|
+
# ) -> list:
|
|
207
|
+
# """
|
|
208
|
+
# Experimental, gets all cruise names as list
|
|
209
|
+
# """
|
|
210
|
+
# filter_expression = "CRUISE_NAME = :cr"
|
|
211
|
+
# response = self.dynamodb_client.scan(
|
|
212
|
+
# TableName=table_name,
|
|
213
|
+
# Select='SPECIFIC_ATTRIBUTES',
|
|
214
|
+
# #ReturnConsumedCapacity='INDEXES' | 'TOTAL' | 'NONE', ...not sure
|
|
215
|
+
# # ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
|
|
216
|
+
# FilterExpression=filter_expression,
|
|
217
|
+
# # ExpressionAttributeNames={
|
|
218
|
+
# # '#SH': 'SHIP_NAME',
|
|
219
|
+
# # '#CR': 'CRUISE_NAME',
|
|
220
|
+
# # '#FN': 'FILE_NAME',
|
|
221
|
+
# # },
|
|
222
|
+
# # ExpressionAttributeValues={ # criteria
|
|
223
|
+
# # ':cr': {
|
|
224
|
+
# # 'S': cruise_name,
|
|
225
|
+
# # },
|
|
226
|
+
# # },
|
|
227
|
+
# )
|
|
228
|
+
# # Note: table.scan() has 1 MB limit on results so pagination is used
|
|
229
|
+
#
|
|
230
|
+
# if len(response["Items"]) == 0 and "LastEvaluatedKey" not in response:
|
|
231
|
+
# return pd.DataFrame() # If no results, return empty dataframe
|
|
232
|
+
#
|
|
233
|
+
# dataset = response["Items"]
|
|
234
|
+
#
|
|
235
|
+
# while response.get('LastEvaluatedKey'): #"LastEvaluatedKey" in response:
|
|
236
|
+
# response = self.dynamodb_client.scan(
|
|
237
|
+
# TableName=table_name,
|
|
238
|
+
# ### Either 'Select' or 'ExpressionAttributeNames'/'ProjectionExpression'
|
|
239
|
+
# Select='ALL_ATTRIBUTES', # or 'SPECIFIC_ATTRIBUTES',
|
|
240
|
+
# FilterExpression=filter_expression,
|
|
241
|
+
# #ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
|
|
242
|
+
# # ExpressionAttributeNames={ # would need to specify all cols in df
|
|
243
|
+
# # '#SH': 'SHIP_NAME',
|
|
244
|
+
# # '#CR': 'CRUISE_NAME',
|
|
245
|
+
# # '#FN': 'FILE_NAME',
|
|
246
|
+
# # },
|
|
247
|
+
# ExpressionAttributeValues={ # criteria
|
|
248
|
+
# ':cr': {
|
|
249
|
+
# 'S': cruise_name,
|
|
250
|
+
# },
|
|
251
|
+
# },
|
|
252
|
+
# ConsistentRead=True,
|
|
253
|
+
# ExclusiveStartKey=response["LastEvaluatedKey"],
|
|
254
|
+
# )
|
|
255
|
+
# dataset.extend(response["Items"])
|
|
256
|
+
#
|
|
257
|
+
# deserializer = self.type_deserializer
|
|
258
|
+
# df = pd.DataFrame([deserializer.deserialize({"M": i}) for i in dataset])
|
|
259
|
+
#
|
|
260
|
+
# return df.sort_values(by="START_TIME", ignore_index=True)
|
|
261
|
+
|
|
175
262
|
#####################################################################
|
|
176
263
|
# TODO: WIP
|
|
177
264
|
def delete_item(
|
|
@@ -183,17 +270,10 @@ class DynamoDBManager:
|
|
|
183
270
|
"""
|
|
184
271
|
Finds all rows associated with a cruise and deletes them.
|
|
185
272
|
"""
|
|
186
|
-
response = self.
|
|
187
|
-
Key={
|
|
188
|
-
"CRUISE_NAME": {
|
|
189
|
-
"S": cruise_name
|
|
190
|
-
},
|
|
191
|
-
"FILE_NAME": {
|
|
192
|
-
"S": file_name
|
|
193
|
-
}
|
|
194
|
-
},
|
|
273
|
+
response = self.dynamodb_client.delete_item(
|
|
274
|
+
Key={"CRUISE_NAME": {"S": cruise_name}, "FILE_NAME": {"S": file_name}},
|
|
195
275
|
TableName=table_name,
|
|
196
|
-
ReturnConsumedCapacity="
|
|
276
|
+
ReturnConsumedCapacity="TOTAL",
|
|
197
277
|
)
|
|
198
278
|
# TODO: there should be attributes included in response but they are missing
|
|
199
279
|
# if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
|
|
@@ -202,18 +282,16 @@ class DynamoDBManager:
|
|
|
202
282
|
|
|
203
283
|
#####################################################################
|
|
204
284
|
def describe_table(
|
|
205
|
-
|
|
206
|
-
|
|
285
|
+
self,
|
|
286
|
+
table_name,
|
|
207
287
|
):
|
|
208
288
|
"""
|
|
209
289
|
Get a description of the table. Used to verify that records were added/removed.
|
|
210
290
|
"""
|
|
211
|
-
response = self.
|
|
291
|
+
response = self.dynamodb_client.describe_table(TableName=table_name)
|
|
212
292
|
print(response)
|
|
213
293
|
return response
|
|
214
294
|
|
|
215
|
-
|
|
216
|
-
|
|
217
295
|
#####################################################################
|
|
218
296
|
# TODO: from test_raw_to_zarr get enum and use here
|
|
219
297
|
# def __update_processing_status(
|
|
@@ -226,7 +304,7 @@ class DynamoDBManager:
|
|
|
226
304
|
# print(f"Updating processing status to {pipeline_status}.")
|
|
227
305
|
# if error_message:
|
|
228
306
|
# print(f"Error message: {error_message}")
|
|
229
|
-
# self.
|
|
307
|
+
# self.dynamo.update_item(
|
|
230
308
|
# table_name=self.__table_name,
|
|
231
309
|
# key={
|
|
232
310
|
# 'FILE_NAME': {'S': file_name}, # Partition Key
|
|
@@ -251,7 +329,7 @@ class DynamoDBManager:
|
|
|
251
329
|
# }
|
|
252
330
|
# )
|
|
253
331
|
# else:
|
|
254
|
-
# self.
|
|
332
|
+
# self.dynamo.update_item(
|
|
255
333
|
# table_name=self.__table_name,
|
|
256
334
|
# key={
|
|
257
335
|
# 'FILE_NAME': {'S': file_name}, # Partition Key
|
|
@@ -273,4 +351,5 @@ class DynamoDBManager:
|
|
|
273
351
|
# )
|
|
274
352
|
# print("Done updating processing status.")
|
|
275
353
|
|
|
354
|
+
|
|
276
355
|
#########################################################################
|