water-column-sonar-processing 0.0.6__py3-none-any.whl → 26.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. water_column_sonar_processing/__init__.py +2 -5
  2. water_column_sonar_processing/aws/__init__.py +2 -2
  3. water_column_sonar_processing/aws/dynamodb_manager.py +257 -72
  4. water_column_sonar_processing/aws/s3_manager.py +184 -112
  5. water_column_sonar_processing/aws/s3fs_manager.py +29 -33
  6. water_column_sonar_processing/aws/sqs_manager.py +1 -1
  7. water_column_sonar_processing/cruise/create_empty_zarr_store.py +38 -97
  8. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  9. water_column_sonar_processing/cruise/resample_regrid.py +144 -129
  10. water_column_sonar_processing/geometry/__init__.py +10 -2
  11. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  12. water_column_sonar_processing/geometry/geometry_manager.py +60 -44
  13. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  14. water_column_sonar_processing/geometry/pmtile_generation.py +242 -51
  15. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  16. water_column_sonar_processing/index/index_manager.py +157 -27
  17. water_column_sonar_processing/model/zarr_manager.py +663 -258
  18. water_column_sonar_processing/processing/__init__.py +4 -0
  19. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  20. water_column_sonar_processing/processing/raw_to_zarr.py +341 -0
  21. water_column_sonar_processing/utility/__init__.py +9 -2
  22. water_column_sonar_processing/utility/cleaner.py +1 -0
  23. water_column_sonar_processing/utility/constants.py +69 -14
  24. water_column_sonar_processing/utility/pipeline_status.py +11 -15
  25. water_column_sonar_processing/utility/timestamp.py +3 -4
  26. water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
  27. water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
  28. {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
  29. {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
  30. water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
  31. water_column_sonar_processing/process.py +0 -147
  32. water_column_sonar_processing-0.0.6.dist-info/METADATA +0 -123
  33. water_column_sonar_processing-0.0.6.dist-info/RECORD +0 -29
  34. {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,6 @@
1
1
  from __future__ import absolute_import
2
2
 
3
- from . import aws, cruise, geometry, index, model, utility, process
4
- from .model import ZarrManager
5
- from .process import Process
3
+ from . import aws, cruise, geometry, index, model, processing, utility
6
4
 
7
5
  __all__ = [
8
6
  "aws",
@@ -10,7 +8,6 @@ __all__ = [
10
8
  "geometry",
11
9
  "index",
12
10
  "model",
11
+ "processing",
13
12
  "utility",
14
- "process",
15
- "Process",
16
13
  ]
@@ -1,7 +1,7 @@
1
1
  from .dynamodb_manager import DynamoDBManager
2
- from .s3_manager import S3Manager
2
+ from .s3_manager import S3Manager, chunked
3
3
  from .s3fs_manager import S3FSManager
4
4
  from .sns_manager import SNSManager
5
5
  from .sqs_manager import SQSManager
6
6
 
7
- __all__ = ["DynamoDBManager", "S3Manager", "S3FSManager", "SNSManager", "SQSManager"]
7
+ __all__ = ["DynamoDBManager", "S3Manager", "chunked", "S3FSManager", "SNSManager", "SQSManager"]
@@ -8,17 +8,23 @@ from boto3.dynamodb.types import TypeDeserializer, TypeSerializer
8
8
  #########################################################################
9
9
  class DynamoDBManager:
10
10
  #####################################################################
11
- def __init__(self):
12
- self.__dynamodb_session = boto3.Session(
11
+ def __init__(
12
+ self,
13
+ # endpoint_url
14
+ ):
15
+ # self.endpoint_url = endpoint_url
16
+ self.dynamodb_session = boto3.Session(
13
17
  aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
14
18
  aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
15
19
  region_name=os.environ.get("AWS_REGION", default="us-east-1"),
16
20
  )
17
- self.__dynamodb_resource = self.__dynamodb_session.resource(
21
+ self.dynamodb_resource = self.dynamodb_session.resource(
18
22
  service_name="dynamodb",
23
+ # endpoint_url=self.endpoint_url
19
24
  )
20
- self.__dynamodb_client = self.__dynamodb_session.client(
25
+ self.dynamodb_client = self.dynamodb_session.client(
21
26
  service_name="dynamodb",
27
+ # endpoint_url=self.endpoint_url
22
28
  )
23
29
  self.type_serializer = TypeSerializer() # https://stackoverflow.com/a/46738251
24
30
  self.type_deserializer = TypeDeserializer()
@@ -35,31 +41,14 @@ class DynamoDBManager:
35
41
  # assert (status_code == 200), "Problem, unable to update dynamodb table."
36
42
 
37
43
  #####################################################################
38
- def create_table(
39
- self,
40
- table_name,
41
- key_schema,
42
- attribute_definitions,
43
- ):
44
- self.__dynamodb_client.create_table(
45
- AttributeDefinitions=attribute_definitions,
46
- TableName=table_name,
47
- KeySchema=key_schema,
48
- BillingMode="PAY_PER_REQUEST", # "PROVISIONED",
49
- # ProvisionedThroughput={
50
- # 'ReadCapacityUnits': 1_000,
51
- # 'WriteCapacityUnits': 1_000
52
- # }
53
- )
54
-
55
44
  #####################################################################
56
45
  def create_water_column_sonar_table(
57
46
  self,
58
47
  table_name,
59
48
  ):
60
- self.create_table(
61
- table_name=table_name,
62
- key_schema=[
49
+ self.dynamodb_client.create_table(
50
+ TableName=table_name,
51
+ KeySchema=[
63
52
  {
64
53
  "AttributeName": "FILE_NAME",
65
54
  "KeyType": "HASH",
@@ -69,20 +58,52 @@ class DynamoDBManager:
69
58
  "KeyType": "RANGE",
70
59
  },
71
60
  ],
72
- attribute_definitions=[
61
+ AttributeDefinitions=[
73
62
  {"AttributeName": "FILE_NAME", "AttributeType": "S"},
74
63
  {"AttributeName": "CRUISE_NAME", "AttributeType": "S"},
75
64
  ],
65
+ BillingMode="PAY_PER_REQUEST",
66
+ # ProvisionedThroughput={
67
+ # 'ReadCapacityUnits': 1_000,
68
+ # 'WriteCapacityUnits': 1_000
69
+ # }
76
70
  )
71
+ # TODO: after creating status is 'CREATING', wait until 'ACTIVE'
72
+ response = self.dynamodb_client.describe_table(TableName=table_name)
73
+ print(
74
+ response
75
+ ) # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb/client/describe_table.html
76
+ # sleep then response['Table']['TableStatus'] == 'ACTIVE'
77
+
78
+ #####################################################################
79
+ # don't think this is used?
80
+ # def get_item(
81
+ # self,
82
+ # table_name,
83
+ # key
84
+ # ):
85
+ # response = self.dynamodb_client.get_item(TableName=table_name, Key=key)
86
+ # item = None
87
+ # if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
88
+ # if "Item" in response:
89
+ # item = response["Item"]
90
+ # return item
77
91
 
78
92
  #####################################################################
79
- def get_item(self, table_name, key):
80
- response = self.__dynamodb_client.get_item(TableName=table_name, Key=key)
81
- item = None
82
- if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
83
- if "Item" in response:
84
- item = response["Item"]
85
- return item
93
+ def get_table_item(
94
+ self,
95
+ table_name,
96
+ key,
97
+ ):
98
+ """
99
+ Gets a single row from the db.
100
+ """
101
+ table = self.dynamodb_resource.Table(table_name)
102
+ response = table.get_item(Key=key)
103
+ # TODO:
104
+ # if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
105
+ # throw error
106
+ return response
86
107
 
87
108
  #####################################################################
88
109
  def update_item(
@@ -92,50 +113,83 @@ class DynamoDBManager:
92
113
  expression_attribute_names,
93
114
  expression_attribute_values,
94
115
  update_expression,
95
- ):
96
- response = self.__dynamodb_client.update_item(
97
- TableName=table_name,
98
- Key=key,
99
- ExpressionAttributeNames=expression_attribute_names,
100
- ExpressionAttributeValues=expression_attribute_values,
101
- UpdateExpression=update_expression,
102
- )
103
- status_code = response["ResponseMetadata"]["HTTPStatusCode"]
104
- # TODO: change to exception
105
- assert status_code == 200, "Problem, unable to update dynamodb table."
116
+ ): # TODO: convert to boolean
117
+ try:
118
+ response = self.dynamodb_client.update_item(
119
+ TableName=table_name,
120
+ Key=key,
121
+ ExpressionAttributeNames=expression_attribute_names,
122
+ ExpressionAttributeValues=expression_attribute_values,
123
+ UpdateExpression=update_expression,
124
+ )
125
+ return response["ResponseMetadata"]["HTTPStatusCode"] # TODO: should be 200
126
+ # print(f"HTTPStatusCode: {status_code}")
127
+ # assert status_code == 200, "Problem, unable to update dynamodb table."
128
+ # assert response['ConsumedCapacity']['TableName'] == table_name
129
+ except Exception as err:
130
+ raise RuntimeError(f"Problem was encountered while updating item, {err}")
106
131
 
107
132
  #####################################################################
133
+ # TODO: change to "get_cruise_as_df"
108
134
  def get_table_as_df(
109
135
  self,
110
- ship_name,
136
+ # ship_name,
111
137
  cruise_name,
112
- sensor_name,
138
+ # sensor_name, # TODO: need to add this back for EK80
113
139
  table_name,
114
- ):
115
- expression_attribute_values = {
116
- ":cr": {"S": cruise_name},
117
- ":se": {"S": sensor_name},
118
- ":sh": {"S": ship_name},
119
- }
120
-
121
- filter_expression = (
122
- "CRUISE_NAME = :cr and SENSOR_NAME = :se and SHIP_NAME = :sh"
123
- )
124
- response = self.__dynamodb_client.scan(
140
+ ) -> pd.DataFrame:
141
+ """
142
+ To be used to initialize a cruise, deletes all entries associated with that cruise
143
+ in the database.
144
+ #TODO: cruise names isn't good enough, there could be two instrument for a cruise...
145
+ """
146
+ filter_expression = "CRUISE_NAME = :cr"
147
+ response = self.dynamodb_client.scan(
125
148
  TableName=table_name,
126
- Select="ALL_ATTRIBUTES",
127
- ExpressionAttributeValues=expression_attribute_values,
149
+ # Limit=1000,
150
+ Select="ALL_ATTRIBUTES", # or 'SPECIFIC_ATTRIBUTES',
151
+ # ExclusiveStartKey=where to pick up
152
+ # ReturnConsumedCapacity='INDEXES' | 'TOTAL' | 'NONE', ...not sure
153
+ # ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
128
154
  FilterExpression=filter_expression,
155
+ # ExpressionAttributeNames={
156
+ # '#SH': 'SHIP_NAME',
157
+ # '#CR': 'CRUISE_NAME',
158
+ # '#FN': 'FILE_NAME',
159
+ # },
160
+ ExpressionAttributeValues={ # criteria
161
+ ":cr": {
162
+ "S": cruise_name,
163
+ },
164
+ },
165
+ ConsistentRead=True,
166
+ # ExclusiveStartKey=response["LastEvaluatedKey"],
129
167
  )
130
168
  # Note: table.scan() has 1 MB limit on results so pagination is used
169
+
170
+ if len(response["Items"]) == 0 and "LastEvaluatedKey" not in response:
171
+ return pd.DataFrame() # If no results, return empty dataframe
172
+
131
173
  data = response["Items"]
132
174
 
133
- while "LastEvaluatedKey" in response:
134
- response = self.__dynamodb_client.scan(
175
+ while response.get("LastEvaluatedKey"): # "LastEvaluatedKey" in response:
176
+ response = self.dynamodb_client.scan(
135
177
  TableName=table_name,
136
- Select="ALL_ATTRIBUTES",
137
- ExpressionAttributeValues=expression_attribute_values,
178
+ ### Either 'Select' or 'ExpressionAttributeNames'/'ProjectionExpression'
179
+ Select="ALL_ATTRIBUTES", # or 'SPECIFIC_ATTRIBUTES',
138
180
  FilterExpression=filter_expression,
181
+ # ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
182
+ # ExpressionAttributeNames={ # would need to specify all cols in df
183
+ # '#SH': 'SHIP_NAME',
184
+ # '#CR': 'CRUISE_NAME',
185
+ # '#FN': 'FILE_NAME',
186
+ # },
187
+ ExpressionAttributeValues={ # criteria
188
+ ":cr": {
189
+ "S": cruise_name,
190
+ },
191
+ },
192
+ ConsistentRead=True,
139
193
  ExclusiveStartKey=response["LastEvaluatedKey"],
140
194
  )
141
195
  data.extend(response["Items"])
@@ -146,25 +200,156 @@ class DynamoDBManager:
146
200
  return df.sort_values(by="START_TIME", ignore_index=True)
147
201
 
148
202
  #####################################################################
149
- # is this used?
150
- def get_table_item(
203
+ # def get_cruise_list(
204
+ # self,
205
+ # table_name,
206
+ # ) -> list:
207
+ # """
208
+ # Experimental, gets all cruise names as list
209
+ # """
210
+ # filter_expression = "CRUISE_NAME = :cr"
211
+ # response = self.dynamodb_client.scan(
212
+ # TableName=table_name,
213
+ # Select='SPECIFIC_ATTRIBUTES',
214
+ # #ReturnConsumedCapacity='INDEXES' | 'TOTAL' | 'NONE', ...not sure
215
+ # # ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
216
+ # FilterExpression=filter_expression,
217
+ # # ExpressionAttributeNames={
218
+ # # '#SH': 'SHIP_NAME',
219
+ # # '#CR': 'CRUISE_NAME',
220
+ # # '#FN': 'FILE_NAME',
221
+ # # },
222
+ # # ExpressionAttributeValues={ # criteria
223
+ # # ':cr': {
224
+ # # 'S': cruise_name,
225
+ # # },
226
+ # # },
227
+ # )
228
+ # # Note: table.scan() has 1 MB limit on results so pagination is used
229
+ #
230
+ # if len(response["Items"]) == 0 and "LastEvaluatedKey" not in response:
231
+ # return pd.DataFrame() # If no results, return empty dataframe
232
+ #
233
+ # dataset = response["Items"]
234
+ #
235
+ # while response.get('LastEvaluatedKey'): #"LastEvaluatedKey" in response:
236
+ # response = self.dynamodb_client.scan(
237
+ # TableName=table_name,
238
+ # ### Either 'Select' or 'ExpressionAttributeNames'/'ProjectionExpression'
239
+ # Select='ALL_ATTRIBUTES', # or 'SPECIFIC_ATTRIBUTES',
240
+ # FilterExpression=filter_expression,
241
+ # #ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
242
+ # # ExpressionAttributeNames={ # would need to specify all cols in df
243
+ # # '#SH': 'SHIP_NAME',
244
+ # # '#CR': 'CRUISE_NAME',
245
+ # # '#FN': 'FILE_NAME',
246
+ # # },
247
+ # ExpressionAttributeValues={ # criteria
248
+ # ':cr': {
249
+ # 'S': cruise_name,
250
+ # },
251
+ # },
252
+ # ConsistentRead=True,
253
+ # ExclusiveStartKey=response["LastEvaluatedKey"],
254
+ # )
255
+ # dataset.extend(response["Items"])
256
+ #
257
+ # deserializer = self.type_deserializer
258
+ # df = pd.DataFrame([deserializer.deserialize({"M": i}) for i in dataset])
259
+ #
260
+ # return df.sort_values(by="START_TIME", ignore_index=True)
261
+
262
+ #####################################################################
263
+ # TODO: WIP
264
+ def delete_item(
151
265
  self,
152
266
  table_name,
153
- key,
267
+ cruise_name,
268
+ file_name,
154
269
  ):
155
- # a bit more high level, uses resource to get table item
156
- table = self.__dynamodb_resource.Table(table_name)
157
- response = table.get_item(Key=key)
270
+ """
271
+ Finds all rows associated with a cruise and deletes them.
272
+ """
273
+ response = self.dynamodb_client.delete_item(
274
+ Key={"CRUISE_NAME": {"S": cruise_name}, "FILE_NAME": {"S": file_name}},
275
+ TableName=table_name,
276
+ ReturnConsumedCapacity="TOTAL",
277
+ )
278
+ # TODO: there should be attributes included in response but they are missing
279
+ # if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
280
+ # throw error
158
281
  return response
159
282
 
160
283
  #####################################################################
161
- # TODO: add helper method to delete the data
162
- def delete_cruise(
284
+ def describe_table(
163
285
  self,
164
286
  table_name,
165
- cruise_name,
166
287
  ):
167
- pass
288
+ """
289
+ Get a description of the table. Used to verify that records were added/removed.
290
+ """
291
+ response = self.dynamodb_client.describe_table(TableName=table_name)
292
+ print(response)
293
+ return response
294
+
295
+ #####################################################################
296
+ # TODO: from test_raw_to_zarr get enum and use here
297
+ # def __update_processing_status(
298
+ # self,
299
+ # file_name: str,
300
+ # cruise_name: str,
301
+ # pipeline_status: str,
302
+ # error_message: str = None,
303
+ # ):
304
+ # print(f"Updating processing status to {pipeline_status}.")
305
+ # if error_message:
306
+ # print(f"Error message: {error_message}")
307
+ # self.dynamo.update_item(
308
+ # table_name=self.__table_name,
309
+ # key={
310
+ # 'FILE_NAME': {'S': file_name}, # Partition Key
311
+ # 'CRUISE_NAME': {'S': cruise_name}, # Sort Key
312
+ # },
313
+ # attribute_names={
314
+ # '#PT': 'PIPELINE_TIME',
315
+ # '#PS': 'PIPELINE_STATUS',
316
+ # '#EM': 'ERROR_MESSAGE',
317
+ # },
318
+ # expression='SET #PT = :pt, #PS = :ps, #EM = :em',
319
+ # attribute_values={
320
+ # ':pt': {
321
+ # 'S': datetime.now().isoformat(timespec="seconds") + "Z"
322
+ # },
323
+ # ':ps': {
324
+ # 'S': pipeline_status
325
+ # },
326
+ # ':em': {
327
+ # 'S': error_message
328
+ # }
329
+ # }
330
+ # )
331
+ # else:
332
+ # self.dynamo.update_item(
333
+ # table_name=self.__table_name,
334
+ # key={
335
+ # 'FILE_NAME': {'S': file_name}, # Partition Key
336
+ # 'CRUISE_NAME': {'S': cruise_name}, # Sort Key
337
+ # },
338
+ # attribute_names={
339
+ # '#PT': 'PIPELINE_TIME',
340
+ # '#PS': 'PIPELINE_STATUS',
341
+ # },
342
+ # expression='SET #PT = :pt, #PS = :ps',
343
+ # attribute_values={
344
+ # ':pt': {
345
+ # 'S': datetime.now().isoformat(timespec="seconds") + "Z"
346
+ # },
347
+ # ':ps': {
348
+ # 'S': pipeline_status
349
+ # }
350
+ # }
351
+ # )
352
+ # print("Done updating processing status.")
168
353
 
169
354
 
170
355
  #########################################################################