water-column-sonar-processing 0.0.1__py3-none-any.whl → 25.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (60) hide show
  1. water_column_sonar_processing/__init__.py +13 -0
  2. water_column_sonar_processing/aws/__init__.py +7 -0
  3. water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
  4. water_column_sonar_processing/aws/s3_manager.py +420 -0
  5. water_column_sonar_processing/aws/s3fs_manager.py +72 -0
  6. {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
  7. {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
  8. water_column_sonar_processing/cruise/__init__.py +4 -0
  9. water_column_sonar_processing/cruise/create_empty_zarr_store.py +191 -0
  10. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  11. water_column_sonar_processing/cruise/resample_regrid.py +339 -0
  12. water_column_sonar_processing/geometry/__init__.py +11 -0
  13. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  14. water_column_sonar_processing/geometry/geometry_manager.py +243 -0
  15. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  16. water_column_sonar_processing/geometry/pmtile_generation.py +261 -0
  17. water_column_sonar_processing/index/__init__.py +3 -0
  18. water_column_sonar_processing/index/index_manager.py +384 -0
  19. water_column_sonar_processing/model/__init__.py +3 -0
  20. water_column_sonar_processing/model/zarr_manager.py +722 -0
  21. water_column_sonar_processing/process.py +149 -0
  22. water_column_sonar_processing/processing/__init__.py +4 -0
  23. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  24. water_column_sonar_processing/processing/raw_to_zarr.py +425 -0
  25. water_column_sonar_processing/utility/__init__.py +13 -0
  26. {model → water_column_sonar_processing}/utility/cleaner.py +7 -8
  27. water_column_sonar_processing/utility/constants.py +118 -0
  28. {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
  29. water_column_sonar_processing/utility/timestamp.py +12 -0
  30. water_column_sonar_processing-25.11.1.dist-info/METADATA +182 -0
  31. water_column_sonar_processing-25.11.1.dist-info/RECORD +34 -0
  32. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info}/WHEEL +1 -1
  33. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info/licenses}/LICENSE +1 -1
  34. water_column_sonar_processing-25.11.1.dist-info/top_level.txt +1 -0
  35. __init__.py +0 -0
  36. model/__init__.py +0 -0
  37. model/aws/__init__.py +0 -0
  38. model/aws/dynamodb_manager.py +0 -149
  39. model/aws/s3_manager.py +0 -356
  40. model/aws/s3fs_manager.py +0 -74
  41. model/cruise/__init__.py +0 -0
  42. model/cruise/create_empty_zarr_store.py +0 -166
  43. model/cruise/resample_regrid.py +0 -248
  44. model/geospatial/__init__.py +0 -0
  45. model/geospatial/geometry_manager.py +0 -194
  46. model/geospatial/geometry_simplification.py +0 -81
  47. model/geospatial/pmtile_generation.py +0 -74
  48. model/index/__init__.py +0 -0
  49. model/index/index.py +0 -228
  50. model/model.py +0 -138
  51. model/utility/__init__.py +0 -0
  52. model/utility/constants.py +0 -56
  53. model/utility/timestamp.py +0 -12
  54. model/zarr/__init__.py +0 -0
  55. model/zarr/bar.py +0 -28
  56. model/zarr/foo.py +0 -11
  57. model/zarr/zarr_manager.py +0 -298
  58. water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
  59. water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
  60. water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
@@ -0,0 +1,13 @@
1
+ from __future__ import absolute_import
2
+
3
+ from . import aws, cruise, geometry, index, model, processing, utility
4
+
5
+ __all__ = [
6
+ "aws",
7
+ "cruise",
8
+ "geometry",
9
+ "index",
10
+ "model",
11
+ "processing",
12
+ "utility",
13
+ ]
@@ -0,0 +1,7 @@
1
+ from .dynamodb_manager import DynamoDBManager
2
+ from .s3_manager import S3Manager, chunked
3
+ from .s3fs_manager import S3FSManager
4
+ from .sns_manager import SNSManager
5
+ from .sqs_manager import SQSManager
6
+
7
+ __all__ = ["DynamoDBManager", "S3Manager", "chunked", "S3FSManager", "SNSManager", "SQSManager"]
@@ -0,0 +1,355 @@
1
+ import os
2
+
3
+ import boto3
4
+ import pandas as pd
5
+ from boto3.dynamodb.types import TypeDeserializer, TypeSerializer
6
+
7
+
8
+ #########################################################################
9
+ class DynamoDBManager:
10
+ #####################################################################
11
+ def __init__(
12
+ self,
13
+ # endpoint_url
14
+ ):
15
+ # self.endpoint_url = endpoint_url
16
+ self.dynamodb_session = boto3.Session(
17
+ aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
18
+ aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
19
+ region_name=os.environ.get("AWS_REGION", default="us-east-1"),
20
+ )
21
+ self.dynamodb_resource = self.dynamodb_session.resource(
22
+ service_name="dynamodb",
23
+ # endpoint_url=self.endpoint_url
24
+ )
25
+ self.dynamodb_client = self.dynamodb_session.client(
26
+ service_name="dynamodb",
27
+ # endpoint_url=self.endpoint_url
28
+ )
29
+ self.type_serializer = TypeSerializer() # https://stackoverflow.com/a/46738251
30
+ self.type_deserializer = TypeDeserializer()
31
+
32
+ #####################################################################
33
+ ### defined in raw-to-model, not used
34
+ # def put_item(
35
+ # self,
36
+ # table_name,
37
+ # item
38
+ # ):
39
+ # response = boto3.Session().client(service_name='dynamodb').put_item(TableName=table_name, Item=item)
40
+ # status_code = response['ResponseMetadata']['HTTPStatusCode']
41
+ # assert (status_code == 200), "Problem, unable to update dynamodb table."
42
+
43
+ #####################################################################
44
+ #####################################################################
45
+ def create_water_column_sonar_table(
46
+ self,
47
+ table_name,
48
+ ):
49
+ self.dynamodb_client.create_table(
50
+ TableName=table_name,
51
+ KeySchema=[
52
+ {
53
+ "AttributeName": "FILE_NAME",
54
+ "KeyType": "HASH",
55
+ },
56
+ {
57
+ "AttributeName": "CRUISE_NAME",
58
+ "KeyType": "RANGE",
59
+ },
60
+ ],
61
+ AttributeDefinitions=[
62
+ {"AttributeName": "FILE_NAME", "AttributeType": "S"},
63
+ {"AttributeName": "CRUISE_NAME", "AttributeType": "S"},
64
+ ],
65
+ BillingMode="PAY_PER_REQUEST",
66
+ # ProvisionedThroughput={
67
+ # 'ReadCapacityUnits': 1_000,
68
+ # 'WriteCapacityUnits': 1_000
69
+ # }
70
+ )
71
+ # TODO: after creating status is 'CREATING', wait until 'ACTIVE'
72
+ response = self.dynamodb_client.describe_table(TableName=table_name)
73
+ print(
74
+ response
75
+ ) # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb/client/describe_table.html
76
+ # sleep then response['Table']['TableStatus'] == 'ACTIVE'
77
+
78
+ #####################################################################
79
+ # don't think this is used?
80
+ # def get_item(
81
+ # self,
82
+ # table_name,
83
+ # key
84
+ # ):
85
+ # response = self.dynamodb_client.get_item(TableName=table_name, Key=key)
86
+ # item = None
87
+ # if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
88
+ # if "Item" in response:
89
+ # item = response["Item"]
90
+ # return item
91
+
92
+ #####################################################################
93
+ def get_table_item(
94
+ self,
95
+ table_name,
96
+ key,
97
+ ):
98
+ """
99
+ Gets a single row from the db.
100
+ """
101
+ table = self.dynamodb_resource.Table(table_name)
102
+ response = table.get_item(Key=key)
103
+ # TODO:
104
+ # if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
105
+ # throw error
106
+ return response
107
+
108
+ #####################################################################
109
+ def update_item(
110
+ self,
111
+ table_name,
112
+ key,
113
+ expression_attribute_names,
114
+ expression_attribute_values,
115
+ update_expression,
116
+ ): # TODO: convert to boolean
117
+ try:
118
+ response = self.dynamodb_client.update_item(
119
+ TableName=table_name,
120
+ Key=key,
121
+ ExpressionAttributeNames=expression_attribute_names,
122
+ ExpressionAttributeValues=expression_attribute_values,
123
+ UpdateExpression=update_expression,
124
+ )
125
+ return response["ResponseMetadata"]["HTTPStatusCode"] # TODO: should be 200
126
+ # print(f"HTTPStatusCode: {status_code}")
127
+ # assert status_code == 200, "Problem, unable to update dynamodb table."
128
+ # assert response['ConsumedCapacity']['TableName'] == table_name
129
+ except Exception as err:
130
+ raise RuntimeError(f"Problem was encountered while updating item, {err}")
131
+
132
+ #####################################################################
133
+ # TODO: change to "get_cruise_as_df"
134
+ def get_table_as_df(
135
+ self,
136
+ # ship_name,
137
+ cruise_name,
138
+ # sensor_name, # TODO: need to add this back for EK80
139
+ table_name,
140
+ ) -> pd.DataFrame:
141
+ """
142
+ To be used to initialize a cruise, deletes all entries associated with that cruise
143
+ in the database.
144
+ #TODO: cruise names isn't good enough, there could be two instrument for a cruise...
145
+ """
146
+ filter_expression = "CRUISE_NAME = :cr"
147
+ response = self.dynamodb_client.scan(
148
+ TableName=table_name,
149
+ # Limit=1000,
150
+ Select="ALL_ATTRIBUTES", # or 'SPECIFIC_ATTRIBUTES',
151
+ # ExclusiveStartKey=where to pick up
152
+ # ReturnConsumedCapacity='INDEXES' | 'TOTAL' | 'NONE', ...not sure
153
+ # ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
154
+ FilterExpression=filter_expression,
155
+ # ExpressionAttributeNames={
156
+ # '#SH': 'SHIP_NAME',
157
+ # '#CR': 'CRUISE_NAME',
158
+ # '#FN': 'FILE_NAME',
159
+ # },
160
+ ExpressionAttributeValues={ # criteria
161
+ ":cr": {
162
+ "S": cruise_name,
163
+ },
164
+ },
165
+ ConsistentRead=True,
166
+ # ExclusiveStartKey=response["LastEvaluatedKey"],
167
+ )
168
+ # Note: table.scan() has 1 MB limit on results so pagination is used
169
+
170
+ if len(response["Items"]) == 0 and "LastEvaluatedKey" not in response:
171
+ return pd.DataFrame() # If no results, return empty dataframe
172
+
173
+ data = response["Items"]
174
+
175
+ while response.get("LastEvaluatedKey"): # "LastEvaluatedKey" in response:
176
+ response = self.dynamodb_client.scan(
177
+ TableName=table_name,
178
+ ### Either 'Select' or 'ExpressionAttributeNames'/'ProjectionExpression'
179
+ Select="ALL_ATTRIBUTES", # or 'SPECIFIC_ATTRIBUTES',
180
+ FilterExpression=filter_expression,
181
+ # ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
182
+ # ExpressionAttributeNames={ # would need to specify all cols in df
183
+ # '#SH': 'SHIP_NAME',
184
+ # '#CR': 'CRUISE_NAME',
185
+ # '#FN': 'FILE_NAME',
186
+ # },
187
+ ExpressionAttributeValues={ # criteria
188
+ ":cr": {
189
+ "S": cruise_name,
190
+ },
191
+ },
192
+ ConsistentRead=True,
193
+ ExclusiveStartKey=response["LastEvaluatedKey"],
194
+ )
195
+ data.extend(response["Items"])
196
+
197
+ deserializer = self.type_deserializer
198
+ df = pd.DataFrame([deserializer.deserialize({"M": i}) for i in data])
199
+
200
+ return df.sort_values(by="START_TIME", ignore_index=True)
201
+
202
+ #####################################################################
203
+ # def get_cruise_list(
204
+ # self,
205
+ # table_name,
206
+ # ) -> list:
207
+ # """
208
+ # Experimental, gets all cruise names as list
209
+ # """
210
+ # filter_expression = "CRUISE_NAME = :cr"
211
+ # response = self.dynamodb_client.scan(
212
+ # TableName=table_name,
213
+ # Select='SPECIFIC_ATTRIBUTES',
214
+ # #ReturnConsumedCapacity='INDEXES' | 'TOTAL' | 'NONE', ...not sure
215
+ # # ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
216
+ # FilterExpression=filter_expression,
217
+ # # ExpressionAttributeNames={
218
+ # # '#SH': 'SHIP_NAME',
219
+ # # '#CR': 'CRUISE_NAME',
220
+ # # '#FN': 'FILE_NAME',
221
+ # # },
222
+ # # ExpressionAttributeValues={ # criteria
223
+ # # ':cr': {
224
+ # # 'S': cruise_name,
225
+ # # },
226
+ # # },
227
+ # )
228
+ # # Note: table.scan() has 1 MB limit on results so pagination is used
229
+ #
230
+ # if len(response["Items"]) == 0 and "LastEvaluatedKey" not in response:
231
+ # return pd.DataFrame() # If no results, return empty dataframe
232
+ #
233
+ # dataset = response["Items"]
234
+ #
235
+ # while response.get('LastEvaluatedKey'): #"LastEvaluatedKey" in response:
236
+ # response = self.dynamodb_client.scan(
237
+ # TableName=table_name,
238
+ # ### Either 'Select' or 'ExpressionAttributeNames'/'ProjectionExpression'
239
+ # Select='ALL_ATTRIBUTES', # or 'SPECIFIC_ATTRIBUTES',
240
+ # FilterExpression=filter_expression,
241
+ # #ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
242
+ # # ExpressionAttributeNames={ # would need to specify all cols in df
243
+ # # '#SH': 'SHIP_NAME',
244
+ # # '#CR': 'CRUISE_NAME',
245
+ # # '#FN': 'FILE_NAME',
246
+ # # },
247
+ # ExpressionAttributeValues={ # criteria
248
+ # ':cr': {
249
+ # 'S': cruise_name,
250
+ # },
251
+ # },
252
+ # ConsistentRead=True,
253
+ # ExclusiveStartKey=response["LastEvaluatedKey"],
254
+ # )
255
+ # dataset.extend(response["Items"])
256
+ #
257
+ # deserializer = self.type_deserializer
258
+ # df = pd.DataFrame([deserializer.deserialize({"M": i}) for i in dataset])
259
+ #
260
+ # return df.sort_values(by="START_TIME", ignore_index=True)
261
+
262
+ #####################################################################
263
+ # TODO: WIP
264
+ def delete_item(
265
+ self,
266
+ table_name,
267
+ cruise_name,
268
+ file_name,
269
+ ):
270
+ """
271
+ Finds all rows associated with a cruise and deletes them.
272
+ """
273
+ response = self.dynamodb_client.delete_item(
274
+ Key={"CRUISE_NAME": {"S": cruise_name}, "FILE_NAME": {"S": file_name}},
275
+ TableName=table_name,
276
+ ReturnConsumedCapacity="TOTAL",
277
+ )
278
+ # TODO: there should be attributes included in response but they are missing
279
+ # if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
280
+ # throw error
281
+ return response
282
+
283
+ #####################################################################
284
+ def describe_table(
285
+ self,
286
+ table_name,
287
+ ):
288
+ """
289
+ Get a description of the table. Used to verify that records were added/removed.
290
+ """
291
+ response = self.dynamodb_client.describe_table(TableName=table_name)
292
+ print(response)
293
+ return response
294
+
295
+ #####################################################################
296
+ # TODO: from test_raw_to_zarr get enum and use here
297
+ # def __update_processing_status(
298
+ # self,
299
+ # file_name: str,
300
+ # cruise_name: str,
301
+ # pipeline_status: str,
302
+ # error_message: str = None,
303
+ # ):
304
+ # print(f"Updating processing status to {pipeline_status}.")
305
+ # if error_message:
306
+ # print(f"Error message: {error_message}")
307
+ # self.dynamo.update_item(
308
+ # table_name=self.__table_name,
309
+ # key={
310
+ # 'FILE_NAME': {'S': file_name}, # Partition Key
311
+ # 'CRUISE_NAME': {'S': cruise_name}, # Sort Key
312
+ # },
313
+ # attribute_names={
314
+ # '#PT': 'PIPELINE_TIME',
315
+ # '#PS': 'PIPELINE_STATUS',
316
+ # '#EM': 'ERROR_MESSAGE',
317
+ # },
318
+ # expression='SET #PT = :pt, #PS = :ps, #EM = :em',
319
+ # attribute_values={
320
+ # ':pt': {
321
+ # 'S': datetime.now().isoformat(timespec="seconds") + "Z"
322
+ # },
323
+ # ':ps': {
324
+ # 'S': pipeline_status
325
+ # },
326
+ # ':em': {
327
+ # 'S': error_message
328
+ # }
329
+ # }
330
+ # )
331
+ # else:
332
+ # self.dynamo.update_item(
333
+ # table_name=self.__table_name,
334
+ # key={
335
+ # 'FILE_NAME': {'S': file_name}, # Partition Key
336
+ # 'CRUISE_NAME': {'S': cruise_name}, # Sort Key
337
+ # },
338
+ # attribute_names={
339
+ # '#PT': 'PIPELINE_TIME',
340
+ # '#PS': 'PIPELINE_STATUS',
341
+ # },
342
+ # expression='SET #PT = :pt, #PS = :ps',
343
+ # attribute_values={
344
+ # ':pt': {
345
+ # 'S': datetime.now().isoformat(timespec="seconds") + "Z"
346
+ # },
347
+ # ':ps': {
348
+ # 'S': pipeline_status
349
+ # }
350
+ # }
351
+ # )
352
+ # print("Done updating processing status.")
353
+
354
+
355
+ #########################################################################