water-column-sonar-processing 0.0.9__py3-none-any.whl → 26.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. water_column_sonar_processing/aws/dynamodb_manager.py +138 -59
  2. water_column_sonar_processing/aws/s3_manager.py +179 -141
  3. water_column_sonar_processing/aws/s3fs_manager.py +29 -33
  4. water_column_sonar_processing/aws/sqs_manager.py +1 -1
  5. water_column_sonar_processing/cruise/create_empty_zarr_store.py +35 -96
  6. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  7. water_column_sonar_processing/cruise/resample_regrid.py +142 -127
  8. water_column_sonar_processing/geometry/__init__.py +10 -2
  9. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  10. water_column_sonar_processing/geometry/geometry_manager.py +50 -49
  11. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  12. water_column_sonar_processing/geometry/pmtile_generation.py +227 -223
  13. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  14. water_column_sonar_processing/index/index_manager.py +151 -33
  15. water_column_sonar_processing/model/zarr_manager.py +665 -262
  16. water_column_sonar_processing/processing/__init__.py +3 -3
  17. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  18. water_column_sonar_processing/processing/raw_to_zarr.py +206 -214
  19. water_column_sonar_processing/utility/__init__.py +9 -2
  20. water_column_sonar_processing/utility/constants.py +69 -18
  21. water_column_sonar_processing/utility/pipeline_status.py +11 -15
  22. water_column_sonar_processing/utility/timestamp.py +3 -4
  23. water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
  24. water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
  25. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
  26. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
  27. water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
  28. water_column_sonar_processing/process.py +0 -147
  29. water_column_sonar_processing/processing/cruise_sampler.py +0 -342
  30. water_column_sonar_processing-0.0.9.dist-info/METADATA +0 -134
  31. water_column_sonar_processing-0.0.9.dist-info/RECORD +0 -32
  32. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
@@ -9,20 +9,20 @@ from boto3.dynamodb.types import TypeDeserializer, TypeSerializer
9
9
  class DynamoDBManager:
10
10
  #####################################################################
11
11
  def __init__(
12
- self,
13
- # endpoint_url
12
+ self,
13
+ # endpoint_url
14
14
  ):
15
15
  # self.endpoint_url = endpoint_url
16
- self.__dynamodb_session = boto3.Session(
16
+ self.dynamodb_session = boto3.Session(
17
17
  aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
18
18
  aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
19
19
  region_name=os.environ.get("AWS_REGION", default="us-east-1"),
20
20
  )
21
- self.__dynamodb_resource = self.__dynamodb_session.resource(
21
+ self.dynamodb_resource = self.dynamodb_session.resource(
22
22
  service_name="dynamodb",
23
23
  # endpoint_url=self.endpoint_url
24
24
  )
25
- self.__dynamodb_client = self.__dynamodb_session.client(
25
+ self.dynamodb_client = self.dynamodb_session.client(
26
26
  service_name="dynamodb",
27
27
  # endpoint_url=self.endpoint_url
28
28
  )
@@ -46,7 +46,7 @@ class DynamoDBManager:
46
46
  self,
47
47
  table_name,
48
48
  ):
49
- self.__dynamodb_client.create_table(
49
+ self.dynamodb_client.create_table(
50
50
  TableName=table_name,
51
51
  KeySchema=[
52
52
  {
@@ -62,15 +62,17 @@ class DynamoDBManager:
62
62
  {"AttributeName": "FILE_NAME", "AttributeType": "S"},
63
63
  {"AttributeName": "CRUISE_NAME", "AttributeType": "S"},
64
64
  ],
65
- BillingMode="PAY_PER_REQUEST"
65
+ BillingMode="PAY_PER_REQUEST",
66
66
  # ProvisionedThroughput={
67
67
  # 'ReadCapacityUnits': 1_000,
68
68
  # 'WriteCapacityUnits': 1_000
69
69
  # }
70
70
  )
71
71
  # TODO: after creating status is 'CREATING', wait until 'ACTIVE'
72
- response = self.__dynamodb_client.describe_table(TableName=table_name)
73
- print(response) # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb/client/describe_table.html
72
+ response = self.dynamodb_client.describe_table(TableName=table_name)
73
+ print(
74
+ response
75
+ ) # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb/client/describe_table.html
74
76
  # sleep then response['Table']['TableStatus'] == 'ACTIVE'
75
77
 
76
78
  #####################################################################
@@ -80,7 +82,7 @@ class DynamoDBManager:
80
82
  # table_name,
81
83
  # key
82
84
  # ):
83
- # response = self.__dynamodb_client.get_item(TableName=table_name, Key=key)
85
+ # response = self.dynamodb_client.get_item(TableName=table_name, Key=key)
84
86
  # item = None
85
87
  # if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
86
88
  # if "Item" in response:
@@ -96,7 +98,7 @@ class DynamoDBManager:
96
98
  """
97
99
  Gets a single row from the db.
98
100
  """
99
- table = self.__dynamodb_resource.Table(table_name)
101
+ table = self.dynamodb_resource.Table(table_name)
100
102
  response = table.get_item(Key=key)
101
103
  # TODO:
102
104
  # if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
@@ -111,58 +113,83 @@ class DynamoDBManager:
111
113
  expression_attribute_names,
112
114
  expression_attribute_values,
113
115
  update_expression,
114
- ):
115
- response = self.__dynamodb_client.update_item(
116
- TableName=table_name,
117
- Key=key,
118
- ExpressionAttributeNames=expression_attribute_names,
119
- ExpressionAttributeValues=expression_attribute_values,
120
- UpdateExpression=update_expression,
121
- )
122
- status_code = response["ResponseMetadata"]["HTTPStatusCode"]
123
- assert response['ConsumedCapacity']['TableName'] == table_name
124
- assert status_code == 200, "Problem, unable to update dynamodb table."
116
+ ): # TODO: convert to boolean
117
+ try:
118
+ response = self.dynamodb_client.update_item(
119
+ TableName=table_name,
120
+ Key=key,
121
+ ExpressionAttributeNames=expression_attribute_names,
122
+ ExpressionAttributeValues=expression_attribute_values,
123
+ UpdateExpression=update_expression,
124
+ )
125
+ return response["ResponseMetadata"]["HTTPStatusCode"] # TODO: should be 200
126
+ # print(f"HTTPStatusCode: {status_code}")
127
+ # assert status_code == 200, "Problem, unable to update dynamodb table."
128
+ # assert response['ConsumedCapacity']['TableName'] == table_name
129
+ except Exception as err:
130
+ raise RuntimeError(f"Problem was encountered while updating item, {err}")
125
131
 
126
132
  #####################################################################
127
133
  # TODO: change to "get_cruise_as_df"
128
134
  def get_table_as_df(
129
135
  self,
130
- ship_name,
136
+ # ship_name,
131
137
  cruise_name,
132
- sensor_name,
138
+ # sensor_name, # TODO: need to add this back for EK80
133
139
  table_name,
134
140
  ) -> pd.DataFrame:
135
141
  """
136
142
  To be used to initialize a cruise, deletes all entries associated with that cruise
137
143
  in the database.
144
+ #TODO: cruise names isn't good enough, there could be two instrument for a cruise...
138
145
  """
139
- expression_attribute_values = {
140
- ":cr": {"S": cruise_name},
141
- ":se": {"S": sensor_name},
142
- ":sh": {"S": ship_name},
143
- }
144
-
145
- filter_expression = (
146
- "CRUISE_NAME = :cr and SENSOR_NAME = :se and SHIP_NAME = :sh"
147
- )
148
- response = self.__dynamodb_client.scan(
146
+ filter_expression = "CRUISE_NAME = :cr"
147
+ response = self.dynamodb_client.scan(
149
148
  TableName=table_name,
150
- Select="ALL_ATTRIBUTES",
151
- ExpressionAttributeValues=expression_attribute_values,
149
+ # Limit=1000,
150
+ Select="ALL_ATTRIBUTES", # or 'SPECIFIC_ATTRIBUTES',
151
+ # ExclusiveStartKey=where to pick up
152
+ # ReturnConsumedCapacity='INDEXES' | 'TOTAL' | 'NONE', ...not sure
153
+ # ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
152
154
  FilterExpression=filter_expression,
155
+ # ExpressionAttributeNames={
156
+ # '#SH': 'SHIP_NAME',
157
+ # '#CR': 'CRUISE_NAME',
158
+ # '#FN': 'FILE_NAME',
159
+ # },
160
+ ExpressionAttributeValues={ # criteria
161
+ ":cr": {
162
+ "S": cruise_name,
163
+ },
164
+ },
165
+ ConsistentRead=True,
166
+ # ExclusiveStartKey=response["LastEvaluatedKey"],
153
167
  )
154
168
  # Note: table.scan() has 1 MB limit on results so pagination is used
155
- if len(response["Items"]) == 0:
156
- return pd.DataFrame() # If no results, return empty dataframe
169
+
170
+ if len(response["Items"]) == 0 and "LastEvaluatedKey" not in response:
171
+ return pd.DataFrame() # If no results, return empty dataframe
157
172
 
158
173
  data = response["Items"]
159
174
 
160
- while "LastEvaluatedKey" in response:
161
- response = self.__dynamodb_client.scan(
175
+ while response.get("LastEvaluatedKey"): # "LastEvaluatedKey" in response:
176
+ response = self.dynamodb_client.scan(
162
177
  TableName=table_name,
163
- Select="ALL_ATTRIBUTES",
164
- ExpressionAttributeValues=expression_attribute_values,
178
+ ### Either 'Select' or 'ExpressionAttributeNames'/'ProjectionExpression'
179
+ Select="ALL_ATTRIBUTES", # or 'SPECIFIC_ATTRIBUTES',
165
180
  FilterExpression=filter_expression,
181
+ # ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
182
+ # ExpressionAttributeNames={ # would need to specify all cols in df
183
+ # '#SH': 'SHIP_NAME',
184
+ # '#CR': 'CRUISE_NAME',
185
+ # '#FN': 'FILE_NAME',
186
+ # },
187
+ ExpressionAttributeValues={ # criteria
188
+ ":cr": {
189
+ "S": cruise_name,
190
+ },
191
+ },
192
+ ConsistentRead=True,
166
193
  ExclusiveStartKey=response["LastEvaluatedKey"],
167
194
  )
168
195
  data.extend(response["Items"])
@@ -172,6 +199,66 @@ class DynamoDBManager:
172
199
 
173
200
  return df.sort_values(by="START_TIME", ignore_index=True)
174
201
 
202
+ #####################################################################
203
+ # def get_cruise_list(
204
+ # self,
205
+ # table_name,
206
+ # ) -> list:
207
+ # """
208
+ # Experimental, gets all cruise names as list
209
+ # """
210
+ # filter_expression = "CRUISE_NAME = :cr"
211
+ # response = self.dynamodb_client.scan(
212
+ # TableName=table_name,
213
+ # Select='SPECIFIC_ATTRIBUTES',
214
+ # #ReturnConsumedCapacity='INDEXES' | 'TOTAL' | 'NONE', ...not sure
215
+ # # ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
216
+ # FilterExpression=filter_expression,
217
+ # # ExpressionAttributeNames={
218
+ # # '#SH': 'SHIP_NAME',
219
+ # # '#CR': 'CRUISE_NAME',
220
+ # # '#FN': 'FILE_NAME',
221
+ # # },
222
+ # # ExpressionAttributeValues={ # criteria
223
+ # # ':cr': {
224
+ # # 'S': cruise_name,
225
+ # # },
226
+ # # },
227
+ # )
228
+ # # Note: table.scan() has 1 MB limit on results so pagination is used
229
+ #
230
+ # if len(response["Items"]) == 0 and "LastEvaluatedKey" not in response:
231
+ # return pd.DataFrame() # If no results, return empty dataframe
232
+ #
233
+ # dataset = response["Items"]
234
+ #
235
+ # while response.get('LastEvaluatedKey'): #"LastEvaluatedKey" in response:
236
+ # response = self.dynamodb_client.scan(
237
+ # TableName=table_name,
238
+ # ### Either 'Select' or 'ExpressionAttributeNames'/'ProjectionExpression'
239
+ # Select='ALL_ATTRIBUTES', # or 'SPECIFIC_ATTRIBUTES',
240
+ # FilterExpression=filter_expression,
241
+ # #ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
242
+ # # ExpressionAttributeNames={ # would need to specify all cols in df
243
+ # # '#SH': 'SHIP_NAME',
244
+ # # '#CR': 'CRUISE_NAME',
245
+ # # '#FN': 'FILE_NAME',
246
+ # # },
247
+ # ExpressionAttributeValues={ # criteria
248
+ # ':cr': {
249
+ # 'S': cruise_name,
250
+ # },
251
+ # },
252
+ # ConsistentRead=True,
253
+ # ExclusiveStartKey=response["LastEvaluatedKey"],
254
+ # )
255
+ # dataset.extend(response["Items"])
256
+ #
257
+ # deserializer = self.type_deserializer
258
+ # df = pd.DataFrame([deserializer.deserialize({"M": i}) for i in dataset])
259
+ #
260
+ # return df.sort_values(by="START_TIME", ignore_index=True)
261
+
175
262
  #####################################################################
176
263
  # TODO: WIP
177
264
  def delete_item(
@@ -183,17 +270,10 @@ class DynamoDBManager:
183
270
  """
184
271
  Finds all rows associated with a cruise and deletes them.
185
272
  """
186
- response = self.__dynamodb_client.delete_item(
187
- Key={
188
- "CRUISE_NAME": {
189
- "S": cruise_name
190
- },
191
- "FILE_NAME": {
192
- "S": file_name
193
- }
194
- },
273
+ response = self.dynamodb_client.delete_item(
274
+ Key={"CRUISE_NAME": {"S": cruise_name}, "FILE_NAME": {"S": file_name}},
195
275
  TableName=table_name,
196
- ReturnConsumedCapacity="TOTALS",
276
+ ReturnConsumedCapacity="TOTAL",
197
277
  )
198
278
  # TODO: there should be attributes included in response but they are missing
199
279
  # if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
@@ -202,18 +282,16 @@ class DynamoDBManager:
202
282
 
203
283
  #####################################################################
204
284
  def describe_table(
205
- self,
206
- table_name,
285
+ self,
286
+ table_name,
207
287
  ):
208
288
  """
209
289
  Get a description of the table. Used to verify that records were added/removed.
210
290
  """
211
- response = self.__dynamodb_client.describe_table(TableName=table_name)
291
+ response = self.dynamodb_client.describe_table(TableName=table_name)
212
292
  print(response)
213
293
  return response
214
294
 
215
-
216
-
217
295
  #####################################################################
218
296
  # TODO: from test_raw_to_zarr get enum and use here
219
297
  # def __update_processing_status(
@@ -226,7 +304,7 @@ class DynamoDBManager:
226
304
  # print(f"Updating processing status to {pipeline_status}.")
227
305
  # if error_message:
228
306
  # print(f"Error message: {error_message}")
229
- # self.__dynamo.update_item(
307
+ # self.dynamo.update_item(
230
308
  # table_name=self.__table_name,
231
309
  # key={
232
310
  # 'FILE_NAME': {'S': file_name}, # Partition Key
@@ -251,7 +329,7 @@ class DynamoDBManager:
251
329
  # }
252
330
  # )
253
331
  # else:
254
- # self.__dynamo.update_item(
332
+ # self.dynamo.update_item(
255
333
  # table_name=self.__table_name,
256
334
  # key={
257
335
  # 'FILE_NAME': {'S': file_name}, # Partition Key
@@ -273,4 +351,5 @@ class DynamoDBManager:
273
351
  # )
274
352
  # print("Done updating processing status.")
275
353
 
354
+
276
355
  #########################################################################