water-column-sonar-processing 25.3.1__py3-none-any.whl → 25.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/aws/dynamodb_manager.py +6 -6
- water_column_sonar_processing/aws/s3_manager.py +95 -90
- water_column_sonar_processing/aws/s3fs_manager.py +5 -3
- water_column_sonar_processing/aws/sqs_manager.py +1 -1
- water_column_sonar_processing/cruise/__init__.py +2 -1
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +49 -43
- water_column_sonar_processing/cruise/create_empty_zarr_store_level_3.py +161 -0
- water_column_sonar_processing/cruise/datatree_manager.py +21 -21
- water_column_sonar_processing/cruise/resample_regrid.py +57 -47
- water_column_sonar_processing/dataset/__init__.py +3 -0
- water_column_sonar_processing/dataset/dataset_manager.py +205 -0
- water_column_sonar_processing/dataset/feature_manager.py +32 -0
- water_column_sonar_processing/geometry/geometry_manager.py +11 -12
- water_column_sonar_processing/geometry/line_simplification.py +26 -1
- water_column_sonar_processing/geometry/pmtile_generation.py +211 -247
- water_column_sonar_processing/index/index_manager.py +18 -17
- water_column_sonar_processing/model/zarr_manager.py +504 -256
- water_column_sonar_processing/processing/__init__.py +3 -2
- water_column_sonar_processing/processing/batch_downloader.py +11 -11
- water_column_sonar_processing/processing/raw_to_netcdf.py +319 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +41 -31
- water_column_sonar_processing/utility/__init__.py +9 -2
- water_column_sonar_processing/utility/cleaner.py +1 -2
- water_column_sonar_processing/utility/constants.py +26 -7
- water_column_sonar_processing/utility/timestamp.py +1 -0
- water_column_sonar_processing-25.8.0.dist-info/METADATA +162 -0
- water_column_sonar_processing-25.8.0.dist-info/RECORD +39 -0
- {water_column_sonar_processing-25.3.1.dist-info → water_column_sonar_processing-25.8.0.dist-info}/WHEEL +1 -1
- water_column_sonar_processing-25.3.1.dist-info/licenses/LICENSE → water_column_sonar_processing-25.8.0.dist-info/licenses/LICENSE-MIT +1 -1
- water_column_sonar_processing-25.3.1.dist-info/METADATA +0 -170
- water_column_sonar_processing-25.3.1.dist-info/RECORD +0 -34
- {water_column_sonar_processing-25.3.1.dist-info → water_column_sonar_processing-25.8.0.dist-info}/top_level.txt +0 -0
|
@@ -127,7 +127,7 @@ class DynamoDBManager:
|
|
|
127
127
|
# assert status_code == 200, "Problem, unable to update dynamodb table."
|
|
128
128
|
# assert response['ConsumedCapacity']['TableName'] == table_name
|
|
129
129
|
except Exception as err:
|
|
130
|
-
|
|
130
|
+
raise RuntimeError(f"Problem was encountered while updating item, {err}")
|
|
131
131
|
|
|
132
132
|
#####################################################################
|
|
133
133
|
# TODO: change to "get_cruise_as_df"
|
|
@@ -135,7 +135,7 @@ class DynamoDBManager:
|
|
|
135
135
|
self,
|
|
136
136
|
# ship_name,
|
|
137
137
|
cruise_name,
|
|
138
|
-
# sensor_name,
|
|
138
|
+
# sensor_name, # TODO: need to add this back for EK80
|
|
139
139
|
table_name,
|
|
140
140
|
) -> pd.DataFrame:
|
|
141
141
|
"""
|
|
@@ -230,7 +230,7 @@ class DynamoDBManager:
|
|
|
230
230
|
# if len(response["Items"]) == 0 and "LastEvaluatedKey" not in response:
|
|
231
231
|
# return pd.DataFrame() # If no results, return empty dataframe
|
|
232
232
|
#
|
|
233
|
-
#
|
|
233
|
+
# dataset = response["Items"]
|
|
234
234
|
#
|
|
235
235
|
# while response.get('LastEvaluatedKey'): #"LastEvaluatedKey" in response:
|
|
236
236
|
# response = self.dynamodb_client.scan(
|
|
@@ -252,10 +252,10 @@ class DynamoDBManager:
|
|
|
252
252
|
# ConsistentRead=True,
|
|
253
253
|
# ExclusiveStartKey=response["LastEvaluatedKey"],
|
|
254
254
|
# )
|
|
255
|
-
#
|
|
255
|
+
# dataset.extend(response["Items"])
|
|
256
256
|
#
|
|
257
257
|
# deserializer = self.type_deserializer
|
|
258
|
-
# df = pd.DataFrame([deserializer.deserialize({"M": i}) for i in
|
|
258
|
+
# df = pd.DataFrame([deserializer.deserialize({"M": i}) for i in dataset])
|
|
259
259
|
#
|
|
260
260
|
# return df.sort_values(by="START_TIME", ignore_index=True)
|
|
261
261
|
|
|
@@ -273,7 +273,7 @@ class DynamoDBManager:
|
|
|
273
273
|
response = self.dynamodb_client.delete_item(
|
|
274
274
|
Key={"CRUISE_NAME": {"S": cruise_name}, "FILE_NAME": {"S": file_name}},
|
|
275
275
|
TableName=table_name,
|
|
276
|
-
ReturnConsumedCapacity="
|
|
276
|
+
ReturnConsumedCapacity="TOTAL",
|
|
277
277
|
)
|
|
278
278
|
# TODO: there should be attributes included in response but they are missing
|
|
279
279
|
# if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
|
|
@@ -2,6 +2,7 @@ import json
|
|
|
2
2
|
import os
|
|
3
3
|
from collections.abc import Generator
|
|
4
4
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
+
from time import sleep
|
|
5
6
|
from typing import Optional
|
|
6
7
|
|
|
7
8
|
import boto3
|
|
@@ -80,14 +81,8 @@ class S3Manager:
|
|
|
80
81
|
self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
|
|
81
82
|
)
|
|
82
83
|
|
|
83
|
-
# def get_client(self): # TODO: do i need this?
|
|
84
|
-
# return self.s3_session.client(
|
|
85
|
-
# service_name="s3",
|
|
86
|
-
# config=self.s3_client_config,
|
|
87
|
-
# region_name=self.s3_region,
|
|
88
|
-
# )
|
|
89
|
-
|
|
90
84
|
#####################################################################
|
|
85
|
+
# tested
|
|
91
86
|
def create_bucket(
|
|
92
87
|
self,
|
|
93
88
|
bucket_name: str,
|
|
@@ -116,12 +111,13 @@ class S3Manager:
|
|
|
116
111
|
# print(response)
|
|
117
112
|
|
|
118
113
|
#####################################################################
|
|
114
|
+
# tested
|
|
119
115
|
def list_buckets(self):
|
|
120
|
-
# client = self.get_client()
|
|
121
116
|
client = self.s3_client
|
|
122
117
|
return client.list_buckets()
|
|
123
118
|
|
|
124
119
|
#####################################################################
|
|
120
|
+
# tested
|
|
125
121
|
def upload_nodd_file(
|
|
126
122
|
self,
|
|
127
123
|
file_name: str,
|
|
@@ -137,6 +133,7 @@ class S3Manager:
|
|
|
137
133
|
return key
|
|
138
134
|
|
|
139
135
|
#####################################################################
|
|
136
|
+
# tested
|
|
140
137
|
def upload_files_with_thread_pool_executor(
|
|
141
138
|
self,
|
|
142
139
|
output_bucket_name: str,
|
|
@@ -160,58 +157,66 @@ class S3Manager:
|
|
|
160
157
|
if result:
|
|
161
158
|
all_uploads.extend([result])
|
|
162
159
|
except Exception as err:
|
|
163
|
-
|
|
160
|
+
raise RuntimeError(f"Problem, {err}")
|
|
161
|
+
|
|
164
162
|
print("Done uploading files using threading pool.")
|
|
165
163
|
return all_uploads
|
|
166
164
|
|
|
167
165
|
#####################################################################
|
|
168
|
-
#
|
|
166
|
+
# tested
|
|
167
|
+
def upload_zarr_store_to_s3(
|
|
168
|
+
self,
|
|
169
|
+
output_bucket_name: str,
|
|
170
|
+
local_directory: str,
|
|
171
|
+
object_prefix: str,
|
|
172
|
+
cruise_name: str,
|
|
173
|
+
) -> None:
|
|
174
|
+
print("uploading model store to s3")
|
|
175
|
+
try:
|
|
176
|
+
#
|
|
177
|
+
print("Starting upload with thread pool executor.")
|
|
178
|
+
# # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
179
|
+
all_files = []
|
|
180
|
+
for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
|
|
181
|
+
for file in files:
|
|
182
|
+
local_path = os.path.join(subdir, file)
|
|
183
|
+
# TODO: find a better method for splitting strings here:
|
|
184
|
+
# 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
|
|
185
|
+
# s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
|
|
186
|
+
s3_key = os.path.join(
|
|
187
|
+
object_prefix,
|
|
188
|
+
os.path.join(
|
|
189
|
+
subdir[subdir.find(f"{cruise_name}.zarr") :], file
|
|
190
|
+
),
|
|
191
|
+
)
|
|
192
|
+
all_files.append([local_path, s3_key])
|
|
193
|
+
self.upload_files_with_thread_pool_executor(
|
|
194
|
+
output_bucket_name=output_bucket_name,
|
|
195
|
+
all_files=all_files,
|
|
196
|
+
)
|
|
197
|
+
print("Done uploading with thread pool executor.")
|
|
198
|
+
except Exception as err:
|
|
199
|
+
raise RuntimeError(f"Problem uploading zarr store to s3, {err}")
|
|
200
|
+
|
|
201
|
+
#####################################################################
|
|
202
|
+
# tested
|
|
169
203
|
def upload_file(
|
|
170
204
|
self,
|
|
171
205
|
filename: str,
|
|
172
206
|
bucket_name: str,
|
|
173
207
|
key: str,
|
|
174
208
|
):
|
|
175
|
-
# self.s3_client.upload_file(Filename=filename, Bucket=bucket, Key=key)
|
|
176
209
|
self.s3_resource.Bucket(bucket_name).upload_file(Filename=filename, Key=key)
|
|
177
210
|
|
|
178
211
|
#####################################################################
|
|
179
|
-
|
|
180
|
-
self,
|
|
181
|
-
local_directory,
|
|
182
|
-
remote_directory,
|
|
183
|
-
output_bucket_name,
|
|
184
|
-
):
|
|
185
|
-
# Right now this is just for uploading a model store to s3
|
|
186
|
-
print("Uploading files to output bucket.")
|
|
187
|
-
store_name = os.path.basename(local_directory)
|
|
188
|
-
all_files = []
|
|
189
|
-
for subdir, dirs, files in os.walk(local_directory):
|
|
190
|
-
for file in files:
|
|
191
|
-
local_path = os.path.join(subdir, file)
|
|
192
|
-
# s3_key = os.path.join(object_prefix, local_path)
|
|
193
|
-
s3_key = os.path.join(
|
|
194
|
-
remote_directory,
|
|
195
|
-
store_name,
|
|
196
|
-
subdir.split(store_name)[-1].strip("/"),
|
|
197
|
-
)
|
|
198
|
-
all_files.append([local_path, s3_key])
|
|
199
|
-
|
|
200
|
-
all_uploads = self.upload_files_with_thread_pool_executor(
|
|
201
|
-
output_bucket_name=output_bucket_name,
|
|
202
|
-
all_files=all_files,
|
|
203
|
-
)
|
|
204
|
-
print("Done uploading files to output bucket.")
|
|
205
|
-
return all_uploads
|
|
206
|
-
|
|
207
|
-
#####################################################################
|
|
212
|
+
# tested
|
|
208
213
|
def check_if_object_exists(self, bucket_name, key_name) -> bool:
|
|
209
214
|
s3_manager2 = S3Manager()
|
|
210
215
|
s3_manager2.list_objects(bucket_name=bucket_name, prefix=key_name)
|
|
211
216
|
s3_client_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds
|
|
212
217
|
try:
|
|
213
|
-
# response = s3_resource_noaa_wcsd_zarr_pds.Object(bucket_name, key_name).load()
|
|
214
218
|
s3_client_noaa_wcsd_zarr_pds.head_object(Bucket=bucket_name, Key=key_name)
|
|
219
|
+
return True
|
|
215
220
|
except botocore.exceptions.ClientError as e:
|
|
216
221
|
if e.response["Error"]["Code"] == "404":
|
|
217
222
|
# The object does not exist.
|
|
@@ -222,10 +227,9 @@ class S3Manager:
|
|
|
222
227
|
else:
|
|
223
228
|
# Something else has gone wrong.
|
|
224
229
|
raise
|
|
225
|
-
return True
|
|
226
230
|
|
|
227
231
|
#####################################################################
|
|
228
|
-
#
|
|
232
|
+
# tested
|
|
229
233
|
def list_objects(self, bucket_name, prefix): # noaa-wcsd-pds and noaa-wcsd-zarr-pds
|
|
230
234
|
# TODO: this isn't working for geojson detecting objects!!!!!!!
|
|
231
235
|
# analog to "find_children_objects"
|
|
@@ -239,32 +243,20 @@ class S3Manager:
|
|
|
239
243
|
keys.extend([k["Key"] for k in page["Contents"]])
|
|
240
244
|
return keys
|
|
241
245
|
|
|
242
|
-
# def list_nodd_objects( # These are used by the geometry for uploading data
|
|
243
|
-
# self,
|
|
244
|
-
# prefix,
|
|
245
|
-
# ):
|
|
246
|
-
# # Returns a list of key strings for each object in bucket defined by prefix
|
|
247
|
-
# keys = []
|
|
248
|
-
# page_iterator = self.paginator_noaa_wcsd_zarr_pds.paginate(Bucket=output_bucket_name, Prefix=prefix):
|
|
249
|
-
# for page in paginator.paginate(Bucket=output_bucket_name, Prefix=prefix):
|
|
250
|
-
# if "Contents" in page.keys():
|
|
251
|
-
# keys.extend([k["Key"] for k in page["Contents"]])
|
|
252
|
-
# return keys
|
|
253
|
-
|
|
254
246
|
#####################################################################
|
|
255
247
|
# TODO: change name to "directory"
|
|
256
|
-
def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
248
|
+
# def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
|
|
249
|
+
# if not path.endswith("/"):
|
|
250
|
+
# path = path + "/"
|
|
251
|
+
# # s3_client = self.s3_client
|
|
252
|
+
# resp = self.list_objects(
|
|
253
|
+
# bucket_name=bucket_name, prefix=path
|
|
254
|
+
# ) # TODO: this is returning root folder and doesn't include children or hidden folders
|
|
255
|
+
# # resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
|
|
256
|
+
# return "Contents" in resp
|
|
265
257
|
|
|
266
258
|
#####################################################################
|
|
267
|
-
#
|
|
259
|
+
# private
|
|
268
260
|
def __paginate_child_objects(
|
|
269
261
|
self,
|
|
270
262
|
bucket_name: str,
|
|
@@ -279,6 +271,8 @@ class S3Manager:
|
|
|
279
271
|
objects.extend(page["Contents"])
|
|
280
272
|
return objects
|
|
281
273
|
|
|
274
|
+
#####################################################################
|
|
275
|
+
# tested
|
|
282
276
|
def get_child_objects(
|
|
283
277
|
self,
|
|
284
278
|
bucket_name: str,
|
|
@@ -310,13 +304,14 @@ class S3Manager:
|
|
|
310
304
|
return raw_files
|
|
311
305
|
|
|
312
306
|
#####################################################################
|
|
313
|
-
|
|
314
|
-
|
|
307
|
+
# tested
|
|
308
|
+
def get_object( # noaa-wcsd-pds or noaa-wcsd-zarr-pds
|
|
315
309
|
self,
|
|
316
310
|
bucket_name,
|
|
317
311
|
key_name,
|
|
318
312
|
):
|
|
319
313
|
# Meant for getting singular objects from a bucket, used by indexing lambda
|
|
314
|
+
# can also return byte range potentially.
|
|
320
315
|
print(f"Getting object {key_name} from {bucket_name}")
|
|
321
316
|
try:
|
|
322
317
|
response = self.s3_client.get_object(
|
|
@@ -325,27 +320,31 @@ class S3Manager:
|
|
|
325
320
|
)
|
|
326
321
|
# status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")
|
|
327
322
|
# if status == 200:
|
|
323
|
+
print(f"Done getting object {key_name} from {bucket_name}")
|
|
324
|
+
return response
|
|
328
325
|
except ClientError as err:
|
|
329
326
|
print(f"Problem was encountered while getting s3 file: {err}")
|
|
330
327
|
raise
|
|
331
|
-
print(f"Done getting object {key_name} from {bucket_name}")
|
|
332
|
-
return response
|
|
333
328
|
|
|
334
329
|
#####################################################################
|
|
335
|
-
#
|
|
336
|
-
def download_file(
|
|
337
|
-
# noaa-wcsd-pds or noaa-wcsd-model-pds
|
|
330
|
+
# tested
|
|
331
|
+
def download_file(
|
|
338
332
|
self,
|
|
339
333
|
bucket_name,
|
|
340
334
|
key,
|
|
341
|
-
file_name, # where the file will be saved
|
|
335
|
+
file_name, # path to where the file will be saved
|
|
342
336
|
):
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
337
|
+
try:
|
|
338
|
+
self.s3_client.download_file(
|
|
339
|
+
Bucket=bucket_name, Key=key, Filename=file_name
|
|
340
|
+
)
|
|
341
|
+
# TODO: if bottom file doesn't exist, don't fail downloader
|
|
342
|
+
print("downloaded file")
|
|
343
|
+
except Exception as err:
|
|
344
|
+
raise RuntimeError(f"Problem was encountered while downloading_file, {err}")
|
|
346
345
|
|
|
347
346
|
#####################################################################
|
|
348
|
-
#
|
|
347
|
+
# tested
|
|
349
348
|
def delete_nodd_objects( # nodd-bucket
|
|
350
349
|
self,
|
|
351
350
|
bucket_name,
|
|
@@ -358,16 +357,20 @@ class S3Manager:
|
|
|
358
357
|
objects_to_delete.append({"Key": obj["Key"]})
|
|
359
358
|
# Note: request can contain a list of up to 1000 keys
|
|
360
359
|
for batch in chunked(ll=objects_to_delete, n=1000):
|
|
360
|
+
# An error occurred (SlowDown) when calling the DeleteObjects operation (reached max retries: 4):
|
|
361
|
+
# Please reduce your request rate.
|
|
362
|
+
sleep(0.5)
|
|
363
|
+
#
|
|
361
364
|
self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
|
|
362
365
|
Bucket=bucket_name, Delete={"Objects": batch}
|
|
363
366
|
)
|
|
364
367
|
print("Deleted files.")
|
|
365
368
|
except Exception as err:
|
|
366
|
-
|
|
369
|
+
raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
|
|
367
370
|
|
|
368
371
|
#####################################################################
|
|
369
|
-
#
|
|
370
|
-
def delete_nodd_object(
|
|
372
|
+
# tested
|
|
373
|
+
def delete_nodd_object( # only used to delete geojson it looks like?! Remove.
|
|
371
374
|
self,
|
|
372
375
|
bucket_name,
|
|
373
376
|
key_name,
|
|
@@ -379,22 +382,27 @@ class S3Manager:
|
|
|
379
382
|
)
|
|
380
383
|
print("Deleted file.")
|
|
381
384
|
except Exception as err:
|
|
382
|
-
|
|
385
|
+
raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
|
|
383
386
|
|
|
384
387
|
#####################################################################
|
|
388
|
+
# tested
|
|
385
389
|
def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
390
|
+
try:
|
|
391
|
+
self.s3_client.put_object(
|
|
392
|
+
Bucket=bucket_name, Key=key, Body=body
|
|
393
|
+
) # "Body" can be a file
|
|
394
|
+
except Exception as err:
|
|
395
|
+
raise RuntimeError(f"Problem was encountered putting object, {err}")
|
|
389
396
|
|
|
390
397
|
#####################################################################
|
|
398
|
+
# tested
|
|
391
399
|
def read_s3_json(
|
|
392
400
|
self,
|
|
393
401
|
ship_name,
|
|
394
402
|
cruise_name,
|
|
395
403
|
sensor_name,
|
|
396
404
|
file_name_stem,
|
|
397
|
-
output_bucket_name,
|
|
405
|
+
output_bucket_name, # TODO: change to just bucket_name
|
|
398
406
|
) -> str:
|
|
399
407
|
try:
|
|
400
408
|
resource = self.s3_resource_noaa_wcsd_zarr_pds
|
|
@@ -405,11 +413,8 @@ class S3Manager:
|
|
|
405
413
|
file_content = content_object["Body"].read().decode("utf-8")
|
|
406
414
|
json_content = json.loads(file_content)
|
|
407
415
|
return json_content
|
|
408
|
-
except Exception as err:
|
|
409
|
-
|
|
410
|
-
raise
|
|
411
|
-
|
|
412
|
-
#####################################################################
|
|
416
|
+
except Exception as err:
|
|
417
|
+
raise RuntimeError(f"Exception encountered reading s3 GeoJSON, {err}")
|
|
413
418
|
|
|
414
419
|
|
|
415
420
|
#########################################################################
|
|
@@ -4,6 +4,7 @@ from typing import Optional
|
|
|
4
4
|
import s3fs
|
|
5
5
|
|
|
6
6
|
# TODO: S3FS_LOGGING_LEVEL=DEBUG
|
|
7
|
+
# S3FS_LOGGING_LEVEL=DEBUG
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
class S3FSManager:
|
|
@@ -13,15 +14,16 @@ class S3FSManager:
|
|
|
13
14
|
endpoint_url: Optional[str] = None,
|
|
14
15
|
):
|
|
15
16
|
self.endpoint_url = endpoint_url
|
|
16
|
-
|
|
17
|
-
|
|
17
|
+
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
18
|
+
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
18
19
|
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
19
20
|
self.s3fs = s3fs.S3FileSystem(
|
|
20
|
-
# asynchronous=False,
|
|
21
21
|
endpoint_url=endpoint_url,
|
|
22
22
|
key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
23
23
|
secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
24
|
+
# asynchronous=True,
|
|
24
25
|
)
|
|
26
|
+
# self.s3fs.ls("")
|
|
25
27
|
|
|
26
28
|
# s3_fs = s3fs.S3FileSystem( # TODO: use s3fs_manager?
|
|
27
29
|
# anon=True,
|
|
@@ -35,7 +35,7 @@ class SQSManager:
|
|
|
35
35
|
#######################################################
|
|
36
36
|
def list_queues(self, queue_name_prefix):
|
|
37
37
|
# Note: SQS control plane is eventually consistent, meaning that it
|
|
38
|
-
# takes a while to propagate the
|
|
38
|
+
# takes a while to propagate the dataset accross the systems.
|
|
39
39
|
response = self.__sqs_client.list_queues(QueueNamePrefix=queue_name_prefix)
|
|
40
40
|
print(response)
|
|
41
41
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from .create_empty_zarr_store import CreateEmptyZarrStore
|
|
2
|
+
from .create_empty_zarr_store_level_3 import CreateEmptyZarrStoreLevel3
|
|
2
3
|
from .resample_regrid import ResampleRegrid
|
|
3
4
|
|
|
4
|
-
__all__ = ["CreateEmptyZarrStore", "ResampleRegrid"]
|
|
5
|
+
__all__ = ["CreateEmptyZarrStore", "CreateEmptyZarrStoreLevel3", "ResampleRegrid"]
|
|
@@ -13,7 +13,7 @@ numcodecs.blosc.set_nthreads(1)
|
|
|
13
13
|
|
|
14
14
|
# TODO: when ready switch to version 3 of model spec
|
|
15
15
|
# ZARR_V3_EXPERIMENTAL_API = 1
|
|
16
|
-
# creates the latlon
|
|
16
|
+
# creates the latlon dataset: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
# TODO: change name to "CreateLocalEmptyZarrStore"
|
|
@@ -27,35 +27,35 @@ class CreateEmptyZarrStore:
|
|
|
27
27
|
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
28
28
|
|
|
29
29
|
#######################################################
|
|
30
|
-
# TODO:
|
|
31
|
-
def upload_zarr_store_to_s3(
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
) -> None:
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
30
|
+
# TODO: moved this to the s3_manager
|
|
31
|
+
# def upload_zarr_store_to_s3(
|
|
32
|
+
# self,
|
|
33
|
+
# output_bucket_name: str,
|
|
34
|
+
# local_directory: str,
|
|
35
|
+
# object_prefix: str,
|
|
36
|
+
# cruise_name: str,
|
|
37
|
+
# ) -> None:
|
|
38
|
+
# print("uploading model store to s3")
|
|
39
|
+
# s3_manager = S3Manager()
|
|
40
|
+
# #
|
|
41
|
+
# print("Starting upload with thread pool executor.")
|
|
42
|
+
# # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
43
|
+
# all_files = []
|
|
44
|
+
# for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
|
|
45
|
+
# for file in files:
|
|
46
|
+
# local_path = os.path.join(subdir, file)
|
|
47
|
+
# # TODO: find a better method for splitting strings here:
|
|
48
|
+
# # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
|
|
49
|
+
# s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
|
|
50
|
+
# all_files.append([local_path, s3_key])
|
|
51
|
+
# #
|
|
52
|
+
# # print(all_files)
|
|
53
|
+
# s3_manager.upload_files_with_thread_pool_executor(
|
|
54
|
+
# output_bucket_name=output_bucket_name,
|
|
55
|
+
# all_files=all_files,
|
|
56
|
+
# )
|
|
57
|
+
# print("Done uploading with thread pool executor.")
|
|
58
|
+
# # TODO: move to common place
|
|
59
59
|
|
|
60
60
|
#######################################################
|
|
61
61
|
def create_cruise_level_zarr_store(
|
|
@@ -65,7 +65,11 @@ class CreateEmptyZarrStore:
|
|
|
65
65
|
cruise_name: str,
|
|
66
66
|
sensor_name: str,
|
|
67
67
|
table_name: str,
|
|
68
|
+
# override_cruise_min_epsilon=None,
|
|
68
69
|
) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Initialize zarr store. The water_level needs to be integrated.
|
|
72
|
+
"""
|
|
69
73
|
tempdir = tempfile.TemporaryDirectory()
|
|
70
74
|
try:
|
|
71
75
|
# HB0806 - 123, HB0903 - 220
|
|
@@ -93,20 +97,19 @@ class CreateEmptyZarrStore:
|
|
|
93
97
|
)
|
|
94
98
|
|
|
95
99
|
# [3] calculate the max/min measurement resolutions for the whole cruise
|
|
96
|
-
cruise_min_echo_range = np.min(
|
|
97
|
-
|
|
98
|
-
)
|
|
100
|
+
# cruise_min_echo_range = np.min(
|
|
101
|
+
# (df["MIN_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
|
|
102
|
+
# )
|
|
99
103
|
|
|
100
|
-
# [4] calculate the
|
|
104
|
+
# [4] calculate the np.max(max_echo_range + water_level)
|
|
101
105
|
cruise_max_echo_range = np.max(
|
|
102
106
|
(df["MAX_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
|
|
103
107
|
)
|
|
104
108
|
|
|
109
|
+
# TODO: set this to either 1 or 0.5 meters
|
|
105
110
|
cruise_min_epsilon = np.min(df["MIN_ECHO_RANGE"].dropna().astype(float))
|
|
106
111
|
|
|
107
|
-
print(
|
|
108
|
-
f"cruise_min_echo_range: {cruise_min_echo_range}, cruise_max_echo_range: {cruise_max_echo_range}"
|
|
109
|
-
)
|
|
112
|
+
print(f"cruise_max_echo_range: {cruise_max_echo_range}")
|
|
110
113
|
|
|
111
114
|
# [5] get number of channels
|
|
112
115
|
cruise_frequencies = [
|
|
@@ -126,6 +129,7 @@ class CreateEmptyZarrStore:
|
|
|
126
129
|
bucket_name=output_bucket_name,
|
|
127
130
|
sub_prefix=zarr_prefix,
|
|
128
131
|
)
|
|
132
|
+
#
|
|
129
133
|
if len(child_objects) > 0:
|
|
130
134
|
s3_manager.delete_nodd_objects(
|
|
131
135
|
bucket_name=output_bucket_name,
|
|
@@ -134,9 +138,9 @@ class CreateEmptyZarrStore:
|
|
|
134
138
|
################################################################
|
|
135
139
|
# Create new model store
|
|
136
140
|
zarr_manager = ZarrManager()
|
|
137
|
-
new_height = len(
|
|
138
|
-
zarr_manager.get_depth_values(
|
|
139
|
-
min_echo_range=cruise_min_echo_range,
|
|
141
|
+
new_height = len( # [0.19m down to 1001.744m] = 5272 samples, 10.3 tiles @ 512
|
|
142
|
+
zarr_manager.get_depth_values( # these depths should be from min_epsilon to max_range+water_level
|
|
143
|
+
# min_echo_range=cruise_min_echo_range,
|
|
140
144
|
max_echo_range=cruise_max_echo_range,
|
|
141
145
|
cruise_min_epsilon=cruise_min_epsilon,
|
|
142
146
|
)
|
|
@@ -150,13 +154,13 @@ class CreateEmptyZarrStore:
|
|
|
150
154
|
sensor_name=sensor_name,
|
|
151
155
|
frequencies=cruise_frequencies,
|
|
152
156
|
width=new_width,
|
|
153
|
-
min_echo_range=cruise_min_echo_range,
|
|
157
|
+
# min_echo_range=cruise_min_echo_range,
|
|
154
158
|
max_echo_range=cruise_max_echo_range,
|
|
155
159
|
cruise_min_epsilon=cruise_min_epsilon,
|
|
156
160
|
calibration_status=True,
|
|
157
161
|
)
|
|
158
162
|
#################################################################
|
|
159
|
-
|
|
163
|
+
s3_manager.upload_zarr_store_to_s3(
|
|
160
164
|
output_bucket_name=output_bucket_name,
|
|
161
165
|
local_directory=tempdir.name, # TODO: need to use .name or problem
|
|
162
166
|
object_prefix=zarr_prefix,
|
|
@@ -182,7 +186,9 @@ class CreateEmptyZarrStore:
|
|
|
182
186
|
print("Done creating cruise level zarr store.")
|
|
183
187
|
#################################################################
|
|
184
188
|
except Exception as err:
|
|
185
|
-
|
|
189
|
+
raise RuntimeError(
|
|
190
|
+
f"Problem trying to create new cruise model store, {err}"
|
|
191
|
+
)
|
|
186
192
|
finally:
|
|
187
193
|
cleaner = Cleaner()
|
|
188
194
|
cleaner.delete_local_files()
|