water-column-sonar-processing 25.3.2__py3-none-any.whl → 25.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/aws/dynamodb_manager.py +6 -6
- water_column_sonar_processing/aws/s3_manager.py +95 -90
- water_column_sonar_processing/aws/s3fs_manager.py +5 -3
- water_column_sonar_processing/aws/sqs_manager.py +1 -1
- water_column_sonar_processing/cruise/__init__.py +2 -1
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +49 -43
- water_column_sonar_processing/cruise/create_empty_zarr_store_level_3.py +161 -0
- water_column_sonar_processing/cruise/datatree_manager.py +21 -21
- water_column_sonar_processing/cruise/resample_regrid.py +57 -47
- water_column_sonar_processing/dataset/__init__.py +3 -0
- water_column_sonar_processing/dataset/dataset_manager.py +205 -0
- water_column_sonar_processing/dataset/feature_manager.py +32 -0
- water_column_sonar_processing/geometry/geometry_manager.py +11 -12
- water_column_sonar_processing/geometry/line_simplification.py +26 -1
- water_column_sonar_processing/geometry/pmtile_generation.py +211 -247
- water_column_sonar_processing/index/index_manager.py +18 -17
- water_column_sonar_processing/model/zarr_manager.py +504 -256
- water_column_sonar_processing/processing/__init__.py +3 -2
- water_column_sonar_processing/processing/batch_downloader.py +11 -11
- water_column_sonar_processing/processing/raw_to_netcdf.py +319 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +41 -31
- water_column_sonar_processing/utility/__init__.py +9 -2
- water_column_sonar_processing/utility/cleaner.py +1 -2
- water_column_sonar_processing/utility/constants.py +26 -7
- water_column_sonar_processing/utility/timestamp.py +1 -0
- water_column_sonar_processing-25.8.0.dist-info/METADATA +162 -0
- water_column_sonar_processing-25.8.0.dist-info/RECORD +39 -0
- {water_column_sonar_processing-25.3.2.dist-info → water_column_sonar_processing-25.8.0.dist-info}/WHEEL +1 -1
- water_column_sonar_processing-25.3.2.dist-info/licenses/LICENSE → water_column_sonar_processing-25.8.0.dist-info/licenses/LICENSE-MIT +1 -1
- water_column_sonar_processing-25.3.2.dist-info/METADATA +0 -170
- water_column_sonar_processing-25.3.2.dist-info/RECORD +0 -34
- {water_column_sonar_processing-25.3.2.dist-info → water_column_sonar_processing-25.8.0.dist-info}/top_level.txt +0 -0
|
@@ -28,7 +28,7 @@ class IndexManager:
|
|
|
28
28
|
#################################################################
|
|
29
29
|
def list_ships(
|
|
30
30
|
self,
|
|
31
|
-
prefix="
|
|
31
|
+
prefix="dataset/raw/",
|
|
32
32
|
):
|
|
33
33
|
page_iterator = self.s3_manager.paginator.paginate(
|
|
34
34
|
Bucket=self.input_bucket_name, Prefix=prefix, Delimiter="/"
|
|
@@ -44,7 +44,7 @@ class IndexManager:
|
|
|
44
44
|
#################################################################
|
|
45
45
|
def list_cruises(
|
|
46
46
|
self,
|
|
47
|
-
ship_prefixes, # e.g. '
|
|
47
|
+
ship_prefixes, # e.g. 'dataset/raw/Alaska_Knight/'
|
|
48
48
|
):
|
|
49
49
|
cruises = []
|
|
50
50
|
for ship_prefix in ship_prefixes:
|
|
@@ -81,7 +81,7 @@ class IndexManager:
|
|
|
81
81
|
sensor_name,
|
|
82
82
|
):
|
|
83
83
|
# Gets all raw files for a cruise under the given prefix
|
|
84
|
-
prefix = f"
|
|
84
|
+
prefix = f"dataset/raw/{ship_name}/{cruise_name}/{sensor_name}/" # Note no forward slash at beginning
|
|
85
85
|
page_iterator = self.s3_manager.paginator.paginate(
|
|
86
86
|
Bucket=self.input_bucket_name, Prefix=prefix, Delimiter="/"
|
|
87
87
|
)
|
|
@@ -99,7 +99,8 @@ class IndexManager:
|
|
|
99
99
|
):
|
|
100
100
|
# Same as above but only needs to get the first raw file
|
|
101
101
|
# because we are only interested in the first datagram of one file
|
|
102
|
-
|
|
102
|
+
# TODO: "dataset?"
|
|
103
|
+
prefix = f"dataset/raw/{ship_name}/{cruise_name}/{sensor_name}/" # Note no forward slash at beginning
|
|
103
104
|
# page_iterator = self.s3_manager.paginator.paginate(
|
|
104
105
|
# Bucket=self.input_bucket_name,
|
|
105
106
|
# Prefix=prefix,
|
|
@@ -130,7 +131,7 @@ class IndexManager:
|
|
|
130
131
|
sensor_name,
|
|
131
132
|
):
|
|
132
133
|
# THIS isn't used, just playing with JMES paths spec
|
|
133
|
-
prefix = f"
|
|
134
|
+
prefix = f"dataset/raw/{ship_name}/{cruise_name}/{sensor_name}/"
|
|
134
135
|
### filter with JMESPath expressions ###
|
|
135
136
|
page_iterator = self.s3_manager.paginator.paginate(
|
|
136
137
|
Bucket=self.input_bucket_name,
|
|
@@ -193,9 +194,9 @@ class IndexManager:
|
|
|
193
194
|
self, df: pd.DataFrame
|
|
194
195
|
) -> pd.DataFrame: # TODO: is this used?
|
|
195
196
|
# Returns all objects with 'EK60' in prefix of file path
|
|
196
|
-
# Note that this can include 'EK80'
|
|
197
|
+
# Note that this can include 'EK80' dataset that are false-positives
|
|
197
198
|
# in dataframe with ['key', 'filename', 'ship', 'cruise', 'sensor', 'size', 'date', 'datagram']
|
|
198
|
-
print("getting subset of ek60
|
|
199
|
+
print("getting subset of ek60 dataset by prefix")
|
|
199
200
|
objects = []
|
|
200
201
|
for row in df.itertuples():
|
|
201
202
|
row_split = row[1].split(os.sep)
|
|
@@ -211,8 +212,8 @@ class IndexManager:
|
|
|
211
212
|
re.search("[D](\\d{8})", filename) is not None
|
|
212
213
|
and re.search("[T](\\d{6})", filename) is not None
|
|
213
214
|
):
|
|
214
|
-
# Parse date if possible e.g.: '
|
|
215
|
-
# and '
|
|
215
|
+
# Parse date if possible e.g.: 'dataset/raw/Henry_B._Bigelow/HB1006/EK60/HBB-D20100723-T025105.raw'
|
|
216
|
+
# and 'dataset/raw/Henry_B._Bigelow/HB1802/EK60/D20180513-T150250.raw'
|
|
216
217
|
date_substring = re.search("[D](\\d{8})", filename).group(1)
|
|
217
218
|
time_substring = re.search("[T](\\d{6})", filename).group(1)
|
|
218
219
|
date_string = datetime.strptime(
|
|
@@ -238,7 +239,7 @@ class IndexManager:
|
|
|
238
239
|
def scan_datagram(self, select_key: str) -> list:
|
|
239
240
|
# Reads the first 8 bytes of S3 file. Used to determine if ek60 or ek80
|
|
240
241
|
# Note: uses boto3 session instead of boto3 client: https://github.com/boto/boto3/issues/801
|
|
241
|
-
# select_key = '
|
|
242
|
+
# select_key = 'dataset/raw/Albatross_Iv/AL0403/EK60/L0005-D20040302-T200108-EK60.raw'
|
|
242
243
|
s3_resource = self.s3_manager.s3_resource
|
|
243
244
|
obj = s3_resource.Object(
|
|
244
245
|
bucket_name=self.input_bucket_name, key=select_key
|
|
@@ -247,7 +248,7 @@ class IndexManager:
|
|
|
247
248
|
obj.get(Range="bytes=3-7")["Body"].read().decode().strip("\x00")
|
|
248
249
|
)
|
|
249
250
|
# return [{'KEY': select_key, 'DATAGRAM': first_datagram}]
|
|
250
|
-
### EK60
|
|
251
|
+
### EK60 dataset are denoted by 'CON0' ###
|
|
251
252
|
return first_datagram
|
|
252
253
|
|
|
253
254
|
#################################################################
|
|
@@ -291,11 +292,11 @@ class IndexManager:
|
|
|
291
292
|
def get_calibration_information(
|
|
292
293
|
self,
|
|
293
294
|
) -> pd.DataFrame:
|
|
294
|
-
# Calibration
|
|
295
|
+
# Calibration dataset generated by dataset manager currently located here:
|
|
295
296
|
# https://noaa-wcsd-pds-index.s3.amazonaws.com/calibrated_crusies.csv
|
|
296
297
|
# Note: Data are either:
|
|
297
|
-
# [1] Calibrated w/ calibration
|
|
298
|
-
# [2] Calibrated w/o calibration
|
|
298
|
+
# [1] Calibrated w/ calibration dataset
|
|
299
|
+
# [2] Calibrated w/o calibration dataset
|
|
299
300
|
# [3] uncalibrated
|
|
300
301
|
response = self.s3_manager.get_object(
|
|
301
302
|
bucket_name=self.calibration_bucket, key_name=self.calibration_key
|
|
@@ -350,8 +351,8 @@ class IndexManager:
|
|
|
350
351
|
ship_name = "Henry_B._Bigelow"
|
|
351
352
|
cruise_name = "HB0707"
|
|
352
353
|
# cruise_name = "HB0805"
|
|
353
|
-
prefix = f"
|
|
354
|
-
# prefix = f"
|
|
354
|
+
prefix = f"dataset/raw/{ship_name}/{cruise_name}/"
|
|
355
|
+
# prefix = f"dataset/raw/{ship_name}/"
|
|
355
356
|
page_iterator = self.s3_manager.paginator.paginate(
|
|
356
357
|
Bucket=self.input_bucket_name,
|
|
357
358
|
Prefix=prefix,
|
|
@@ -367,7 +368,7 @@ class IndexManager:
|
|
|
367
368
|
node_for_adding=basename, ETag=obj_etag, Size=obj_size, Key=obj_key
|
|
368
369
|
) # TODO: add parent hash
|
|
369
370
|
split_path = os.path.normpath(obj_key).split(os.path.sep)
|
|
370
|
-
# split_path: ['
|
|
371
|
+
# split_path: ['dataset', 'raw', 'Henry_B._Bigelow', 'HB0707', 'EK60', 'D20070712-T004447.raw']
|
|
371
372
|
for previous, current in zip(split_path, split_path[1:]):
|
|
372
373
|
if not G.has_edge(previous, current):
|
|
373
374
|
G.add_edge(previous, current)
|