water-column-sonar-processing 25.3.1__py3-none-any.whl → 25.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (32) hide show
  1. water_column_sonar_processing/aws/dynamodb_manager.py +6 -6
  2. water_column_sonar_processing/aws/s3_manager.py +95 -90
  3. water_column_sonar_processing/aws/s3fs_manager.py +5 -3
  4. water_column_sonar_processing/aws/sqs_manager.py +1 -1
  5. water_column_sonar_processing/cruise/__init__.py +2 -1
  6. water_column_sonar_processing/cruise/create_empty_zarr_store.py +49 -43
  7. water_column_sonar_processing/cruise/create_empty_zarr_store_level_3.py +161 -0
  8. water_column_sonar_processing/cruise/datatree_manager.py +21 -21
  9. water_column_sonar_processing/cruise/resample_regrid.py +57 -47
  10. water_column_sonar_processing/dataset/__init__.py +3 -0
  11. water_column_sonar_processing/dataset/dataset_manager.py +205 -0
  12. water_column_sonar_processing/dataset/feature_manager.py +32 -0
  13. water_column_sonar_processing/geometry/geometry_manager.py +11 -12
  14. water_column_sonar_processing/geometry/line_simplification.py +26 -1
  15. water_column_sonar_processing/geometry/pmtile_generation.py +211 -247
  16. water_column_sonar_processing/index/index_manager.py +18 -17
  17. water_column_sonar_processing/model/zarr_manager.py +504 -256
  18. water_column_sonar_processing/processing/__init__.py +3 -2
  19. water_column_sonar_processing/processing/batch_downloader.py +11 -11
  20. water_column_sonar_processing/processing/raw_to_netcdf.py +319 -0
  21. water_column_sonar_processing/processing/raw_to_zarr.py +41 -31
  22. water_column_sonar_processing/utility/__init__.py +9 -2
  23. water_column_sonar_processing/utility/cleaner.py +1 -2
  24. water_column_sonar_processing/utility/constants.py +26 -7
  25. water_column_sonar_processing/utility/timestamp.py +1 -0
  26. water_column_sonar_processing-25.8.0.dist-info/METADATA +162 -0
  27. water_column_sonar_processing-25.8.0.dist-info/RECORD +39 -0
  28. {water_column_sonar_processing-25.3.1.dist-info → water_column_sonar_processing-25.8.0.dist-info}/WHEEL +1 -1
  29. water_column_sonar_processing-25.3.1.dist-info/licenses/LICENSE → water_column_sonar_processing-25.8.0.dist-info/licenses/LICENSE-MIT +1 -1
  30. water_column_sonar_processing-25.3.1.dist-info/METADATA +0 -170
  31. water_column_sonar_processing-25.3.1.dist-info/RECORD +0 -34
  32. {water_column_sonar_processing-25.3.1.dist-info → water_column_sonar_processing-25.8.0.dist-info}/top_level.txt +0 -0
@@ -28,7 +28,7 @@ class IndexManager:
28
28
  #################################################################
29
29
  def list_ships(
30
30
  self,
31
- prefix="data/raw/",
31
+ prefix="dataset/raw/",
32
32
  ):
33
33
  page_iterator = self.s3_manager.paginator.paginate(
34
34
  Bucket=self.input_bucket_name, Prefix=prefix, Delimiter="/"
@@ -44,7 +44,7 @@ class IndexManager:
44
44
  #################################################################
45
45
  def list_cruises(
46
46
  self,
47
- ship_prefixes, # e.g. 'data/raw/Alaska_Knight/'
47
+ ship_prefixes, # e.g. 'dataset/raw/Alaska_Knight/'
48
48
  ):
49
49
  cruises = []
50
50
  for ship_prefix in ship_prefixes:
@@ -81,7 +81,7 @@ class IndexManager:
81
81
  sensor_name,
82
82
  ):
83
83
  # Gets all raw files for a cruise under the given prefix
84
- prefix = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/" # Note no forward slash at beginning
84
+ prefix = f"dataset/raw/{ship_name}/{cruise_name}/{sensor_name}/" # Note no forward slash at beginning
85
85
  page_iterator = self.s3_manager.paginator.paginate(
86
86
  Bucket=self.input_bucket_name, Prefix=prefix, Delimiter="/"
87
87
  )
@@ -99,7 +99,8 @@ class IndexManager:
99
99
  ):
100
100
  # Same as above but only needs to get the first raw file
101
101
  # because we are only interested in the first datagram of one file
102
- prefix = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/" # Note no forward slash at beginning
102
+ # TODO: "dataset?"
103
+ prefix = f"dataset/raw/{ship_name}/{cruise_name}/{sensor_name}/" # Note no forward slash at beginning
103
104
  # page_iterator = self.s3_manager.paginator.paginate(
104
105
  # Bucket=self.input_bucket_name,
105
106
  # Prefix=prefix,
@@ -130,7 +131,7 @@ class IndexManager:
130
131
  sensor_name,
131
132
  ):
132
133
  # THIS isn't used, just playing with JMES paths spec
133
- prefix = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/"
134
+ prefix = f"dataset/raw/{ship_name}/{cruise_name}/{sensor_name}/"
134
135
  ### filter with JMESPath expressions ###
135
136
  page_iterator = self.s3_manager.paginator.paginate(
136
137
  Bucket=self.input_bucket_name,
@@ -193,9 +194,9 @@ class IndexManager:
193
194
  self, df: pd.DataFrame
194
195
  ) -> pd.DataFrame: # TODO: is this used?
195
196
  # Returns all objects with 'EK60' in prefix of file path
196
- # Note that this can include 'EK80' data that are false-positives
197
+ # Note that this can include 'EK80' dataset that are false-positives
197
198
  # in dataframe with ['key', 'filename', 'ship', 'cruise', 'sensor', 'size', 'date', 'datagram']
198
- print("getting subset of ek60 data by prefix")
199
+ print("getting subset of ek60 dataset by prefix")
199
200
  objects = []
200
201
  for row in df.itertuples():
201
202
  row_split = row[1].split(os.sep)
@@ -211,8 +212,8 @@ class IndexManager:
211
212
  re.search("[D](\\d{8})", filename) is not None
212
213
  and re.search("[T](\\d{6})", filename) is not None
213
214
  ):
214
- # Parse date if possible e.g.: 'data/raw/Henry_B._Bigelow/HB1006/EK60/HBB-D20100723-T025105.raw'
215
- # and 'data/raw/Henry_B._Bigelow/HB1802/EK60/D20180513-T150250.raw'
215
+ # Parse date if possible e.g.: 'dataset/raw/Henry_B._Bigelow/HB1006/EK60/HBB-D20100723-T025105.raw'
216
+ # and 'dataset/raw/Henry_B._Bigelow/HB1802/EK60/D20180513-T150250.raw'
216
217
  date_substring = re.search("[D](\\d{8})", filename).group(1)
217
218
  time_substring = re.search("[T](\\d{6})", filename).group(1)
218
219
  date_string = datetime.strptime(
@@ -238,7 +239,7 @@ class IndexManager:
238
239
  def scan_datagram(self, select_key: str) -> list:
239
240
  # Reads the first 8 bytes of S3 file. Used to determine if ek60 or ek80
240
241
  # Note: uses boto3 session instead of boto3 client: https://github.com/boto/boto3/issues/801
241
- # select_key = 'data/raw/Albatross_Iv/AL0403/EK60/L0005-D20040302-T200108-EK60.raw'
242
+ # select_key = 'dataset/raw/Albatross_Iv/AL0403/EK60/L0005-D20040302-T200108-EK60.raw'
242
243
  s3_resource = self.s3_manager.s3_resource
243
244
  obj = s3_resource.Object(
244
245
  bucket_name=self.input_bucket_name, key=select_key
@@ -247,7 +248,7 @@ class IndexManager:
247
248
  obj.get(Range="bytes=3-7")["Body"].read().decode().strip("\x00")
248
249
  )
249
250
  # return [{'KEY': select_key, 'DATAGRAM': first_datagram}]
250
- ### EK60 data are denoted by 'CON0' ###
251
+ ### EK60 dataset are denoted by 'CON0' ###
251
252
  return first_datagram
252
253
 
253
254
  #################################################################
@@ -291,11 +292,11 @@ class IndexManager:
291
292
  def get_calibration_information(
292
293
  self,
293
294
  ) -> pd.DataFrame:
294
- # Calibration data generated by data manager currently located here:
295
+ # Calibration dataset generated by dataset manager currently located here:
295
296
  # https://noaa-wcsd-pds-index.s3.amazonaws.com/calibrated_crusies.csv
296
297
  # Note: Data are either:
297
- # [1] Calibrated w/ calibration data
298
- # [2] Calibrated w/o calibration data
298
+ # [1] Calibrated w/ calibration dataset
299
+ # [2] Calibrated w/o calibration dataset
299
300
  # [3] uncalibrated
300
301
  response = self.s3_manager.get_object(
301
302
  bucket_name=self.calibration_bucket, key_name=self.calibration_key
@@ -350,8 +351,8 @@ class IndexManager:
350
351
  ship_name = "Henry_B._Bigelow"
351
352
  cruise_name = "HB0707"
352
353
  # cruise_name = "HB0805"
353
- prefix = f"data/raw/{ship_name}/{cruise_name}/"
354
- # prefix = f"data/raw/{ship_name}/"
354
+ prefix = f"dataset/raw/{ship_name}/{cruise_name}/"
355
+ # prefix = f"dataset/raw/{ship_name}/"
355
356
  page_iterator = self.s3_manager.paginator.paginate(
356
357
  Bucket=self.input_bucket_name,
357
358
  Prefix=prefix,
@@ -367,7 +368,7 @@ class IndexManager:
367
368
  node_for_adding=basename, ETag=obj_etag, Size=obj_size, Key=obj_key
368
369
  ) # TODO: add parent hash
369
370
  split_path = os.path.normpath(obj_key).split(os.path.sep)
370
- # split_path: ['data', 'raw', 'Henry_B._Bigelow', 'HB0707', 'EK60', 'D20070712-T004447.raw']
371
+ # split_path: ['dataset', 'raw', 'Henry_B._Bigelow', 'HB0707', 'EK60', 'D20070712-T004447.raw']
371
372
  for previous, current in zip(split_path, split_path[1:]):
372
373
  if not G.has_edge(previous, current):
373
374
  G.add_edge(previous, current)