PyPI - water-column-sonar-processing - Versions diffs - 25.3.2__py3-none-any.whl → 25.8.0__py3-none-any.whl - Mend

water-column-sonar-processing 25.3.2py3-none-any.whl → 25.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (32) hide show

water_column_sonar_processing/processing/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
 # from .cruise_sampler import CruiseSampler
 from .batch_downloader import BatchDownloader
-from .raw_to_zarr import RawToZarr
+from .raw_to_netcdf import RawToNetCDF
+from .raw_to_zarr import RawToZarr, get_water_level
-__all__ = ["RawToZarr", "BatchDownloader"]
+__all__ = ["RawToZarr", "get_water_level", "RawToNetCDF", "BatchDownloader"]

water_column_sonar_processing/processing/batch_downloader.py CHANGED Viewed

@@ -10,7 +10,7 @@ import xbatcher
 class BatchDownloader:
     """
-    Uses the xbatcher XbatchDownloader to download data from an xarray dataset. Connection
+    Uses the xbatcher XbatchDownloader to download dataset from an xarray dataset. Connection
     is established
     """
@@ -50,13 +50,13 @@ class BatchDownloader:
     def get_toy_batch_generator(self) -> xbatcher.BatchGenerator:
         """
-        Returns a BatchGenerator with subsets of Sv data
-        Note: this is synthetic data, for a smaller toy example
+        Returns a BatchGenerator with subsets of Sv dataset
+        Note: this is synthetic dataset, for a smaller toy example
         """
         depth = np.arange(1, 21)  # N meters
         time = pd.date_range(start="2025-01-01", end="2025-01-31", freq="D")  # N days
         frequency = [1_000, 2_000, 3_000]  # N frequencies
-        Sv = np.random.rand(len(depth), len(time), len(frequency))  # synthetic data
+        Sv = np.random.rand(len(depth), len(time), len(frequency))  # synthetic dataset
         cruise = xr.Dataset(
             data_vars={"Sv": (["depth", "time", "frequency"], Sv)},
             coords={
@@ -84,10 +84,10 @@ class BatchDownloader:
         return batch_generator
     def get_s3_batch_generator(self) -> xbatcher.BatchGenerator:
-        """Returns a BatchGenerator with subsets of Sv data from s3 Zarr store"""
+        """Returns a BatchGenerator with subsets of Sv dataset from s3 Zarr store"""
         cruise = self.get_s3_zarr_store()
-        # TODO: temporarily limits to a smaller slice of the data
+        # TODO: temporarily limits to a smaller slice of the dataset
         cruise_select = (
             cruise.where(cruise.depth < 100.0, drop=True).sel(
                 time=slice("2007-07-11T18:20:33", "2007-07-11T18:20:53")
@@ -111,19 +111,19 @@ class BatchDownloader:
             preload_batch=False,
         )
-        # TODO: need to raise exception if all the data is nan
+        # TODO: need to raise exception if all the dataset is nan
         return batch_generator
         # https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_generator
     def get_s3_manual_batch_generator(self):
         """
-        Using just xarray (no xbatcher), iterate through the data and generate batches.
-        Returns a BatchGenerator with subsets of Sv data from s3 Zarr store.
+        Using just xarray (no xbatcher), iterate through the dataset and generate batches.
+        Returns a BatchGenerator with subsets of Sv dataset from s3 Zarr store.
         """
         cruise = self.get_s3_zarr_store()
-        # TODO: temporarily limits to a smaller slice of the data
+        # TODO: temporarily limits to a smaller slice of the dataset
         cruise_select = cruise.where(cruise.depth < 100.0, drop=True).sel(
             time=slice("2007-07-11T18:20:33", "2007-07-11T18:20:53")
         )
@@ -143,7 +143,7 @@ class BatchDownloader:
             preload_batch=True,
         )
-        # TODO: need to raise exception if all the data is nan
+        # TODO: need to raise exception if all the dataset is nan
         return batch_generator
         # https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_generator

water_column_sonar_processing/processing/raw_to_netcdf.py ADDED Viewed

@@ -0,0 +1,319 @@
+import gc
+import os
+from datetime import datetime
+from pathlib import Path  # , PurePath
+import echopype as ep
+import numcodecs
+import numpy as np
+from numcodecs import Blosc
+from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
+from water_column_sonar_processing.geometry import GeometryManager
+from water_column_sonar_processing.utility import Cleaner
+# This code is getting copied from echofish-aws-raw-to-zarr-lambda
+class RawToNetCDF:
+    #######################################################
+    def __init__(
+        self,
+        # output_bucket_access_key,
+        # output_bucket_secret_access_key,
+        # # overwrite_existing_zarr_store,
+    ):
+        # TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
+        self.__compressor = Blosc(cname="zstd", clevel=2)  # shuffle=Blosc.NOSHUFFLE
+        self.__overwrite = True
+        self.__num_threads = numcodecs.blosc.get_nthreads()
+        # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
+        # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
+        # self.__table_name = table_name
+        # # self.__overwrite_existing_zarr_store = overwrite_existing_zarr_store
+    ############################################################################
+    ############################################################################
+    def __netcdf_info_to_table(
+        self,
+        # output_bucket_name,
+        table_name,
+        ship_name,
+        cruise_name,
+        sensor_name,
+        file_name,
+        # zarr_path,
+        min_echo_range,
+        max_echo_range,
+        num_ping_time_dropna,
+        start_time,
+        end_time,
+        frequencies,
+        channels,
+        water_level,
+    ):
+        print("Writing Zarr information to DynamoDB table.")
+        dynamodb_manager = DynamoDBManager()
+        dynamodb_manager.update_item(
+            table_name=table_name,
+            key={
+                "FILE_NAME": {"S": file_name},  # Partition Key
+                "CRUISE_NAME": {"S": cruise_name},  # Sort Key
+            },
+            expression_attribute_names={
+                "#CH": "CHANNELS",
+                "#ET": "END_TIME",
+                # "#ED": "ERROR_DETAIL",
+                "#FR": "FREQUENCIES",
+                "#MA": "MAX_ECHO_RANGE",
+                "#MI": "MIN_ECHO_RANGE",
+                "#ND": "NUM_PING_TIME_DROPNA",
+                # "#PS": "PIPELINE_STATUS",
+                "#PT": "PIPELINE_TIME",
+                "#SE": "SENSOR_NAME",
+                "#SH": "SHIP_NAME",
+                "#ST": "START_TIME",
+                # "#ZB": "ZARR_BUCKET",
+                # "#ZP": "ZARR_PATH",
+                "#WL": "WATER_LEVEL",
+            },
+            expression_attribute_values={
+                ":ch": {"L": [{"S": i} for i in channels]},
+                ":et": {"S": end_time},
+                # ":ed": {"S": ""},
+                ":fr": {"L": [{"N": str(i)} for i in frequencies]},
+                ":ma": {"N": str(np.round(max_echo_range, 4))},
+                ":mi": {"N": str(np.round(min_echo_range, 4))},
+                ":nd": {"N": str(num_ping_time_dropna)},
+                # ":ps": {"S": "PROCESSING_RESAMPLE_AND_WRITE_TO_ZARR_STORE"},
+                # ":ps": {"S": PipelineStatus.LEVEL_1_PROCESSING.name},
+                ":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
+                ":se": {"S": sensor_name},
+                ":sh": {"S": ship_name},
+                ":st": {"S": start_time},
+                ":wl": {"N": str(np.round(water_level, 2))},
+                # ":zb": {"S": output_bucket_name},
+                # ":zp": {"S": zarr_path},
+            },
+            update_expression=(
+                "SET "
+                "#CH = :ch, "
+                "#ET = :et, "
+                # "#ED = :ed, "
+                "#FR = :fr, "
+                "#MA = :ma, "
+                "#MI = :mi, "
+                "#ND = :nd, "
+                # "#PS = :ps, "
+                "#PT = :pt, "
+                "#SE = :se, "
+                "#SH = :sh, "
+                "#ST = :st, "
+                "#WL = :wl"
+                # "#ZB = :zb, "
+                # "#ZP = :zp"
+            ),
+        )
+        print("Done writing Zarr information to DynamoDB table.")
+    ############################################################################
+    ############################################################################
+    ############################################################################
+    def __upload_files_to_output_bucket(
+        self,
+        output_bucket_name,
+        local_directory,
+        object_prefix,
+        endpoint_url,
+    ):
+        # Note: this will be passed credentials if using NODD
+        s3_manager = S3Manager(endpoint_url=endpoint_url)
+        print("Uploading files using thread pool executor.")
+        all_files = []
+        for subdir, dirs, files in os.walk(local_directory):
+            for file in files:
+                local_path = os.path.join(subdir, file)
+                s3_key = os.path.join(object_prefix, local_path)
+                all_files.append([local_path, s3_key])
+        # all_files
+        all_uploads = s3_manager.upload_files_with_thread_pool_executor(
+            output_bucket_name=output_bucket_name,
+            all_files=all_files,
+        )
+        return all_uploads
+    def __upload_file_to_output_bucket(
+        self,
+        output_bucket_name,
+        local_directory,
+        object_prefix,
+        endpoint_url,
+    ):
+        # Note: this will be passed credentials if using NODD
+        s3_manager = S3Manager(endpoint_url=endpoint_url)
+        print("Uploading files using thread pool executor.")
+        all_files = [local_directory]
+        all_uploads = s3_manager.upload_files_with_thread_pool_executor(
+            output_bucket_name=output_bucket_name,
+            all_files=all_files,
+        )
+        return all_uploads
+    ############################################################################
+    def raw_to_netcdf(
+        self,
+        table_name,
+        input_bucket_name,
+        output_bucket_name,
+        ship_name,
+        cruise_name,
+        sensor_name,
+        raw_file_name,
+        endpoint_url=None,
+        include_bot=True,
+    ):
+        """
+        Downloads the raw files, processes them with echopype, and uploads files
+        to the nodd bucket.
+        Needs to create two files, one echopype opened file, one is Sv calibrated file
+        """
+        print(f"Opening raw: {raw_file_name} and creating netcdf.")
+        try:
+            geometry_manager = GeometryManager()
+            cleaner = Cleaner()
+            cleaner.delete_local_files(
+                file_types=["*.nc", "*.json"]
+            )  # TODO: include bot and raw?
+            s3_manager = S3Manager(endpoint_url=endpoint_url)
+            s3_file_path = (
+                f"dataset/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
+            )
+            bottom_file_name = f"{Path(raw_file_name).stem}.bot"
+            s3_bottom_file_path = f"dataset/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
+            s3_manager.download_file(
+                bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name
+            )
+            # TODO: add the bottom file
+            if include_bot:
+                s3_manager.download_file(
+                    bucket_name=input_bucket_name,
+                    key=s3_bottom_file_path,
+                    file_name=bottom_file_name,
+                )
+            gc.collect()
+            print("Opening raw file with echopype.")
+            # s3_file_path = f"s3://{bucket_name}/dataset/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}"
+            # s3_file_path = Path(f"s3://noaa-wcsd-pds/dataset/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}")
+            echodata = ep.open_raw(
+                raw_file=raw_file_name,
+                sonar_model=sensor_name,
+                include_bot=include_bot,
+            )
+            netcdf_name = f"{Path(raw_file_name).stem}.nc"
+            # Xarray Dataset to netcdf
+            echodata.to_netcdf(
+                save_path=netcdf_name,
+                compress=True,
+                overwrite=True,
+            )
+            print("Compute volume backscattering strength (Sv) from raw dataset.")
+            ds_sv = ep.calibrate.compute_Sv(echodata)
+            ds_sv = ep.consolidate.add_depth(
+                ds_sv, echodata
+            )  # TODO: consolidate with other depth values
+            # water_level = ds_sv["water_level"].values
+            gc.collect()
+            print("Done computing volume backscatter strength (Sv) from raw dataset.")
+            # Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
+            # but is not written out with ds_sv
+            if "detected_seafloor_depth" in list(echodata.vendor.variables):
+                ds_sv["detected_seafloor_depth"] = (
+                    echodata.vendor.detected_seafloor_depth
+                )
+            #
+            # frequencies = echodata.environment.frequency_nominal.values
+            #################################################################
+            # Get GPS coordinates, just overwrite the lat lon values
+            gps_data, lat, lon = geometry_manager.read_echodata_gps_data(
+                echodata=echodata,
+                output_bucket_name=output_bucket_name,
+                ship_name=ship_name,
+                cruise_name=cruise_name,
+                sensor_name=sensor_name,
+                file_name=raw_file_name,
+                endpoint_url=endpoint_url,
+                write_geojson=False,
+            )
+            ds_sv = ep.consolidate.add_location(ds_sv, echodata)
+            ds_sv.latitude.values = (
+                lat  # overwriting echopype gps values to include missing values
+            )
+            ds_sv.longitude.values = lon
+            # gps_data, lat, lon = self.__get_gps_data(echodata=echodata)
+            # Create the netcdf
+            netcdf_name_computed_Sv = f"{Path(raw_file_name).stem}_computed_Sv.nc"
+            # Xarray Dataset to netcdf
+            ds_sv.to_netcdf(
+                path=netcdf_name_computed_Sv,
+                mode="w",
+            )
+            gc.collect()
+            #################################################################
+            # output_netcdf_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
+            #################################################################
+            # If netcdf already exists then delete
+            s3_manager = S3Manager(endpoint_url=endpoint_url)
+            child_objects = s3_manager.get_child_objects(
+                bucket_name=output_bucket_name,
+                sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.nc",
+            )
+            if len(child_objects) > 0:
+                print(
+                    "NetCDF dataset already exists in s3, deleting existing and continuing."
+                )
+                s3_manager.delete_nodd_objects(
+                    bucket_name=output_bucket_name,
+                    objects=child_objects,
+                )
+            child_objects_computed_Sv = s3_manager.get_child_objects(
+                bucket_name=output_bucket_name,
+                sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}_computed_Sv.nc",
+            )
+            if len(child_objects_computed_Sv) > 0:
+                print("data already exists in s3, deleting existing and continuing.")
+                s3_manager.delete_nodd_objects(
+                    bucket_name=output_bucket_name,
+                    objects=child_objects_computed_Sv,
+                )
+            #################################################################
+            s3_manager.upload_file(
+                filename=netcdf_name,
+                bucket_name=output_bucket_name,
+                key=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.nc",
+            )
+            s3_manager.upload_file(
+                filename=netcdf_name_computed_Sv,
+                bucket_name=output_bucket_name,
+                key=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}_computed_Sv.nc",
+            )
+        except Exception as err:
+            print(f"Exception encountered creating local netcdf with echopype: {err}")
+            raise RuntimeError(f"Problem creating local netcdf, {err}")
+        finally:
+            gc.collect()
+            cleaner.delete_local_files(
+                file_types=["*.raw", "*.bot", "*.zarr", "*.nc", "*.json"]
+            )
+            print("Done creating local zarr store.")
+    ############################################################################
+################################################################################
+############################################################################

water_column_sonar_processing/processing/raw_to_zarr.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gc
 import os
 from datetime import datetime
-from pathlib import Path  # , PurePath
+from pathlib import Path
 import echopype as ep
 import numcodecs
@@ -13,6 +13,16 @@ from water_column_sonar_processing.geometry import GeometryManager
 from water_column_sonar_processing.utility import Cleaner
+def get_water_level(ds):
+    """
+    needs to be mocked up so thats why this is broken out
+    """
+    if "water_level" in ds.keys():
+        return ds.water_level.values
+    else:
+        return 0.0
 # This code is getting copied from echofish-aws-raw-to-zarr-lambda
 class RawToZarr:
     #######################################################
@@ -35,13 +45,11 @@ class RawToZarr:
     ############################################################################
     def __zarr_info_to_table(
         self,
-        # output_bucket_name,
         table_name,
         ship_name,
         cruise_name,
-        sensor_name,
+        sensor_name,  # : Constants, TODO: convert to enum
         file_name,
-        # zarr_path,
         min_echo_range,
         max_echo_range,
         num_ping_time_dropna,
@@ -67,13 +75,10 @@ class RawToZarr:
                 "#MA": "MAX_ECHO_RANGE",
                 "#MI": "MIN_ECHO_RANGE",
                 "#ND": "NUM_PING_TIME_DROPNA",
-                # "#PS": "PIPELINE_STATUS",
                 "#PT": "PIPELINE_TIME",
                 "#SE": "SENSOR_NAME",
                 "#SH": "SHIP_NAME",
                 "#ST": "START_TIME",
-                # "#ZB": "ZARR_BUCKET",
-                # "#ZP": "ZARR_PATH",
                 "#WL": "WATER_LEVEL",
             },
             expression_attribute_values={
@@ -84,33 +89,25 @@ class RawToZarr:
                 ":ma": {"N": str(np.round(max_echo_range, 4))},
                 ":mi": {"N": str(np.round(min_echo_range, 4))},
                 ":nd": {"N": str(num_ping_time_dropna)},
-                # ":ps": {"S": "PROCESSING_RESAMPLE_AND_WRITE_TO_ZARR_STORE"},
-                # ":ps": {"S": PipelineStatus.LEVEL_1_PROCESSING.name},
                 ":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
                 ":se": {"S": sensor_name},
                 ":sh": {"S": ship_name},
                 ":st": {"S": start_time},
                 ":wl": {"N": str(np.round(water_level, 2))},
-                # ":zb": {"S": output_bucket_name},
-                # ":zp": {"S": zarr_path},
             },
             update_expression=(
                 "SET "
                 "#CH = :ch, "
                 "#ET = :et, "
-                # "#ED = :ed, "
                 "#FR = :fr, "
                 "#MA = :ma, "
                 "#MI = :mi, "
                 "#ND = :nd, "
-                # "#PS = :ps, "
                 "#PT = :pt, "
                 "#SE = :se, "
                 "#SH = :sh, "
                 "#ST = :st, "
                 "#WL = :wl"
-                # "#ZB = :zb, "
-                # "#ZP = :zp"
             ),
         )
         print("Done writing Zarr information to DynamoDB table.")
@@ -120,16 +117,20 @@ class RawToZarr:
     ############################################################################
     def __upload_files_to_output_bucket(
         self,
-        output_bucket_name,
-        local_directory,
-        object_prefix,
+        output_bucket_name: str,
+        local_directory: str,  # e.g. 'D20070724-T042400.zarr'  # TODO: problem: if this is not in the current directory
+        object_prefix: str,  # e.g. "level_1/Henry_B._Bigelow/HB0706/EK60/"
         endpoint_url,
     ):
         # Note: this will be passed credentials if using NODD
+        # TODO: this will not work if the local_directory is anywhere other than the current folder
+        # see test_s3_manager test_upload...pool_executor for solution
         s3_manager = S3Manager(endpoint_url=endpoint_url)
         print("Uploading files using thread pool executor.")
         all_files = []
-        for subdir, dirs, files in os.walk(local_directory):
+        for subdir, dirs, files in os.walk(
+            local_directory
+        ):  # os.path.basename(s3_manager_test_path.joinpath("HB0707.zarr/"))
             for file in files:
                 local_path = os.path.join(subdir, file)
                 s3_key = os.path.join(object_prefix, local_path)
@@ -141,6 +142,8 @@ class RawToZarr:
         )
         return all_uploads
+    ############################################################################
     ############################################################################
     def raw_to_zarr(
         self,
@@ -167,11 +170,11 @@ class RawToZarr:
         s3_manager = S3Manager(endpoint_url=endpoint_url)
         s3_file_path = (
-            f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
+            f"dataset/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
         )
         bottom_file_name = f"{Path(raw_file_name).stem}.bot"
         s3_bottom_file_path = (
-            f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
+            f"dataset/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
         )
         s3_manager.download_file(
             bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name
@@ -187,8 +190,8 @@ class RawToZarr:
         try:
             gc.collect()
             print("Opening raw file with echopype.")
-            # s3_file_path = f"s3://{bucket_name}/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}"
-            # s3_file_path = Path(f"s3://noaa-wcsd-pds/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}")
+            # s3_file_path = f"s3://{bucket_name}/dataset/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}"
+            # s3_file_path = Path(f"s3://noaa-wcsd-pds/dataset/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}")
             echodata = ep.open_raw(
                 raw_file=raw_file_name,
                 sonar_model=sensor_name,
@@ -197,14 +200,16 @@ class RawToZarr:
                 # max_chunk_size=300,
                 # storage_options={'anon': True } # 'endpoint_url': self.endpoint_url} # this was creating problems
             )
-            print("Compute volume backscattering strength (Sv) from raw data.")
+            print("Compute volume backscattering strength (Sv) from raw dataset.")
             ds_sv = ep.calibrate.compute_Sv(echodata)
             ds_sv = ep.consolidate.add_depth(
                 ds_sv, echodata
             )  # TODO: consolidate with other depth values
-            water_level = ds_sv["water_level"].values
+            water_level = get_water_level(ds_sv)
             gc.collect()
-            print("Done computing volume backscatter strength (Sv) from raw data.")
+            print("Done computing volume backscatter strength (Sv) from raw dataset.")
             # Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
             # but is not written out with ds_sv
             if "detected_seafloor_depth" in list(echodata.vendor.variables):
@@ -237,7 +242,14 @@ class RawToZarr:
             # TODO revert this so that smaller diffs can be used
             # The most minimum the resolution can be is as small as 0.25 meters
             min_echo_range = np.round(np.nanmin(np.diff(ds_sv.echo_range.values)), 2)
+            # For the HB0710 cruise the depths vary from 499.7215 @19cm to 2999.4805 @ 1cm. Moving that back
+            # inline with the
+            min_echo_range = np.max(
+                [0.20, min_echo_range]
+            )  # TODO: experiment with 0.25 and 0.50
             max_echo_range = float(np.nanmax(ds_sv.echo_range))
             # This is the number of missing values found throughout the lat/lon
             num_ping_time_dropna = lat[~np.isnan(lat)].shape[0]  # symmetric to lon
             #
@@ -269,7 +281,7 @@ class RawToZarr:
             )
             if len(child_objects) > 0:
                 print(
-                    "Zarr store data already exists in s3, deleting existing and continuing."
+                    "Zarr store dataset already exists in s3, deleting existing and continuing."
                 )
                 s3_manager.delete_nodd_objects(
                     bucket_name=output_bucket_name,
@@ -284,13 +296,11 @@ class RawToZarr:
             )
             #################################################################
             self.__zarr_info_to_table(
-                # output_bucket_name=output_bucket_name,
                 table_name=table_name,
                 ship_name=ship_name,
                 cruise_name=cruise_name,
                 sensor_name=sensor_name,
                 file_name=raw_file_name,
-                # zarr_path=os.path.join(output_zarr_prefix, store_name),
                 min_echo_range=min_echo_range,
                 max_echo_range=max_echo_range,
                 num_ping_time_dropna=num_ping_time_dropna,
@@ -334,7 +344,7 @@ class RawToZarr:
     #         #######################################################################
     #         store_name = f"{os.path.splitext(input_file_name)[0]}.zarr"
     #         output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}"
-    #         bucket_key = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{input_file_name}"
+    #         bucket_key = f"dataset/raw/{ship_name}/{cruise_name}/{sensor_name}/{input_file_name}"
     #         zarr_prefix = os.path.join("level_1", ship_name, cruise_name, sensor_name)
     #         #
     #         os.chdir(TEMPDIR)  # Lambdas require use of temp directory
@@ -348,7 +358,7 @@ class RawToZarr:
     #             secret_access_key=self.__output_bucket_secret_access_key
     #         )
     #         if len(s3_objects) > 0:
-    #             print('Zarr store data already exists in s3, deleting existing and continuing.')
+    #             print('Zarr store dataset already exists in s3, deleting existing and continuing.')
     #             self.__s3.delete_objects(
     #                 bucket_name=self.__output_bucket,
     #                 objects=s3_objects,

water_column_sonar_processing/utility/__init__.py CHANGED Viewed

@@ -1,6 +1,13 @@
 from .cleaner import Cleaner
-from .constants import Constants, Coordinates
+from .constants import Constants, Coordinates, Instruments
 from .pipeline_status import PipelineStatus
 from .timestamp import Timestamp
-__all__ = ["Cleaner", "Constants", "Coordinates", "PipelineStatus", "Timestamp"]
+__all__ = [
+    "Cleaner",
+    "Instruments",
+    "Constants",
+    "Coordinates",
+    "PipelineStatus",
+    "Timestamp",
+]

water_column_sonar_processing/utility/cleaner.py CHANGED Viewed

@@ -5,8 +5,7 @@ import shutil
 ###########################################################
 class Cleaner:
-    @staticmethod
-    def delete_local_files(file_types=["*.raw*", "*.model"]):  # '*.json'
+    def delete_local_files(self, file_types=["*.raw*", "*.model"]):  # '*.json'
         # TODO: add .zarr to this
         print("Deleting all local raw and model files")
         for i in file_types:

water-column-sonar-processing 25.3.2__py3-none-any.whl → 25.8.0__py3-none-any.whl

Potentially problematic release.

water-column-sonar-processing 25.3.2py3-none-any.whl → 25.8.0py3-none-any.whl