PyPI - water-column-sonar-processing - Versions diffs - 25.3.2__py3-none-any.whl → 25.8.0__py3-none-any.whl - Mend

water-column-sonar-processing 25.3.2py3-none-any.whl → 25.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (32) hide show

water_column_sonar_processing/aws/dynamodb_manager.py CHANGED Viewed

@@ -127,7 +127,7 @@ class DynamoDBManager:
             # assert status_code == 200, "Problem, unable to update dynamodb table."
             # assert response['ConsumedCapacity']['TableName'] == table_name
         except Exception as err:
-            print(f"Problem was encountered while updating item: {err}")
+            raise RuntimeError(f"Problem was encountered while updating item, {err}")
     #####################################################################
     # TODO: change to "get_cruise_as_df"
@@ -135,7 +135,7 @@ class DynamoDBManager:
         self,
         # ship_name,
         cruise_name,
-        # sensor_name,
+        # sensor_name, # TODO: need to add this back for EK80
         table_name,
     ) -> pd.DataFrame:
         """
@@ -230,7 +230,7 @@ class DynamoDBManager:
     #     if len(response["Items"]) == 0 and "LastEvaluatedKey" not in response:
     #         return pd.DataFrame() # If no results, return empty dataframe
     #
-    #     data = response["Items"]
+    #     dataset = response["Items"]
     #
     #     while response.get('LastEvaluatedKey'): #"LastEvaluatedKey" in response:
     #         response = self.dynamodb_client.scan(
@@ -252,10 +252,10 @@ class DynamoDBManager:
     #             ConsistentRead=True,
     #             ExclusiveStartKey=response["LastEvaluatedKey"],
     #         )
-    #         data.extend(response["Items"])
+    #         dataset.extend(response["Items"])
     #
     #     deserializer = self.type_deserializer
-    #     df = pd.DataFrame([deserializer.deserialize({"M": i}) for i in data])
+    #     df = pd.DataFrame([deserializer.deserialize({"M": i}) for i in dataset])
     #
     #     return df.sort_values(by="START_TIME", ignore_index=True)
@@ -273,7 +273,7 @@ class DynamoDBManager:
         response = self.dynamodb_client.delete_item(
             Key={"CRUISE_NAME": {"S": cruise_name}, "FILE_NAME": {"S": file_name}},
             TableName=table_name,
-            ReturnConsumedCapacity="TOTALS",
+            ReturnConsumedCapacity="TOTAL",
         )
         # TODO: there should be attributes included in response but they are missing
         # if response["ResponseMetadata"]["HTTPStatusCode"] != 200:

water_column_sonar_processing/aws/s3_manager.py CHANGED Viewed

@@ -2,6 +2,7 @@ import json
 import os
 from collections.abc import Generator
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from time import sleep
 from typing import Optional
 import boto3
@@ -80,14 +81,8 @@ class S3Manager:
             self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
         )
-    # def get_client(self): # TODO: do i need this?
-    #     return self.s3_session.client(
-    #         service_name="s3",
-    #         config=self.s3_client_config,
-    #         region_name=self.s3_region,
-    #     )
     #####################################################################
+    # tested
     def create_bucket(
         self,
         bucket_name: str,
@@ -116,12 +111,13 @@ class S3Manager:
         # print(response)
     #####################################################################
+    # tested
     def list_buckets(self):
-        # client = self.get_client()
         client = self.s3_client
         return client.list_buckets()
     #####################################################################
+    # tested
     def upload_nodd_file(
         self,
         file_name: str,
@@ -137,6 +133,7 @@ class S3Manager:
         return key
     #####################################################################
+    # tested
     def upload_files_with_thread_pool_executor(
         self,
         output_bucket_name: str,
@@ -160,58 +157,66 @@ class S3Manager:
                     if result:
                         all_uploads.extend([result])
         except Exception as err:
-            print(err)
+            raise RuntimeError(f"Problem, {err}")
         print("Done uploading files using threading pool.")
         return all_uploads
     #####################################################################
-    # TODO: this uses resource, try to use client
+    # tested
+    def upload_zarr_store_to_s3(
+        self,
+        output_bucket_name: str,
+        local_directory: str,
+        object_prefix: str,
+        cruise_name: str,
+    ) -> None:
+        print("uploading model store to s3")
+        try:
+            #
+            print("Starting upload with thread pool executor.")
+            # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
+            all_files = []
+            for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
+                for file in files:
+                    local_path = os.path.join(subdir, file)
+                    # TODO: find a better method for splitting strings here:
+                    # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
+                    # s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
+                    s3_key = os.path.join(
+                        object_prefix,
+                        os.path.join(
+                            subdir[subdir.find(f"{cruise_name}.zarr") :], file
+                        ),
+                    )
+                    all_files.append([local_path, s3_key])
+            self.upload_files_with_thread_pool_executor(
+                output_bucket_name=output_bucket_name,
+                all_files=all_files,
+            )
+            print("Done uploading with thread pool executor.")
+        except Exception as err:
+            raise RuntimeError(f"Problem uploading zarr store to s3, {err}")
+    #####################################################################
+    # tested
     def upload_file(
         self,
         filename: str,
         bucket_name: str,
         key: str,
     ):
-        # self.s3_client.upload_file(Filename=filename, Bucket=bucket, Key=key)
         self.s3_resource.Bucket(bucket_name).upload_file(Filename=filename, Key=key)
     #####################################################################
-    def upload_zarr_files_to_bucket(  # noaa-wcsd-model-pds
-        self,
-        local_directory,
-        remote_directory,
-        output_bucket_name,
-    ):
-        # Right now this is just for uploading a model store to s3
-        print("Uploading files to output bucket.")
-        store_name = os.path.basename(local_directory)
-        all_files = []
-        for subdir, dirs, files in os.walk(local_directory):
-            for file in files:
-                local_path = os.path.join(subdir, file)
-                # s3_key = os.path.join(object_prefix, local_path)
-                s3_key = os.path.join(
-                    remote_directory,
-                    store_name,
-                    subdir.split(store_name)[-1].strip("/"),
-                )
-                all_files.append([local_path, s3_key])
-        all_uploads = self.upload_files_with_thread_pool_executor(
-            output_bucket_name=output_bucket_name,
-            all_files=all_files,
-        )
-        print("Done uploading files to output bucket.")
-        return all_uploads
-    #####################################################################
+    # tested
     def check_if_object_exists(self, bucket_name, key_name) -> bool:
         s3_manager2 = S3Manager()
         s3_manager2.list_objects(bucket_name=bucket_name, prefix=key_name)
         s3_client_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds
         try:
-            # response = s3_resource_noaa_wcsd_zarr_pds.Object(bucket_name, key_name).load()
             s3_client_noaa_wcsd_zarr_pds.head_object(Bucket=bucket_name, Key=key_name)
+            return True
         except botocore.exceptions.ClientError as e:
             if e.response["Error"]["Code"] == "404":
                 # The object does not exist.
@@ -222,10 +227,9 @@ class S3Manager:
             else:
                 # Something else has gone wrong.
                 raise
-        return True
     #####################################################################
-    # used: raw-to-zarr
+    # tested
     def list_objects(self, bucket_name, prefix):  # noaa-wcsd-pds and noaa-wcsd-zarr-pds
         # TODO: this isn't working for geojson detecting objects!!!!!!!
         # analog to "find_children_objects"
@@ -239,32 +243,20 @@ class S3Manager:
                 keys.extend([k["Key"] for k in page["Contents"]])
         return keys
-    # def list_nodd_objects(  # These are used by the geometry for uploading data
-    #     self,
-    #     prefix,
-    # ):
-    #     # Returns a list of key strings for each object in bucket defined by prefix
-    #     keys = []
-    #     page_iterator = self.paginator_noaa_wcsd_zarr_pds.paginate(Bucket=output_bucket_name, Prefix=prefix):
-    #     for page in paginator.paginate(Bucket=output_bucket_name, Prefix=prefix):
-    #         if "Contents" in page.keys():
-    #             keys.extend([k["Key"] for k in page["Contents"]])
-    #     return keys
     #####################################################################
     # TODO: change name to "directory"
-    def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
-        if not path.endswith("/"):
-            path = path + "/"
-        # s3_client = self.s3_client
-        resp = self.list_objects(
-            bucket_name=bucket_name, prefix=path
-        )  # TODO: this is returning root folder and doesn't include children or hidden folders
-        # resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
-        return "Contents" in resp
+    # def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
+    #     if not path.endswith("/"):
+    #         path = path + "/"
+    #     # s3_client = self.s3_client
+    #     resp = self.list_objects(
+    #         bucket_name=bucket_name, prefix=path
+    #     )  # TODO: this is returning root folder and doesn't include children or hidden folders
+    #     # resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
+    #     return "Contents" in resp
     #####################################################################
-    # used
+    # private
     def __paginate_child_objects(
         self,
         bucket_name: str,
@@ -279,6 +271,8 @@ class S3Manager:
                 objects.extend(page["Contents"])
         return objects
+    #####################################################################
+    # tested
     def get_child_objects(
         self,
         bucket_name: str,
@@ -310,13 +304,14 @@ class S3Manager:
         return raw_files
     #####################################################################
-    def get_object(  # TODO: Move this to index.py
-        # noaa-wcsd-pds or noaa-wcsd-model-pds
+    # tested
+    def get_object(  # noaa-wcsd-pds or noaa-wcsd-zarr-pds
         self,
         bucket_name,
         key_name,
     ):
         # Meant for getting singular objects from a bucket, used by indexing lambda
+        # can also return byte range potentially.
         print(f"Getting object {key_name} from {bucket_name}")
         try:
             response = self.s3_client.get_object(
@@ -325,27 +320,31 @@ class S3Manager:
             )
             # status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")
             # if status == 200:
+            print(f"Done getting object {key_name} from {bucket_name}")
+            return response
         except ClientError as err:
             print(f"Problem was encountered while getting s3 file: {err}")
             raise
-        print(f"Done getting object {key_name} from {bucket_name}")
-        return response
     #####################################################################
-    # used raw-to-model
-    def download_file(  # TODO: change to download_object
-        # noaa-wcsd-pds or noaa-wcsd-model-pds
+    # tested
+    def download_file(
         self,
         bucket_name,
         key,
-        file_name,  # where the file will be saved
+        file_name,  # path to where the file will be saved
     ):
-        self.s3_client.download_file(Bucket=bucket_name, Key=key, Filename=file_name)
-        # TODO: if bottom file doesn't exist, don't fail downloader
-        print("downloaded file")
+        try:
+            self.s3_client.download_file(
+                Bucket=bucket_name, Key=key, Filename=file_name
+            )
+            # TODO: if bottom file doesn't exist, don't fail downloader
+            print("downloaded file")
+        except Exception as err:
+            raise RuntimeError(f"Problem was encountered while downloading_file, {err}")
     #####################################################################
-    # TODO: need to test this!!!
+    # tested
     def delete_nodd_objects(  # nodd-bucket
         self,
         bucket_name,
@@ -358,16 +357,20 @@ class S3Manager:
                 objects_to_delete.append({"Key": obj["Key"]})
             # Note: request can contain a list of up to 1000 keys
             for batch in chunked(ll=objects_to_delete, n=1000):
+                # An error occurred (SlowDown) when calling the DeleteObjects operation (reached max retries: 4):
+                # Please reduce your request rate.
+                sleep(0.5)
+                #
                 self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
                     Bucket=bucket_name, Delete={"Objects": batch}
                 )
             print("Deleted files.")
         except Exception as err:
-            print(f"Problem was encountered while deleting objects: {err}")
+            raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
     #####################################################################
-    # TODO: need to test this!!!
-    def delete_nodd_object(
+    # tested
+    def delete_nodd_object(  # only used to delete geojson it looks like?! Remove.
         self,
         bucket_name,
         key_name,
@@ -379,22 +382,27 @@ class S3Manager:
             )
             print("Deleted file.")
         except Exception as err:
-            print(f"Problem was encountered while deleting objects: {err}")
+            raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
     #####################################################################
+    # tested
     def put(self, bucket_name, key, body):  # noaa-wcsd-model-pds
-        self.s3_client.put_object(
-            Bucket=bucket_name, Key=key, Body=body
-        )  # "Body" can be a file
+        try:
+            self.s3_client.put_object(
+                Bucket=bucket_name, Key=key, Body=body
+            )  # "Body" can be a file
+        except Exception as err:
+            raise RuntimeError(f"Problem was encountered putting object, {err}")
     #####################################################################
+    # tested
     def read_s3_json(
         self,
         ship_name,
         cruise_name,
         sensor_name,
         file_name_stem,
-        output_bucket_name,
+        output_bucket_name,  # TODO: change to just bucket_name
     ) -> str:
         try:
             resource = self.s3_resource_noaa_wcsd_zarr_pds
@@ -405,11 +413,8 @@ class S3Manager:
             file_content = content_object["Body"].read().decode("utf-8")
             json_content = json.loads(file_content)
             return json_content
-        except Exception as err:  # Failure
-            print(f"Exception encountered reading s3 GeoJSON: {err}")
-            raise
-    #####################################################################
+        except Exception as err:
+            raise RuntimeError(f"Exception encountered reading s3 GeoJSON, {err}")
 #########################################################################

water_column_sonar_processing/aws/s3fs_manager.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import Optional
 import s3fs
 # TODO: S3FS_LOGGING_LEVEL=DEBUG
+# S3FS_LOGGING_LEVEL=DEBUG
 class S3FSManager:
@@ -13,15 +14,16 @@ class S3FSManager:
         endpoint_url: Optional[str] = None,
     ):
         self.endpoint_url = endpoint_url
-        # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
-        # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
+        self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
+        self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
         self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
         self.s3fs = s3fs.S3FileSystem(
-            # asynchronous=False,
             endpoint_url=endpoint_url,
             key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
             secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
+            # asynchronous=True,
         )
+        # self.s3fs.ls("")
     # s3_fs = s3fs.S3FileSystem( # TODO: use s3fs_manager?
     #     anon=True,

water_column_sonar_processing/aws/sqs_manager.py CHANGED Viewed

@@ -35,7 +35,7 @@ class SQSManager:
     #######################################################
     def list_queues(self, queue_name_prefix):
         # Note: SQS control plane is eventually consistent, meaning that it
-        # takes a while to propagate the data accross the systems.
+        # takes a while to propagate the dataset accross the systems.
         response = self.__sqs_client.list_queues(QueueNamePrefix=queue_name_prefix)
         print(response)

water_column_sonar_processing/cruise/__init__.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from .create_empty_zarr_store import CreateEmptyZarrStore
+from .create_empty_zarr_store_level_3 import CreateEmptyZarrStoreLevel3
 from .resample_regrid import ResampleRegrid
-__all__ = ["CreateEmptyZarrStore", "ResampleRegrid"]
+__all__ = ["CreateEmptyZarrStore", "CreateEmptyZarrStoreLevel3", "ResampleRegrid"]

water_column_sonar_processing/cruise/create_empty_zarr_store.py CHANGED Viewed

@@ -13,7 +13,7 @@ numcodecs.blosc.set_nthreads(1)
 # TODO: when ready switch to version 3 of model spec
 # ZARR_V3_EXPERIMENTAL_API = 1
-# creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
+# creates the latlon dataset: foo = ep.consolidate.add_location(ds_Sv, echodata)
 # TODO: change name to "CreateLocalEmptyZarrStore"
@@ -27,35 +27,35 @@ class CreateEmptyZarrStore:
         # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
     #######################################################
-    # TODO: move this to the s3_manager
-    def upload_zarr_store_to_s3(
-        self,
-        output_bucket_name: str,
-        local_directory: str,
-        object_prefix: str,
-        cruise_name: str,
-    ) -> None:
-        print("uploading model store to s3")
-        s3_manager = S3Manager()
-        #
-        print("Starting upload with thread pool executor.")
-        # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
-        all_files = []
-        for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
-            for file in files:
-                local_path = os.path.join(subdir, file)
-                # TODO: find a better method for splitting strings here:
-                # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
-                s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
-                all_files.append([local_path, s3_key])
-        #
-        # print(all_files)
-        s3_manager.upload_files_with_thread_pool_executor(
-            output_bucket_name=output_bucket_name,
-            all_files=all_files,
-        )
-        print("Done uploading with thread pool executor.")
-        # TODO: move to common place
+    # TODO: moved this to the s3_manager
+    # def upload_zarr_store_to_s3(
+    #     self,
+    #     output_bucket_name: str,
+    #     local_directory: str,
+    #     object_prefix: str,
+    #     cruise_name: str,
+    # ) -> None:
+    #     print("uploading model store to s3")
+    #     s3_manager = S3Manager()
+    #     #
+    #     print("Starting upload with thread pool executor.")
+    #     # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
+    #     all_files = []
+    #     for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
+    #         for file in files:
+    #             local_path = os.path.join(subdir, file)
+    #             # TODO: find a better method for splitting strings here:
+    #             # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
+    #             s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
+    #             all_files.append([local_path, s3_key])
+    #     #
+    #     # print(all_files)
+    #     s3_manager.upload_files_with_thread_pool_executor(
+    #         output_bucket_name=output_bucket_name,
+    #         all_files=all_files,
+    #     )
+    #     print("Done uploading with thread pool executor.")
+    #     # TODO: move to common place
     #######################################################
     def create_cruise_level_zarr_store(
@@ -65,7 +65,11 @@ class CreateEmptyZarrStore:
         cruise_name: str,
         sensor_name: str,
         table_name: str,
+        # override_cruise_min_epsilon=None,
     ) -> None:
+        """
+        Initialize zarr store. The water_level needs to be integrated.
+        """
         tempdir = tempfile.TemporaryDirectory()
         try:
             # HB0806 - 123, HB0903 - 220
@@ -93,20 +97,19 @@ class CreateEmptyZarrStore:
             )
             # [3] calculate the max/min measurement resolutions for the whole cruise
-            cruise_min_echo_range = np.min(
-                (df["MIN_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
-            )
+            # cruise_min_echo_range = np.min(
+            #     (df["MIN_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
+            # )
-            # [4] calculate the maximum of the max depth values
+            # [4] calculate the np.max(max_echo_range + water_level)
             cruise_max_echo_range = np.max(
                 (df["MAX_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
             )
+            # TODO: set this to either 1 or 0.5 meters
             cruise_min_epsilon = np.min(df["MIN_ECHO_RANGE"].dropna().astype(float))
-            print(
-                f"cruise_min_echo_range: {cruise_min_echo_range}, cruise_max_echo_range: {cruise_max_echo_range}"
-            )
+            print(f"cruise_max_echo_range: {cruise_max_echo_range}")
             # [5] get number of channels
             cruise_frequencies = [
@@ -126,6 +129,7 @@ class CreateEmptyZarrStore:
                 bucket_name=output_bucket_name,
                 sub_prefix=zarr_prefix,
             )
+            #
             if len(child_objects) > 0:
                 s3_manager.delete_nodd_objects(
                     bucket_name=output_bucket_name,
@@ -134,9 +138,9 @@ class CreateEmptyZarrStore:
             ################################################################
             # Create new model store
             zarr_manager = ZarrManager()
-            new_height = len(
-                zarr_manager.get_depth_values(
-                    min_echo_range=cruise_min_echo_range,
+            new_height = len(  # [0.19m down to 1001.744m] = 5272 samples, 10.3 tiles @ 512
+                zarr_manager.get_depth_values(  # these depths should be from min_epsilon to max_range+water_level
+                    # min_echo_range=cruise_min_echo_range,
                     max_echo_range=cruise_max_echo_range,
                     cruise_min_epsilon=cruise_min_epsilon,
                 )
@@ -150,13 +154,13 @@ class CreateEmptyZarrStore:
                 sensor_name=sensor_name,
                 frequencies=cruise_frequencies,
                 width=new_width,
-                min_echo_range=cruise_min_echo_range,
+                # min_echo_range=cruise_min_echo_range,
                 max_echo_range=cruise_max_echo_range,
                 cruise_min_epsilon=cruise_min_epsilon,
                 calibration_status=True,
             )
             #################################################################
-            self.upload_zarr_store_to_s3(
+            s3_manager.upload_zarr_store_to_s3(
                 output_bucket_name=output_bucket_name,
                 local_directory=tempdir.name,  # TODO: need to use .name or problem
                 object_prefix=zarr_prefix,
@@ -182,7 +186,9 @@ class CreateEmptyZarrStore:
             print("Done creating cruise level zarr store.")
             #################################################################
         except Exception as err:
-            print(f"Problem trying to create new cruise model store: {err}")
+            raise RuntimeError(
+                f"Problem trying to create new cruise model store, {err}"
+            )
         finally:
             cleaner = Cleaner()
             cleaner.delete_local_files()

water-column-sonar-processing 25.3.2__py3-none-any.whl → 25.8.0__py3-none-any.whl

Potentially problematic release.

water-column-sonar-processing 25.3.2py3-none-any.whl → 25.8.0py3-none-any.whl