PyPI - water-column-sonar-processing - Versions diffs - 25.1.6__py3-none-any.whl → 25.3.0__py3-none-any.whl - Mend

water-column-sonar-processing 25.1.6py3-none-any.whl → 25.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (26) hide show

water_column_sonar_processing/aws/dynamodb_manager.py CHANGED Viewed

@@ -9,8 +9,8 @@ from boto3.dynamodb.types import TypeDeserializer, TypeSerializer
 class DynamoDBManager:
     #####################################################################
     def __init__(
-            self,
-            # endpoint_url
+        self,
+        # endpoint_url
     ):
         # self.endpoint_url = endpoint_url
         self.dynamodb_session = boto3.Session(
@@ -62,7 +62,7 @@ class DynamoDBManager:
                 {"AttributeName": "FILE_NAME", "AttributeType": "S"},
                 {"AttributeName": "CRUISE_NAME", "AttributeType": "S"},
             ],
-            BillingMode="PAY_PER_REQUEST"
+            BillingMode="PAY_PER_REQUEST",
             # ProvisionedThroughput={
             #     'ReadCapacityUnits': 1_000,
             #     'WriteCapacityUnits': 1_000
@@ -70,7 +70,9 @@ class DynamoDBManager:
         )
         # TODO: after creating status is 'CREATING', wait until 'ACTIVE'
         response = self.dynamodb_client.describe_table(TableName=table_name)
-        print(response) # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb/client/describe_table.html
+        print(
+            response
+        )  # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb/client/describe_table.html
         # sleep then response['Table']['TableStatus'] == 'ACTIVE'
     #####################################################################
@@ -111,7 +113,7 @@ class DynamoDBManager:
         expression_attribute_names,
         expression_attribute_values,
         update_expression,
-    ): # TODO: convert to boolean
+    ):  # TODO: convert to boolean
         try:
             response = self.dynamodb_client.update_item(
                 TableName=table_name,
@@ -120,7 +122,7 @@ class DynamoDBManager:
                 ExpressionAttributeValues=expression_attribute_values,
                 UpdateExpression=update_expression,
             )
-            status_code = response["ResponseMetadata"]["HTTPStatusCode"]
+            return response["ResponseMetadata"]["HTTPStatusCode"]  # TODO: should be 200
             # print(f"HTTPStatusCode: {status_code}")
             # assert status_code == 200, "Problem, unable to update dynamodb table."
             # assert response['ConsumedCapacity']['TableName'] == table_name
@@ -131,22 +133,23 @@ class DynamoDBManager:
     # TODO: change to "get_cruise_as_df"
     def get_table_as_df(
         self,
-        ship_name,
+        # ship_name,
         cruise_name,
-        sensor_name,
+        # sensor_name,
         table_name,
     ) -> pd.DataFrame:
         """
         To be used to initialize a cruise, deletes all entries associated with that cruise
         in the database.
+        #TODO: cruise names isn't good enough, there could be two instrument for a cruise...
         """
         filter_expression = "CRUISE_NAME = :cr"
         response = self.dynamodb_client.scan(
             TableName=table_name,
             # Limit=1000,
-            Select='ALL_ATTRIBUTES', # or 'SPECIFIC_ATTRIBUTES',
+            Select="ALL_ATTRIBUTES",  # or 'SPECIFIC_ATTRIBUTES',
             # ExclusiveStartKey=where to pick up
-            #ReturnConsumedCapacity='INDEXES' | 'TOTAL' | 'NONE', ...not sure
+            # ReturnConsumedCapacity='INDEXES' | 'TOTAL' | 'NONE', ...not sure
             # ProjectionExpression='#SH, #CR, #FN', # what to specifically return — from expression_attribute_names
             FilterExpression=filter_expression,
             # ExpressionAttributeNames={
@@ -154,36 +157,36 @@ class DynamoDBManager:
             #     '#CR': 'CRUISE_NAME',
             #     '#FN': 'FILE_NAME',
             # },
-            ExpressionAttributeValues={ # criteria
-                ':cr': {
-                    'S': cruise_name,
+            ExpressionAttributeValues={  # criteria
+                ":cr": {
+                    "S": cruise_name,
                 },
             },
-            ConsistentRead=True
+            ConsistentRead=True,
             # ExclusiveStartKey=response["LastEvaluatedKey"],
         )
         # Note: table.scan() has 1 MB limit on results so pagination is used
         if len(response["Items"]) == 0 and "LastEvaluatedKey" not in response:
-            return pd.DataFrame() # If no results, return empty dataframe
+            return pd.DataFrame()  # If no results, return empty dataframe
         data = response["Items"]
-        while response.get('LastEvaluatedKey'): #"LastEvaluatedKey" in response:
+        while response.get("LastEvaluatedKey"):  # "LastEvaluatedKey" in response:
             response = self.dynamodb_client.scan(
                 TableName=table_name,
                 ### Either 'Select' or 'ExpressionAttributeNames'/'ProjectionExpression'
-                Select='ALL_ATTRIBUTES', # or 'SPECIFIC_ATTRIBUTES',
+                Select="ALL_ATTRIBUTES",  # or 'SPECIFIC_ATTRIBUTES',
                 FilterExpression=filter_expression,
-                #ProjectionExpression='#SH, #CR, #FN',  # what to specifically return — from expression_attribute_names
+                # ProjectionExpression='#SH, #CR, #FN',  # what to specifically return — from expression_attribute_names
                 # ExpressionAttributeNames={ # would need to specify all cols in df
                 #     '#SH': 'SHIP_NAME',
                 #     '#CR': 'CRUISE_NAME',
                 #     '#FN': 'FILE_NAME',
                 # },
                 ExpressionAttributeValues={  # criteria
-                    ':cr': {
-                        'S': cruise_name,
+                    ":cr": {
+                        "S": cruise_name,
                     },
                 },
                 ConsistentRead=True,
@@ -268,14 +271,7 @@ class DynamoDBManager:
         Finds all rows associated with a cruise and deletes them.
         """
         response = self.dynamodb_client.delete_item(
-            Key={
-                "CRUISE_NAME": {
-                    "S": cruise_name
-                },
-                "FILE_NAME": {
-                    "S": file_name
-                }
-            },
+            Key={"CRUISE_NAME": {"S": cruise_name}, "FILE_NAME": {"S": file_name}},
             TableName=table_name,
             ReturnConsumedCapacity="TOTALS",
         )
@@ -286,8 +282,8 @@ class DynamoDBManager:
     #####################################################################
     def describe_table(
-            self,
-            table_name,
+        self,
+        table_name,
     ):
         """
         Get a description of the table. Used to verify that records were added/removed.
@@ -296,8 +292,6 @@ class DynamoDBManager:
         print(response)
         return response
     #####################################################################
     # TODO: from test_raw_to_zarr get enum and use here
     # def __update_processing_status(
@@ -357,4 +351,5 @@ class DynamoDBManager:
     #         )
     #     print("Done updating processing status.")
 #########################################################################

water_column_sonar_processing/aws/s3_manager.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import json
 import os
-import boto3
-from typing import Optional
 from collections.abc import Generator
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import Optional
+import boto3
 import botocore
 from boto3.s3.transfer import TransferConfig
 from botocore.config import Config
@@ -17,10 +17,7 @@ GB = 1024**3
 #########################################################################
-def chunked(
-    ll: list,
-    n: int
-) -> Generator:
+def chunked(ll: list, n: int) -> Generator:
     # Yields successively n-sized chunks from ll.
     for i in range(0, len(ll), n):
         yield ll[i : i + n]
@@ -70,14 +67,18 @@ class S3Manager:
             region_name=self.s3_region,
             endpoint_url=self.endpoint_url,
         )
-        self.s3_resource_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.resource(
-            service_name="s3",
-            config=self.s3_client_config,
-            region_name=self.s3_region,
-            endpoint_url=self.endpoint_url,
+        self.s3_resource_noaa_wcsd_zarr_pds = (
+            self.s3_session_noaa_wcsd_zarr_pds.resource(
+                service_name="s3",
+                config=self.s3_client_config,
+                region_name=self.s3_region,
+                endpoint_url=self.endpoint_url,
+            )
+        )
+        self.paginator = self.s3_client.get_paginator("list_objects_v2")
+        self.paginator_noaa_wcsd_zarr_pds = (
+            self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
         )
-        self.paginator = self.s3_client.get_paginator('list_objects_v2')
-        self.paginator_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds.get_paginator('list_objects_v2')
     # def get_client(self): # TODO: do i need this?
     #     return self.s3_session.client(
@@ -96,21 +97,18 @@ class S3Manager:
         buckets. It allows public read of all objects.
         """
         # https://github.com/aodn/aodn_cloud_optimised/blob/e5035495e782783cc8b9e58711d63ed466420350/test_aodn_cloud_optimised/test_schema.py#L7
-        public_policy = {
-            "Version": "2012-10-17",
-            "Statement": [
-                {
-                    "Effect": "Allow",
-                    "Principal": "*",
-                    "Action": "s3:GetObject",
-                    "Resource": f"arn:aws:s3:::{bucket_name}/*",
-                }
-            ],
-        }
-        response1 = self.s3_client.create_bucket(
-            Bucket=bucket_name,
-            ACL='public-read'
-        )
+        # public_policy = {
+        #     "Version": "2012-10-17",
+        #     "Statement": [
+        #         {
+        #             "Effect": "Allow",
+        #             "Principal": "*",
+        #             "Action": "s3:GetObject",
+        #             "Resource": f"arn:aws:s3:::{bucket_name}/*",
+        #         }
+        #     ],
+        # }
+        response1 = self.s3_client.create_bucket(Bucket=bucket_name, ACL="public-read")
         print(response1)
         # response = self.s3_client.put_bucket_policy(
         #     Bucket=bucket_name, Policy=json.dumps(public_policy)
@@ -133,7 +131,9 @@ class S3Manager:
         """
         Used to upload a single file, e.g. the GeoJSON file to the NODD bucket
         """
-        self.s3_resource_noaa_wcsd_zarr_pds.Bucket(output_bucket_name).upload_file(Filename=file_name, Key=key)
+        self.s3_resource_noaa_wcsd_zarr_pds.Bucket(output_bucket_name).upload_file(
+            Filename=file_name, Key=key
+        )
         return key
     #####################################################################
@@ -167,10 +167,10 @@ class S3Manager:
     #####################################################################
     # TODO: this uses resource, try to use client
     def upload_file(
-            self,
-            filename: str,
-            bucket_name: str,
-            key: str,
+        self,
+        filename: str,
+        bucket_name: str,
+        key: str,
     ):
         # self.s3_client.upload_file(Filename=filename, Bucket=bucket, Key=key)
         self.s3_resource.Bucket(bucket_name).upload_file(Filename=filename, Key=key)
@@ -205,11 +205,7 @@ class S3Manager:
         return all_uploads
     #####################################################################
-    def check_if_object_exists(
-            self,
-            bucket_name,
-            key_name
-    ) -> bool:
+    def check_if_object_exists(self, bucket_name, key_name) -> bool:
         s3_manager2 = S3Manager()
         s3_manager2.list_objects(bucket_name=bucket_name, prefix=key_name)
         s3_client_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds
@@ -217,10 +213,10 @@ class S3Manager:
             # response = s3_resource_noaa_wcsd_zarr_pds.Object(bucket_name, key_name).load()
             s3_client_noaa_wcsd_zarr_pds.head_object(Bucket=bucket_name, Key=key_name)
         except botocore.exceptions.ClientError as e:
-            if e.response['Error']['Code'] == "404":
+            if e.response["Error"]["Code"] == "404":
                 # The object does not exist.
                 return False
-            elif e.response['Error']['Code'] == 403:
+            elif e.response["Error"]["Code"] == 403:
                 # Unauthorized, including invalid bucket
                 return False
             else:
@@ -230,11 +226,7 @@ class S3Manager:
     #####################################################################
     # used: raw-to-zarr
-    def list_objects(  # noaa-wcsd-pds and noaa-wcsd-zarr-pds
-        self,
-        bucket_name,
-        prefix
-    ):
+    def list_objects(self, bucket_name, prefix):  # noaa-wcsd-pds and noaa-wcsd-zarr-pds
         # TODO: this isn't working for geojson detecting objects!!!!!!!
         # analog to "find_children_objects"
         # Returns a list of key strings for each object in bucket defined by prefix
@@ -261,14 +253,10 @@ class S3Manager:
     #####################################################################
     # TODO: change name to "directory"
-    def folder_exists_and_not_empty(
-        self,
-        bucket_name: str,
-        path: str
-    ) -> bool:
+    def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
         if not path.endswith("/"):
             path = path + "/"
-        s3_client = self.s3_client
+        # s3_client = self.s3_client
         resp = self.list_objects(
             bucket_name=bucket_name, prefix=path
         )  # TODO: this is returning root folder and doesn't include children or hidden folders
@@ -350,7 +338,7 @@ class S3Manager:
         self,
         bucket_name,
         key,
-        file_name, # where the file will be saved
+        file_name,  # where the file will be saved
     ):
         self.s3_client.download_file(Bucket=bucket_name, Key=key, Filename=file_name)
         # TODO: if bottom file doesn't exist, don't fail downloader
@@ -364,9 +352,7 @@ class S3Manager:
         objects: list,
     ):
         try:
-            print(
-                f"Deleting {len(objects)} objects in {bucket_name} in batches."
-            )
+            print(f"Deleting {len(objects)} objects in {bucket_name} in batches.")
             objects_to_delete = []
             for obj in objects:
                 objects_to_delete.append({"Key": obj["Key"]})
@@ -375,29 +361,31 @@ class S3Manager:
                 self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
                     Bucket=bucket_name, Delete={"Objects": batch}
                 )
-            print(f"Deleted files.")
+            print("Deleted files.")
         except Exception as err:
             print(f"Problem was encountered while deleting objects: {err}")
     #####################################################################
     # TODO: need to test this!!!
     def delete_nodd_object(
-            self,
-            bucket_name,
-            key_name,
+        self,
+        bucket_name,
+        key_name,
     ):
         try:
-            print(
-                f"Deleting {key_name} objects in {bucket_name}."
+            print(f"Deleting {key_name} objects in {bucket_name}.")
+            self.s3_client_noaa_wcsd_zarr_pds.delete_object(
+                Bucket=bucket_name, Key=key_name
             )
-            self.s3_client_noaa_wcsd_zarr_pds.delete_object(Bucket=bucket_name, Key=key_name)
-            print(f"Deleted file.")
+            print("Deleted file.")
         except Exception as err:
             print(f"Problem was encountered while deleting objects: {err}")
     #####################################################################
     def put(self, bucket_name, key, body):  # noaa-wcsd-model-pds
-        self.s3_client.put_object(Bucket=bucket_name, Key=key, Body=body) # "Body" can be a file
+        self.s3_client.put_object(
+            Bucket=bucket_name, Key=key, Body=body
+        )  # "Body" can be a file
     #####################################################################
     def read_s3_json(

water_column_sonar_processing/aws/s3fs_manager.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 from typing import Optional
 import s3fs
 # TODO: S3FS_LOGGING_LEVEL=DEBUG
@@ -38,8 +39,7 @@ class S3FSManager:
         # create=False, not false because will be writing
         # return s3fs.S3Map(root=s3_zarr_store_path, s3=self.s3fs, check=True)
         return s3fs.S3Map(
-            root=s3_zarr_store_path,
-            s3=self.s3fs
+            root=s3_zarr_store_path, s3=self.s3fs
         )  # create=False, not false because will be writing
     #####################################################################
@@ -53,12 +53,7 @@ class S3FSManager:
     #     print(ff)
     #####################################################################
-    def upload_data(
-            self,
-            bucket_name,
-            file_path,
-            prefix
-    ):
+    def upload_data(self, bucket_name, file_path, prefix):
         # TODO: this works in theory but use boto3 to upload files
         s3_path = f"s3://{bucket_name}/{prefix}/"
         s3_file_system = self.s3fs
@@ -72,5 +67,4 @@ class S3FSManager:
         # s3_file_system =
         return self.s3fs.exists(s3_path)
     #####################################################################

water_column_sonar_processing/cruise/create_empty_zarr_store.py CHANGED Viewed

@@ -4,8 +4,7 @@ import tempfile
 import numcodecs
 import numpy as np
-from water_column_sonar_processing.aws import DynamoDBManager
-from water_column_sonar_processing.aws import S3Manager
+from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
 from water_column_sonar_processing.model import ZarrManager
 from water_column_sonar_processing.utility import Cleaner
@@ -42,14 +41,12 @@ class CreateEmptyZarrStore:
         print("Starting upload with thread pool executor.")
         # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
         all_files = []
-        for subdir, dirs, files in os.walk(
-            f"{local_directory}/{cruise_name}.zarr"
-        ):
+        for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
             for file in files:
                 local_path = os.path.join(subdir, file)
                 # TODO: find a better method for splitting strings here:
                 # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
-                s3_key = f'{object_prefix}/{cruise_name}.zarr{local_path.split(f"{cruise_name}.zarr")[-1]}'
+                s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
                 all_files.append([local_path, s3_key])
         #
         # print(all_files)
@@ -77,9 +74,7 @@ class CreateEmptyZarrStore:
             df = dynamo_db_manager.get_table_as_df(
                 table_name=table_name,
-                ship_name=ship_name,
                 cruise_name=cruise_name,
-                sensor_name=sensor_name,
             )
             # TODO: filter the dataframe just for enums >= LEVEL_1_PROCESSING
@@ -98,14 +93,17 @@ class CreateEmptyZarrStore:
             )
             # [3] calculate the max/min measurement resolutions for the whole cruise
-            cruise_min_echo_range = float(
-                np.min(df["MIN_ECHO_RANGE"].dropna().astype(float))
+            cruise_min_echo_range = np.min(
+                (df["MIN_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
             )
             # [4] calculate the maximum of the max depth values
-            cruise_max_echo_range = float(
-                np.max(df["MAX_ECHO_RANGE"].dropna().astype(float))
+            cruise_max_echo_range = np.max(
+                (df["MAX_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
             )
+            cruise_min_epsilon = np.min(df["MIN_ECHO_RANGE"].dropna().astype(float))
             print(
                 f"cruise_min_echo_range: {cruise_min_echo_range}, cruise_max_echo_range: {cruise_max_echo_range}"
             )
@@ -140,12 +138,13 @@ class CreateEmptyZarrStore:
                 zarr_manager.get_depth_values(
                     min_echo_range=cruise_min_echo_range,
                     max_echo_range=cruise_max_echo_range,
+                    cruise_min_epsilon=cruise_min_epsilon,
                 )
             )
             print(f"new_height: {new_height}")
             zarr_manager.create_zarr_store(
-                path=tempdir.name, # TODO: need to use .name or problem
+                path=tempdir.name,  # TODO: need to use .name or problem
                 ship_name=ship_name,
                 cruise_name=cruise_name,
                 sensor_name=sensor_name,
@@ -153,12 +152,13 @@ class CreateEmptyZarrStore:
                 width=new_width,
                 min_echo_range=cruise_min_echo_range,
                 max_echo_range=cruise_max_echo_range,
+                cruise_min_epsilon=cruise_min_epsilon,
                 calibration_status=True,
             )
             #################################################################
             self.upload_zarr_store_to_s3(
                 output_bucket_name=output_bucket_name,
-                local_directory=tempdir.name, # TODO: need to use .name or problem
+                local_directory=tempdir.name,  # TODO: need to use .name or problem
                 object_prefix=zarr_prefix,
                 cruise_name=cruise_name,
             )

water_column_sonar_processing/cruise/datatree_manager.py CHANGED Viewed

@@ -1,7 +1,7 @@
 ### https://xarray-datatree.readthedocs.io/en/latest/data-structures.html
-import numpy as np
-from datatree import DataTree
 import xarray as xr
+from datatree import DataTree
 class DatatreeManager:
     #######################################################
@@ -17,8 +17,5 @@ class DatatreeManager:
     ) -> None:
         ds1 = xr.Dataset({"foo": "orange"})
         dt = DataTree(name="root", data=ds1)  # create root node
-        ds2 = xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])})
+        # ds2 = xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])})
         return dt

water-column-sonar-processing 25.1.6__py3-none-any.whl → 25.3.0__py3-none-any.whl

Potentially problematic release.

water-column-sonar-processing 25.1.6py3-none-any.whl → 25.3.0py3-none-any.whl