PyPI - water-column-sonar-processing - Versions diffs - 25.11.1__py3-none-any.whl → 26.1.14__py3-none-any.whl - Mend

water-column-sonar-processing 25.11.1py3-none-any.whl → 26.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (22) hide show

water_column_sonar_processing/geometry/geometry_manager.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import os
 from pathlib import Path
 import geopandas
@@ -8,6 +7,7 @@ import pandas as pd
 from water_column_sonar_processing.aws import S3Manager
 from water_column_sonar_processing.utility import Cleaner
 # //  [Decimal / Places / Degrees	/ Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
 #   //  0   1.0	        1° 00′ 0″	        country or large region                             111.32 km	  102.47 km	  78.71 km	43.496 km
 #   //  1	  0.1	        0° 06′ 0″         large city or district                              11.132 km	  10.247 km	  7.871 km	4.3496 km
@@ -24,7 +24,7 @@ class GeometryManager:
     def __init__(
         self,
     ):
-        self.DECIMAL_PRECISION = 5  # precision for GPS coordinates
+        self.DECIMAL_PRECISION = 6  # precision for GPS coordinates
         self.SIMPLIFICATION_TOLERANCE = 0.0001  # RDP simplification to "street level"
     #######################################################
@@ -44,12 +44,10 @@ class GeometryManager:
         print("Getting GPS dataset from echopype object.")
         try:
-            latitude = np.round(
-                echodata.platform.latitude.values, self.DECIMAL_PRECISION
-            )
-            longitude = np.round(
-                echodata.platform.longitude.values, self.DECIMAL_PRECISION
-            )
+            latitude = (
+                echodata.platform.latitude.values
+            )  # TODO: DONT get values from here!
+            longitude = echodata.platform.longitude.values
             # RE: time coordinates: https://github.com/OSOceanAcoustics/echopype/issues/656#issue-1219104771
             # 'nmea_times' are times from the nmea datalogger associated with GPS
@@ -192,8 +190,8 @@ class GeometryManager:
         #  can include np.nan values?
     #######################################################
+    @staticmethod
     def read_s3_geo_json(
-        self,
         ship_name,
         cruise_name,
         sensor_name,
@@ -234,10 +232,10 @@ class GeometryManager:
     ############################################################################
     # COMES from the raw-to-zarr conversion
-    def __write_geojson_to_file(self, store_name, data) -> None:
-        print("Writing GeoJSON to file.")
-        with open(os.path.join(store_name, "geo.json"), "w") as outfile:
-            outfile.write(data)
+    # def __write_geojson_to_file(self, store_name, data) -> None:
+    #     print("Writing GeoJSON to file.")
+    #     with open(os.path.join(store_name, "geo.json"), "w") as outfile:
+    #         outfile.write(data)
 ###########################################################

water_column_sonar_processing/geometry/line_simplification.py CHANGED Viewed

@@ -71,11 +71,11 @@ class LineSimplification:
         pass
     #######################################################
+    @staticmethod
     def kalman_filter(
-        self,
         longitudes,
         latitudes,
-    ) -> (np.ndarray, np.ndarray):
+    ):
         """
         # TODO: need to use masked array to get the right number of values
         """
@@ -102,8 +102,8 @@ class LineSimplification:
         return smoothed_state_means[:, [0, 2]]
     #######################################################
+    @staticmethod
     def get_speeds(
-        self,
         times: np.ndarray,  # don't really need time, do need to segment the dataset first
         latitudes: np.ndarray,
         longitudes: np.ndarray,
@@ -136,13 +136,13 @@ class LineSimplification:
         # returns the speed in meters per second #TODO: get speed in knots
         return speed_meters_per_second.to_numpy(dtype="float32")  # includes nan
-    def remove_null_island_values(
-        self,
-        epsilon=1e-5,
-    ) -> None:
-        # TODO: low priority
-        print(epsilon)
-        pass
+    # def remove_null_island_values(
+    #     self,
+    #     epsilon=1e-5,
+    # ) -> None:
+    #     # TODO: low priority
+    #     print(epsilon)
+    #     pass
     def break_linestring_into_multi_linestring(
         self,

water_column_sonar_processing/geometry/pmtile_generation.py CHANGED Viewed

@@ -36,17 +36,20 @@ class PMTileGeneration(object):
         self.sensor_name = "EK60"
     #######################################################
-    def check_all_cruises(self, bucket_name, cruises):
+    @staticmethod
+    def check_all_cruises(bucket_name, cruises):
         completed = []
         for cruise_name in cruises:
             print(cruise_name)
             try:
                 zarr_store = f"{cruise_name}.zarr"
                 s3_zarr_store_path = f"{bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{zarr_store}"
+                kwargs = {"consolidated": False}
                 cruise = xr.open_dataset(
                     filename_or_obj=f"s3://{s3_zarr_store_path}",
                     engine="zarr",
                     storage_options={"anon": True},
+                    **kwargs,
                 )
                 width = cruise.Sv.shape[1]
                 height = cruise.Sv.shape[0]
@@ -67,7 +70,8 @@ class PMTileGeneration(object):
         return completed
     #######################################################
-    def get_cruise_geometry(self, cruise_name, index):
+    @staticmethod
+    def get_cruise_geometry(cruise_name, index):
         print(cruise_name)
         try:
             pieces = []
@@ -117,7 +121,8 @@ class PMTileGeneration(object):
             raise RuntimeError(f"Problem parsing Zarr stores, {err}")
     #######################################################
-    def aggregate_geojson_into_dataframe(self, geoms):
+    @staticmethod
+    def aggregate_geojson_into_dataframe(geoms):
         gps_gdf = gpd.GeoDataFrame(
             columns=["id", "ship", "cruise", "sensor", "geometry"],
             geometry="geometry",

water_column_sonar_processing/geometry/spatiotemporal.py ADDED Viewed

@@ -0,0 +1,106 @@
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+from shapely.geometry import Point
+from water_column_sonar_processing.model import ZarrManager
+# Convert "meters per second" to "knots"
+# meters_per_second_to_knots = lambda mps_value: mps_value * 1.94384
+class Spatiotemporal:
+    #######################################################
+    def __init__(
+        self,
+    ):
+        self.NANOSECONDS_PER_SECOND = 1e9
+        self.CUTOFF_DISTANCE_METERS = 50.0
+        self.CUTOFF_TIME_SECONDS = 10.0
+    #######################################################
+    @staticmethod
+    def meters_per_second_to_knots(
+        mps_value,
+    ):
+        return mps_value * 1.94384
+    #######################################################
+    def compute_speed_and_distance(
+        self,
+        times_ns,  #: np.ndarray[tuple[int], np.dtype[np.int64]],
+        latitudes,  #: np.ndarray,
+        longitudes,  #: np.ndarray,
+    ) -> pd.DataFrame:
+        try:
+            # fix times
+            times = np.array([np.datetime64(int(i), "ns") for i in times_ns])
+            geom = [Point(xy) for xy in zip(longitudes, latitudes)]
+            points_df = gpd.GeoDataFrame({"geometry": geom}, crs="EPSG:4326")
+            # Conversion to a rectilinear projection coordinate system where distance can be calculated with pythagorean theorem
+            # EPSG:4087, WGS 84 / World Equidistant Cylindrical
+            # https://epsg.io/4087
+            points_df.to_crs(epsg=4087, inplace=True)
+            distance_diffs = points_df.distance(points_df.geometry.shift())
+            distance_diffs[0] = distance_diffs[1]  # missing first datapoint, backfill
+            # Issue: np.max(distance_diffs) = 3397 meters
+            time_diffs_ns = np.append(0, (times[1:] - times[:-1]).astype(int))
+            time_diffs_ns[0] = time_diffs_ns[1]  # missing first datapoint, backfill
+            time_diffs_seconds = time_diffs_ns / self.NANOSECONDS_PER_SECOND
+            # Calculate the speed in knots
+            speed_meters_per_second = np.array(
+                (distance_diffs / time_diffs_ns * self.NANOSECONDS_PER_SECOND),
+                dtype=np.float32,
+            )
+            knots = self.meters_per_second_to_knots(speed_meters_per_second)
+            metrics_df = pd.DataFrame(
+                {
+                    "speed_knots": knots.astype(dtype=np.float32),
+                    "distance_meters": distance_diffs.to_numpy(dtype=np.float32),
+                    "diff_seconds": time_diffs_seconds.astype(np.float32),
+                },
+                index=times,
+            )
+            #
+            return metrics_df
+        except Exception as err:
+            raise RuntimeError(f"Exception encountered, {err}")
+    #######################################################
+    def add_speed_and_distance(
+        self,
+        ship_name,
+        cruise_name,
+        sensor_name,
+        bucket_name,
+        endpoint_url=None,
+    ) -> None:
+        try:
+            zarr_manager = ZarrManager()
+            zarr_store = zarr_manager.open_s3_zarr_store_with_zarr(
+                ship_name=ship_name,
+                cruise_name=cruise_name,
+                sensor_name=sensor_name,
+                output_bucket_name=bucket_name,
+                endpoint_url=endpoint_url,
+            )
+            longitudes = zarr_store["longitude"][:]
+            latitudes = zarr_store["latitude"][:]
+            times = zarr_store["time"][:]
+            #
+            metrics_df = self.compute_speed_and_distance(
+                times_ns=times,
+                latitudes=latitudes,
+                longitudes=longitudes,
+            )
+            # Write the speed and distance to the output zarr store
+            zarr_store["speed"][:] = metrics_df.speed_knots.values
+            zarr_store["distance"][:] = metrics_df.distance_meters.values
+        except Exception as err:
+            raise RuntimeError(
+                f"Exception encountered writing the speed and distance, {err}"
+            )
+###########################################################

water_column_sonar_processing/index/index_manager.py CHANGED Viewed

@@ -2,10 +2,8 @@ import os
 import re
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from datetime import datetime
-from hashlib import sha256
-import networkx as nx
-import numpy as np
+# import networkx as nx
 import pandas as pd
 from water_column_sonar_processing.aws import S3Manager
@@ -120,6 +118,7 @@ class IndexManager:
         for res in page_iterator:
             if "Key" in res:
                 return res["Key"]
+        return None
         # else raise exception?
         # DSJ0604-D20060406-T050022.bot 2kB == 2152 'Size'
@@ -190,9 +189,8 @@ class IndexManager:
         return files_list
     #################################################################
-    def get_subset_ek60_prefix(
-        self, df: pd.DataFrame
-    ) -> pd.DataFrame:  # TODO: is this used?
+    @staticmethod
+    def get_subset_ek60_prefix(df: pd.DataFrame) -> pd.DataFrame:  # TODO: is this used?
         # Returns all objects with 'EK60' in prefix of file path
         # Note that this can include 'EK80' dataset that are false-positives
         # in dataframe with ['key', 'filename', 'ship', 'cruise', 'sensor', 'size', 'date', 'datagram']
@@ -274,9 +272,8 @@ class IndexManager:
         return all_datagrams
     #################################################################
-    def get_ek60_objects(
-        self, df: pd.DataFrame, subset_datagrams: list
-    ) -> pd.DataFrame:
+    @staticmethod
+    def get_ek60_objects(df: pd.DataFrame, subset_datagrams: list) -> pd.DataFrame:
         # for each key write datagram value to all other files in same cruise
         for subset_datagram in subset_datagrams:
             if subset_datagram["DATAGRAM"] == "CON0":
@@ -345,40 +342,40 @@ class IndexManager:
     #     print(end_time)
     # TODO: wip
-    def build_merkle_tree(self):
-        G = nx.DiGraph()
-        # https://noaa-wcsd-pds.s3.amazonaws.com/index.html#data/raw/Henry_B._Bigelow/HB0707/
-        ship_name = "Henry_B._Bigelow"
-        cruise_name = "HB0707"
-        # cruise_name = "HB0805"
-        prefix = f"data/raw/{ship_name}/{cruise_name}/"
-        # prefix = f"data/raw/{ship_name}/"
-        page_iterator = self.s3_manager.paginator.paginate(
-            Bucket=self.input_bucket_name,
-            Prefix=prefix,
-        )
-        for page in page_iterator:
-            for contents in page["Contents"]:
-                obj_key = contents["Key"]
-                # https://datatracker.ietf.org/doc/html/rfc7232#section-2.3
-                obj_etag = contents["ETag"].split('"')[1]  # properties
-                obj_size = contents["Size"]
-                basename = os.path.basename(obj_key)
-                G.add_node(
-                    node_for_adding=basename, ETag=obj_etag, Size=obj_size, Key=obj_key
-                )  # TODO: add parent hash
-                split_path = os.path.normpath(obj_key).split(os.path.sep)
-                # split_path: ['dataset', 'raw', 'Henry_B._Bigelow', 'HB0707', 'EK60', 'D20070712-T004447.raw']
-                for previous, current in zip(split_path, split_path[1:]):
-                    if not G.has_edge(previous, current):
-                        G.add_edge(previous, current)
-        # print(G)
-        etag_set = frozenset(
-            [k for j, k in list(G.nodes.data("ETag")) if k is not None]
-        )
-        new_hash = sha256(str(etag_set.__hash__()).encode("utf-8")).hexdigest()
-        total_size = [k for j, k in list(G.nodes.data("Size")) if k is not None]
-        print(np.sum(total_size))  # 22.24 Terabytes in Henry_B._Bigelow cruises
-        print(" ")
-        print(new_hash)
-        return new_hash
+    # def build_merkle_tree(self):
+    #     G = nx.DiGraph()
+    #     # https://noaa-wcsd-pds.s3.amazonaws.com/index.html#data/raw/Henry_B._Bigelow/HB0707/
+    #     ship_name = "Henry_B._Bigelow"
+    #     cruise_name = "HB0707"
+    #     # cruise_name = "HB0805"
+    #     prefix = f"data/raw/{ship_name}/{cruise_name}/"
+    #     # prefix = f"data/raw/{ship_name}/"
+    #     page_iterator = self.s3_manager.paginator.paginate(
+    #         Bucket=self.input_bucket_name,
+    #         Prefix=prefix,
+    #     )
+    #     for page in page_iterator:
+    #         for contents in page["Contents"]:
+    #             obj_key = contents["Key"]
+    #             # https://datatracker.ietf.org/doc/html/rfc7232#section-2.3
+    #             obj_etag = contents["ETag"].split('"')[1]  # properties
+    #             obj_size = contents["Size"]
+    #             basename = os.path.basename(obj_key)
+    #             G.add_node(
+    #                 node_for_adding=basename, ETag=obj_etag, Size=obj_size, Key=obj_key
+    #             )  # TODO: add parent hash
+    #             split_path = os.path.normpath(obj_key).split(os.path.sep)
+    #             # split_path: ['dataset', 'raw', 'Henry_B._Bigelow', 'HB0707', 'EK60', 'D20070712-T004447.raw']
+    #             for previous, current in zip(split_path, split_path[1:]):
+    #                 if not G.has_edge(previous, current):
+    #                     G.add_edge(previous, current)
+    #     # print(G)
+    #     etag_set = frozenset(
+    #         [k for j, k in list(G.nodes.data("ETag")) if k is not None]
+    #     )
+    #     new_hash = sha256(str(etag_set.__hash__()).encode("utf-8")).hexdigest()
+    #     total_size = [k for j, k in list(G.nodes.data("Size")) if k is not None]
+    #     print(np.sum(total_size))  # 22.24 Terabytes in Henry_B._Bigelow cruises
+    #     print(" ")
+    #     print(new_hash)
+    #     return new_hash

water-column-sonar-processing 25.11.1__py3-none-any.whl → 26.1.14__py3-none-any.whl

Potentially problematic release.

water-column-sonar-processing 25.11.1py3-none-any.whl → 26.1.14py3-none-any.whl