PyPI - water-column-sonar-processing - Versions diffs - 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

water-column-sonar-processing 0.0.5py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

water_column_sonar_processing/cruise/resample_regrid.py CHANGED Viewed

@@ -1,15 +1,15 @@
 import gc
 import os
 from pathlib import Path
 import numcodecs
 import numpy as np
-import xarray as xr
 import pandas as pd
+import xarray as xr
 from water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
-from water_column_sonar_processing.model.zarr_manager import ZarrManager
 from water_column_sonar_processing.geometry.geometry_manager import GeometryManager
+from water_column_sonar_processing.model.zarr_manager import ZarrManager
 numcodecs.blosc.use_threads = False
 numcodecs.blosc.set_nthreads(1)
@@ -19,30 +19,34 @@ numcodecs.blosc.set_nthreads(1)
 #  ZARR_V3_EXPERIMENTAL_API = 1
 #  creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
 class ResampleRegrid:
     #######################################################
     def __init__(
-            self,
+        self,
     ):
         self.__overwrite = True
         self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
         self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
-        self.dtype = 'float32'
+        self.dtype = "float32"
     #################################################################
     def interpolate_data(
-            self,
-            input_xr,
-            ping_times,
-            all_cruise_depth_values,
+        self,
+        input_xr,
+        ping_times,
+        all_cruise_depth_values,
     ) -> np.ndarray:
         print("Interpolating data.")
         try:
-            data = np.empty((
-                len(all_cruise_depth_values),
-                len(ping_times),
-                len(input_xr.frequency_nominal)
-            ), dtype=self.dtype)
+            data = np.empty(
+                (
+                    len(all_cruise_depth_values),
+                    len(ping_times),
+                    len(input_xr.frequency_nominal),
+                ),
+                dtype=self.dtype,
+            )
             data[:] = np.nan
@@ -53,37 +57,60 @@ class ResampleRegrid:
                     "depth": all_cruise_depth_values,
                     "time": ping_times,
                     "frequency": input_xr.frequency_nominal.values,
-                }
+                },
             )
             channels = input_xr.channel.values
-            for channel in range(len(channels)):  # TODO: leaving off here, need to subset for just indices in time axis
-                print(np.nanmax(input_xr.echo_range.sel(channel=input_xr.channel[channel]).values))
+            for channel in range(
+                len(channels)
+            ):  # TODO: leaving off here, need to subset for just indices in time axis
+                print(
+                    np.nanmax(
+                        input_xr.echo_range.sel(
+                            channel=input_xr.channel[channel]
+                        ).values
+                    )
+                )
                 #
                 max_depths = np.nanmax(
                     a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values,
-                    axis=1
+                    axis=1,
                 )
                 superset_of_max_depths = set(
-                    np.nanmax(input_xr.echo_range.sel(channel=input_xr.channel[channel]).values, 1)
+                    np.nanmax(
+                        input_xr.echo_range.sel(
+                            channel=input_xr.channel[channel]
+                        ).values,
+                        1,
+                    )
                 )
-                set_of_max_depths = list({x for x in superset_of_max_depths if x == x})  # removes nan's
+                set_of_max_depths = list(
+                    {x for x in superset_of_max_depths if x == x}
+                )  # removes nan's
                 # iterate through partitions of data with similar depths and resample
                 for select_max_depth in set_of_max_depths:
                     # TODO: for nan just skip and leave all nan's
-                    select_indices = [i for i in range(0, len(max_depths)) if max_depths[i] == select_max_depth]
+                    select_indices = [
+                        i
+                        for i in range(0, len(max_depths))
+                        if max_depths[i] == select_max_depth
+                    ]
                     # now create new DataArray with proper dimension and indices
                     # data_select = input_xr.Sv.sel(
                     #     channel=input_xr.channel[channel]
                     # ).values[select_indices, :].T  # TODO: dont like this transpose
-                    data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[select_indices, :].T.values
+                    data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[
+                        select_indices, :
+                    ].T.values
                     # change from ".values[select_indices, :].T" to "[select_indices, :].values.T"
                     times_select = input_xr.ping_time.values[select_indices]
                     depths_select = input_xr.echo_range.sel(
                         channel=input_xr.channel[channel]
-                    ).values[select_indices[0], :]  # '0' because all others in group should be same
+                    ).values[
+                        select_indices[0], :
+                    ]  # '0' because all others in group should be same
                     da_select = xr.DataArray(
                         data=data_select,
@@ -91,27 +118,32 @@ class ResampleRegrid:
                         coords={
                             "depth": depths_select,
                             "time": times_select,
-                        }
-                    ).dropna(dim='depth')
-                    resampled = da_select.interp(depth=all_cruise_depth_values, method="nearest")
+                        },
+                    ).dropna(dim="depth")
+                    resampled = da_select.interp(
+                        depth=all_cruise_depth_values, method="nearest"
+                    )
                     # write to the resample array
                     regrid_resample.loc[
-                        dict(time=times_select, frequency=input_xr.frequency_nominal.values[channel])
+                        dict(
+                            time=times_select,
+                            frequency=input_xr.frequency_nominal.values[channel],
+                        )
                     ] = resampled
                     print(f"updated {len(times_select)} ping times")
         except Exception as err:
-            print(f'Problem finding the dynamodb table: {err}')
+            print(f"Problem finding the dynamodb table: {err}")
             raise err
         print("Done interpolating data.")
         return regrid_resample
     #################################################################
     def resample_regrid(
-            self,
-            ship_name,
-            cruise_name,
-            sensor_name,
-            table_name,
+        self,
+        ship_name,
+        cruise_name,
+        sensor_name,
+        table_name,
     ) -> None:
         """
         The goal here is to interpolate the data against the depth values already populated
@@ -144,7 +176,7 @@ class ResampleRegrid:
             #########################################################
             #########################################################
             # TODO: iterate files here
-            all_file_names = cruise_df['FILE_NAME']
+            all_file_names = cruise_df["FILE_NAME"]
             for file_name in all_file_names:
                 gc.collect()
                 file_name_stem = Path(file_name).stem
@@ -158,7 +190,9 @@ class ResampleRegrid:
                 #  df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
                 # Get index from all cruise files. Note: should be based on which are included in cruise.
-                index = cruise_df.index[cruise_df['FILE_NAME'] == f"{file_name_stem}.raw"][0]
+                index = cruise_df.index[
+                    cruise_df["FILE_NAME"] == f"{file_name_stem}.raw"
+                ][0]
                 # get input store
                 input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
@@ -171,29 +205,38 @@ class ResampleRegrid:
                 # [3] Get needed indices
                 # Offset from start index to insert new data. Note that missing values are excluded.
                 ping_time_cumsum = np.insert(
-                    np.cumsum(cruise_df['NUM_PING_TIME_DROPNA'].dropna().to_numpy(dtype=int)),
+                    np.cumsum(
+                        cruise_df["NUM_PING_TIME_DROPNA"].dropna().to_numpy(dtype=int)
+                    ),
                     obj=0,
-                    values=0
+                    values=0,
                 )
                 start_ping_time_index = ping_time_cumsum[index]
                 end_ping_time_index = ping_time_cumsum[index + 1]
-                min_echo_range = np.nanmin(np.float32(cruise_df['MIN_ECHO_RANGE']))
-                max_echo_range = np.nanmax(np.float32(cruise_df['MAX_ECHO_RANGE']))
+                min_echo_range = np.nanmin(np.float32(cruise_df["MIN_ECHO_RANGE"]))
+                max_echo_range = np.nanmax(np.float32(cruise_df["MAX_ECHO_RANGE"]))
-                print("Creating empty ndarray for Sv data.")  # Note: cruise_zarr dimensions are (depth, time, frequency)
+                print(
+                    "Creating empty ndarray for Sv data."
+                )  # Note: cruise_zarr dimensions are (depth, time, frequency)
                 cruise_sv_subset = np.empty(
-                    shape=output_zarr_store.Sv[:, start_ping_time_index:end_ping_time_index, :].shape
+                    shape=output_zarr_store.Sv[
+                        :, start_ping_time_index:end_ping_time_index, :
+                    ].shape
                 )
                 cruise_sv_subset[:, :, :] = np.nan  # (5208, 9778, 4)
                 all_cruise_depth_values = zarr_manager.get_depth_values(
-                    min_echo_range=min_echo_range,
-                    max_echo_range=max_echo_range
+                    min_echo_range=min_echo_range, max_echo_range=max_echo_range
                 )
                 print(" ".join(list(input_xr_zarr_store.Sv.dims)))
-                if set(input_xr_zarr_store.Sv.dims) != {'channel', 'ping_time', 'range_sample'}:
+                if set(input_xr_zarr_store.Sv.dims) != {
+                    "channel",
+                    "ping_time",
+                    "range_sample",
+                }:
                     raise Exception("Xarray dimensions are not as expected.")
                 # get geojson
@@ -209,8 +252,13 @@ class ResampleRegrid:
                 ping_times = input_xr.ping_time.values
                 # Date format: numpy.datetime64('2007-07-20T02:10:25.845073920') converts to "1184897425.845074"
-                epoch_seconds = [(pd.Timestamp(i) - pd.Timestamp('1970-01-01')) / pd.Timedelta('1s') for i in ping_times]
-                output_zarr_store.time[start_ping_time_index:end_ping_time_index] = epoch_seconds
+                epoch_seconds = [
+                    (pd.Timestamp(i) - pd.Timestamp("1970-01-01")) / pd.Timedelta("1s")
+                    for i in ping_times
+                ]
+                output_zarr_store.time[start_ping_time_index:end_ping_time_index] = (
+                    epoch_seconds
+                )
                 # --- UPDATING --- #
@@ -220,30 +268,33 @@ class ResampleRegrid:
                     all_cruise_depth_values=all_cruise_depth_values,
                 )
-                print(f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}")
+                print(
+                    f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}"
+                )
                 #########################################################################
                 # write Sv values to cruise-level-model-store
-                for channel in range(len(input_xr.channel.values)):  # doesn't like being written in one fell swoop :(
+                for channel in range(
+                    len(input_xr.channel.values)
+                ):  # doesn't like being written in one fell swoop :(
                     output_zarr_store.Sv[
-                        :,
-                        start_ping_time_index:end_ping_time_index,
-                        channel
+                        :, start_ping_time_index:end_ping_time_index, channel
                     ] = regrid_resample[:, :, channel]
                 #########################################################################
                 # [5] write subset of latitude/longitude
-                output_zarr_store.latitude[start_ping_time_index:end_ping_time_index] = geospatial.dropna()[
-                    'latitude'
-                ].values
-                output_zarr_store.longitude[start_ping_time_index:end_ping_time_index] = geospatial.dropna()[
-                    'longitude'
-                ].values
+                output_zarr_store.latitude[
+                    start_ping_time_index:end_ping_time_index
+                ] = geospatial.dropna()["latitude"].values
+                output_zarr_store.longitude[
+                    start_ping_time_index:end_ping_time_index
+                ] = geospatial.dropna()["longitude"].values
         except Exception as err:
-            print(f'Problem interpolating the data: {err}')
+            print(f"Problem interpolating the data: {err}")
             raise err
         print("Done interpolating data.")
     #######################################################
 ###########################################################

water_column_sonar_processing/geometry/__init__.py CHANGED Viewed

@@ -0,0 +1,5 @@
+from .geometry_manager import GeometryManager
+from .geometry_simplification import GeometrySimplification
+from .pmtile_generation import PMTileGeneration
+__all__ = ["GeometryManager", "GeometrySimplification", "PMTileGeneration"]

water_column_sonar_processing/geometry/geometry_manager.py CHANGED Viewed

@@ -1,10 +1,11 @@
 from pathlib import Path
-import numpy as np
 import geopandas
+import numpy as np
 import pandas as pd
-from water_column_sonar_processing.utility.cleaner import Cleaner
 from water_column_sonar_processing.aws.s3_manager import S3Manager
+from water_column_sonar_processing.utility.cleaner import Cleaner
 """
 //  [Decimal / Places / Degrees	/ Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
@@ -22,28 +23,32 @@ from water_column_sonar_processing.aws.s3_manager import S3Manager
 class GeometryManager:
     #######################################################
     def __init__(
-            self,
+        self,
     ):
         self.DECIMAL_PRECISION = 5  # precision for GPS coordinates
         self.SIMPLIFICATION_TOLERANCE = 0.0001  # RDP simplification to street level
     #######################################################
     def read_echodata_gps_data(
-            self,
-            echodata,
-            ship_name,
-            cruise_name,
-            sensor_name,
-            file_name,
-            write_geojson=True,
+        self,
+        echodata,
+        ship_name,
+        cruise_name,
+        sensor_name,
+        file_name,
+        write_geojson=True,
     ) -> tuple:
         file_name_stem = Path(file_name).stem
         geo_json_name = f"{file_name_stem}.json"
-        print('Getting GPS data from echopype object.')
+        print("Getting GPS data from echopype object.")
         try:
-            latitude = np.round(echodata.platform.latitude.values, self.DECIMAL_PRECISION)
-            longitude = np.round(echodata.platform.longitude.values, self.DECIMAL_PRECISION)
+            latitude = np.round(
+                echodata.platform.latitude.values, self.DECIMAL_PRECISION
+            )
+            longitude = np.round(
+                echodata.platform.longitude.values, self.DECIMAL_PRECISION
+            )
             # RE: time coordinates: https://github.com/OSOceanAcoustics/echopype/issues/656#issue-1219104771
             # 'nmea_times' are times from the nmea datalogger associated with GPS
@@ -54,10 +59,15 @@ class GeometryManager:
             time1 = echodata.environment.time1.values
             if len(nmea_times) < len(time1):
-                raise Exception("Problem: Not enough NMEA times available to extrapolate time1.")
+                raise Exception(
+                    "Problem: Not enough NMEA times available to extrapolate time1."
+                )
             # Align 'sv_times' to 'nmea_times'
-            if not (np.all(time1[:-1] <= time1[1:]) and np.all(nmea_times[:-1] <= nmea_times[1:])):
+            if not (
+                np.all(time1[:-1] <= time1[1:])
+                and np.all(nmea_times[:-1] <= nmea_times[1:])
+            ):
                 raise Exception("Problem: NMEA times are not sorted.")
             # Finds the indices where 'v' can be inserted just to the right of 'a'
@@ -67,65 +77,83 @@ class GeometryManager:
             lon = longitude[indices]
             lon[indices < 0] = np.nan
-            if not (np.all(lat[~np.isnan(lat)] >= -90.) and np.all(lat[~np.isnan(lat)] <= 90.) and np.all(lon[~np.isnan(lon)] >= -180.) and np.all(lon[~np.isnan(lon)] <= 180.)):
+            if not (
+                np.all(lat[~np.isnan(lat)] >= -90.0)
+                and np.all(lat[~np.isnan(lat)] <= 90.0)
+                and np.all(lon[~np.isnan(lon)] >= -180.0)
+                and np.all(lon[~np.isnan(lon)] <= 180.0)
+            ):
                 raise Exception("Problem: GPS Data falls outside allowed bounds.")
             # check for visits to null island
             null_island_indices = list(
-                set.intersection(set(np.where(np.abs(lat) < 1e-3)[0]), set(np.where(np.abs(lon) < 1e-3)[0]))
+                set.intersection(
+                    set(np.where(np.abs(lat) < 1e-3)[0]),
+                    set(np.where(np.abs(lon) < 1e-3)[0]),
+                )
             )
             lat[null_island_indices] = np.nan
             lon[null_island_indices] = np.nan
             # create requirement for minimum linestring size
-            MIN_ALLOWED_SIZE = 4  # don't want to process files with less than 4 data points
-            if len(lat[~np.isnan(lat)]) < MIN_ALLOWED_SIZE or len(lon[~np.isnan(lon)]) < MIN_ALLOWED_SIZE:
+            MIN_ALLOWED_SIZE = (
+                4  # don't want to process files with less than 4 data points
+            )
+            if (
+                len(lat[~np.isnan(lat)]) < MIN_ALLOWED_SIZE
+                or len(lon[~np.isnan(lon)]) < MIN_ALLOWED_SIZE
+            ):
                 raise Exception(
                     f"There was not enough data in lat or lon to create geojson, {len(lat[~np.isnan(lat)])} found, less than {MIN_ALLOWED_SIZE}."
                 )
             # https://osoceanacoustics.github.io/echopype-examples/echopype_tour.html
-            gps_df = pd.DataFrame({
-                'latitude': lat,
-                'longitude': lon,
-                'time': time1
-            }).set_index(['time']).fillna(0)
+            gps_df = (
+                pd.DataFrame({"latitude": lat, "longitude": lon, "time": time1})
+                .set_index(["time"])
+                .fillna(0)
+            )
             # Note: We set np.nan to 0,0 so downstream missing values can be omitted
             gps_gdf = geopandas.GeoDataFrame(
                 gps_df,
                 geometry=geopandas.points_from_xy(
-                    gps_df['longitude'],
-                    gps_df['latitude']
+                    gps_df["longitude"], gps_df["latitude"]
                 ),
-                crs="epsg:4326"
+                crs="epsg:4326",
             )
             # Note: We set np.nan to 0,0 so downstream missing values can be omitted
             geo_json_line = gps_gdf.to_json()
             if write_geojson:
-                print('Creating local copy of geojson file.')
+                print("Creating local copy of geojson file.")
                 with open(geo_json_name, "w") as write_file:
                     write_file.write(geo_json_line)
-                geo_json_prefix = f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
+                geo_json_prefix = (
+                    f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
+                )
-                print('Checking s3 and deleting any existing GeoJSON file.')
+                print("Checking s3 and deleting any existing GeoJSON file.")
                 s3_manager = S3Manager()
-                s3_objects = s3_manager.list_nodd_objects(prefix=f"{geo_json_prefix}/{geo_json_name}")
+                s3_objects = s3_manager.list_nodd_objects(
+                    prefix=f"{geo_json_prefix}/{geo_json_name}"
+                )
                 if len(s3_objects) > 0:
-                    print('GeoJSON already exists in s3, deleting existing and continuing.')
+                    print(
+                        "GeoJSON already exists in s3, deleting existing and continuing."
+                    )
                     s3_manager.delete_nodd_objects(objects=s3_objects)
-                print('Upload GeoJSON to s3.')
+                print("Upload GeoJSON to s3.")
                 s3_manager.upload_nodd_file(
                     file_name=geo_json_name,  # file_name
-                    key=f"{geo_json_prefix}/{geo_json_name}"  # key
+                    key=f"{geo_json_prefix}/{geo_json_name}",  # key
                 )
                 # TODO: delete geo_json file
                 cleaner = Cleaner()
-                cleaner.delete_local_files(file_types=['*.json'])
+                cleaner.delete_local_files(file_types=["*.json"])
             #################################################################
             # TODO: simplify with shapely
@@ -144,7 +172,9 @@ class GeometryManager:
             #################################################################
             # GeoJSON FeatureCollection with IDs as "time"
         except Exception as err:
-            print(f'Exception encountered extracting gps coordinates creating geojson: {err}')
+            print(
+                f"Exception encountered extracting gps coordinates creating geojson: {err}"
+            )
             raise
         # Note: returned lat/lon values can include np.nan because they need to be aligned with
         # the Sv data! GeoJSON needs simplification but has been filtered.
@@ -154,12 +184,12 @@ class GeometryManager:
     #######################################################
     def read_s3_geo_json(
-            self,
-            ship_name,
-            cruise_name,
-            sensor_name,
-            file_name_stem,
-            input_xr_zarr_store,
+        self,
+        ship_name,
+        cruise_name,
+        sensor_name,
+        file_name_stem,
+        input_xr_zarr_store,
     ):
         try:
             s3_manager = S3Manager()
@@ -170,25 +200,26 @@ class GeometryManager:
                 file_name_stem=file_name_stem,
             )
             ###
-            geospatial = geopandas.GeoDataFrame.from_features(geo_json['features']).set_index(
-                pd.json_normalize(geo_json["features"])["id"].values
-            )
+            geospatial = geopandas.GeoDataFrame.from_features(
+                geo_json["features"]
+            ).set_index(pd.json_normalize(geo_json["features"])["id"].values)
             null_island_indices = list(
                 set.intersection(
                     set(np.where(np.abs(geospatial.latitude.values) < 1e-3)[0]),
-                    set(np.where(np.abs(geospatial.longitude.values) < 1e-3)[0])
+                    set(np.where(np.abs(geospatial.longitude.values) < 1e-3)[0]),
                 )
             )
             geospatial.iloc[null_island_indices] = np.nan
             ###
-            geospatial_index = geospatial.dropna().index.values.astype('datetime64[ns]')
+            geospatial_index = geospatial.dropna().index.values.astype("datetime64[ns]")
             aa = input_xr_zarr_store.ping_time.values.tolist()
             vv = geospatial_index.tolist()
             indices = np.searchsorted(a=aa, v=vv)
             return indices, geospatial
         except Exception as err:  # Failure
-            print(f'Exception encountered reading s3 GeoJSON: {err}')
+            print(f"Exception encountered reading s3 GeoJSON: {err}")
             raise
 ###########################################################

water_column_sonar_processing/geometry/geometry_simplification.py CHANGED Viewed

@@ -31,51 +31,52 @@
 """
-class GeometrySimplification(object):
+class GeometrySimplification:
     # TODO: in the future move to standalone library
     #######################################################
     def __init__(
-            self,
+        self,
     ):
         pass
     #######################################################
     def speed_check(
-            self,
-            speed_knots=50,
+        self,
+        speed_knots=50,
     ) -> None:
         print(speed_knots)
         pass
     def remove_null_island_values(
-            self,
-            epsilon=1e-5,
+        self,
+        epsilon=1e-5,
     ) -> None:
         print(epsilon)
         pass
     def stream_geometry(
-            self,
+        self,
     ) -> None:
         pass
     def break_linestring_into_multi_linestring(
-            self,
+        self,
     ) -> None:
         # For any line-strings across the antimeridian, break into multilinestring
         pass
     def simplify(
-            self,
+        self,
     ) -> None:
         pass
-    def kalman_filter(
-            self
-    ):
+    def kalman_filter(self):
         # for cruises with bad signal, filter so that
         pass
     #######################################################
 ###########################################################

water-column-sonar-processing 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl

water-column-sonar-processing 0.0.5py3-none-any.whl → 0.0.6py3-none-any.whl