PyPI - water-column-sonar-processing - Versions diffs - 25.1.6__py3-none-any.whl → 25.3.0__py3-none-any.whl - Mend

water-column-sonar-processing 25.1.6py3-none-any.whl → 25.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (26) hide show

water_column_sonar_processing/cruise/resample_regrid.py CHANGED Viewed

@@ -35,6 +35,7 @@ class ResampleRegrid:
         input_xr,
         ping_times,
         all_cruise_depth_values,
+        water_level,
     ) -> np.ndarray:
         print("Interpolating data.")
         try:
@@ -53,7 +54,7 @@ class ResampleRegrid:
                 data=data,
                 dims=("depth", "time", "frequency"),
                 coords={
-                    "depth": all_cruise_depth_values,
+                    "depth": all_cruise_depth_values,  # TODO: these should be on interval from 7.7 meters to 507 meters
                     "time": ping_times,
                     "frequency": input_xr.frequency_nominal.values,
                 },
@@ -62,34 +63,19 @@ class ResampleRegrid:
             channels = input_xr.channel.values
             for channel in range(
                 len(channels)
-            ):  # TODO: leaving off here, need to subset for just indices in time axis
+            ):  # ?TODO: leaving off here, need to subset for just indices in time axis
                 gc.collect()
-                print(
-                    np.nanmax(
-                        input_xr.echo_range.sel(
-                            channel=input_xr.channel[channel]
-                        ).values
-                    )
-                )
-                #
                 max_depths = np.nanmax(
-                    a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values,
+                    a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values
+                    + water_level,
                     axis=1,
                 )
-                superset_of_max_depths = set(
-                    np.nanmax(
-                        input_xr.echo_range.sel(
-                            channel=input_xr.channel[channel]
-                        ).values,
-                        1,
-                    )
-                )
+                superset_of_max_depths = set(max_depths)
                 set_of_max_depths = list(
                     {x for x in superset_of_max_depths if x == x}
                 )  # removes nan's
                 # iterate through partitions of data with similar depths and resample
                 for select_max_depth in set_of_max_depths:
-                    gc.collect()
                     # TODO: for nan just skip and leave all nan's
                     select_indices = [
                         i
@@ -132,6 +118,7 @@ class ResampleRegrid:
                         )
                     ] = resampled
                     print(f"updated {len(times_select)} ping times")
+                    gc.collect()
         except Exception as err:
             print(f"Problem finding the dynamodb table: {err}")
             raise err
@@ -146,9 +133,9 @@ class ResampleRegrid:
         sensor_name,
         table_name,
         # TODO: file_name?,
-        bucket_name, # TODO: this is the same bucket
+        bucket_name,  # TODO: this is the same bucket
         override_select_files=None,
-        endpoint_url=None
+        endpoint_url=None,
     ) -> None:
         """
         The goal here is to interpolate the data against the depth values already populated
@@ -172,9 +159,9 @@ class ResampleRegrid:
             # get dynamo stuff
             dynamo_db_manager = DynamoDBManager()
             cruise_df = dynamo_db_manager.get_table_as_df(
-                ship_name=ship_name,
+                # ship_name=ship_name,
                 cruise_name=cruise_name,
-                sensor_name=sensor_name,
+                # sensor_name=sensor_name,
                 table_name=table_name,
             )
@@ -191,19 +178,21 @@ class ResampleRegrid:
                 file_name_stem = Path(file_name).stem
                 print(f"Processing file: {file_name_stem}.")
-                if f"{file_name_stem}.raw" not in list(cruise_df['FILE_NAME']):
-                    raise Exception(f"Raw file file_stem not found in dynamodb.")
+                if f"{file_name_stem}.raw" not in list(cruise_df["FILE_NAME"]):
+                    raise Exception("Raw file file_stem not found in dynamodb.")
                 # status = PipelineStatus['LEVEL_1_PROCESSING']
                 # TODO: filter rows by enum success, filter the dataframe just for enums >= LEVEL_1_PROCESSING
                 #  df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
                 # Get index from all cruise files. Note: should be based on which are included in cruise.
-                index = int(cruise_df.index[
-                    cruise_df["FILE_NAME"] == f"{file_name_stem}.raw"
-                ][0])
+                index = int(
+                    cruise_df.index[cruise_df["FILE_NAME"] == f"{file_name_stem}.raw"][
+                        0
+                    ]
+                )
-                # get input store
+                # Get input store
                 input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
                     ship_name=ship_name,
                     cruise_name=cruise_name,
@@ -212,6 +201,10 @@ class ResampleRegrid:
                     input_bucket_name=bucket_name,
                     endpoint_url=endpoint_url,
                 )
+                # This is the horizontal offset of the measurement.
+                # See https://echopype.readthedocs.io/en/stable/data-proc-additional.html
+                water_level = input_xr_zarr_store.water_level.values
                 #########################################################################
                 # [3] Get needed indices
                 # Offset from start index to insert new data. Note that missing values are excluded.
@@ -225,14 +218,26 @@ class ResampleRegrid:
                 start_ping_time_index = ping_time_cumsum[index]
                 end_ping_time_index = ping_time_cumsum[index + 1]
-                min_echo_range = np.nanmin(np.float32(cruise_df["MIN_ECHO_RANGE"]))
-                max_echo_range = np.nanmax(np.float32(cruise_df["MAX_ECHO_RANGE"]))
+                min_echo_range = np.min(
+                    (cruise_df["MIN_ECHO_RANGE"] + cruise_df["WATER_LEVEL"])
+                    .dropna()
+                    .astype(float)
+                )
+                max_echo_range = np.max(
+                    (cruise_df["MAX_ECHO_RANGE"] + cruise_df["WATER_LEVEL"])
+                    .dropna()
+                    .astype(float)
+                )
+                cruise_min_epsilon = np.min(
+                    cruise_df["MIN_ECHO_RANGE"].dropna().astype(float)
+                )
                 # Note: cruise dims (depth, time, frequency)
                 all_cruise_depth_values = zarr_manager.get_depth_values(
                     min_echo_range=min_echo_range,
-                    max_echo_range=max_echo_range
-                )
+                    max_echo_range=max_echo_range,
+                    cruise_min_epsilon=cruise_min_epsilon,  # remove this & integrate into min_echo_range
+                )  # with offset of 7.5 meters, 0 meter measurement should now start at 7.5 meters
                 print(" ".join(list(input_xr_zarr_store.Sv.dims)))
                 if set(input_xr_zarr_store.Sv.dims) != {
@@ -265,34 +270,45 @@ class ResampleRegrid:
                 )
                 # --- UPDATING --- #
-                regrid_resample = self.interpolate_data(
-                    input_xr=input_xr,
-                    ping_times=ping_times,
-                    all_cruise_depth_values=all_cruise_depth_values,
+                regrid_resample = (
+                    self.interpolate_data(  # TODO: need to add water_level here
+                        input_xr=input_xr,
+                        ping_times=ping_times,
+                        all_cruise_depth_values=all_cruise_depth_values,
+                        water_level=water_level,
+                    )
                 )
-                print(f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}")
+                print(
+                    f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}"
+                )
                 #########################################################################
                 # write Sv values to cruise-level-model-store
                 for fff in range(regrid_resample.shape[-1]):
-                    output_zarr_store.Sv[:, start_ping_time_index:end_ping_time_index, fff] = regrid_resample[:, :, fff]
+                    output_zarr_store.Sv[
+                        :, start_ping_time_index:end_ping_time_index, fff
+                    ] = regrid_resample[:, :, fff]
                 #########################################################################
                 # TODO: add the "detected_seafloor_depth/" to the
                 #  L2 cruise dataarrays
                 # TODO: make bottom optional
                 # TODO: Only checking the first channel for now. Need to average across all channels
                 #  in the future. See https://github.com/CI-CMG/water-column-sonar-processing/issues/11
-                if 'detected_seafloor_depth' in input_xr.variables:
-                    print('Found detected_seafloor_depth, adding data to output store.')
+                if "detected_seafloor_depth" in input_xr.variables:
+                    print("Found detected_seafloor_depth, adding data to output store.")
                     detected_seafloor_depth = input_xr.detected_seafloor_depth.values
-                    detected_seafloor_depth[detected_seafloor_depth == 0.] = np.nan
+                    detected_seafloor_depth[detected_seafloor_depth == 0.0] = np.nan
                     # TODO: problem here: Processing file: D20070711-T210709.
-                    detected_seafloor_depths = np.nanmean(detected_seafloor_depth, 0) # RuntimeWarning: Mean of empty slice detected_seafloor_depths = np.nanmean(detected_seafloor_depth, 0)
-                    detected_seafloor_depths[detected_seafloor_depths == 0.] = np.nan
+                    detected_seafloor_depths = np.nanmean(
+                        a=detected_seafloor_depth, axis=0
+                    )
+                    # RuntimeWarning: Mean of empty slice detected_seafloor_depths = np.nanmean(detected_seafloor_depth, 0)
+                    detected_seafloor_depths[detected_seafloor_depths == 0.0] = np.nan
                     print(f"min depth measured: {np.nanmin(detected_seafloor_depths)}")
                     print(f"max depth measured: {np.nanmax(detected_seafloor_depths)}")
-                    #available_indices = np.argwhere(np.isnan(geospatial['latitude'].values))
+                    # available_indices = np.argwhere(np.isnan(geospatial['latitude'].values))
                     output_zarr_store.bottom[
                         start_ping_time_index:end_ping_time_index
                     ] = detected_seafloor_depths
@@ -301,17 +317,19 @@ class ResampleRegrid:
                 # [5] write subset of latitude/longitude
                 output_zarr_store.latitude[
                     start_ping_time_index:end_ping_time_index
-                ] = geospatial.dropna()["latitude"].values # TODO: get from ds_sv directly, dont need geojson anymore
+                ] = geospatial.dropna()[
+                    "latitude"
+                ].values  # TODO: get from ds_sv directly, dont need geojson anymore
                 output_zarr_store.longitude[
                     start_ping_time_index:end_ping_time_index
                 ] = geospatial.dropna()["longitude"].values
                 #########################################################################
                 #########################################################################
         except Exception as err:
-            print(f"Problem interpolating the data: {err}")
+            print(f"Problem with resample_regrid: {err}")
             raise err
         finally:
-            print("Done interpolating data.")
+            print("Exiting resample_regrid.")
             # TODO: read across times and verify data was written?
     #######################################################

water_column_sonar_processing/geometry/__init__.py CHANGED Viewed

@@ -1,6 +1,11 @@
 from .elevation_manager import ElevationManager
 from .geometry_manager import GeometryManager
-from .geometry_simplification import GeometrySimplification
+from .line_simplification import LineSimplification
 from .pmtile_generation import PMTileGeneration
-__all__ = ["ElevationManager", "GeometryManager", "GeometrySimplification", "PMTileGeneration"]
+__all__ = [
+    "ElevationManager",
+    "GeometryManager",
+    "LineSimplification",
+    "PMTileGeneration",
+]

water_column_sonar_processing/geometry/elevation_manager.py CHANGED Viewed

@@ -26,16 +26,15 @@ https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/Ima
   "status": "OK"
 }
 """
 import json
 import time
+from collections.abc import Generator
 import requests
-from collections.abc import Generator
-def chunked(
-    ll: list,
-    n: int
-) -> Generator:
+def chunked(ll: list, n: int) -> Generator:
     # Yields successively n-sized chunks from ll.
     for i in range(0, len(ll), n):
         yield ll[i : i + n]
@@ -51,10 +50,10 @@ class ElevationManager:
     #######################################################
     def get_arcgis_elevation(
-            self,
-            lngs: list,
-            lats: list,
-            chunk_size: int=500, # I think this is the api limit
+        self,
+        lngs: list,
+        lats: list,
+        chunk_size: int = 500,  # I think this is the api limit
     ) -> int:
         # Reference: https://developers.arcgis.com/rest/services-reference/enterprise/map-to-image/
         # Info: https://www.arcgis.com/home/item.html?id=c876e3c96a8642ab8557646a3b4fa0ff
@@ -62,7 +61,7 @@ class ElevationManager:
         if len(lngs) != len(lats):
             raise ValueError("lngs and lats must have same length")
-        geometryType = "esriGeometryMultipoint" # TODO: allow single point?
+        geometryType = "esriGeometryMultipoint"  # TODO: allow single point?
         depths = []
@@ -71,14 +70,14 @@ class ElevationManager:
             time.sleep(0.1)
             # order: (lng, lat)
             geometry = f'{{"points":{str(chunk)}}}'
-            url=f'https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={geometry}&geometryType={geometryType}&returnGeometry=false&returnCatalogItems=false&f=json'
+            url = f"https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={geometry}&geometryType={geometryType}&returnGeometry=false&returnCatalogItems=false&f=json"
             result = requests.get(url, timeout=self.TIMOUT_SECONDS)
-            res = json.loads(result.content.decode('utf8'))
-            if 'results' in res:
-                for element in res['results']:
-                    depths.append(float(element['value']))
-            elif 'value' in res:
-                depths.append(float(res['value']))
+            res = json.loads(result.content.decode("utf8"))
+            if "results" in res:
+                for element in res["results"]:
+                    depths.append(float(element["value"]))
+            elif "value" in res:
+                depths.append(float(res["value"]))
         return depths

water_column_sonar_processing/geometry/geometry_manager.py CHANGED Viewed

@@ -8,17 +8,15 @@ import pandas as pd
 from water_column_sonar_processing.aws import S3Manager
 from water_column_sonar_processing.utility import Cleaner
-"""
-//  [Decimal / Places / Degrees	/ Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
-  //  0   1.0	        1° 00′ 0″	        country or large region                             111.32 km	  102.47 km	  78.71 km	43.496 km
-  //  1	  0.1	        0° 06′ 0″         large city or district                              11.132 km	  10.247 km	  7.871 km	4.3496 km
-  //  2	  0.01	      0° 00′ 36″        town or village                                     1.1132 km	  1.0247 km	  787.1 m	  434.96 m
-  //  3	  0.001	      0° 00′ 3.6″       neighborhood, street                                111.32 m	  102.47 m	  78.71 m	  43.496 m
-  //  4	  0.0001	    0° 00′ 0.36″      individual street, land parcel                      11.132 m	  10.247 m	  7.871 m	  4.3496 m
-  //  5	  0.00001	    0° 00′ 0.036″     individual trees, door entrance	                    1.1132 m	  1.0247 m	  787.1 mm	434.96 mm
-  //  6	  0.000001	  0° 00′ 0.0036″    individual humans                                   111.32 mm	  102.47 mm	  78.71 mm	43.496 mm
-  //  7	  0.0000001	  0° 00′ 0.00036″   practical limit of commercial surveying	            11.132 mm	  10.247 mm	  7.871 mm	4.3496 mm
-"""
+# //  [Decimal / Places / Degrees	/ Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
+#   //  0   1.0	        1° 00′ 0″	        country or large region                             111.32 km	  102.47 km	  78.71 km	43.496 km
+#   //  1	  0.1	        0° 06′ 0″         large city or district                              11.132 km	  10.247 km	  7.871 km	4.3496 km
+#   //  2	  0.01	      0° 00′ 36″        town or village                                     1.1132 km	  1.0247 km	  787.1 m	  434.96 m
+#   //  3	  0.001	      0° 00′ 3.6″       neighborhood, street                                111.32 m	  102.47 m	  78.71 m	  43.496 m
+#   //  4	  0.0001	    0° 00′ 0.36″      individual street, land parcel                      11.132 m	  10.247 m	  7.871 m	  4.3496 m
+#   //  5	  0.00001	    0° 00′ 0.036″     individual trees, door entrance	                    1.1132 m	  1.0247 m	  787.1 mm	434.96 mm
+#   //  6	  0.000001	  0° 00′ 0.0036″    individual humans                                   111.32 mm	  102.47 mm	  78.71 mm	43.496 mm
+#   //  7	  0.0000001	  0° 00′ 0.00036″   practical limit of commercial surveying	            11.132 mm	  10.247 mm	  7.871 mm	4.3496 mm
 class GeometryManager:
@@ -62,9 +60,9 @@ class GeometryManager:
             time1 = echodata.environment.time1.values
             if len(nmea_times) < len(time1):
-                raise Exception( # TODO: explore this logic further...
+                raise Exception(
                     "Problem: Not enough NMEA times available to extrapolate time1."
-                )
+                )  # TODO: explore this logic further...
             # Align 'sv_times' to 'nmea_times'
             if not (
@@ -131,7 +129,9 @@ class GeometryManager:
             if write_geojson:
                 print("Creating local copy of geojson file.")
                 with open(geo_json_name, "w") as write_file:
-                    write_file.write(geo_json_line) # NOTE: this file can include zeros for lat lon
+                    write_file.write(
+                        geo_json_line
+                    )  # NOTE: this file can include zeros for lat lon
                 geo_json_prefix = (
                     f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
@@ -141,11 +141,16 @@ class GeometryManager:
                 s3_manager = S3Manager(endpoint_url=endpoint_url)
                 geojson_object_exists = s3_manager.check_if_object_exists(
                     bucket_name=output_bucket_name,
-                    key_name=f"{geo_json_prefix}/{geo_json_name}"
+                    key_name=f"{geo_json_prefix}/{geo_json_name}",
                 )
                 if geojson_object_exists:
-                    print("GeoJSON already exists in s3, deleting existing and continuing.")
-                    s3_manager.delete_nodd_object(bucket_name=output_bucket_name, key_name=f"{geo_json_prefix}/{geo_json_name}")
+                    print(
+                        "GeoJSON already exists in s3, deleting existing and continuing."
+                    )
+                    s3_manager.delete_nodd_object(
+                        bucket_name=output_bucket_name,
+                        key_name=f"{geo_json_prefix}/{geo_json_name}",
+                    )
                 print("Upload GeoJSON to s3.")
                 s3_manager.upload_nodd_file(
@@ -205,7 +210,6 @@ class GeometryManager:
                 sensor_name=sensor_name,
                 file_name_stem=file_name_stem,
                 output_bucket_name=output_bucket_name,
             )
             ###
             geospatial = geopandas.GeoDataFrame.from_features(
@@ -231,13 +235,9 @@ class GeometryManager:
     ############################################################################
     # COMES from the raw-to-zarr conversion
-    def __write_geojson_to_file(
-            self,
-            store_name,
-            data
-    ) -> None:
-        print('Writing GeoJSON to file.')
-        with open(os.path.join(store_name, 'geo.json'), "w") as outfile:
+    def __write_geojson_to_file(self, store_name, data) -> None:
+        print("Writing GeoJSON to file.")
+        with open(os.path.join(store_name, "geo.json"), "w") as outfile:
             outfile.write(data)

water_column_sonar_processing/geometry/line_simplification.py ADDED Viewed

@@ -0,0 +1,150 @@
+# import json
+import geopandas as gpd
+import numpy as np
+from pykalman import KalmanFilter
+from shapely.geometry import Point
+# import matplotlib.pyplot as plt
+# lambda for timestamp in form "yyyy-MM-ddTHH:mm:ssZ"
+# dt = lambda: datetime.now().isoformat(timespec="seconds") + "Z"
+# TODO: get line for example HB1906 ...save linestring to array for testing
+MAX_SPEED_KNOTS = 50
+# Lambert's formula ==> better accuracy than haversinte
+# Lambert's formula (the formula used by the calculators above) is the method used to calculate the shortest distance along the surface of an ellipsoid. When used to approximate the Earth and calculate the distance on the Earth surface, it has an accuracy on the order of 10 meters over thousands of kilometers, which is more precise than the haversine formula.
+def mph_to_knots(mph_value):
+    # 1 mile per hour === 0.868976 Knots
+    return mph_value * 0.868976
+# https://shapely.readthedocs.io/en/stable/reference/shapely.MultiLineString.html#shapely.MultiLineString
+class LineSimplification:
+    """
+    //  [Decimal / Places / Degrees	/ Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
+      //  0   1.0	        1° 00′ 0″	        country or large region                             111.32 km	  102.47 km	  78.71 km	43.496 km
+      //  1	  0.1	        0° 06′ 0″         large city or district                              11.132 km	  10.247 km	  7.871 km	4.3496 km
+      //  2	  0.01	      0° 00′ 36″        town or village                                     1.1132 km	  1.0247 km	  787.1 m	  434.96 m
+      //  3	  0.001	      0° 00′ 3.6″       neighborhood, street                                111.32 m	  102.47 m	  78.71 m	  43.496 m
+      //  4	  0.0001	    0° 00′ 0.36″      individual street, land parcel                      11.132 m	  10.247 m	  7.871 m	  4.3496 m
+      //  5	  0.00001	    0° 00′ 0.036″     individual trees, door entrance	                    1.1132 m	  1.0247 m	  787.1 mm	434.96 mm
+      //  6	  0.000001	  0° 00′ 0.0036″    individual humans                                   111.32 mm	  102.47 mm	  78.71 mm	43.496 mm
+      //  7	  0.0000001	  0° 00′ 0.00036″   practical limit of commercial surveying	            11.132 mm	  10.247 mm	  7.871 mm	4.3496 mm
+        private static final int SRID = 8307;
+        private static final double simplificationTolerance = 0.0001;
+        private static final long splitGeometryMs = 900000L;
+        private static final int batchSize = 10000;
+        private static final int geoJsonPrecision = 5;
+        final int geoJsonPrecision = 5;
+        final double simplificationTolerance = 0.0001;
+        final int simplifierBatchSize = 3000;
+        final long maxCount = 0;
+        private static final double maxAllowedSpeedKnts = 60D;
+    """
+    # TODO: in the future move to standalone library
+    #######################################################
+    def __init__(
+        self,
+    ):
+        pass
+    #######################################################
+    def kalman_filter(
+        self,
+        longitudes,
+        latitudes,
+    ) -> (np.ndarray, np.ndarray):
+        """
+        # TODO: need to use masked array to get the right number of values
+        """
+        ### https://github.com/pykalman/pykalman
+        # https://stackoverflow.com/questions/43377626/how-to-use-kalman-filter-in-python-for-location-data
+        measurements = np.asarray([list(elem) for elem in zip(longitudes, latitudes)])
+        initial_state_mean = [measurements[0, 0], 0, measurements[0, 1], 0]
+        transition_matrix = [[1, 1, 0, 0], [0, 1, 0, 0], [0, 0, 1, 1], [0, 0, 0, 1]]
+        observation_matrix = [[1, 0, 0, 0], [0, 0, 1, 0]]
+        kf = KalmanFilter(
+            transition_matrices=transition_matrix,
+            observation_matrices=observation_matrix,
+            initial_state_mean=initial_state_mean,
+        )
+        kf = kf.em(measurements, n_iter=2)  # TODO: 5
+        (smoothed_state_means, smoothed_state_covariances) = kf.smooth(measurements)
+        # plt.plot(longitudes, latitudes, label="original")
+        # plt.plot(smoothed_state_means[:, 0], smoothed_state_means[:, 2], label="smoothed")
+        # plt.legend()
+        # plt.show()
+        return smoothed_state_means[:, [0, 2]]
+    #######################################################
+    def get_speeds(
+        self,
+        times: np.ndarray,  # don't really need time, do need to segment the data first
+        latitudes: np.ndarray,
+        longitudes: np.ndarray,
+    ) -> np.ndarray:
+        print(MAX_SPEED_KNOTS)  # TODO: too high
+        print(times[0], latitudes[0], longitudes[0])
+        # TODO: distance/time ==> need to take position2 - position1 to get speed
+        # get distance difference
+        geom = [Point(xy) for xy in zip(longitudes, latitudes)]
+        points_df = gpd.GeoDataFrame({"geometry": geom}, crs="EPSG:4326")
+        # Conversion to UTM, a rectilinear projection coordinate system where distance can be calculated with pythagorean theorem
+        # an alternative could be to use EPSG 32663
+        points_df.to_crs(
+            epsg=3310, inplace=True
+        )  # https://gis.stackexchange.com/questions/293310/finding-distance-between-two-points-with-geoseries-distance
+        distance_diffs = points_df.distance(points_df.shift())
+        # distance_diffs_sorted = distance_diffs.sort_values(
+        #     ascending=False
+        # )  # TODO: get avg cutoff time
+        #
+        time_diffs_ns = np.append(0, (times[1:] - times[:-1]).astype(int))
+        # time_diffs_ns_sorted = np.sort(time_diffs_ns)
+        # largest time diffs HB0707 [ 17. 17.93749786  21.0781271  54.82812723  85.09374797, 113.56249805 204.87500006 216. 440.68749798 544.81249818]
+        # largest diffs HB1906 [3.01015808e+00 3.01016013e+00 3.01017805e+00 3.01018701e+00, 3.01018701e+00 3.01018906e+00 3.01019802e+00 3.01021005e+00, 3.01021005e+00 3.01021414e+00 3.01022208e+00 3.01022899e+00, 3.01024998e+00 3.01025920e+00 3.01026202e+00 3.01028096e+00, 3.01119411e+00 3.01120896e+00 3.01120998e+00 3.01120998e+00, 3.01122099e+00 3.01122790e+00 3.01122790e+00 3.01124506e+00, 3.01125197e+00 3.01128090e+00 3.01142707e+00 3.01219814e+00, 3.01221120e+00 3.01223014e+00 3.01225498e+00 3.01225882e+00, 3.01226010e+00 3.01312998e+00 3.01316096e+00 3.01321190e+00, 3.01321293e+00 3.01322880e+00 3.01322906e+00 3.01323110e+00, 3.01323213e+00 3.01323290e+00 3.01326208e+00 3.01328512e+00, 3.01418112e+00 3.01420109e+00 3.01421107e+00 3.01421184e+00, 3.01421414e+00 3.01424819e+00 3.01512883e+00 3.01516006e+00, 3.01524198e+00 3.01619917e+00 3.01623194e+00 3.01623296e+00, 3.01917594e+00 3.01921408e+00 3.01921587e+00 3.02022195e+00, 3.02025216e+00 3.02121702e+00 3.02325811e+00 3.02410291e+00, 3.02421914e+00 3.02426701e+00 3.02523776e+00 3.02718694e+00, 3.02927590e+00 3.03621606e+00 3.03826304e+00 3.34047514e+00, 3.36345114e+00 3.39148595e+00 4.36819302e+00 4.50157901e+00, 4.50315699e+00 4.50330598e+00 4.50333491e+00 4.50428416e+00, 4.50430490e+00 4.50430694e+00 4.50526387e+00 4.50530790e+00, 4.50530995e+00 4.50532301e+00 4.50533478e+00 4.50629402e+00, 4.50730701e+00 4.50825882e+00 4.50939008e+00 6.50179098e+00, 2.25025029e+01 1.39939425e+02 1.54452331e+02 1.60632653e+03, 1.74574667e+05 4.33569587e+05 4.35150475e+05 8.00044883e+05]
+        nanoseconds_per_second = 1e9
+        speed_meters_per_second = (
+            distance_diffs / time_diffs_ns * nanoseconds_per_second
+        )
+        # returns the speed in meters per second #TODO: get speed in knots
+        return speed_meters_per_second.to_numpy(dtype="float32")  # includes nan
+    def remove_null_island_values(
+        self,
+        epsilon=1e-5,
+    ) -> None:
+        # TODO: low priority
+        print(epsilon)
+        pass
+    def break_linestring_into_multi_linestring(
+        self,
+    ) -> None:
+        # TODO: medium priority
+        # For any line-strings across the antimeridian, break into multilinestring
+        # average cadence is measurements every 1 second
+        # break when over 1 minute
+        pass
+    def simplify(
+        self,
+    ) -> None:
+        # TODO: medium-high priority
+        pass
+    #######################################################
+###########################################################

water-column-sonar-processing 25.1.6__py3-none-any.whl → 25.3.0__py3-none-any.whl

Potentially problematic release.

water-column-sonar-processing 25.1.6py3-none-any.whl → 25.3.0py3-none-any.whl