PyPI - water-column-sonar-processing - Versions diffs - 25.11.1__py3-none-any.whl → 26.1.14__py3-none-any.whl - Mend

water-column-sonar-processing 25.11.1py3-none-any.whl → 26.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (22) hide show

water_column_sonar_processing/aws/s3_manager.py CHANGED Viewed

@@ -31,8 +31,6 @@ class S3Manager:
         endpoint_url: Optional[str] = None,
     ):
         self.endpoint_url = endpoint_url
-        # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
-        # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
         self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
         self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
         self.s3_transfer_config = TransferConfig(
@@ -56,6 +54,7 @@ class S3Manager:
             service_name="s3",
             config=self.s3_client_config,
             region_name=self.s3_region,
+            endpoint_url=self.endpoint_url,
         )
         self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
             aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
@@ -76,6 +75,7 @@ class S3Manager:
                 endpoint_url=self.endpoint_url,
             )
         )
+        #
         self.paginator = self.s3_client.get_paginator("list_objects_v2")
         self.paginator_noaa_wcsd_zarr_pds = (
             self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
@@ -117,7 +117,6 @@ class S3Manager:
         return client.list_buckets()
     #####################################################################
-    # tested
     def upload_nodd_file(
         self,
         file_name: str,
@@ -133,7 +132,6 @@ class S3Manager:
         return key
     #####################################################################
-    # tested
     def upload_files_with_thread_pool_executor(
         self,
         output_bucket_name: str,

water_column_sonar_processing/aws/s3fs_manager.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Optional
 import s3fs
 # TODO: S3FS_LOGGING_LEVEL=DEBUG
 # S3FS_LOGGING_LEVEL=DEBUG
@@ -21,17 +22,8 @@ class S3FSManager:
             endpoint_url=endpoint_url,
             key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
             secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
-            # asynchronous=True,
         )
-        # self.s3fs.ls("")
-    # s3_fs = s3fs.S3FileSystem( # TODO: use s3fs_manager?
-    #     anon=True,
-    #     client_kwargs={
-    #         "endpoint_url": moto_server,
-    #         "region_name": "us-east-1",
-    #     },
-    # )
     #####################################################################
     def s3_map(
         self,

water_column_sonar_processing/cruise/create_empty_zarr_store.py CHANGED Viewed

@@ -6,6 +6,7 @@ import numpy as np
 from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
 from water_column_sonar_processing.model import ZarrManager
 from water_column_sonar_processing.utility import Cleaner
+from water_column_sonar_processing.utility import Constants
 # TODO: change name to "CreateLocalEmptyZarrStore"
@@ -19,52 +20,21 @@ class CreateEmptyZarrStore:
         # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
     #######################################################
-    # TODO: moved this to the s3_manager
-    # def upload_zarr_store_to_s3(
-    #     self,
-    #     output_bucket_name: str,
-    #     local_directory: str,
-    #     object_prefix: str,
-    #     cruise_name: str,
-    # ) -> None:
-    #     print("uploading model store to s3")
-    #     s3_manager = S3Manager()
-    #     #
-    #     print("Starting upload with thread pool executor.")
-    #     # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
-    #     all_files = []
-    #     for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
-    #         for file in files:
-    #             local_path = os.path.join(subdir, file)
-    #             # TODO: find a better method for splitting strings here:
-    #             # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
-    #             s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
-    #             all_files.append([local_path, s3_key])
-    #     #
-    #     # print(all_files)
-    #     s3_manager.upload_files_with_thread_pool_executor(
-    #         output_bucket_name=output_bucket_name,
-    #         all_files=all_files,
-    #     )
-    #     print("Done uploading with thread pool executor.")
-    #     # TODO: move to common place
-    #######################################################
+    @staticmethod
     def create_cruise_level_zarr_store(
-        self,
         output_bucket_name: str,
         ship_name: str,
         cruise_name: str,
         sensor_name: str,
         table_name: str,
-        # override_cruise_min_epsilon=None,
     ) -> None:
         """
-        Initialize zarr store. The water_level needs to be integrated.
+        Initialize zarr store for the entire cruise which aggregates all the raw data.
+        All cruises will be resampled at 20 cm depth.
+        # tempdir="/tmp", # TODO: create better tmp directory for testing
         """
         tempdir = tempfile.TemporaryDirectory()
         try:
-            # HB0806 - 123, HB0903 - 220
             dynamo_db_manager = DynamoDBManager()
             s3_manager = S3Manager()
@@ -76,7 +46,7 @@ class CreateEmptyZarrStore:
             # TODO: filter the dataframe just for enums >= LEVEL_1_PROCESSING
             # df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
-            # TODO: VERIFY GEOJSON EXISTS as prerequisite!!!
+            # TODO: VERIFY GEOJSON EXISTS as prerequisite!!! ...no more geojson needed
             print(f"DataFrame shape: {df.shape}")
             cruise_channels = list(
@@ -88,18 +58,11 @@ class CreateEmptyZarrStore:
                 df["NUM_PING_TIME_DROPNA"].dropna().astype(int)
             )
-            # [3] calculate the max/min measurement resolutions for the whole cruise
-            # cruise_min_echo_range = np.min(
-            #     (df["MIN_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
-            # )
-            # [4] calculate the np.max(max_echo_range + water_level)
+            # [4] max measurement resolution for the whole cruise
+            # Each max-echo-range is paired with water-level and then find the max of that
             cruise_max_echo_range = np.max(
                 (df["MAX_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
-            )
-            # TODO: set this to either 1 or 0.5 meters
-            cruise_min_epsilon = np.min(df["MIN_ECHO_RANGE"].dropna().astype(float))
+            )  # max_echo_range now includes water_level
             print(f"cruise_max_echo_range: {cruise_max_echo_range}")
@@ -107,21 +70,18 @@ class CreateEmptyZarrStore:
             cruise_frequencies = [
                 float(i) for i in df["FREQUENCIES"].dropna().values.flatten()[0]
             ]
-            print(cruise_frequencies)
             new_width = int(consolidated_zarr_width)
-            print(f"new_width: {new_width}")
-            #################################################################
-            store_name = f"{cruise_name}.zarr"
-            print(store_name)
             ################################################################
-            # Delete existing model store if it exists
-            zarr_prefix = os.path.join("level_2", ship_name, cruise_name, sensor_name)
+            # Delete any existing stores
+            zarr_prefix = os.path.join(
+                str(Constants.LEVEL_2.value), ship_name, cruise_name, sensor_name
+            )
             child_objects = s3_manager.get_child_objects(
                 bucket_name=output_bucket_name,
                 sub_prefix=zarr_prefix,
             )
-            #
             if len(child_objects) > 0:
                 s3_manager.delete_nodd_objects(
                     bucket_name=output_bucket_name,
@@ -130,50 +90,28 @@ class CreateEmptyZarrStore:
             ################################################################
             # Create new model store
             zarr_manager = ZarrManager()
-            new_height = len(  # [0.19m down to 1001.744m] = 5272 samples, 10.3 tiles @ 512
-                zarr_manager.get_depth_values(  # these depths should be from min_epsilon to max_range+water_level
-                    # min_echo_range=cruise_min_echo_range,
-                    max_echo_range=cruise_max_echo_range,
-                    cruise_min_epsilon=cruise_min_epsilon,
-                )
-            )
-            print(f"new_height: {new_height}")
             zarr_manager.create_zarr_store(
-                path=tempdir.name,  # TODO: need to use .name or problem
+                path=tempdir.name,
                 ship_name=ship_name,
                 cruise_name=cruise_name,
                 sensor_name=sensor_name,
                 frequencies=cruise_frequencies,
                 width=new_width,
-                # min_echo_range=cruise_min_echo_range,
                 max_echo_range=cruise_max_echo_range,
-                cruise_min_epsilon=cruise_min_epsilon,
+                # cruise_min_epsilon=cruise_min_epsilon,
                 calibration_status=True,
             )
             #################################################################
+            # TODO: would be more elegant to create directly into s3 bucket
             s3_manager.upload_zarr_store_to_s3(
                 output_bucket_name=output_bucket_name,
-                local_directory=tempdir.name,  # TODO: need to use .name or problem
+                local_directory=tempdir.name,
                 object_prefix=zarr_prefix,
                 cruise_name=cruise_name,
             )
-            # https://noaa-wcsd-zarr-pds.s3.amazonaws.com/index.html
             #################################################################
-            # Verify count of the files uploaded
-            # count = self.__get_file_count(store_name=store_name)
-            # #
-            # raw_zarr_files = self.__get_s3_files(  # TODO: just need count
-            #     bucket_name=self.__output_bucket,
-            #     sub_prefix=os.path.join(zarr_prefix, store_name),
-            # )
-            # if len(raw_zarr_files) != count:
-            #     print(f'Problem writing {store_name} with proper count {count}.')
-            #     raise Exception("File count doesnt equal number of s3 Zarr store files.")
-            # else:
-            #     print("File counts match.")
+            # TODO: verify count of the files uploaded
             #################################################################
-            # Success
             # TODO: update enum in dynamodb
             print("Done creating cruise level zarr store.")
             #################################################################

water_column_sonar_processing/cruise/resample_regrid.py CHANGED Viewed

@@ -3,11 +3,9 @@ import warnings
 from pathlib import Path
 import numpy as np
-import pandas as pd
 import xarray as xr
 from water_column_sonar_processing.aws import DynamoDBManager
-from water_column_sonar_processing.geometry import GeometryManager
 from water_column_sonar_processing.model import ZarrManager
 warnings.simplefilter("ignore", category=RuntimeWarning)
@@ -19,28 +17,29 @@ class ResampleRegrid:
         self,
     ):
         self.__overwrite = True
-        # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
-        # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
         self.dtype = "float32"
     #################################################################
     def interpolate_data(
         self,
-        input_xr,
-        ping_times,
-        all_cruise_depth_values,  # includes water_level offset
-        water_level,  # this is the offset that will be added to each respective file
+        input_xr: xr.Dataset,
+        ping_times: np.ndarray,
+        all_cruise_depth_values: np.ndarray,  # includes water_level offset
+        water_level: float = 0.0,
     ) -> np.ndarray:
         """
-        What gets passed into interpolate data
+        Input dataset is passed in along with times and depth values to regrid to.
         """
         print("Interpolating dataset.")
         try:
+            # add offset for the water level to the whole input xarray
+            input_xr.depth.values = input_xr.depth.values + water_level
             data = np.empty(
-                (
+                (  # Depth / Time / Frequency
                     len(all_cruise_depth_values),
                     len(ping_times),
-                    len(input_xr.frequency_nominal),
+                    len(input_xr.frequency_nominal.values),
                 ),
                 dtype=self.dtype,
             )
@@ -49,36 +48,27 @@ class ResampleRegrid:
             regrid_resample = xr.DataArray(  # where data will be written to
                 data=data,
-                dims=("depth", "time", "frequency"),
                 coords={
                     "depth": all_cruise_depth_values,
                     "time": ping_times,
                     "frequency": input_xr.frequency_nominal.values,
                 },
+                dims=("depth", "time", "frequency"),
+                name="Sv",
             )
-            # shift the input data by water_level
-            input_xr.echo_range.values = (
-                input_xr.echo_range.values + water_level
-            )  # water_level # TODO: change
             channels = input_xr.channel.values
-            for channel in range(
-                len(channels)
-            ):  # ?TODO: leaving off here, need to subset for just indices in time axis
+            for channel in range(len(channels)):
                 gc.collect()
                 max_depths = np.nanmax(
-                    a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values,
+                    a=input_xr.depth.sel(channel=input_xr.channel[channel]).values,
                     # + water_level,
                     axis=1,
                 )
-                superset_of_max_depths = set(
-                    max_depths
-                )  # HB1501, D20150503-T102035.raw, TypeError: unhashable type: 'numpy.ndarray'
+                superset_of_max_depths = set(max_depths)
                 set_of_max_depths = list(
                     {x for x in superset_of_max_depths if x == x}
-                )  # removes nan's
-                # iterate through partitions of dataset with similar depths and resample
+                )  # To speed things up resample in groups denoted by max_depth -- so samples might no longer be adjacent
                 for select_max_depth in set_of_max_depths:
                     # TODO: for nan just skip and leave all nan's
                     select_indices = [
@@ -87,46 +77,50 @@ class ResampleRegrid:
                         if max_depths[i] == select_max_depth
                     ]
-                    # now create new DataArray with proper dimension and indices
-                    # data_select = input_xr.Sv.sel(
-                    #     channel=input_xr.channel[channel]
-                    # ).values[select_indices, :].T  # TODO: dont like this transpose
                     data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[
                         select_indices, :
                     ].T.values
-                    # change from ".values[select_indices, :].T" to "[select_indices, :].values.T"
                     times_select = input_xr.ping_time.values[select_indices]
-                    depths_select = input_xr.echo_range.sel(
-                        channel=input_xr.channel[channel]
-                    ).values[
-                        select_indices[0], :
-                    ]  # '0' because all others in group should be same
+                    # input_xr.depth[0][0] -> [0., 499.9] before
+                    # input_xr.depth.values = input_xr.depth.values + water_level  # issue here!! overwritting all the data
+                    # input_xr.depth[0][0] -> [7.5, 507.40] after
+                    depths_all = input_xr.depth.sel(
+                        channel=input_xr.channel[channel],
+                        ping_time=input_xr.ping_time[select_indices[0]],
+                    ).values
+                    depths_select = depths_all[~np.isnan(depths_all)]
+                    #
                     da_select = xr.DataArray(
-                        data=data_select,
+                        data=data_select[: len(depths_select), :],
                         dims=("depth", "time"),
                         coords={
                             "depth": depths_select,
                             "time": times_select,
                         },
-                    ).dropna(dim="depth")
-                    resampled = da_select.interp(
-                        depth=all_cruise_depth_values, method="nearest"
                     )
-                    # write to the resample array
-                    regrid_resample.loc[
+                    # 'resampled' is now the interpolated superset of new dimensions
+                    resampled = da_select.interp(  # need to define the data with water level (domain)
+                        depth=all_cruise_depth_values,  # and need to interpolate over the (range)
+                        method="nearest",
+                        assume_sorted=True,
+                    )  # good through here, @27 is -3.11 which is 5.4 m depth
+                    ### write to outptut ###
+                    regrid_resample.loc[  # ~150 MB for 5001x7706x4
                         dict(
                             time=times_select,
                             frequency=input_xr.frequency_nominal.values[channel],
                         )
                     ] = resampled
-                    print(f"updated {len(times_select)} ping times")
+                    # print(f"updated {len(times_select)} ping times")
                     gc.collect()
+            return regrid_resample.values.copy()
         except Exception as err:
             raise RuntimeError(f"Problem finding the dynamodb table, {err}")
-        print("Done interpolating dataset.")
-        return regrid_resample.values.copy()
+        finally:
+            gc.collect()
+            print("Done interpolating dataset.")
     #################################################################
     def resample_regrid(
@@ -137,7 +131,6 @@ class ResampleRegrid:
         table_name,
         bucket_name,
         override_select_files=None,
-        # override_cruise_min_epsilon=None,
         endpoint_url=None,
     ) -> None:
         """
@@ -149,7 +142,6 @@ class ResampleRegrid:
         print("Resample Regrid, Interpolating dataset.")
         try:
             zarr_manager = ZarrManager()
-            geo_manager = GeometryManager()
             output_zarr_store = zarr_manager.open_s3_zarr_store_with_zarr(
                 ship_name=ship_name,
@@ -159,12 +151,9 @@ class ResampleRegrid:
                 endpoint_url=endpoint_url,
             )
-            # get dynamo stuff
             dynamo_db_manager = DynamoDBManager()
             cruise_df = dynamo_db_manager.get_table_as_df(
-                # ship_name=ship_name,
                 cruise_name=cruise_name,
-                # sensor_name=sensor_name,
                 table_name=table_name,
             )
@@ -182,6 +171,7 @@ class ResampleRegrid:
                 print(f"Processing file: {file_name_stem}.")
                 if f"{file_name_stem}.raw" not in list(cruise_df["FILE_NAME"]):
+                    print("Raw file file_stem not found in dynamodb.")
                     raise Exception("Raw file file_stem not found in dynamodb.")
                 # status = PipelineStatus['LEVEL_1_PROCESSING']
@@ -195,20 +185,21 @@ class ResampleRegrid:
                     ]
                 )
-                # Get input store — this is unadjusted for water_level
+                # Get input store
                 input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
                     ship_name=ship_name,
                     cruise_name=cruise_name,
                     sensor_name=sensor_name,
                     file_name_stem=file_name_stem,
-                    input_bucket_name=bucket_name,
+                    bucket_name=bucket_name,
                     endpoint_url=endpoint_url,
                 )
+                #########################################################################
                 # This is the vertical offset of the sensor related to the ocean surface
                 # See https://echopype.readthedocs.io/en/stable/data-proc-additional.html
                 if "water_level" in input_xr_zarr_store.keys():
-                    water_level = input_xr_zarr_store.water_level.values
+                    water_level = float(input_xr_zarr_store.water_level.values)
                 else:
                     water_level = 0.0
                 #########################################################################
@@ -224,60 +215,52 @@ class ResampleRegrid:
                 start_ping_time_index = ping_time_cumsum[index]
                 end_ping_time_index = ping_time_cumsum[index + 1]
-                max_echo_range = np.max(
+                max_echo_range = np.max(  # Should water level go in here?
                     (cruise_df["MAX_ECHO_RANGE"] + cruise_df["WATER_LEVEL"])
                     .dropna()
-                    .astype(float)
-                )
-                cruise_min_epsilon = np.min(
-                    cruise_df["MIN_ECHO_RANGE"].dropna().astype(float)
+                    .astype(np.float32)
                 )
+                # cruise_min_epsilon = np.min(
+                #     cruise_df["MIN_ECHO_RANGE"].dropna().astype(float)
+                # ) # TODO: currently overwriting to 0.25 m
-                # Note: cruise dims (depth, time, frequency)
-                all_cruise_depth_values = zarr_manager.get_depth_values(  # needs to integrate water_level
-                    # min_echo_range=min_echo_range,
-                    max_echo_range=max_echo_range,  # does it here
-                    cruise_min_epsilon=cruise_min_epsilon,  # remove this & integrate into min_echo_range
-                )  # with offset of 7.5 meters, 0 meter measurement should now start at 7.5 meters
+                all_cruise_depth_values = zarr_manager.get_depth_values(
+                    max_echo_range=max_echo_range,
+                    # cruise_min_epsilon=cruise_min_epsilon,
+                )
-                print(" ".join(list(input_xr_zarr_store.Sv.dims)))
-                if set(input_xr_zarr_store.Sv.dims) != {
+                if set(
+                    input_xr_zarr_store.Sv.dims
+                ) != {  # Cruise dimensions are: (depth, time, frequency)
                     "channel",
                     "ping_time",
                     "range_sample",
                 }:
                     raise Exception("Xarray dimensions are not as expected.")
-                indices, geospatial = geo_manager.read_s3_geo_json(
-                    ship_name=ship_name,
-                    cruise_name=cruise_name,
-                    sensor_name=sensor_name,
-                    file_name_stem=file_name_stem,
-                    input_xr_zarr_store=input_xr_zarr_store,
-                    endpoint_url=endpoint_url,
-                    output_bucket_name=bucket_name,
-                )
+                # indices, geospatial = geo_manager.read_s3_geo_json(  # TODO: remove this!!!!
+                #     ship_name=ship_name,
+                #     cruise_name=cruise_name,
+                #     sensor_name=sensor_name,
+                #     file_name_stem=file_name_stem,
+                #     input_xr_zarr_store=input_xr_zarr_store,
+                #     endpoint_url=endpoint_url,
+                #     output_bucket_name=bucket_name,
+                # )
-                input_xr = input_xr_zarr_store.isel(
-                    ping_time=indices
-                )  # Problem with HB200802-D20080310-T174959.zarr/
+                input_xr = input_xr_zarr_store  # .isel(ping_time=indices)
                 ping_times = input_xr.ping_time.values
-                # Date format: numpy.datetime64('2007-07-20T02:10:25.845073920') converts to "1184897425.845074"
-                epoch_seconds = [
-                    (pd.Timestamp(i) - pd.Timestamp("1970-01-01")) / pd.Timedelta("1s")
-                    for i in ping_times
-                ]
                 output_zarr_store["time"][start_ping_time_index:end_ping_time_index] = (
-                    epoch_seconds
+                    input_xr.ping_time.data
                 )
-                # --- UPDATING --- #
+                # --- UPDATING --- # # TODO: problem, this returns dimensionless array
                 regrid_resample = self.interpolate_data(
                     input_xr=input_xr,
                     ping_times=ping_times,
                     all_cruise_depth_values=all_cruise_depth_values,  # should accommodate the water_level already
-                    water_level=water_level,  # not applied to anything yet
+                    water_level=water_level,
                 )
                 print(
@@ -288,50 +271,51 @@ class ResampleRegrid:
                 for fff in range(regrid_resample.shape[-1]):
                     output_zarr_store["Sv"][
-                        :, start_ping_time_index:end_ping_time_index, fff
+                        : regrid_resample[:, :, fff].shape[0],
+                        start_ping_time_index:end_ping_time_index,
+                        fff,
                     ] = regrid_resample[:, :, fff]
                 #########################################################################
-                # TODO: add the "detected_seafloor_depth/" to the
-                #  L2 cruise dataarrays
-                # TODO: make bottom optional
-                # TODO: Only checking the first channel for now. Need to average across all channels
                 #  in the future. See https://github.com/CI-CMG/water-column-sonar-processing/issues/11
-                if "detected_seafloor_depth" in input_xr.variables:
-                    print(
-                        "Found detected_seafloor_depth, adding dataset to output store."
-                    )
+                if "detected_seafloor_depth" in list(input_xr.variables):
+                    print("Adding detected_seafloor_depth to output")
                     detected_seafloor_depth = input_xr.detected_seafloor_depth.values
                     detected_seafloor_depth[detected_seafloor_depth == 0.0] = np.nan
-                    # TODO: problem here: Processing file: D20070711-T210709.
-                    # Use the lowest frequencies to determine bottom
+                    # As requested, use the lowest frequencies to determine bottom
                     detected_seafloor_depths = detected_seafloor_depth[0, :]
                     detected_seafloor_depths[detected_seafloor_depths == 0.0] = np.nan
                     print(f"min depth measured: {np.nanmin(detected_seafloor_depths)}")
                     print(f"max depth measured: {np.nanmax(detected_seafloor_depths)}")
-                    # available_indices = np.argwhere(np.isnan(geospatial['latitude'].values))
                     output_zarr_store["bottom"][
                         start_ping_time_index:end_ping_time_index
                     ] = detected_seafloor_depths
                 #
                 #########################################################################
                 # [5] write subset of latitude/longitude
+                # output_zarr_store["latitude"][
+                #     start_ping_time_index:end_ping_time_index
+                # ] = geospatial.dropna()[
+                #     "latitude"
+                # ].values  # TODO: get from ds_sv directly, dont need geojson anymore
+                # output_zarr_store["longitude"][
+                #     start_ping_time_index:end_ping_time_index
+                # ] = geospatial.dropna()["longitude"].values
+                #########################################################################
                 output_zarr_store["latitude"][
                     start_ping_time_index:end_ping_time_index
-                ] = geospatial.dropna()[
-                    "latitude"
-                ].values  # TODO: get from ds_sv directly, dont need geojson anymore
+                ] = input_xr_zarr_store.latitude.dropna(dim="ping_time").values
                 output_zarr_store["longitude"][
                     start_ping_time_index:end_ping_time_index
-                ] = geospatial.dropna()["longitude"].values
-                #########################################################################
+                ] = input_xr_zarr_store.longitude.dropna(dim="ping_time").values
                 #########################################################################
         except Exception as err:
             raise RuntimeError(f"Problem with resample_regrid, {err}")
         finally:
             print("Exiting resample_regrid.")
             # TODO: read across times and verify dataset was written?
+            gc.collect()
     #######################################################

water_column_sonar_processing/geometry/__init__.py CHANGED Viewed

@@ -2,10 +2,12 @@ from .elevation_manager import ElevationManager
 from .geometry_manager import GeometryManager
 from .line_simplification import LineSimplification
 from .pmtile_generation import PMTileGeneration
+from .spatiotemporal import Spatiotemporal
 __all__ = [
     "ElevationManager",
     "GeometryManager",
     "LineSimplification",
     "PMTileGeneration",
+    "Spatiotemporal",
 ]

water_column_sonar_processing/geometry/elevation_manager.py CHANGED Viewed

@@ -46,7 +46,7 @@ class ElevationManager:
         self,
     ):
         self.DECIMAL_PRECISION = 5  # precision for GPS coordinates
-        self.TIMOUT_SECONDS = 10
+        self.TIMEOUT_SECONDS = 10
     #######################################################
     def get_arcgis_elevation(
@@ -71,7 +71,7 @@ class ElevationManager:
             # order: (lng, lat)
             geometry = f'{{"points":{str(chunk)}}}'
             url = f"https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={geometry}&geometryType={geometryType}&returnGeometry=false&returnCatalogItems=false&f=json"
-            result = requests.get(url, timeout=self.TIMOUT_SECONDS)
+            result = requests.get(url, timeout=self.TIMEOUT_SECONDS)
             res = json.loads(result.content.decode("utf8"))
             if "results" in res:
                 for element in res["results"]:

water-column-sonar-processing 25.11.1__py3-none-any.whl → 26.1.14__py3-none-any.whl

Potentially problematic release.

water-column-sonar-processing 25.11.1py3-none-any.whl → 26.1.14py3-none-any.whl