PyPI - water-column-sonar-processing - Versions diffs - 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl - Mend

water-column-sonar-processing 0.0.7py3-none-any.whl → 0.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

water_column_sonar_processing/geometry/pmtile_generation.py CHANGED Viewed

@@ -1,14 +1,22 @@
+import glob
 import os
 from pathlib import Path
-# from shapely import wkt
-# import json
-# from shapely.geometry import shape, GeometryCollection
 import fiona
-import geopandas
+import s3fs
+import numpy as np
 import pandas as pd
+import xarray as xr
+import geopandas
+import geopandas as gpd
+import pyogrio
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from shapely.geometry import LineString
+MAX_POOL_CONNECTIONS = 64
+MAX_CONCURRENCY = 64
+MAX_WORKERS = 64
+GB = 1024**3
 class PMTileGeneration(object):
     #######################################################
@@ -18,34 +26,35 @@ class PMTileGeneration(object):
         print("123")
     #######################################################
+    # This uses a local collection of file-level geojson files to create the data
     def generate_geojson_feature_collection(self):
         # This was used to read from noaa-wcsd-model-pds bucket geojson files and then to
         # generate the geopandas dataframe which could be exported to another comprehensive
         # geojson file. That
         result = list(Path("/Users/r2d2/Documents/echofish/geojson").rglob("*.json"))
         # result = result[:100]
-        iii = 0
+        jjj = 0
         pieces = []
-        for iii in range(len(result)):
-            file_name = os.path.normpath(result[iii]).split(os.sep)[-1]
+        for jjj in range(len(result)):
+            file_name = os.path.normpath(result[jjj]).split(os.sep)[-1]
             file_stem = os.path.splitext(os.path.basename(file_name))[0]
-            geom = geopandas.read_file(result[iii]).iloc[0]["geometry"]
+            geom = gpd.read_file(result[jjj]).iloc[0]["geometry"]
             # TDOO: Filter (0,0) coordinates
             if len(geom.coords.xy[0]) < 2:
                 continue
             geom = LineString(list(zip(geom.coords.xy[1], geom.coords.xy[0])))
             pieces.append(
                 {
-                    "ship_name": os.path.normpath(result[iii]).split(os.sep)[-4],
-                    "cruise_name": os.path.normpath(result[iii]).split(os.sep)[-3],
+                    "ship_name": os.path.normpath(result[jjj]).split(os.sep)[-4],
+                    "cruise_name": os.path.normpath(result[jjj]).split(os.sep)[-3],
                     "file_stem": file_stem,
-                    "file_path": result[iii],
+                    "file_path": result[jjj],
                     "geom": geom,
                 }
             )
         df = pd.DataFrame(pieces)
         print(df)
-        gps_gdf = geopandas.GeoDataFrame(
+        gps_gdf = gpd.GeoDataFrame(
             data=df[
                 ["ship_name", "cruise_name", "file_stem"]
             ],  # try again with file_stem
@@ -70,6 +79,184 @@ class PMTileGeneration(object):
         """
     #######################################################
+    # TODO: temporary using this to get info
+    def get_info_from_zarr_store(
+        self,
+        ship_name,
+        cruise_names,
+    ):
+        total_size = 0
+        s3_fs = s3fs.S3FileSystem(anon=True)
+        for cruise_name in cruise_names:
+            path_to_zarr_store = f"s3://noaa-wcsd-zarr-pds/level_2/{ship_name}/{cruise_name}/EK60/{cruise_name}.zarr"
+            zarr_store = s3fs.S3Map(root=path_to_zarr_store, s3=s3_fs)
+            xr_store = xr.open_zarr(store=zarr_store, consolidated=None)
+            print(f'Cruise: {cruise_name}, shape: {xr_store.time.shape[0]}')
+            total_size = total_size + xr_store.time.shape[0]
+    def get_geospatial_info_from_zarr_store(
+        self,
+        ship_name,
+        cruise_name,
+    ):
+        """
+        Open Zarr store, create geometry, write to geojson, return name
+        """
+        s3_fs = s3fs.S3FileSystem(anon=True)
+        gps_gdf = geopandas.GeoDataFrame(
+            columns=["id", "ship", "cruise", "sensor", "geometry"],
+            geometry="geometry",
+            crs="EPSG:4326"
+        )
+        path_to_zarr_store = f"s3://noaa-wcsd-zarr-pds/level_2/{ship_name}/{cruise_name}/EK60/{cruise_name}.zarr"
+        # file_name = os.path.normpath(path_to_zarr_store).split(os.sep)[-1]
+        # file_stem = os.path.splitext(os.path.basename(file_name))[0]
+        zarr_store = s3fs.S3Map(root=path_to_zarr_store, s3=s3_fs)
+        # ---Open Zarr Store--- #
+        # TODO: try-except to allow failures
+        print('opening store')
+        # xr_store = xr.open_zarr(store=zarr_store, consolidated=False)
+        xr_store = xr.open_zarr(store=zarr_store, consolidated=None)
+        print(xr_store.Sv.shape)
+        # ---Read Zarr Store Time/Latitude/Longitude--- #
+        latitude = xr_store.latitude.values
+        longitude = xr_store.longitude.values
+        if np.isnan(latitude).any() or np.isnan(longitude).any():
+            print(f'there was missing lat-lon data for {cruise_name}')
+            return None
+        # ---Add To GeoPandas Dataframe--- #
+        # TODO: experiment with tolerance "0.001"
+        geom = LineString(list(zip(longitude, latitude))).simplify(tolerance=0.001, preserve_topology=True)
+        gps_gdf.loc[0] = (0, "Henry_B._Bigelow", cruise_name, "EK60", geom)  # (ship, cruise, sensor, geometry)
+        gps_gdf.set_index('id', inplace=True)
+        gps_gdf.to_file(f"dataframe_{cruise_name}.geojson", driver="GeoJSON") #, engine="pyogrio")
+        return cruise_name
+    #######################################################
+    def open_zarr_stores_with_thread_pool_executor(
+            self,
+            cruises: list,
+    ):
+        # 'cruises' is a list of cruises to process
+        completed_cruises = []
+        try:
+            with ThreadPoolExecutor(max_workers=32) as executor:
+                futures = [
+                    executor.submit(
+                        self.get_geospatial_info_from_zarr_store,
+                        "Henry_B._Bigelow",  # ship_name
+                        cruise,  # cruise_name
+                    )
+                    for cruise in cruises
+                ]
+                for future in as_completed(futures):
+                    result = future.result()
+                    if result:
+                        completed_cruises.extend([result])
+        except Exception as err:
+            print(err)
+        print("Done opening zarr stores using thread pool.")
+        return completed_cruises # Took ~12 minutes
+    #######################################################
+    # https://docs.protomaps.com/pmtiles/create
+    def aggregate_geojson_into_dataframe(
+        self
+    ):
+        """
+        iterate through cruises, threadpoolexecute geojson creation, aggregate geojson files into df,
+        """
+        gps_gdf = geopandas.GeoDataFrame(
+            columns=["id", "ship", "cruise", "sensor", "geometry"],
+            geometry="geometry",
+            crs="EPSG:4326"
+        )
+        file_type = 'dataframe_*.geojson'
+        geojson_files = glob.glob(file_type)
+        for jjj in range(len(geojson_files)):
+            print(jjj)
+            geom = geopandas.read_file(geojson_files[jjj])
+            gps_gdf.loc[jjj] = (jjj, geom.ship[0], geom.cruise[0], geom.sensor[0], geom.geometry[0])
+            #gps_gdf.loc[0] = (0, "Henry_B._Bigelow", cruise_name, "EK60", geom)  # (ship, cruise, sensor, geometry)
+        print(gps_gdf)
+        gps_gdf.set_index('id', inplace=True)
+        gps_gdf.to_file(f"data.geojson", driver="GeoJSON", engine="pyogrio", layer_options={"ID_GENERATE": "YES"})
+        return list(gps_gdf.cruise)
+        # gps_gdf.loc[iii] = (iii, "Henry_B._Bigelow", cruise_name, "EK60", geom)  # (ship, cruise, sensor, geometry)
+        #print('writing to file')
+        #print(gps_gdf)
+        # gps_gdf.set_index('id', inplace=True)
+        # gps_gdf.to_file(f"dataframe_{cruise_name}.geojson", driver="GeoJSON", engine="pyogrio", layer_options={"ID_GENERATE": "YES"})
+        # https://gdal.org/en/latest/drivers/vector/jsonfg.html
+        # gps_gdf.to_file(
+        #     f"data.geojson",
+        #     driver="GeoJSON",
+        #     engine="pyogrio",
+        #     layer_options={"ID_FIELD": "id"}
+        # )
+        # gps_gdf.to_file(f"dataframe_{cruise_name}.geojson", driver="GeoJSON", engine="pyogrio", id_generate=True)
+# print(fiona.supported_drivers) # {'DXF': 'rw', 'CSV': 'raw', 'OpenFileGDB': 'raw', 'ESRIJSON': 'r', 'ESRI Shapefile': 'raw', 'FlatGeobuf': 'raw', 'GeoJSON': 'raw', 'GeoJSONSeq': 'raw', 'GPKG': 'raw', 'GML': 'rw', 'OGR_GMT': 'rw', 'GPX': 'rw', 'MapInfo File': 'raw', 'DGN': 'raw', 'S57': 'r', 'SQLite': 'raw', 'TopoJSON': 'r'}
+#gps_gdf.to_file('dataframe.shp', crs="EPSG:4326", engine="fiona")
+# Convert geojson feature collection to pmtiles
+#gps_gdf.to_file("dataframe.geojson", driver="GeoJSON", crs="EPSG:4326", engine="fiona")
+#print("done")
+# ---Export Shapefile--- #
+#gps_gdf.set_geometry(col='geometry', inplace=True)
+#gps_gdf.__geo_interface__
+#gps_gdf.set_index('id', inplace=True)
+#gps_gdf.to_file(f"dataframe3.geojson", driver="GeoJSON", crs="EPSG:4326", engine="fiona", index=True)
+### this gives the right layer id values
+#gps_gdf.to_file(f"dataframe6.geojson", driver="GeoJSON", engine="pyogrio", layer_options={"ID_GENERATE": "YES"})
+# jq '{"type": "FeatureCollection", "features": [.[] | .features[]]}' --slurp input*.geojson > output.geojson
+#tippecanoe -zg --projection=EPSG:4326 -o water-column-sonar-id.pmtiles -l cruises output.geojson
+#tippecanoe -zg --convert-stringified-ids-to-numbers --projection=EPSG:4326 -o water-column-sonar-id.pmtiles -l cruises dataframe*.geojson
+# {
+# "type": "FeatureCollection",
+# "name": "dataframe5",
+# "features": [
+# { "type": "Feature", "id": 0, "properties": { "id": 0, "ship": "Henry_B._Bigelow", "cruise": "HB0706", "sensor": "EK60" }, "geometry": { "type": "LineString", "coordinates": [ [ -72.120498657226562, 39.659671783447266 ], [ -72.120773315429688, 39.660198211669922 ] ] } },
+# { "type": "Feature", "id": 1, "properties": { "id": 1, "ship": "Henry_B._Bigelow", "cruise": "HB0707", "sensor": "EK60" }, "geometry": { "type": "LineString", "coordinates": [ [ -71.797836303710938, 41.003166198730469 ], [ -71.797996520996094, 41.002998352050781 ], [ -71.798583984375, 41.002994537353516 ] ] } },
+# { "type": "Feature", "id": 2, "properties": { "id": 2, "ship": "Henry_B._Bigelow", "cruise": "HB0710", "sensor": "EK60" }, "geometry": { "type": "LineString", "coordinates": [ [ -72.489486694335938, 40.331901550292969 ], [ -72.490760803222656, 40.33099365234375 ] ] } }
+# ]
+# }
+"""
+# https://docs.protomaps.com/pmtiles/create
+#ogr2ogr -t_srs EPSG:4326 data.geojson dataframe.shp
+# Only need to do the second one here...
+tippecanoe -zg --projection=EPSG:4326 -o data.pmtiles -l cruises dataframe.geojson
+tippecanoe -zg --projection=EPSG:4326 -o data.pmtiles -l cruises --coalesce-densest-as-needed --extend-zooms-if-still-dropping dataframe*.geojson
+# used this to combine all the geojson files into single pmtile file (2024-12-03):
+tippecanoe -zg --projection=EPSG:4326 -o data.pmtiles -l cruises --coalesce-densest-as-needed --extend-zooms-if-still-dropping dataframe*.geojson
+TODO:
+    run each one of the cruises in a separate ospool workflow.
+    each process gets own store
+"""
 ###########################################################
+# s3_manager = S3Manager()  # endpoint_url=endpoint_url)
+# # s3fs_manager = S3FSManager()
+# # input_bucket_name = "test_input_bucket"
+# # s3_manager.create_bucket(bucket_name=input_bucket_name)
+# ship_name = "Henry_B._Bigelow"
+# cruise_name = "HB0706"
+# sensor_name = "EK60"
+#
+# # ---Scan Bucket For All Zarr Stores--- #
+# # https://noaa-wcsd-zarr-pds.s3.amazonaws.com/index.html#level_2/Henry_B._Bigelow/HB0706/EK60/HB0706.zarr/
+# path_to_zarr_store = f"s3://noaa-wcsd-zarr-pds/level_2/Henry_B._Bigelow/HB0706/EK60/HB0706.zarr"
+# s3 = s3fs.S3FileSystem()
+# zarr_store = s3fs.S3Map(path_to_zarr_store, s3=s3)
+# ds_zarr = xr.open_zarr(zarr_store, consolidated=None)
+# print(ds_zarr.Sv.shape)
+total = [246847, 89911, 169763, 658047, 887640, 708771, 187099, 3672813, 4095002, 763268, 162727, 189454, 1925270, 3575857, 1031920, 1167590, 3737415, 4099957, 3990725, 3619996, 3573052, 2973090, 55851, 143192, 1550164, 3692819, 668400, 489735, 393260, 1311234, 242989, 4515760, 1303091, 704663, 270645, 3886437, 4204381, 1062090, 428639, 541455, 4206506, 298561, 1279329, 137416, 139836, 228947, 517949]

water_column_sonar_processing/index/index_manager.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pandas as pd
 from datetime import datetime
 from concurrent.futures import ThreadPoolExecutor
 from concurrent.futures import as_completed
-from water_column_sonar_processing.aws.s3_manager import S3Manager
+from water_column_sonar_processing.aws import S3Manager
 class IndexManager:
@@ -16,12 +16,10 @@ class IndexManager:
         self.s3_manager = S3Manager()
     #################################################################
     def list_ships(
         self,
         prefix="data/raw/",
     ):
-        # s3_client = self.s3_manager.s3_client
         page_iterator = self.s3_manager.paginator.paginate(
             Bucket=self.input_bucket_name, Prefix=prefix, Delimiter="/"
         )
@@ -79,6 +77,7 @@ class IndexManager:
                 all_files.extend([i["Key"] for i in page["Contents"]])
         return [i for i in all_files if i.endswith(".raw")]
+    #################################################################
     def get_raw_files_csv(
         self,
         ship_name,
@@ -86,7 +85,9 @@ class IndexManager:
         sensor_name,
     ):
         raw_files = self.get_raw_files(
-            ship_name=ship_name, cruise_name=cruise_name, sensor_name=sensor_name
+            ship_name=ship_name,
+            cruise_name=cruise_name,
+            sensor_name=sensor_name
         )
         files_list = [
             {
@@ -102,7 +103,10 @@ class IndexManager:
         print("done")
     #################################################################
-    def get_subset_ek60_prefix(self, df: pd.DataFrame) -> pd.DataFrame:
+    def get_subset_ek60_prefix( # TODO: is this used?
+        self,
+        df: pd.DataFrame
+    ) -> pd.DataFrame:
         # Returns all objects with 'EK60' in prefix of file path
         # Note that this can include 'EK80' data that are false-positives
         # in dataframe with ['key', 'filename', 'ship', 'cruise', 'sensor', 'size', 'date', 'datagram']
@@ -119,13 +123,13 @@ class IndexManager:
                         2:5
                     ]  # 'Okeanos_Explorer', 'EX1608', 'EK60'
                     if (
-                        re.search("[D](\d{8})", filename) is not None
-                        and re.search("[T](\d{6})", filename) is not None
+                        re.search("[D](\\d{8})", filename) is not None
+                        and re.search("[T](\\d{6})", filename) is not None
                     ):
                         # Parse date if possible e.g.: 'data/raw/Henry_B._Bigelow/HB1006/EK60/HBB-D20100723-T025105.raw'
                         # and 'data/raw/Henry_B._Bigelow/HB1802/EK60/D20180513-T150250.raw'
-                        date_substring = re.search("[D](\d{8})", filename).group(1)
-                        time_substring = re.search("[T](\d{6})", filename).group(1)
+                        date_substring = re.search("[D](\\d{8})", filename).group(1)
+                        time_substring = re.search("[T](\\d{6})", filename).group(1)
                         date_string = datetime.strptime(
                             f"{date_substring}{time_substring}", "%Y%m%d%H%M%S"
                         )
@@ -146,7 +150,10 @@ class IndexManager:
         return pd.DataFrame(objects)
     #################################################################
-    def scan_datagram(self, select_key: str) -> list:
+    def scan_datagram(
+        self,
+        select_key: str
+    ) -> list:
         # Reads the first 8 bytes of S3 file. Used to determine if ek60 or ek80
         # Note: uses boto3 session instead of boto3 client: https://github.com/boto/boto3/issues/801
         # select_key = 'data/raw/Albatross_Iv/AL0403/EK60/L0005-D20040302-T200108-EK60.raw'
@@ -162,7 +169,10 @@ class IndexManager:
         return first_datagram
     #################################################################
-    def get_subset_datagrams(self, df: pd.DataFrame) -> list:
+    def get_subset_datagrams(
+        self,
+        df: pd.DataFrame
+    ) -> list:
         print("getting subset of datagrams")
         select_keys = list(
             df[["KEY", "CRUISE"]].drop_duplicates(subset="CRUISE")["KEY"].values
@@ -181,7 +191,9 @@ class IndexManager:
     #################################################################
     def get_ek60_objects(
-        self, df: pd.DataFrame, subset_datagrams: list
+        self,
+        df: pd.DataFrame,
+        subset_datagrams: list
     ) -> pd.DataFrame:
         # for each key write datagram value to all other files in same cruise
         for subset_datagram in subset_datagrams:
@@ -195,7 +207,7 @@ class IndexManager:
         return df.loc[df["DATAGRAM"] == "CON0"]
     #################################################################
-    def get_calibration_information(  # tested
+    def get_calibration_information(
         self,
     ) -> pd.DataFrame:
         # Calibration data generated by data manager currently located here:

water_column_sonar_processing/model/zarr_manager.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import os
 import numcodecs
 import numpy as np
 import xarray as xr
 import zarr
 from numcodecs import Blosc
-from water_column_sonar_processing.aws.s3fs_manager import S3FSManager
-from water_column_sonar_processing.utility.constants import Constants, Coordinates
-from water_column_sonar_processing.utility.timestamp import Timestamp
+from water_column_sonar_processing.aws import S3FSManager
+from water_column_sonar_processing.utility import Constants
+from water_column_sonar_processing.utility import Timestamp
+from water_column_sonar_processing.utility import Coordinates
 numcodecs.blosc.use_threads = False
 numcodecs.blosc.set_nthreads(1)
@@ -32,8 +32,8 @@ class ZarrManager:
         self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
     #######################################################
-    @staticmethod
     def get_depth_values(
+        self,
         min_echo_range: float = 1.0,  # minimum depth measured (zero non-inclusive) from whole cruise
         max_echo_range: float = 100.0,  # maximum depth measured from whole cruise
     ):
@@ -85,12 +85,11 @@ class ZarrManager:
             name=Coordinates.TIME.value,
             data=np.repeat(0.0, width),
             shape=width,
-            chunks=(
-                Constants.TILE_SIZE.value,
-            ),  # TODO: the chunking scheme doesn't seem to be working here
+            chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
+                # Constants.TILE_SIZE.value,
+            #),  # TODO: the chunking scheme doesn't seem to be working here
             dtype=np.dtype(Coordinates.TIME_DTYPE.value),
             compressor=self.__compressor,
-            # fill_value=0.,
             fill_value=np.nan,  # TODO: do i want nan's?
             overwrite=self.__overwrite,
         )
@@ -113,12 +112,12 @@ class ZarrManager:
             # TODO: verify that these values are correct
             data=depth_values,
             shape=len(depth_values),
-            chunks=Constants.TILE_SIZE.value,
+            chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
             dtype=np.dtype(
                 Coordinates.DEPTH_DTYPE.value
             ),  # float16 == 2 significant digits would be ideal
             compressor=self.__compressor,
-            # fill_value=np.nan,
+            fill_value=np.nan,
             overwrite=self.__overwrite,
         )
         # TODO: change to exception
@@ -133,15 +132,16 @@ class ZarrManager:
         # --- Coordinate: Latitude --- #
         root.create_dataset(
             name=Coordinates.LATITUDE.value,
-            data=np.repeat(0.0, width),
+            # data=np.repeat(0.0, width),
             shape=width,
-            chunks=Constants.TILE_SIZE.value,
+            chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
             dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
             compressor=self.__compressor,
-            fill_value=0.0,
+            fill_value=np.nan,
             overwrite=self.__overwrite,
         )
+        # Note: LATITUDE is indexed by TIME
         root.latitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
         root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
@@ -151,15 +151,16 @@ class ZarrManager:
         # --- Coordinate: Longitude --- #
         root.create_dataset(
             name=Coordinates.LONGITUDE.value,
-            data=np.repeat(0.0, width),  # root.longitude[:] = np.nan
+            # data=np.repeat(0.0, width),  # root.longitude[:] = np.nan
             shape=width,
-            chunks=Constants.TILE_SIZE.value,
+            chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
             dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
             compressor=self.__compressor,
-            fill_value=0.0,
+            fill_value=np.nan,
             overwrite=self.__overwrite,
         )
+        # Note: LONGITUDE is indexed by TIME
         root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
         root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
@@ -170,19 +171,20 @@ class ZarrManager:
         # --- Coordinate: Bottom --- #
         root.create_dataset(
             name=Coordinates.BOTTOM.value,
-            # data=np.repeat(0.0, width),  # root.longitude[:] = np.nan
+            data=np.repeat(0.0, width),  # root.longitude[:] = np.nan
             shape=width,
-            chunks=Constants.TILE_SIZE.value,
+            chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
             dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value),
             compressor=self.__compressor,
-            fill_value=np.nan,
+            fill_value=0.0,
             overwrite=self.__overwrite,
         )
-        root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
+        # BOTTOM is indexed by TIME
+        root.bottom.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
-        root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
-        root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
+        root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
+        root.bottom.attrs["units"] = Coordinates.BOTTOM_UNITS.value
         #####################################################################
         # --- Coordinate: Frequency --- #
@@ -190,7 +192,7 @@ class ZarrManager:
             name=Coordinates.FREQUENCY.value,
             data=frequencies,
             shape=len(frequencies),
-            chunks=1,
+            chunks=len(frequencies),
             dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
             compressor=self.__compressor,
             fill_value=0.0,
@@ -213,7 +215,7 @@ class ZarrManager:
         root.create_dataset(
             name=Coordinates.SV.value,
             shape=(len(depth_values), width, len(frequencies)),
-            chunks=(Constants.TILE_SIZE.value, Constants.TILE_SIZE.value, 1),
+            chunks=(Constants.TILE_SIZE.value, Constants.TILE_SIZE.value, len(frequencies)),
             dtype=np.dtype(
                 Coordinates.SV_DTYPE.value
             ),  # TODO: try to experiment with 'float16'

water_column_sonar_processing/process.py CHANGED Viewed

@@ -3,10 +3,10 @@ import os
 import numpy as np
-from water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
-from water_column_sonar_processing.aws.s3_manager import S3Manager
-from water_column_sonar_processing.aws.s3fs_manager import S3FSManager
-from water_column_sonar_processing.aws.sns_manager import SNSManager
+from water_column_sonar_processing.aws import DynamoDBManager
+from water_column_sonar_processing.aws import S3Manager
+from water_column_sonar_processing.aws import S3FSManager
+from water_column_sonar_processing.aws import SNSManager
 ###########################################################

water_column_sonar_processing/processing/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .cruise_sampler import CruiseSampler
+from .raw_to_zarr import RawToZarr
+__all__ = ["CruiseSampler", "RawToZarr"]

water-column-sonar-processing 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

water-column-sonar-processing 0.0.7py3-none-any.whl → 0.0.9py3-none-any.whl