water-column-sonar-processing 25.1.3__tar.gz → 25.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (59) hide show
  1. water_column_sonar_processing-25.1.4/.github/workflows/test_action.yaml +46 -0
  2. water_column_sonar_processing-25.1.4/.python-version +1 -0
  3. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/PKG-INFO +24 -8
  4. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/README.md +9 -6
  5. water_column_sonar_processing-25.1.4/pyproject.toml +73 -0
  6. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/aws/s3fs_manager.py +1 -1
  7. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/cruise/resample_regrid.py +19 -24
  8. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/model/zarr_manager.py +13 -9
  9. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/utility/constants.py +1 -1
  10. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing.egg-info/PKG-INFO +24 -8
  11. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing.egg-info/SOURCES.txt +0 -4
  12. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing.egg-info/requires.txt +15 -0
  13. water_column_sonar_processing-25.1.3/.github/workflows/test_action.yaml +0 -24
  14. water_column_sonar_processing-25.1.3/.python-version +0 -2
  15. water_column_sonar_processing-25.1.3/pyproject.toml +0 -43
  16. water_column_sonar_processing-25.1.3/pytest.ini +0 -13
  17. water_column_sonar_processing-25.1.3/requirements.txt +0 -32
  18. water_column_sonar_processing-25.1.3/requirements_dev.txt +0 -14
  19. water_column_sonar_processing-25.1.3/tests/test_process.py +0 -472
  20. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/.env-test +0 -0
  21. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/.gitignore +0 -0
  22. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/.pre-commit-config.yaml +0 -0
  23. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/LICENSE +0 -0
  24. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/open-science-data-federation/ml/autoencoder_example.py +0 -0
  25. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/open-science-data-federation/osdf_examples/foo.ipynb +0 -0
  26. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/open-science-data-federation/osdf_examples/sonar_ai.ipynb +0 -0
  27. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/setup.cfg +0 -0
  28. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/tests/conftest.py +0 -0
  29. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/tests/test_resources/index/calibrated_cruises.csv +0 -0
  30. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/tests/test_resources/raw_to_zarr/D20070724-T042400.bot +0 -0
  31. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/tests/test_resources/raw_to_zarr/D20070724-T042400.idx +0 -0
  32. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/tests/test_resources/raw_to_zarr/D20070724-T042400.raw +0 -0
  33. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/__init__.py +0 -0
  34. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/aws/__init__.py +0 -0
  35. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/aws/dynamodb_manager.py +0 -0
  36. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/aws/s3_manager.py +0 -0
  37. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/aws/sns_manager.py +0 -0
  38. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/aws/sqs_manager.py +0 -0
  39. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/cruise/__init__.py +0 -0
  40. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/cruise/create_empty_zarr_store.py +0 -0
  41. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/cruise/datatree_manager.py +0 -0
  42. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/geometry/__init__.py +0 -0
  43. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/geometry/elevation_manager.py +0 -0
  44. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/geometry/geometry_manager.py +0 -0
  45. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/geometry/geometry_simplification.py +0 -0
  46. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/geometry/pmtile_generation.py +0 -0
  47. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/index/__init__.py +0 -0
  48. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/index/index_manager.py +0 -0
  49. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/model/__init__.py +0 -0
  50. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/process.py +0 -0
  51. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/processing/__init__.py +0 -0
  52. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/processing/batch_downloader.py +0 -0
  53. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/processing/raw_to_zarr.py +0 -0
  54. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/utility/__init__.py +0 -0
  55. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/utility/cleaner.py +0 -0
  56. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/utility/pipeline_status.py +0 -0
  57. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing/utility/timestamp.py +0 -0
  58. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing.egg-info/dependency_links.txt +0 -0
  59. {water_column_sonar_processing-25.1.3 → water_column_sonar_processing-25.1.4}/water_column_sonar_processing.egg-info/top_level.txt +0 -0
@@ -0,0 +1,46 @@
1
+ name: Testing
2
+
3
+ on: [push]
4
+
5
+ jobs:
6
+ # build:
7
+ # runs-on: ubuntu-latest
8
+ # steps:
9
+ # - name: Check out
10
+ # uses: actions/checkout@v4
11
+ # - name: Set up Python
12
+ # uses: actions/setup-python@v5
13
+ # with:
14
+ # # Semantic version range syntax or exact version of a Python version
15
+ # python-version: '3.10'
16
+ # # Optional - x64 or x86 architecture, defaults to x64
17
+ ## architecture: 'x64'
18
+ # cache: 'pip'
19
+ # - name: Install dependencies
20
+ # run: |
21
+ # python -m pip install --upgrade pip
22
+ # pip install -r requirements_dev.txt
23
+ # - name: Run the tests
24
+ # run: python -m pytest
25
+
26
+ build:
27
+ name: python
28
+ runs-on: ubuntu-latest
29
+ steps:
30
+ - uses: actions/checkout@v4
31
+
32
+ - name: Install uv
33
+ uses: astral-sh/setup-uv@v5
34
+ with:
35
+ version: "0.5.25"
36
+
37
+ - name: Set up Python
38
+ uses: actions/setup-python@v5
39
+ with:
40
+ python-version-file: ".python-version"
41
+
42
+ - name: Install the project
43
+ run: uv sync --all-extras --dev
44
+
45
+ - name: Run tests
46
+ run: uv run pytest tests
@@ -0,0 +1 @@
1
+ 3.10.12
@@ -1,14 +1,14 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: water_column_sonar_processing
3
- Version: 25.1.3
4
- Summary: A processing tool for water column sonar data.
3
+ Version: 25.1.4
4
+ Summary: Processing tool for water column sonar data.
5
5
  Author-email: Rudy Klucik <rudy.klucik@noaa.gov>
6
6
  Project-URL: Homepage, https://github.com/CI-CMG/water-column-sonar-processing
7
7
  Project-URL: Issues, https://github.com/CI-CMG/water-column-sonar-processing/issues
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.8
11
+ Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  Requires-Dist: aiobotocore==2.19.0
@@ -34,6 +34,19 @@ Requires-Dist: typing-extensions==4.10.0
34
34
  Requires-Dist: xarray==2024.10.0
35
35
  Requires-Dist: xbatcher==0.4.0
36
36
  Requires-Dist: zarr==2.18.3
37
+ Provides-Extra: dev
38
+ Requires-Dist: bandit[toml]==1.8.0; extra == "dev"
39
+ Requires-Dist: build; extra == "dev"
40
+ Requires-Dist: pre-commit; extra == "dev"
41
+ Requires-Dist: pyinstaller; extra == "dev"
42
+ Requires-Dist: twine; extra == "dev"
43
+ Requires-Dist: flake8==7.1.1; extra == "dev"
44
+ Requires-Dist: pooch==1.8.2; extra == "dev"
45
+ Requires-Dist: pytest~=8.3.3; extra == "dev"
46
+ Requires-Dist: tqdm; extra == "dev"
47
+ Requires-Dist: bandit; extra == "dev"
48
+ Provides-Extra: test
49
+ Requires-Dist: pytest-cov; extra == "test"
37
50
 
38
51
  # Water Column Sonar Processing
39
52
  Processing tool for converting L0 data to L1 and L2 as well as generating geospatial information
@@ -80,14 +93,17 @@ Processing tool for converting L0 data to L1 and L2 as well as generating geospa
80
93
  3. Set interpreter
81
94
 
82
95
  # Installing Dependencies
83
-
84
- 1. Add dependencies with versions to requirements.txt
85
- 2. ```pip install --upgrade pip && pip install -r requirements_dev.txt```
96
+ ```
97
+ uv pip install --upgrade pip
98
+ #uv pip install -r requirements_dev.txt
99
+ uv pip install -r pyproject.toml --extra dev
100
+ ```
86
101
 
87
102
 
88
103
  # Pytest
89
104
  ```commandline
90
- pytest --disable-warnings
105
+ uv run pytest tests
106
+ #pytest --disable-warnings
91
107
  ```
92
108
  or
93
109
  > pytest --cache-clear --cov=src tests/ --cov-report=xml
@@ -120,7 +136,7 @@ https://colab.research.google.com/drive/1KiLMueXiz9WVB9o4RuzYeGjNZ6PsZU7a#scroll
120
136
  # Tag a Release
121
137
  Step 1 --> increment the semantic version in the zarr_manager.py "metadata" & the "pyproject.toml"
122
138
  ```commandline
123
- git tag -a v25.1.2 -m "Releasing version v25.1.2"
139
+ git tag -a v25.1.4 -m "Releasing version v25.1.4"
124
140
  git push origin --tags
125
141
  ```
126
142
 
@@ -43,14 +43,17 @@ Processing tool for converting L0 data to L1 and L2 as well as generating geospa
43
43
  3. Set interpreter
44
44
 
45
45
  # Installing Dependencies
46
-
47
- 1. Add dependencies with versions to requirements.txt
48
- 2. ```pip install --upgrade pip && pip install -r requirements_dev.txt```
46
+ ```
47
+ uv pip install --upgrade pip
48
+ #uv pip install -r requirements_dev.txt
49
+ uv pip install -r pyproject.toml --extra dev
50
+ ```
49
51
 
50
52
 
51
53
  # Pytest
52
54
  ```commandline
53
- pytest --disable-warnings
55
+ uv run pytest tests
56
+ #pytest --disable-warnings
54
57
  ```
55
58
  or
56
59
  > pytest --cache-clear --cov=src tests/ --cov-report=xml
@@ -83,7 +86,7 @@ https://colab.research.google.com/drive/1KiLMueXiz9WVB9o4RuzYeGjNZ6PsZU7a#scroll
83
86
  # Tag a Release
84
87
  Step 1 --> increment the semantic version in the zarr_manager.py "metadata" & the "pyproject.toml"
85
88
  ```commandline
86
- git tag -a v25.1.2 -m "Releasing version v25.1.2"
89
+ git tag -a v25.1.4 -m "Releasing version v25.1.4"
87
90
  git push origin --tags
88
91
  ```
89
92
 
@@ -105,4 +108,4 @@ Experimental Plotting in Xarray (hvPlot):
105
108
  https://colab.research.google.com/drive/18vrI9LAip4xRGEX6EvnuVFp35RAiVYwU#scrollTo=q9_j9p2yXsLV
106
109
 
107
110
  HB0707 Cruise zoomable:
108
- https://hb0707.s3.us-east-1.amazonaws.com/index.html
111
+ https://hb0707.s3.us-east-1.amazonaws.com/index.html
@@ -0,0 +1,73 @@
1
+ [build-system]
2
+ requires = [
3
+ "setuptools>=61.0",
4
+ "wheel >= 0.29.0",
5
+ ]
6
+ build-backend = "setuptools.build_meta"
7
+
8
+ [project]
9
+ name = "water_column_sonar_processing"
10
+ version = "25.1.4"
11
+ authors = [
12
+ { name="Rudy Klucik", email="rudy.klucik@noaa.gov" },
13
+ ]
14
+ description = "Processing tool for water column sonar data."
15
+ readme = "README.md"
16
+ requires-python = ">=3.10"
17
+ classifiers = [
18
+ "Programming Language :: Python :: 3",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Operating System :: OS Independent",
21
+ ]
22
+
23
+ dependencies = [
24
+ "aiobotocore==2.19.0",
25
+ "boto3==1.36.3",
26
+ "botocore==1.36.3",
27
+ "echopype==0.9.0",
28
+ "fiona==1.10.1",
29
+ "geopandas==1.0.1",
30
+ "mock==5.1.0",
31
+ "moto[all]==5.0.27",
32
+ "moto[server]==5.0.27",
33
+ "numcodecs==0.13.1",
34
+ "numpy==1.26.4",
35
+ "pandas==2.2.3",
36
+ "pyarrow==18.1.0",
37
+ "python-dotenv==1.0.1",
38
+ "requests==2.32.3",
39
+ "s3fs==2024.2.0",
40
+ "scipy==1.14.1",
41
+ "setuptools",
42
+ "shapely==2.0.3",
43
+ "typing-extensions==4.10.0",
44
+ "xarray==2024.10.0",
45
+ "xbatcher==0.4.0",
46
+ "zarr==2.18.3",
47
+ ]
48
+
49
+ [project.optional-dependencies]
50
+ dev = [
51
+ "bandit[toml]==1.8.0",
52
+ "build",
53
+ "pre-commit",
54
+ "pyinstaller",
55
+ "twine",
56
+ "flake8==7.1.1",
57
+ "pooch==1.8.2",
58
+ "pytest~=8.3.3",
59
+ "tqdm",
60
+ "bandit"
61
+ ]
62
+ test = [
63
+ "pytest-cov",
64
+ ]
65
+
66
+ [project.urls]
67
+ Homepage = "https://github.com/CI-CMG/water-column-sonar-processing"
68
+ Issues = "https://github.com/CI-CMG/water-column-sonar-processing/issues"
69
+
70
+ [tool.bandit]
71
+ exclude_dirs = ["tests"]
72
+ [tool.pre-commit-hooks.bandit]
73
+ exclude = ["*/tests/*"]
@@ -16,7 +16,7 @@ class S3FSManager:
16
16
  # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
17
17
  self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
18
18
  self.s3fs = s3fs.S3FileSystem(
19
- asynchronous=False,
19
+ # asynchronous=False,
20
20
  endpoint_url=endpoint_url,
21
21
  key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
22
22
  secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
@@ -197,9 +197,9 @@ class ResampleRegrid:
197
197
  # df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
198
198
 
199
199
  # Get index from all cruise files. Note: should be based on which are included in cruise.
200
- index = cruise_df.index[
200
+ index = int(cruise_df.index[
201
201
  cruise_df["FILE_NAME"] == f"{file_name_stem}.raw"
202
- ][0]
202
+ ][0])
203
203
 
204
204
  # get input store
205
205
  input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
@@ -226,18 +226,20 @@ class ResampleRegrid:
226
226
  min_echo_range = np.nanmin(np.float32(cruise_df["MIN_ECHO_RANGE"]))
227
227
  max_echo_range = np.nanmax(np.float32(cruise_df["MAX_ECHO_RANGE"]))
228
228
 
229
- print(
230
- "Creating empty ndarray for Sv data."
231
- ) # Note: cruise_zarr dimensions are (depth, time, frequency)
229
+ print("Creating empty ndarray for Sv data.") # Note: cruise dims (depth, time, frequency)
230
+ output_zarr_store_shape = output_zarr_store.Sv.shape
231
+ end_ping_time_index - start_ping_time_index
232
+ output_zarr_store_height = output_zarr_store_shape[0]
233
+ output_zarr_store_width = end_ping_time_index - start_ping_time_index
234
+ output_zarr_store_depth = output_zarr_store_shape[2]
232
235
  cruise_sv_subset = np.empty(
233
- shape=output_zarr_store.Sv[
234
- :, start_ping_time_index:end_ping_time_index, :
235
- ].shape
236
+ shape=(output_zarr_store_height, output_zarr_store_width, output_zarr_store_depth)
236
237
  )
237
238
  cruise_sv_subset[:, :, :] = np.nan
238
239
 
239
240
  all_cruise_depth_values = zarr_manager.get_depth_values(
240
- min_echo_range=min_echo_range, max_echo_range=max_echo_range
241
+ min_echo_range=min_echo_range,
242
+ max_echo_range=max_echo_range
241
243
  ) # (5262,) and
242
244
 
243
245
  print(" ".join(list(input_xr_zarr_store.Sv.dims)))
@@ -281,16 +283,6 @@ class ResampleRegrid:
281
283
  #########################################################################
282
284
  # write Sv values to cruise-level-model-store
283
285
  output_zarr_store.Sv[:, start_ping_time_index:end_ping_time_index, :] = regrid_resample.values
284
-
285
- #########################################################################
286
- # [5] write subset of latitude/longitude
287
- output_zarr_store.latitude[
288
- start_ping_time_index:end_ping_time_index
289
- ] = geospatial.dropna()["latitude"].values # TODO: get from ds_sv directly, dont need geojson anymore
290
- output_zarr_store.longitude[
291
- start_ping_time_index:end_ping_time_index
292
- ] = geospatial.dropna()["longitude"].values
293
-
294
286
  #########################################################################
295
287
  # TODO: add the "detected_seafloor_depth/" to the
296
288
  # L2 cruise dataarrays
@@ -311,11 +303,14 @@ class ResampleRegrid:
311
303
  start_ping_time_index:end_ping_time_index
312
304
  ] = detected_seafloor_depths
313
305
  #
314
- #
315
- #
316
- # TODO: write the time variable last so that I can parse that as check
317
- #
318
- #
306
+ #########################################################################
307
+ # [5] write subset of latitude/longitude
308
+ output_zarr_store.latitude[
309
+ start_ping_time_index:end_ping_time_index
310
+ ] = geospatial.dropna()["latitude"].values # TODO: get from ds_sv directly, dont need geojson anymore
311
+ output_zarr_store.longitude[
312
+ start_ping_time_index:end_ping_time_index
313
+ ] = geospatial.dropna()["longitude"].values
319
314
  #########################################################################
320
315
  #########################################################################
321
316
  except Exception as err:
@@ -2,6 +2,7 @@ import numcodecs
2
2
  import numpy as np
3
3
  import xarray as xr
4
4
  import zarr
5
+ import importlib.metadata
5
6
  from numcodecs import Blosc
6
7
 
7
8
  from water_column_sonar_processing.aws import S3FSManager
@@ -249,9 +250,9 @@ class ZarrManager:
249
250
  root.attrs["sensor_name"] = sensor_name
250
251
  #
251
252
  root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
252
- root.attrs["processing_software_version"] = (
253
- "25.1.3" # TODO: get programmatically, echopype>utils>prov.py
254
- )
253
+
254
+ current_project_version = importlib.metadata.version('water_column_sonar_processing')
255
+ root.attrs["processing_software_version"] = current_project_version
255
256
  root.attrs["processing_software_time"] = Timestamp.get_timestamp()
256
257
  #
257
258
  root.attrs["calibration_status"] = calibration_status
@@ -290,7 +291,7 @@ class ZarrManager:
290
291
  # zarr_synchronizer: Union[str, None] = None, # TODO:
291
292
  output_bucket_name: str,
292
293
  endpoint_url=None,
293
- ):
294
+ ) -> zarr.hierarchy.Group:
294
295
  # Mounts a Zarr store using pythons Zarr implementation. The mounted store
295
296
  # will have read/write privileges so that store can be updated.
296
297
  print("Opening L2 Zarr store with Zarr for writing.")
@@ -316,18 +317,21 @@ class ZarrManager:
316
317
  input_bucket_name: str,
317
318
  endpoint_url=None,
318
319
  ) -> xr.Dataset:
319
- print("Opening L1 Zarr store in S3 with Xarray.")
320
+ print("Opening L1 Zarr store in S3 with Xarray.") # TODO: Is this only used for reading from?
320
321
  try:
321
322
  zarr_path = f"s3://{input_bucket_name}/level_1/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.zarr"
322
323
  s3fs_manager = S3FSManager(endpoint_url=endpoint_url)
323
324
  store_s3_map = s3fs_manager.s3_map(s3_zarr_store_path=zarr_path)
324
- ds = xr.open_zarr(
325
- store=store_s3_map, consolidated=None
326
- ) # synchronizer=SYNCHRONIZER
325
+ ds = xr.open_dataset(
326
+ filename_or_obj=store_s3_map,
327
+ engine="zarr",
328
+ chunks={}
329
+ )
327
330
  except Exception as err:
328
331
  print("Problem opening Zarr store in S3 as Xarray.")
329
332
  raise err
330
- print("Done opening Zarr store in S3 as Xarray.")
333
+ finally:
334
+ print("Exiting opening Zarr store in S3 as Xarray.")
331
335
  return ds
332
336
 
333
337
  def open_l2_zarr_store_with_xarray(
@@ -3,7 +3,7 @@ from enum import Enum, Flag, unique
3
3
 
4
4
  @unique
5
5
  class Constants(Flag):
6
- TILE_SIZE = 1024 # TODO: add tile size to metadata?
6
+ TILE_SIZE = 1024
7
7
 
8
8
  # Average https://noaa-wcsd-zarr-pds.s3.us-east-1.amazonaws.com/level_2/Henry_B._Bigelow/HB0902/EK60/HB0902.zarr/time/927
9
9
  # chunk size is ~1.3 kB, HB0902 cruise takes ~30 seconds to load all time/lat/lon data
@@ -1,14 +1,14 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: water_column_sonar_processing
3
- Version: 25.1.3
4
- Summary: A processing tool for water column sonar data.
3
+ Version: 25.1.4
4
+ Summary: Processing tool for water column sonar data.
5
5
  Author-email: Rudy Klucik <rudy.klucik@noaa.gov>
6
6
  Project-URL: Homepage, https://github.com/CI-CMG/water-column-sonar-processing
7
7
  Project-URL: Issues, https://github.com/CI-CMG/water-column-sonar-processing/issues
8
8
  Classifier: Programming Language :: Python :: 3
9
9
  Classifier: License :: OSI Approved :: MIT License
10
10
  Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.8
11
+ Requires-Python: >=3.10
12
12
  Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  Requires-Dist: aiobotocore==2.19.0
@@ -34,6 +34,19 @@ Requires-Dist: typing-extensions==4.10.0
34
34
  Requires-Dist: xarray==2024.10.0
35
35
  Requires-Dist: xbatcher==0.4.0
36
36
  Requires-Dist: zarr==2.18.3
37
+ Provides-Extra: dev
38
+ Requires-Dist: bandit[toml]==1.8.0; extra == "dev"
39
+ Requires-Dist: build; extra == "dev"
40
+ Requires-Dist: pre-commit; extra == "dev"
41
+ Requires-Dist: pyinstaller; extra == "dev"
42
+ Requires-Dist: twine; extra == "dev"
43
+ Requires-Dist: flake8==7.1.1; extra == "dev"
44
+ Requires-Dist: pooch==1.8.2; extra == "dev"
45
+ Requires-Dist: pytest~=8.3.3; extra == "dev"
46
+ Requires-Dist: tqdm; extra == "dev"
47
+ Requires-Dist: bandit; extra == "dev"
48
+ Provides-Extra: test
49
+ Requires-Dist: pytest-cov; extra == "test"
37
50
 
38
51
  # Water Column Sonar Processing
39
52
  Processing tool for converting L0 data to L1 and L2 as well as generating geospatial information
@@ -80,14 +93,17 @@ Processing tool for converting L0 data to L1 and L2 as well as generating geospa
80
93
  3. Set interpreter
81
94
 
82
95
  # Installing Dependencies
83
-
84
- 1. Add dependencies with versions to requirements.txt
85
- 2. ```pip install --upgrade pip && pip install -r requirements_dev.txt```
96
+ ```
97
+ uv pip install --upgrade pip
98
+ #uv pip install -r requirements_dev.txt
99
+ uv pip install -r pyproject.toml --extra dev
100
+ ```
86
101
 
87
102
 
88
103
  # Pytest
89
104
  ```commandline
90
- pytest --disable-warnings
105
+ uv run pytest tests
106
+ #pytest --disable-warnings
91
107
  ```
92
108
  or
93
109
  > pytest --cache-clear --cov=src tests/ --cov-report=xml
@@ -120,7 +136,7 @@ https://colab.research.google.com/drive/1KiLMueXiz9WVB9o4RuzYeGjNZ6PsZU7a#scroll
120
136
  # Tag a Release
121
137
  Step 1 --> increment the semantic version in the zarr_manager.py "metadata" & the "pyproject.toml"
122
138
  ```commandline
123
- git tag -a v25.1.2 -m "Releasing version v25.1.2"
139
+ git tag -a v25.1.4 -m "Releasing version v25.1.4"
124
140
  git push origin --tags
125
141
  ```
126
142
 
@@ -5,15 +5,11 @@
5
5
  LICENSE
6
6
  README.md
7
7
  pyproject.toml
8
- pytest.ini
9
- requirements.txt
10
- requirements_dev.txt
11
8
  .github/workflows/test_action.yaml
12
9
  open-science-data-federation/ml/autoencoder_example.py
13
10
  open-science-data-federation/osdf_examples/foo.ipynb
14
11
  open-science-data-federation/osdf_examples/sonar_ai.ipynb
15
12
  tests/conftest.py
16
- tests/test_process.py
17
13
  tests/test_resources/index/calibrated_cruises.csv
18
14
  tests/test_resources/raw_to_zarr/D20070724-T042400.bot
19
15
  tests/test_resources/raw_to_zarr/D20070724-T042400.idx
@@ -21,3 +21,18 @@ typing-extensions==4.10.0
21
21
  xarray==2024.10.0
22
22
  xbatcher==0.4.0
23
23
  zarr==2.18.3
24
+
25
+ [dev]
26
+ bandit[toml]==1.8.0
27
+ build
28
+ pre-commit
29
+ pyinstaller
30
+ twine
31
+ flake8==7.1.1
32
+ pooch==1.8.2
33
+ pytest~=8.3.3
34
+ tqdm
35
+ bandit
36
+
37
+ [test]
38
+ pytest-cov
@@ -1,24 +0,0 @@
1
- name: Python package
2
-
3
- on: [push]
4
-
5
- jobs:
6
- build:
7
- runs-on: ubuntu-latest
8
- steps:
9
- - name: Check out
10
- uses: actions/checkout@v4
11
- - name: Set up Python
12
- uses: actions/setup-python@v5
13
- with:
14
- # Semantic version range syntax or exact version of a Python version
15
- python-version: '3.10'
16
- # Optional - x64 or x86 architecture, defaults to x64
17
- # architecture: 'x64'
18
- cache: 'pip'
19
- - name: Install dependencies
20
- run: |
21
- python -m pip install --upgrade pip
22
- pip install -r requirements_dev.txt
23
- - name: Run the tests
24
- run: python -m pytest
@@ -1,2 +0,0 @@
1
- 3.10.12
2
- water-column-sonar-processing
@@ -1,43 +0,0 @@
1
- [build-system]
2
- requires = [
3
- "setuptools>=61.0",
4
- #"setuptools_scm[toml] >= 4, <6",
5
- "wheel >= 0.29.0",
6
- ]
7
- build-backend = "setuptools.build_meta"
8
-
9
- [project]
10
- name = "water_column_sonar_processing"
11
- version = "25.1.3"
12
- authors = [
13
- { name="Rudy Klucik", email="rudy.klucik@noaa.gov" },
14
- ]
15
- description = "A processing tool for water column sonar data."
16
- readme = "README.md"
17
- #requires-python = ">=3.10"
18
- requires-python = ">=3.8"
19
- classifiers = [
20
- "Programming Language :: Python :: 3",
21
- "License :: OSI Approved :: MIT License",
22
- "Operating System :: OS Independent",
23
- ]
24
- dynamic = ["dependencies"]
25
-
26
- [project.urls]
27
- Homepage = "https://github.com/CI-CMG/water-column-sonar-processing"
28
- Issues = "https://github.com/CI-CMG/water-column-sonar-processing/issues"
29
-
30
- [tool.setuptools.dynamic]
31
- dependencies = {file = ["requirements.txt"]}
32
- optional-dependencies = {dev = { file = ["requirements_dev.txt"] }}
33
-
34
- #[tool.setuptools_scm]
35
- #fallback_version = "unknown"
36
- #local_scheme = "node-and-date"
37
- #write_to = "_water_column_sonar_processing_version.py"
38
- #write_to_template = 'version = "{version}"'
39
-
40
- [tool.bandit]
41
- exclude_dirs = ["tests"]
42
- [tool.pre-commit-hooks.bandit]
43
- exclude = ["*/tests/*"]
@@ -1,13 +0,0 @@
1
- # test directory
2
- #[pytest]
3
- #testpaths = src/water_column_sonar_processing/tests
4
- #cache_dir = .cache
5
- #markers =
6
- # unit: marks tests as unit tests
7
- # integration: marks tests as integration tests
8
- [pytest]
9
- addopts = "-p no:warnings"
10
- #testpaths = "tests"
11
- #testpaths=src/water_column_sonar_processing/tests
12
- cache_dir=.cache
13
- pythonpath="."
@@ -1,32 +0,0 @@
1
- # https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html
2
- # defined for Python 3.12
3
- # Note: be careful with conversions for pandas >=2.0.0, timestamps will have a lot of problems
4
-
5
- aiobotocore==2.19.0
6
- boto3==1.36.3
7
- botocore==1.36.3
8
- echopype==0.9.0
9
- fiona==1.10.1
10
- # Alternative to geopandas: pyogrio
11
- geopandas==1.0.1
12
- mock==5.1.0
13
- moto[all]==5.0.27
14
- moto[server]==5.0.27
15
- numcodecs==0.13.1
16
- numpy==1.26.4
17
- pandas==2.2.3
18
- pyarrow==18.1.0
19
- python-dotenv==1.0.1
20
- requests==2.32.3
21
- #s3fs==2024.3.1
22
- #s3fs==2024.3.0 # does not work
23
- s3fs==2024.2.0 # works ...something between 2024.2 and 2024.3 creates the problem
24
- scipy==1.14.1
25
- #setuptools==75.6.0
26
- setuptools
27
- shapely==2.0.3
28
- typing-extensions==4.10.0
29
- xarray==2024.10.0
30
- # xbatcher[tensorflow]
31
- xbatcher==0.4.0
32
- zarr==2.18.3
@@ -1,14 +0,0 @@
1
- -r requirements.txt
2
-
3
- bandit[toml]==1.8.0
4
- build
5
- pre-commit
6
- pyinstaller
7
- twine
8
-
9
- flake8==7.1.1
10
- pooch==1.8.2
11
- pytest~=8.3.3
12
- pytest-cov==6.0.0
13
- tqdm
14
- bandit
@@ -1,472 +0,0 @@
1
- # import json
2
- # import os
3
- # import pytest
4
- # import numpy as np
5
- # from dotenv import find_dotenv, load_dotenv
6
- # from moto import mock_aws
7
- #
8
- # from water_column_sonar_processing.aws import DynamoDBManager
9
- # from water_column_sonar_processing.aws import S3Manager
10
- # from water_column_sonar_processing.aws import SNSManager
11
- # from water_column_sonar_processing.aws import SQSManager
12
- # from water_column_sonar_processing.process import Process
13
- #
14
- #
15
- # #######################################################
16
- # def setup_module():
17
- # print("setup")
18
- # env_file = find_dotenv(".env-test")
19
- # load_dotenv(dotenv_path=env_file, override=True)
20
- #
21
- #
22
- # def teardown_module():
23
- # print("teardown")
24
- #
25
- #
26
- # #######################################################
27
- # # TODO: Delete this?
28
- # @mock_aws
29
- # @pytest.mark.skip(reason="no way of currently testing this")
30
- # def test_model_happy_path():
31
- # test_input_bucket_name = os.environ["INPUT_BUCKET_NAME"]
32
- #
33
- # test_output_bucket_name = os.environ["OUTPUT_BUCKET_NAME"]
34
- #
35
- # test_table_name = os.environ["TABLE_NAME"]
36
- #
37
- # test_topic_arn = os.environ["TOPIC_ARN"]
38
- # test_topic_name = test_topic_arn.split(":")[-1]
39
- #
40
- # # [1 of 3] Create DynamoDB table
41
- # ddbm = DynamoDBManager()
42
- # ddbm.create_water_column_sonar_table(table_name=test_table_name)
43
- # ###################################################
44
- # # tests data 0 - David_Starr_Jordan - DS0604
45
- # # tests data 1 - Okeanos_Explorer - EX1404L2
46
- # # tests data 2 - Henry_B._Bigelow - HB0707
47
- # # tests data 3 - Miller_Freeman - MF0710
48
- # ###################################################
49
- # # tests data 0 - David_Starr_Jordan - DS0604
50
- # test_channels = [
51
- # "GPT 38 kHz 009072055a7f 2 ES38B",
52
- # "GPT 70 kHz 00907203400a 3 ES70-7C",
53
- # "GPT 120 kHz 009072034d52 1 ES120-7",
54
- # "GPT 200 kHz 0090720564e4 4 ES200-7C",
55
- # ]
56
- # test_frequencies = [38_000, 70_000, 120_000, 200_000]
57
- # # Create the first and third tests example files for the same cruise
58
- # ddbm.update_item(
59
- # table_name=test_table_name,
60
- # key={
61
- # "FILE_NAME": {"S": "DSJ0604-D20060406-T035914.raw"}, # Partition Key
62
- # "CRUISE_NAME": {"S": "DS0604"}, # Sort Key
63
- # },
64
- # expression_attribute_names={
65
- # "#CH": "CHANNELS",
66
- # "#ET": "END_TIME",
67
- # "#ED": "ERROR_DETAIL",
68
- # "#FR": "FREQUENCIES",
69
- # "#MA": "MAX_ECHO_RANGE",
70
- # "#MI": "MIN_ECHO_RANGE",
71
- # "#ND": "NUM_PING_TIME_DROPNA",
72
- # "#PS": "PIPELINE_STATUS", # testing this updated
73
- # "#PT": "PIPELINE_TIME", # testing this updated
74
- # "#SE": "SENSOR_NAME",
75
- # "#SH": "SHIP_NAME",
76
- # "#ST": "START_TIME",
77
- # "#ZB": "ZARR_BUCKET",
78
- # "#ZP": "ZARR_PATH",
79
- # },
80
- # expression_attribute_values={
81
- # ":ch": {"L": [{"S": i} for i in test_channels]},
82
- # ":et": {"S": "2006-04-06T03:59:15.587Z"},
83
- # ":ed": {"S": ""},
84
- # ":fr": {"L": [{"N": str(i)} for i in test_frequencies]},
85
- # ":ma": {"N": str(np.round(499.5721, 4))},
86
- # ":mi": {"N": str(np.round(0.25, 4))},
87
- # ":nd": {"N": str(1)},
88
- # ":ps": {"S": "SUCCESS_AGGREGATOR"},
89
- # ":pt": {"S": "2023-10-02T08:54:41Z"},
90
- # ":se": {"S": "EK60"},
91
- # ":sh": {"S": "David_Starr_Jordan"},
92
- # ":st": {"S": "2006-04-06T03:59:14.115Z"},
93
- # ":zb": {"S": "r2d2-dev-echofish2-118234403147-echofish-dev-output"},
94
- # ":zp": {
95
- # "S": "level_1/David_Starr_Jordan/DS0604/EK60/DSJ0604-D20060406-T035914.model"
96
- # },
97
- # },
98
- # update_expression=(
99
- # "SET "
100
- # "#CH = :ch, "
101
- # "#ET = :et, "
102
- # "#ED = :ed, "
103
- # "#FR = :fr, "
104
- # "#MA = :ma, "
105
- # "#MI = :mi, "
106
- # "#ND = :nd, "
107
- # "#PS = :ps, "
108
- # "#PT = :pt, "
109
- # "#SE = :se, "
110
- # "#SH = :sh, "
111
- # "#ST = :st, "
112
- # "#ZB = :zb, "
113
- # "#ZP = :zp"
114
- # ),
115
- # )
116
- # ddbm.update_item(
117
- # table_name=test_table_name,
118
- # key={
119
- # "FILE_NAME": {"S": "DSJ0604-D20060406-T133530.raw"}, # Partition Key
120
- # "CRUISE_NAME": {"S": "DS0604"}, # Sort Key
121
- # },
122
- # expression_attribute_names={
123
- # "#CH": "CHANNELS",
124
- # "#ET": "END_TIME",
125
- # "#ED": "ERROR_DETAIL",
126
- # "#FR": "FREQUENCIES",
127
- # "#MA": "MAX_ECHO_RANGE",
128
- # "#MI": "MIN_ECHO_RANGE",
129
- # "#ND": "NUM_PING_TIME_DROPNA",
130
- # "#PS": "PIPELINE_STATUS", # testing this updated
131
- # "#PT": "PIPELINE_TIME", # testing this updated
132
- # "#SE": "SENSOR_NAME",
133
- # "#SH": "SHIP_NAME",
134
- # "#ST": "START_TIME",
135
- # "#ZB": "ZARR_BUCKET",
136
- # "#ZP": "ZARR_PATH",
137
- # },
138
- # expression_attribute_values={
139
- # ":ch": {"L": [{"S": i} for i in test_channels]},
140
- # ":et": {"S": "2006-04-06T15:16:51.945Z"},
141
- # ":ed": {"S": ""},
142
- # ":fr": {"L": [{"N": str(i)} for i in test_frequencies]},
143
- # ":ma": {"N": str(np.round(499.7653, 4))},
144
- # ":mi": {"N": str(np.round(0.25, 4))},
145
- # ":nd": {"N": str(2467)},
146
- # ":ps": {"S": "SUCCESS_AGGREGATOR"},
147
- # ":pt": {"S": "2023-10-02T08:54:43Z"},
148
- # ":se": {"S": "EK60"},
149
- # ":sh": {"S": "David_Starr_Jordan"},
150
- # ":st": {"S": "2006-04-06T13:35:30.701Z"},
151
- # ":zb": {"S": "r2d2-dev-echofish2-118234403147-echofish-dev-output"},
152
- # ":zp": {
153
- # "S": "level_1/David_Starr_Jordan/DS0604/EK60/DSJ0604-D20060406-T133530.model"
154
- # },
155
- # },
156
- # update_expression=(
157
- # "SET "
158
- # "#CH = :ch, "
159
- # "#ET = :et, "
160
- # "#ED = :ed, "
161
- # "#FR = :fr, "
162
- # "#MA = :ma, "
163
- # "#MI = :mi, "
164
- # "#ND = :nd, "
165
- # "#PS = :ps, "
166
- # "#PT = :pt, "
167
- # "#SE = :se, "
168
- # "#SH = :sh, "
169
- # "#ST = :st, "
170
- # "#ZB = :zb, "
171
- # "#ZP = :zp"
172
- # ),
173
- # )
174
- # ###################################################
175
- # # tests data 1 - Okeanos_Explorer - EX1404L2
176
- # test_channels = ["GPT 18 kHz 009072066c0e 1-1 ES18-11"]
177
- # test_frequencies = [18_000]
178
- # ddbm.update_item(
179
- # table_name=test_table_name,
180
- # key={
181
- # "FILE_NAME": {"S": "EX1404L2_EK60_-D20140908-T173907.raw"}, # Partition Key
182
- # "CRUISE_NAME": {"S": "EX1404L2"}, # Sort Key
183
- # },
184
- # expression_attribute_names={
185
- # "#CH": "CHANNELS",
186
- # "#ET": "END_TIME",
187
- # "#ED": "ERROR_DETAIL",
188
- # "#FR": "FREQUENCIES",
189
- # "#MA": "MAX_ECHO_RANGE",
190
- # "#MI": "MIN_ECHO_RANGE",
191
- # "#ND": "NUM_PING_TIME_DROPNA",
192
- # "#PS": "PIPELINE_STATUS", # testing this updated
193
- # "#PT": "PIPELINE_TIME", # testing this updated
194
- # "#SE": "SENSOR_NAME",
195
- # "#SH": "SHIP_NAME",
196
- # "#ST": "START_TIME",
197
- # "#ZB": "ZARR_BUCKET",
198
- # "#ZP": "ZARR_PATH",
199
- # },
200
- # expression_attribute_values={
201
- # ":ch": {"L": [{"S": i} for i in test_channels]},
202
- # ":et": {"S": "2014-09-08T17:56:49.024Z"},
203
- # ":ed": {"S": ""},
204
- # ":fr": {"L": [{"N": str(i)} for i in test_frequencies]},
205
- # ":ma": {"N": str(np.round(2499.7573, 4))},
206
- # ":mi": {"N": str(np.round(0.25, 4))},
207
- # ":nd": {"N": str(324)},
208
- # ":ps": {"S": "SUCCESS_AGGREGATOR"},
209
- # ":pt": {"S": "2023-10-02T18:19:44Z"},
210
- # ":se": {"S": "EK60"},
211
- # ":sh": {"S": "Okeanos_Explorer"},
212
- # ":st": {"S": "2014-09-08T17:39:07.660Z"},
213
- # ":zb": {"S": "r2d2-dev-echofish2-118234403147-echofish-dev-output"},
214
- # ":zp": {
215
- # "S": "level_1/Okeanos_Explorer/EX1404L2/EK60/EX1404L2_EK60_-D20140908-T173907.model"
216
- # },
217
- # },
218
- # update_expression=(
219
- # "SET "
220
- # "#CH = :ch, "
221
- # "#ET = :et, "
222
- # "#ED = :ed, "
223
- # "#FR = :fr, "
224
- # "#MA = :ma, "
225
- # "#MI = :mi, "
226
- # "#ND = :nd, "
227
- # "#PS = :ps, "
228
- # "#PT = :pt, "
229
- # "#SE = :se, "
230
- # "#SH = :sh, "
231
- # "#ST = :st, "
232
- # "#ZB = :zb, "
233
- # "#ZP = :zp"
234
- # ),
235
- # )
236
- # ###################################################
237
- # # tests data 2 - Henry_B._Bigelow - HB0707
238
- # test_channels = [
239
- # "GPT 18 kHz 009072056b0e 2 ES18-11",
240
- # "GPT 38 kHz 0090720346bc 1 ES38B",
241
- # "GPT 120 kHz 0090720580f1 3 ES120-7C",
242
- # "GPT 200 kHz 009072034261 4 ES200-7C",
243
- # ]
244
- # test_frequencies = [18_000, 38_000, 120_000, 200_000]
245
- # ddbm.update_item(
246
- # table_name=test_table_name,
247
- # key={
248
- # "FILE_NAME": {"S": "D20070712-T061745.raw"}, # Partition Key
249
- # "CRUISE_NAME": {"S": "HB0707"}, # Sort Key
250
- # },
251
- # expression_attribute_names={
252
- # "#CH": "CHANNELS",
253
- # "#ET": "END_TIME",
254
- # "#ED": "ERROR_DETAIL",
255
- # "#FR": "FREQUENCIES",
256
- # "#MA": "MAX_ECHO_RANGE",
257
- # "#MI": "MIN_ECHO_RANGE",
258
- # "#ND": "NUM_PING_TIME_DROPNA",
259
- # "#PS": "PIPELINE_STATUS", # testing this updated
260
- # "#PT": "PIPELINE_TIME", # testing this updated
261
- # "#SE": "SENSOR_NAME",
262
- # "#SH": "SHIP_NAME",
263
- # "#ST": "START_TIME",
264
- # "#ZB": "ZARR_BUCKET",
265
- # "#ZP": "ZARR_PATH",
266
- # },
267
- # expression_attribute_values={
268
- # ":ch": {"L": [{"S": i} for i in test_channels]},
269
- # ":et": {"S": "2007-07-12T10:05:02.579Z"},
270
- # ":ed": {"S": ""},
271
- # ":fr": {"L": [{"N": str(i)} for i in test_frequencies]},
272
- # ":ma": {"N": str(np.round(249.792, 4))},
273
- # ":mi": {"N": str(np.round(0.25, 4))},
274
- # ":nd": {"N": str(9733)},
275
- # ":ps": {"S": "SUCCESS_AGGREGATOR"},
276
- # ":pt": {"S": "2023-10-01T20:13:58Z"},
277
- # ":se": {"S": "EK60"},
278
- # ":sh": {"S": "Henry_B._Bigelow"},
279
- # ":st": {"S": "2007-07-12T06:17:45.579Z"},
280
- # ":zb": {"S": "r2d2-dev-echofish2-118234403147-echofish-dev-output"},
281
- # ":zp": {
282
- # "S": "level_1/Henry_B._Bigelow/HB0707/EK60/D20070712-T061745.model"
283
- # },
284
- # },
285
- # update_expression=(
286
- # "SET "
287
- # "#CH = :ch, "
288
- # "#ET = :et, "
289
- # "#ED = :ed, "
290
- # "#FR = :fr, "
291
- # "#MA = :ma, "
292
- # "#MI = :mi, "
293
- # "#ND = :nd, "
294
- # "#PS = :ps, "
295
- # "#PT = :pt, "
296
- # "#SE = :se, "
297
- # "#SH = :sh, "
298
- # "#ST = :st, "
299
- # "#ZB = :zb, "
300
- # "#ZP = :zp"
301
- # ),
302
- # )
303
- # ###################################################
304
- # # tests data 3 - Miller_Freeman - MF0710
305
- # test_channels = [
306
- # "GPT 18 kHz 009072034d55 3 ES18-11",
307
- # "GPT 38 kHz 009072016e01 4 ES38B",
308
- # "GPT 120 kHz 009072016a73 1 ES120-7C",
309
- # "GPT 200 kHz 009072033fcc 2 ES200-7C",
310
- # ]
311
- # test_frequencies = [18_000, 38_000, 120_000, 200_000]
312
- # ddbm.update_item(
313
- # table_name=test_table_name,
314
- # key={
315
- # "FILE_NAME": {"S": "HAKE2007-D20070708-T200449.raw"}, # Partition Key
316
- # "CRUISE_NAME": {"S": "MF0710"}, # Sort Key
317
- # },
318
- # expression_attribute_names={
319
- # "#CH": "CHANNELS",
320
- # "#ET": "END_TIME",
321
- # "#ED": "ERROR_DETAIL",
322
- # "#FR": "FREQUENCIES",
323
- # "#MA": "MAX_ECHO_RANGE",
324
- # "#MI": "MIN_ECHO_RANGE",
325
- # "#ND": "NUM_PING_TIME_DROPNA",
326
- # "#PS": "PIPELINE_STATUS", # testing this updated
327
- # "#PT": "PIPELINE_TIME", # testing this updated
328
- # "#SE": "SENSOR_NAME",
329
- # "#SH": "SHIP_NAME",
330
- # "#ST": "START_TIME",
331
- # "#ZB": "ZARR_BUCKET",
332
- # "#ZP": "ZARR_PATH",
333
- # },
334
- # expression_attribute_values={
335
- # ":ch": {"L": [{"S": i} for i in test_channels]},
336
- # ":et": {"S": "2007-07-08T20:44:55.598Z"},
337
- # ":ed": {"S": ""},
338
- # ":fr": {"L": [{"N": str(i)} for i in test_frequencies]},
339
- # ":ma": {"N": str(np.round(749.7416, 4))},
340
- # ":mi": {"N": str(np.round(0.25, 4))},
341
- # ":nd": {"N": str(801)},
342
- # ":ps": {"S": "SUCCESS_AGGREGATOR"},
343
- # ":pt": {"S": "2023-10-02T08:41:50Z"},
344
- # ":se": {"S": "EK60"},
345
- # ":sh": {"S": "Miller_Freeman"},
346
- # ":st": {"S": "2007-07-08T20:04:49.552Z"},
347
- # ":zb": {"S": "r2d2-dev-echofish2-118234403147-echofish-dev-output"},
348
- # ":zp": {
349
- # "S": "level_1/Miller_Freeman/MF0710/EK60/HAKE2007-D20070708-T200449.model"
350
- # },
351
- # },
352
- # update_expression=(
353
- # "SET "
354
- # "#CH = :ch, "
355
- # "#ET = :et, "
356
- # "#ED = :ed, "
357
- # "#FR = :fr, "
358
- # "#MA = :ma, "
359
- # "#MI = :mi, "
360
- # "#ND = :nd, "
361
- # "#PS = :ps, "
362
- # "#PT = :pt, "
363
- # "#SE = :se, "
364
- # "#SH = :sh, "
365
- # "#ST = :st, "
366
- # "#ZB = :zb, "
367
- # "#ZP = :zp"
368
- # ),
369
- # )
370
- # ###################################################
371
- #
372
- # # [2 of 3 - Part I] Create S3 bucket
373
- # input_s3m = S3Manager()
374
- # input_s3m.create_bucket(bucket_name=test_input_bucket_name)
375
- # output_s3m = S3Manager() # TODO: requires different credentials
376
- # output_s3m.create_bucket(bucket_name=test_output_bucket_name)
377
- # # TODO: create two buckets with two sets of credentials required
378
- # all_buckets = input_s3m.list_buckets()
379
- # print(all_buckets)
380
- #
381
- # # [2 of 3 - Part II] Add Object to Input Bucket
382
- # input_s3m.put(
383
- # bucket_name=test_input_bucket_name, key="the_input_key", body="the_input_body"
384
- # )
385
- #
386
- # # [3 of 3] Set up SNS and SQS
387
- # snsm = SNSManager()
388
- # sqsm = SQSManager()
389
- #
390
- # sqs_queue_name = "test-queue"
391
- # create_queue_response = sqsm.create_queue(queue_name=sqs_queue_name)
392
- # print(create_queue_response["QueueUrl"])
393
- # assert create_queue_response["ResponseMetadata"]["HTTPStatusCode"] == 200
394
- #
395
- # create_topic_response = snsm.create_topic(topic_name=test_topic_name)
396
- # sns_topic_arn = create_topic_response["TopicArn"]
397
- # sqs_queue = sqsm.get_queue_by_name(queue_name=sqs_queue_name)
398
- # sqs_queue_arn = sqs_queue.attributes["QueueArn"]
399
- # snsm.subscribe(topic_arn=sns_topic_arn, endpoint=sqs_queue_arn)
400
- # ###troubleshooting
401
- # # snsm.list_topics()
402
- # # snsm.publish(
403
- # # topic_arn=sns_topic_arn,
404
- # # message=json.dumps("abc"),
405
- # # # MessageStructure='json'
406
- # # )
407
- # ###### end setup ######
408
- #
409
- # #############################################################
410
- # model_instance = Process()
411
- # # run the src
412
- # model_instance.execute()
413
- # #############################################################
414
- #
415
- # # tests all the outcomes
416
- # # (1) file is in bucket
417
- # # (2) sns messages are in queue
418
- # # (3) dynamodb was updated
419
- #
420
- # # [1 of 3] Check that file is in the Output Bucket
421
- # # TODO: change to writing file to s3 bucket using s3fs
422
- # s3_object = input_s3m.get(bucket_name=test_input_bucket_name, key="the_input_key")
423
- # body = s3_object.get()["Body"].read().decode("utf-8")
424
- # assert body == "the_input_body"
425
- #
426
- # # [2 of 3] Validate SNS Message was Dispatched
427
- # sqs_msgs = sqs_queue.receive_messages(
428
- # AttributeNames=["All"],
429
- # MessageAttributeNames=["All"],
430
- # VisibilityTimeout=15,
431
- # WaitTimeSeconds=20,
432
- # MaxNumberOfMessages=10,
433
- # )
434
- # assert len(sqs_msgs) == 1
435
- # test_success_message = {
436
- # "default": {
437
- # "shipName": "David_Starr_Jordan",
438
- # "cruiseName": "DS0604",
439
- # "sensorName": "EK60",
440
- # "fileName": "DSJ0604-D20060406-T113407.raw",
441
- # }
442
- # }
443
- # assert json.loads(sqs_msgs[0].body)["Message"] == json.dumps(test_success_message)
444
- #
445
- # # [3 of 3] Check that DynamoDB has been updated
446
- # # TODO: get the table as a dataframe
447
- # df = ddbm.get_table_as_df(
448
- # table_name=test_table_name,
449
- # ship_name="David_Starr_Jordan",
450
- # cruise_name="DS0604",
451
- # sensor_name="EK60",
452
- # )
453
- #
454
- # # 2 files were processed previously, creating new total of 3
455
- # assert df.shape[0] == 3
456
- #
457
- # # 16 columns of data are captured
458
- # assert df.shape[1] == 16
459
- #
460
- # # check that new file name is included
461
- # assert "DSJ0604-D20060406-T113407.raw" in list(df["FILE_NAME"])
462
- #
463
- # # make sure that other filenames aren't included
464
- # assert "HAKE2007-D20070708-T200449.raw" not in list(df["FILE_NAME"])
465
- #
466
- # # assert df[PIPELINE_STATUS'] == __?__
467
- #
468
- #
469
- # # def test_model_file_already_exists(self):
470
- # # pass
471
- #
472
- # #######################################################