water-column-sonar-processing 0.0.13__tar.gz → 24.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/.pre-commit-config.yaml +10 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/PKG-INFO +24 -21
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/README.md +20 -18
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/pyproject.toml +9 -3
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/requirements.txt +4 -2
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/requirements_dev.txt +3 -1
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/aws/s3fs_manager.py +1 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/cruise/create_empty_zarr_store.py +4 -5
- water_column_sonar_processing-24.1.1/water_column_sonar_processing/cruise/datatree_manager.py +24 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/cruise/resample_regrid.py +15 -20
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/geometry/__init__.py +2 -1
- water_column_sonar_processing-24.1.1/water_column_sonar_processing/geometry/elevation_manager.py +112 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/index/index_manager.py +92 -7
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/model/zarr_manager.py +14 -9
- water_column_sonar_processing-24.1.1/water_column_sonar_processing/processing/__init__.py +5 -0
- water_column_sonar_processing-24.1.1/water_column_sonar_processing/processing/batch_downloader.py +132 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/processing/raw_to_zarr.py +0 -2
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/utility/constants.py +3 -2
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing.egg-info/PKG-INFO +24 -21
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing.egg-info/SOURCES.txt +3 -2
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing.egg-info/requires.txt +2 -1
- water_column_sonar_processing-0.0.13/water_column_sonar_processing/cruise/experiment_datatree.py +0 -13
- water_column_sonar_processing-0.0.13/water_column_sonar_processing/processing/__init__.py +0 -4
- water_column_sonar_processing-0.0.13/water_column_sonar_processing/processing/cruise_sampler.py +0 -342
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/.env-test +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/.github/workflows/test_action.yaml +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/.gitignore +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/.python-version +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/LICENSE +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/open-science-data-federation/ml/autoencoder_example.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/open-science-data-federation/osdf_examples/foo.ipynb +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/open-science-data-federation/osdf_examples/sonar_ai.ipynb +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/pytest.ini +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/setup.cfg +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/tests/conftest.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/tests/test_process.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/tests/test_resources/index/calibrated_cruises.csv +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/tests/test_resources/raw_to_zarr/D20070724-T042400.bot +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/tests/test_resources/raw_to_zarr/D20070724-T042400.idx +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/tests/test_resources/raw_to_zarr/D20070724-T042400.raw +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/__init__.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/aws/__init__.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/aws/dynamodb_manager.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/aws/s3_manager.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/aws/sns_manager.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/aws/sqs_manager.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/cruise/__init__.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/geometry/geometry_manager.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/geometry/geometry_simplification.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/geometry/pmtile_generation.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/index/__init__.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/model/__init__.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/process.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/utility/__init__.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/utility/cleaner.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/utility/pipeline_status.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing/utility/timestamp.py +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing.egg-info/dependency_links.txt +0 -0
- {water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/water_column_sonar_processing.egg-info/top_level.txt +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
repos:
|
|
2
|
+
### Security Scan for AWS Secrets ###
|
|
2
3
|
- repo: local
|
|
3
4
|
hooks:
|
|
4
5
|
- id: trufflehog
|
|
@@ -34,3 +35,12 @@ repos:
|
|
|
34
35
|
# - id: isort
|
|
35
36
|
# name: isort (python)
|
|
36
37
|
# args: ["--profile", "black", "--filter-files"]
|
|
38
|
+
|
|
39
|
+
### Static Security Scan ###
|
|
40
|
+
# To run manually you can do: "bandit -c pyproject.toml -r ."
|
|
41
|
+
- repo: https://github.com/PyCQA/bandit
|
|
42
|
+
rev: '1.8.0'
|
|
43
|
+
hooks:
|
|
44
|
+
- id: bandit
|
|
45
|
+
args: ["-c", "pyproject.toml"]
|
|
46
|
+
additional_dependencies: [ "bandit[toml]" ]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: water_column_sonar_processing
|
|
3
|
-
Version:
|
|
3
|
+
Version: 24.1.1
|
|
4
4
|
Summary: A processing tool for water column sonar data.
|
|
5
5
|
Author-email: Rudy Klucik <rudy.klucik@noaa.gov>
|
|
6
6
|
Project-URL: Homepage, https://github.com/CI-CMG/water-column-sonar-processing
|
|
@@ -8,7 +8,7 @@ Project-URL: Issues, https://github.com/CI-CMG/water-column-sonar-processing/iss
|
|
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
|
9
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.
|
|
11
|
+
Requires-Python: >=3.8
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
Requires-Dist: aiobotocore==2.15.2
|
|
@@ -26,26 +26,19 @@ Requires-Dist: pandas==2.2.3
|
|
|
26
26
|
Requires-Dist: pyarrow==18.1.0
|
|
27
27
|
Requires-Dist: python-dotenv==1.0.1
|
|
28
28
|
Requires-Dist: requests==2.32.3
|
|
29
|
-
Requires-Dist: s3fs==
|
|
29
|
+
Requires-Dist: s3fs==2024.2.0
|
|
30
30
|
Requires-Dist: scipy==1.14.1
|
|
31
31
|
Requires-Dist: setuptools
|
|
32
32
|
Requires-Dist: shapely==2.0.3
|
|
33
33
|
Requires-Dist: typing-extensions==4.10.0
|
|
34
34
|
Requires-Dist: xarray==2024.10.0
|
|
35
|
+
Requires-Dist: xbatcher==0.4.0
|
|
35
36
|
Requires-Dist: zarr==2.18.3
|
|
36
37
|
|
|
37
38
|
# Water Column Sonar Processing
|
|
38
39
|
Processing tool for converting L0 data to L1 and L2 as well as generating geospatial information
|
|
39
40
|
|
|
40
|
-

|
|
41
|
-
|
|
42
|
-

|
|
43
|
-
|
|
44
|
-

|
|
45
|
-
|
|
46
|
-

|
|
47
|
-
|
|
48
|
-
 
|
|
41
|
+
     
|
|
49
42
|
|
|
50
43
|
# Setting up the Python Environment
|
|
51
44
|
> Python 3.10.12
|
|
@@ -103,12 +96,6 @@ or
|
|
|
103
96
|
Following this tutorial:
|
|
104
97
|
https://packaging.python.org/en/latest/tutorials/packaging-projects/
|
|
105
98
|
|
|
106
|
-
# To Publish To PROD
|
|
107
|
-
```commandline
|
|
108
|
-
python -m build
|
|
109
|
-
python -m twine upload --repository pypi dist/*
|
|
110
|
-
```
|
|
111
|
-
|
|
112
99
|
# Pre Commit Hook
|
|
113
100
|
see here for installation: https://pre-commit.com/
|
|
114
101
|
https://dev.to/rafaelherik/using-trufflehog-and-pre-commit-hook-to-prevent-secret-exposure-edo
|
|
@@ -133,13 +120,29 @@ https://colab.research.google.com/drive/1KiLMueXiz9WVB9o4RuzYeGjNZ6PsZU7a#scroll
|
|
|
133
120
|
# Tag a Release
|
|
134
121
|
Step 1 --> increment the semantic version in the zarr_manager.py "metadata" & the "pyproject.toml"
|
|
135
122
|
```commandline
|
|
136
|
-
git tag "
|
|
123
|
+
git tag -a v24.01.01 -m "Releasing version v24.01.01"
|
|
137
124
|
```
|
|
138
|
-
|
|
125
|
+
|
|
139
126
|
```commandline
|
|
140
127
|
git push origin --tags
|
|
141
128
|
```
|
|
142
129
|
|
|
130
|
+
# To Publish To PROD
|
|
131
|
+
```commandline
|
|
132
|
+
python -m build
|
|
133
|
+
python -m twine upload --repository pypi dist/*
|
|
134
|
+
```
|
|
135
|
+
|
|
143
136
|
# TODO:
|
|
144
137
|
add https://pypi.org/project/setuptools-scm/
|
|
145
138
|
for extracting the version
|
|
139
|
+
|
|
140
|
+
# Security scanning
|
|
141
|
+
> bandit -r water_column_sonar_processing/
|
|
142
|
+
|
|
143
|
+
# Data Debugging
|
|
144
|
+
Experimental Plotting in Xarray (hvPlot):
|
|
145
|
+
https://colab.research.google.com/drive/18vrI9LAip4xRGEX6EvnuVFp35RAiVYwU#scrollTo=q9_j9p2yXsLV
|
|
146
|
+
|
|
147
|
+
HB0707 Cruise zoomable:
|
|
148
|
+
https://hb0707.s3.us-east-1.amazonaws.com/index.html
|
|
@@ -1,15 +1,7 @@
|
|
|
1
1
|
# Water Column Sonar Processing
|
|
2
2
|
Processing tool for converting L0 data to L1 and L2 as well as generating geospatial information
|
|
3
3
|
|
|
4
|
-

|
|
5
|
-
|
|
6
|
-

|
|
7
|
-
|
|
8
|
-

|
|
9
|
-
|
|
10
|
-

|
|
11
|
-
|
|
12
|
-
 
|
|
4
|
+
     
|
|
13
5
|
|
|
14
6
|
# Setting up the Python Environment
|
|
15
7
|
> Python 3.10.12
|
|
@@ -67,12 +59,6 @@ or
|
|
|
67
59
|
Following this tutorial:
|
|
68
60
|
https://packaging.python.org/en/latest/tutorials/packaging-projects/
|
|
69
61
|
|
|
70
|
-
# To Publish To PROD
|
|
71
|
-
```commandline
|
|
72
|
-
python -m build
|
|
73
|
-
python -m twine upload --repository pypi dist/*
|
|
74
|
-
```
|
|
75
|
-
|
|
76
62
|
# Pre Commit Hook
|
|
77
63
|
see here for installation: https://pre-commit.com/
|
|
78
64
|
https://dev.to/rafaelherik/using-trufflehog-and-pre-commit-hook-to-prevent-secret-exposure-edo
|
|
@@ -97,13 +83,29 @@ https://colab.research.google.com/drive/1KiLMueXiz9WVB9o4RuzYeGjNZ6PsZU7a#scroll
|
|
|
97
83
|
# Tag a Release
|
|
98
84
|
Step 1 --> increment the semantic version in the zarr_manager.py "metadata" & the "pyproject.toml"
|
|
99
85
|
```commandline
|
|
100
|
-
git tag "
|
|
86
|
+
git tag -a v24.01.01 -m "Releasing version v24.01.01"
|
|
101
87
|
```
|
|
102
|
-
|
|
88
|
+
|
|
103
89
|
```commandline
|
|
104
90
|
git push origin --tags
|
|
105
91
|
```
|
|
106
92
|
|
|
93
|
+
# To Publish To PROD
|
|
94
|
+
```commandline
|
|
95
|
+
python -m build
|
|
96
|
+
python -m twine upload --repository pypi dist/*
|
|
97
|
+
```
|
|
98
|
+
|
|
107
99
|
# TODO:
|
|
108
100
|
add https://pypi.org/project/setuptools-scm/
|
|
109
|
-
for extracting the version
|
|
101
|
+
for extracting the version
|
|
102
|
+
|
|
103
|
+
# Security scanning
|
|
104
|
+
> bandit -r water_column_sonar_processing/
|
|
105
|
+
|
|
106
|
+
# Data Debugging
|
|
107
|
+
Experimental Plotting in Xarray (hvPlot):
|
|
108
|
+
https://colab.research.google.com/drive/18vrI9LAip4xRGEX6EvnuVFp35RAiVYwU#scrollTo=q9_j9p2yXsLV
|
|
109
|
+
|
|
110
|
+
HB0707 Cruise zoomable:
|
|
111
|
+
https://hb0707.s3.us-east-1.amazonaws.com/index.html
|
{water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/pyproject.toml
RENAMED
|
@@ -8,13 +8,14 @@ build-backend = "setuptools.build_meta"
|
|
|
8
8
|
|
|
9
9
|
[project]
|
|
10
10
|
name = "water_column_sonar_processing"
|
|
11
|
-
version = "
|
|
11
|
+
version = "24.01.01"
|
|
12
12
|
authors = [
|
|
13
13
|
{ name="Rudy Klucik", email="rudy.klucik@noaa.gov" },
|
|
14
14
|
]
|
|
15
15
|
description = "A processing tool for water column sonar data."
|
|
16
16
|
readme = "README.md"
|
|
17
|
-
requires-python = ">=3.10"
|
|
17
|
+
#requires-python = ">=3.10"
|
|
18
|
+
requires-python = ">=3.8"
|
|
18
19
|
classifiers = [
|
|
19
20
|
"Programming Language :: Python :: 3",
|
|
20
21
|
"License :: OSI Approved :: MIT License",
|
|
@@ -34,4 +35,9 @@ optional-dependencies = {dev = { file = ["requirements_dev.txt"] }}
|
|
|
34
35
|
#fallback_version = "unknown"
|
|
35
36
|
#local_scheme = "node-and-date"
|
|
36
37
|
#write_to = "_water_column_sonar_processing_version.py"
|
|
37
|
-
#write_to_template = 'version = "{version}"'
|
|
38
|
+
#write_to_template = 'version = "{version}"'
|
|
39
|
+
|
|
40
|
+
[tool.bandit]
|
|
41
|
+
exclude_dirs = ["tests"]
|
|
42
|
+
[tool.pre-commit-hooks.bandit]
|
|
43
|
+
exclude = ["*/tests/*"]
|
{water_column_sonar_processing-0.0.13 → water_column_sonar_processing-24.1.1}/requirements.txt
RENAMED
|
@@ -19,12 +19,14 @@ pyarrow==18.1.0
|
|
|
19
19
|
python-dotenv==1.0.1
|
|
20
20
|
requests==2.32.3
|
|
21
21
|
#s3fs==2024.3.1
|
|
22
|
-
#s3fs==2024.
|
|
23
|
-
s3fs==
|
|
22
|
+
#s3fs==2024.3.0 # does not work
|
|
23
|
+
s3fs==2024.2.0 # works ...something between 2024.2 and 2024.3 creates the problem
|
|
24
24
|
scipy==1.14.1
|
|
25
25
|
#setuptools==75.6.0
|
|
26
26
|
setuptools
|
|
27
27
|
shapely==2.0.3
|
|
28
28
|
typing-extensions==4.10.0
|
|
29
29
|
xarray==2024.10.0
|
|
30
|
+
# xbatcher[tensorflow]
|
|
31
|
+
xbatcher==0.4.0
|
|
30
32
|
zarr==2.18.3
|
|
@@ -16,6 +16,7 @@ class S3FSManager:
|
|
|
16
16
|
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
17
17
|
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
18
18
|
self.s3fs = s3fs.S3FileSystem(
|
|
19
|
+
asynchronous=False,
|
|
19
20
|
endpoint_url=endpoint_url,
|
|
20
21
|
key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
21
22
|
secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import tempfile
|
|
2
3
|
|
|
3
4
|
import numcodecs
|
|
4
5
|
import numpy as np
|
|
@@ -11,7 +12,6 @@ from water_column_sonar_processing.utility import Cleaner
|
|
|
11
12
|
numcodecs.blosc.use_threads = False
|
|
12
13
|
numcodecs.blosc.set_nthreads(1)
|
|
13
14
|
|
|
14
|
-
# TEMPDIR = "/tmp"
|
|
15
15
|
# TODO: when ready switch to version 3 of model spec
|
|
16
16
|
# ZARR_V3_EXPERIMENTAL_API = 1
|
|
17
17
|
# creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
@@ -61,7 +61,6 @@ class CreateEmptyZarrStore:
|
|
|
61
61
|
# TODO: move to common place
|
|
62
62
|
|
|
63
63
|
#######################################################
|
|
64
|
-
# @classmethod
|
|
65
64
|
def create_cruise_level_zarr_store(
|
|
66
65
|
self,
|
|
67
66
|
output_bucket_name: str,
|
|
@@ -69,8 +68,8 @@ class CreateEmptyZarrStore:
|
|
|
69
68
|
cruise_name: str,
|
|
70
69
|
sensor_name: str,
|
|
71
70
|
table_name: str,
|
|
72
|
-
tempdir: str,
|
|
73
71
|
) -> None:
|
|
72
|
+
tempdir = tempfile.TemporaryDirectory()
|
|
74
73
|
try:
|
|
75
74
|
# HB0806 - 123, HB0903 - 220
|
|
76
75
|
dynamo_db_manager = DynamoDBManager()
|
|
@@ -146,7 +145,7 @@ class CreateEmptyZarrStore:
|
|
|
146
145
|
print(f"new_height: {new_height}")
|
|
147
146
|
|
|
148
147
|
zarr_manager.create_zarr_store(
|
|
149
|
-
path=tempdir,
|
|
148
|
+
path=tempdir.name, # TODO: need to use .name or problem
|
|
150
149
|
ship_name=ship_name,
|
|
151
150
|
cruise_name=cruise_name,
|
|
152
151
|
sensor_name=sensor_name,
|
|
@@ -159,7 +158,7 @@ class CreateEmptyZarrStore:
|
|
|
159
158
|
#################################################################
|
|
160
159
|
self.upload_zarr_store_to_s3(
|
|
161
160
|
output_bucket_name=output_bucket_name,
|
|
162
|
-
local_directory=tempdir,
|
|
161
|
+
local_directory=tempdir.name, # TODO: need to use .name or problem
|
|
163
162
|
object_prefix=zarr_prefix,
|
|
164
163
|
cruise_name=cruise_name,
|
|
165
164
|
)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
### https://xarray-datatree.readthedocs.io/en/latest/data-structures.html
|
|
2
|
+
import numpy as np
|
|
3
|
+
from datatree import DataTree
|
|
4
|
+
import xarray as xr
|
|
5
|
+
|
|
6
|
+
class DatatreeManager:
|
|
7
|
+
#######################################################
|
|
8
|
+
def __init__(
|
|
9
|
+
self,
|
|
10
|
+
):
|
|
11
|
+
self.dtype = "float32"
|
|
12
|
+
|
|
13
|
+
#################################################################
|
|
14
|
+
def create_datatree(
|
|
15
|
+
self,
|
|
16
|
+
input_ds,
|
|
17
|
+
) -> None:
|
|
18
|
+
ds1 = xr.Dataset({"foo": "orange"})
|
|
19
|
+
dt = DataTree(name="root", data=ds1) # create root node
|
|
20
|
+
ds2 = xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])})
|
|
21
|
+
return dt
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
@@ -281,12 +281,7 @@ class ResampleRegrid:
|
|
|
281
281
|
print(f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}")
|
|
282
282
|
#########################################################################
|
|
283
283
|
# write Sv values to cruise-level-model-store
|
|
284
|
-
|
|
285
|
-
len(input_xr.channel.values)
|
|
286
|
-
): # does not like being written in one fell swoop :(
|
|
287
|
-
output_zarr_store.Sv[
|
|
288
|
-
:, start_ping_time_index:end_ping_time_index, channel
|
|
289
|
-
] = regrid_resample[:, :, channel]
|
|
284
|
+
output_zarr_store.Sv[:, start_ping_time_index:end_ping_time_index, :] = regrid_resample.values
|
|
290
285
|
|
|
291
286
|
#########################################################################
|
|
292
287
|
# [5] write subset of latitude/longitude
|
|
@@ -300,27 +295,27 @@ class ResampleRegrid:
|
|
|
300
295
|
#########################################################################
|
|
301
296
|
# TODO: add the "detected_seafloor_depth/" to the
|
|
302
297
|
# L2 cruise dataarrays
|
|
303
|
-
# TODO: make bottom optional
|
|
298
|
+
# TODO: make bottom optional
|
|
304
299
|
# TODO: Only checking the first channel for now. Need to average across all channels
|
|
305
300
|
# in the future. See https://github.com/CI-CMG/water-column-sonar-processing/issues/11
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
301
|
+
if 'detected_seafloor_depth' in input_xr.variables:
|
|
302
|
+
print('Found detected_seafloor_depth, adding data to output store.')
|
|
303
|
+
detected_seafloor_depth = input_xr.detected_seafloor_depth.values
|
|
304
|
+
detected_seafloor_depth[detected_seafloor_depth == 0.] = np.nan
|
|
305
|
+
# TODO: problem here: Processing file: D20070711-T210709.
|
|
306
|
+
detected_seafloor_depths = np.nanmean(detected_seafloor_depth, 0) # RuntimeWarning: Mean of empty slice detected_seafloor_depths = np.nanmean(detected_seafloor_depth, 0)
|
|
307
|
+
detected_seafloor_depths[detected_seafloor_depths == 0.] = np.nan
|
|
308
|
+
print(f"min depth measured: {np.nanmin(detected_seafloor_depths)}")
|
|
309
|
+
print(f"max depth measured: {np.nanmax(detected_seafloor_depths)}")
|
|
310
|
+
#available_indices = np.argwhere(np.isnan(geospatial['latitude'].values))
|
|
311
|
+
output_zarr_store.bottom[
|
|
312
|
+
start_ping_time_index:end_ping_time_index
|
|
313
|
+
] = detected_seafloor_depths
|
|
317
314
|
#########################################################################
|
|
318
315
|
#########################################################################
|
|
319
316
|
except Exception as err:
|
|
320
317
|
print(f"Problem interpolating the data: {err}")
|
|
321
318
|
raise err
|
|
322
|
-
# else:
|
|
323
|
-
# pass
|
|
324
319
|
finally:
|
|
325
320
|
print("Done interpolating data.")
|
|
326
321
|
# TODO: read across times and verify data was written?
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
from .elevation_manager import ElevationManager
|
|
1
2
|
from .geometry_manager import GeometryManager
|
|
2
3
|
from .geometry_simplification import GeometrySimplification
|
|
3
4
|
from .pmtile_generation import PMTileGeneration
|
|
4
5
|
|
|
5
|
-
__all__ = ["GeometryManager", "GeometrySimplification", "PMTileGeneration"]
|
|
6
|
+
__all__ = ["ElevationManager", "GeometryManager", "GeometrySimplification", "PMTileGeneration"]
|
water_column_sonar_processing-24.1.1/water_column_sonar_processing/geometry/elevation_manager.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""
|
|
2
|
+
https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry=-31.70235%2C13.03332&geometryType=esriGeometryPoint&returnGeometry=false&returnCatalogItems=false&f=json
|
|
3
|
+
|
|
4
|
+
https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/
|
|
5
|
+
identify?
|
|
6
|
+
geometry=-31.70235%2C13.03332
|
|
7
|
+
&geometryType=esriGeometryPoint
|
|
8
|
+
&returnGeometry=false
|
|
9
|
+
&returnCatalogItems=false
|
|
10
|
+
&f=json
|
|
11
|
+
{"objectId":0,"name":"Pixel","value":"-5733","location":{"x":-31.702349999999999,"y":13.03332,"spatialReference":{"wkid":4326,"latestWkid":4326}},"properties":null,"catalogItems":null,"catalogItemVisibilities":[]}
|
|
12
|
+
-5733
|
|
13
|
+
|
|
14
|
+
(base) rudy:deleteME rudy$ curl https://api.opentopodata.org/v1/gebco2020?locations=13.03332,-31.70235
|
|
15
|
+
{
|
|
16
|
+
"results": [
|
|
17
|
+
{
|
|
18
|
+
"dataset": "gebco2020",
|
|
19
|
+
"elevation": -5729.0,
|
|
20
|
+
"location": {
|
|
21
|
+
"lat": 13.03332,
|
|
22
|
+
"lng": -31.70235
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
],
|
|
26
|
+
"status": "OK"
|
|
27
|
+
}
|
|
28
|
+
"""
|
|
29
|
+
import json
|
|
30
|
+
import time
|
|
31
|
+
|
|
32
|
+
import requests
|
|
33
|
+
from collections.abc import Generator
|
|
34
|
+
|
|
35
|
+
def chunked(
|
|
36
|
+
ll: list,
|
|
37
|
+
n: int
|
|
38
|
+
) -> Generator:
|
|
39
|
+
# Yields successively n-sized chunks from ll.
|
|
40
|
+
for i in range(0, len(ll), n):
|
|
41
|
+
yield ll[i : i + n]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ElevationManager:
|
|
45
|
+
#######################################################
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
):
|
|
49
|
+
self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
|
|
50
|
+
self.TIMOUT_SECONDS = 10
|
|
51
|
+
|
|
52
|
+
#######################################################
|
|
53
|
+
def get_arcgis_elevation(
|
|
54
|
+
self,
|
|
55
|
+
lngs: list,
|
|
56
|
+
lats: list,
|
|
57
|
+
chunk_size: int=500, # I think this is the api limit
|
|
58
|
+
) -> int:
|
|
59
|
+
# Reference: https://developers.arcgis.com/rest/services-reference/enterprise/map-to-image/
|
|
60
|
+
# Info: https://www.arcgis.com/home/item.html?id=c876e3c96a8642ab8557646a3b4fa0ff
|
|
61
|
+
### 'https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={"points":[[-31.70235,13.03332],[-32.70235,14.03332]]}&geometryType=esriGeometryMultipoint&returnGeometry=false&returnCatalogItems=false&f=json'
|
|
62
|
+
if len(lngs) != len(lats):
|
|
63
|
+
raise ValueError("lngs and lats must have same length")
|
|
64
|
+
|
|
65
|
+
geometryType = "esriGeometryMultipoint" # TODO: allow single point?
|
|
66
|
+
|
|
67
|
+
depths = []
|
|
68
|
+
|
|
69
|
+
list_of_points = [list(elem) for elem in list(zip(lngs, lats))]
|
|
70
|
+
for chunk in chunked(list_of_points, chunk_size):
|
|
71
|
+
time.sleep(0.1)
|
|
72
|
+
# order: (lng, lat)
|
|
73
|
+
geometry = f'{{"points":{str(chunk)}}}'
|
|
74
|
+
url=f'https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={geometry}&geometryType={geometryType}&returnGeometry=false&returnCatalogItems=false&f=json'
|
|
75
|
+
result = requests.get(url, timeout=self.TIMOUT_SECONDS)
|
|
76
|
+
res = json.loads(result.content.decode('utf8'))
|
|
77
|
+
if 'results' in res:
|
|
78
|
+
for element in res['results']:
|
|
79
|
+
depths.append(float(element['value']))
|
|
80
|
+
elif 'value' in res:
|
|
81
|
+
depths.append(float(res['value']))
|
|
82
|
+
|
|
83
|
+
return depths
|
|
84
|
+
|
|
85
|
+
# def get_gebco_bathymetry_elevation(self) -> int:
|
|
86
|
+
# # Documentation: https://www.opentopodata.org/datasets/gebco2020/
|
|
87
|
+
# latitude = 13.03332
|
|
88
|
+
# longitude = -31.70235
|
|
89
|
+
# dataset = "gebco2020"
|
|
90
|
+
# url = f"https://api.opentopodata.org/v1/{dataset}?locations={latitude},{longitude}"
|
|
91
|
+
# pass
|
|
92
|
+
|
|
93
|
+
# def get_elevation(
|
|
94
|
+
# self,
|
|
95
|
+
# df,
|
|
96
|
+
# lat_column,
|
|
97
|
+
# lon_column,
|
|
98
|
+
# ) -> int:
|
|
99
|
+
# """Query service using lat, lon. add the elevation values as a new column."""
|
|
100
|
+
# url = r'https://epqs.nationalmap.gov/v1/json?'
|
|
101
|
+
# elevations = []
|
|
102
|
+
# for lat, lon in zip(df[lat_column], df[lon_column]):
|
|
103
|
+
# # define rest query params
|
|
104
|
+
# params = {
|
|
105
|
+
# 'output': 'json',
|
|
106
|
+
# 'x': lon,
|
|
107
|
+
# 'y': lat,
|
|
108
|
+
# 'units': 'Meters'
|
|
109
|
+
# }
|
|
110
|
+
# result = requests.get((url + urllib.parse.urlencode(params)))
|
|
111
|
+
# elevations.append(result.json()['value'])
|
|
112
|
+
# return elevations
|
|
@@ -7,13 +7,20 @@ from concurrent.futures import as_completed
|
|
|
7
7
|
from water_column_sonar_processing.aws import S3Manager
|
|
8
8
|
|
|
9
9
|
|
|
10
|
+
MAX_POOL_CONNECTIONS = 64
|
|
11
|
+
MAX_CONCURRENCY = 64
|
|
12
|
+
MAX_WORKERS = 64
|
|
13
|
+
GB = 1024**3
|
|
14
|
+
|
|
15
|
+
|
|
10
16
|
class IndexManager:
|
|
17
|
+
# TODO: index into dynamodb instead of csv files
|
|
11
18
|
|
|
12
19
|
def __init__(self, input_bucket_name, calibration_bucket, calibration_key):
|
|
13
20
|
self.input_bucket_name = input_bucket_name
|
|
14
21
|
self.calibration_bucket = calibration_bucket
|
|
15
22
|
self.calibration_key = calibration_key
|
|
16
|
-
self.s3_manager = S3Manager()
|
|
23
|
+
self.s3_manager = S3Manager() # TODO: make anonymous?
|
|
17
24
|
|
|
18
25
|
#################################################################
|
|
19
26
|
def list_ships(
|
|
@@ -50,6 +57,9 @@ class IndexManager:
|
|
|
50
57
|
self,
|
|
51
58
|
cruise_prefixes,
|
|
52
59
|
):
|
|
60
|
+
"""
|
|
61
|
+
This returns a list of ek60 prefixed cruises.
|
|
62
|
+
"""
|
|
53
63
|
cruise_sensors = [] # includes all sensor types
|
|
54
64
|
for cruise_prefix in cruise_prefixes:
|
|
55
65
|
page_iterator = self.s3_manager.paginator.paginate(
|
|
@@ -67,9 +77,12 @@ class IndexManager:
|
|
|
67
77
|
cruise_name,
|
|
68
78
|
sensor_name,
|
|
69
79
|
):
|
|
80
|
+
# Gets all raw files for a cruise under the given prefix
|
|
70
81
|
prefix = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/" # Note no forward slash at beginning
|
|
71
82
|
page_iterator = self.s3_manager.paginator.paginate(
|
|
72
|
-
Bucket=self.input_bucket_name,
|
|
83
|
+
Bucket=self.input_bucket_name,
|
|
84
|
+
Prefix=prefix,
|
|
85
|
+
Delimiter="/"
|
|
73
86
|
)
|
|
74
87
|
all_files = []
|
|
75
88
|
for page in page_iterator:
|
|
@@ -77,6 +90,57 @@ class IndexManager:
|
|
|
77
90
|
all_files.extend([i["Key"] for i in page["Contents"]])
|
|
78
91
|
return [i for i in all_files if i.endswith(".raw")]
|
|
79
92
|
|
|
93
|
+
def get_first_raw_file(
|
|
94
|
+
self,
|
|
95
|
+
ship_name,
|
|
96
|
+
cruise_name,
|
|
97
|
+
sensor_name,
|
|
98
|
+
):
|
|
99
|
+
# Same as above but only needs to get the first raw file
|
|
100
|
+
# because we are only interested in the first datagram of one file
|
|
101
|
+
prefix = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/" # Note no forward slash at beginning
|
|
102
|
+
# page_iterator = self.s3_manager.paginator.paginate(
|
|
103
|
+
# Bucket=self.input_bucket_name,
|
|
104
|
+
# Prefix=prefix,
|
|
105
|
+
# Delimiter="/",
|
|
106
|
+
# PaginationConfig={ 'MaxItems': 5 }
|
|
107
|
+
# ) # TODO: this can create a problem if there is a non raw file returned first
|
|
108
|
+
### filter with JMESPath expressions ###
|
|
109
|
+
page_iterator = self.s3_manager.paginator.paginate(
|
|
110
|
+
Bucket=self.input_bucket_name,
|
|
111
|
+
Prefix=prefix,
|
|
112
|
+
Delimiter="/",
|
|
113
|
+
)
|
|
114
|
+
# page_iterator = page_iterator.search("Contents[?Size < `2200`][]")
|
|
115
|
+
page_iterator = page_iterator.search(expression="Contents[?contains(Key, '.raw')] ")
|
|
116
|
+
for res in page_iterator:
|
|
117
|
+
if "Key" in res:
|
|
118
|
+
return res["Key"]
|
|
119
|
+
# else raise exception?
|
|
120
|
+
|
|
121
|
+
# DSJ0604-D20060406-T050022.bot 2kB == 2152 'Size'
|
|
122
|
+
|
|
123
|
+
def get_files_under_size(
|
|
124
|
+
self,
|
|
125
|
+
ship_name,
|
|
126
|
+
cruise_name,
|
|
127
|
+
sensor_name,
|
|
128
|
+
):
|
|
129
|
+
# THIS isn't used, just playing with JMES paths spec
|
|
130
|
+
prefix = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/"
|
|
131
|
+
### filter with JMESPath expressions ###
|
|
132
|
+
page_iterator = self.s3_manager.paginator.paginate(
|
|
133
|
+
Bucket=self.input_bucket_name,
|
|
134
|
+
Prefix=prefix,
|
|
135
|
+
Delimiter="/",
|
|
136
|
+
)
|
|
137
|
+
page_iterator = page_iterator.search("Contents[?Size < `2200`][]")
|
|
138
|
+
all_files = []
|
|
139
|
+
for page in page_iterator:
|
|
140
|
+
if "Contents" in page.keys():
|
|
141
|
+
all_files.extend([i["Key"] for i in page["Contents"]])
|
|
142
|
+
return [i for i in all_files if i.endswith(".raw")]
|
|
143
|
+
|
|
80
144
|
#################################################################
|
|
81
145
|
def get_raw_files_csv(
|
|
82
146
|
self,
|
|
@@ -102,6 +166,29 @@ class IndexManager:
|
|
|
102
166
|
df.to_csv(f"{ship_name}_{cruise_name}.csv", index=False, header=False, sep=" ")
|
|
103
167
|
print("done")
|
|
104
168
|
|
|
169
|
+
def get_raw_files_list(
|
|
170
|
+
self,
|
|
171
|
+
ship_name,
|
|
172
|
+
cruise_name,
|
|
173
|
+
sensor_name,
|
|
174
|
+
):
|
|
175
|
+
# gets all raw files in cruise and returns a list of dicts
|
|
176
|
+
raw_files = self.get_raw_files(
|
|
177
|
+
ship_name=ship_name,
|
|
178
|
+
cruise_name=cruise_name,
|
|
179
|
+
sensor_name=sensor_name
|
|
180
|
+
)
|
|
181
|
+
files_list = [
|
|
182
|
+
{
|
|
183
|
+
"ship_name": ship_name,
|
|
184
|
+
"cruise_name": cruise_name,
|
|
185
|
+
"sensor_name": sensor_name,
|
|
186
|
+
"file_name": os.path.basename(raw_file),
|
|
187
|
+
}
|
|
188
|
+
for raw_file in raw_files
|
|
189
|
+
]
|
|
190
|
+
return files_list
|
|
191
|
+
|
|
105
192
|
#################################################################
|
|
106
193
|
def get_subset_ek60_prefix( # TODO: is this used?
|
|
107
194
|
self,
|
|
@@ -169,16 +256,14 @@ class IndexManager:
|
|
|
169
256
|
return first_datagram
|
|
170
257
|
|
|
171
258
|
#################################################################
|
|
172
|
-
def get_subset_datagrams(
|
|
259
|
+
def get_subset_datagrams( # TODO: is this getting used
|
|
173
260
|
self,
|
|
174
261
|
df: pd.DataFrame
|
|
175
262
|
) -> list:
|
|
176
263
|
print("getting subset of datagrams")
|
|
177
|
-
select_keys =
|
|
178
|
-
df[["KEY", "CRUISE"]].drop_duplicates(subset="CRUISE")["KEY"].values
|
|
179
|
-
)
|
|
264
|
+
select_keys = df[["KEY", "CRUISE"]].drop_duplicates(subset="CRUISE")["KEY"].values.tolist()
|
|
180
265
|
all_datagrams = []
|
|
181
|
-
with ThreadPoolExecutor(max_workers=
|
|
266
|
+
with ThreadPoolExecutor(max_workers=MAX_POOL_CONNECTIONS) as executor:
|
|
182
267
|
futures = [
|
|
183
268
|
executor.submit(self.scan_datagram, select_key)
|
|
184
269
|
for select_key in select_keys
|