water-column-sonar-processing 0.0.7__tar.gz → 0.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- {water_column_sonar_processing-0.0.7/src/water_column_sonar_processing.egg-info → water_column_sonar_processing-0.0.8}/PKG-INFO +20 -10
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/README.md +10 -1
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/pyproject.toml +19 -9
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/requirements.txt +12 -11
- water_column_sonar_processing-0.0.8/src/water_column_sonar_processing/__init__.py +15 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/aws/dynamodb_manager.py +149 -43
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/aws/s3_manager.py +71 -37
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/cruise/create_empty_zarr_store.py +6 -4
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/cruise/resample_regrid.py +3 -3
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/geometry/geometry_manager.py +21 -6
- water_column_sonar_processing-0.0.8/src/water_column_sonar_processing/geometry/pmtile_generation.py +264 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/index/index_manager.py +25 -13
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/model/zarr_manager.py +26 -25
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/process.py +4 -4
- water_column_sonar_processing-0.0.8/src/water_column_sonar_processing/processing/__init__.py +4 -0
- water_column_sonar_processing-0.0.8/src/water_column_sonar_processing/processing/cruise_sampler.py +342 -0
- water_column_sonar_processing-0.0.8/src/water_column_sonar_processing/processing/raw_to_zarr.py +349 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/utility/cleaner.py +1 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/utility/constants.py +6 -2
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8/src/water_column_sonar_processing.egg-info}/PKG-INFO +20 -10
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing.egg-info/SOURCES.txt +5 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing.egg-info/requires.txt +10 -9
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/tests/test_create_empty_zarr_store.py +7 -5
- water_column_sonar_processing-0.0.8/tests/test_dynamodb_manager.py +376 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/tests/test_geometry_manager.py +10 -6
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/tests/test_index.py +34 -9
- water_column_sonar_processing-0.0.8/tests/test_pmtile_generation.py +268 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/tests/test_process.py +6 -22
- water_column_sonar_processing-0.0.8/tests/test_raw_to_zarr.py +146 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/tests/test_resample_regrid.py +2 -2
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/tests/test_s3_manager.py +27 -7
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/tests/test_s3fs_manager.py +4 -17
- water_column_sonar_processing-0.0.8/tests/test_s3fs_with_moto.py +78 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/tests/test_sns_sqs_manager.py +2 -2
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/tests/test_zarr_manager.py +30 -23
- water_column_sonar_processing-0.0.7/src/water_column_sonar_processing/__init__.py +0 -16
- water_column_sonar_processing-0.0.7/src/water_column_sonar_processing/geometry/pmtile_generation.py +0 -75
- water_column_sonar_processing-0.0.7/tests/test_dynamodb_manager.py +0 -204
- water_column_sonar_processing-0.0.7/tests/test_s3fs_with_moto.py +0 -31
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/LICENSE +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/setup.cfg +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/aws/__init__.py +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/aws/s3fs_manager.py +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/aws/sns_manager.py +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/aws/sqs_manager.py +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/cruise/__init__.py +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/geometry/__init__.py +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/geometry/geometry_simplification.py +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/index/__init__.py +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/model/__init__.py +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/utility/__init__.py +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/utility/pipeline_status.py +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing/utility/timestamp.py +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing.egg-info/dependency_links.txt +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/src/water_column_sonar_processing.egg-info/top_level.txt +0 -0
- {water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/tests/test_geometry_simplification.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: water_column_sonar_processing
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.8
|
|
4
4
|
Summary: A processing tool for water column sonar data.
|
|
5
5
|
Author-email: Rudy Klucik <rudy.klucik@noaa.gov>
|
|
6
6
|
Project-URL: Homepage, https://github.com/CI-CMG/water-column-sonar-processing
|
|
@@ -11,26 +11,27 @@ Classifier: Operating System :: OS Independent
|
|
|
11
11
|
Requires-Python: >=3.10
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
-
Requires-Dist: aiobotocore
|
|
15
|
-
Requires-Dist:
|
|
16
|
-
Requires-Dist:
|
|
17
|
-
Requires-Dist: botocore~=1.33.13
|
|
14
|
+
Requires-Dist: aiobotocore==2.15.2
|
|
15
|
+
Requires-Dist: boto3==1.35.36
|
|
16
|
+
Requires-Dist: botocore==1.35.36
|
|
18
17
|
Requires-Dist: echopype==0.9.0
|
|
19
18
|
Requires-Dist: fiona==1.10.1
|
|
20
19
|
Requires-Dist: geopandas==1.0.1
|
|
21
|
-
Requires-Dist: mock
|
|
22
|
-
Requires-Dist: moto
|
|
20
|
+
Requires-Dist: mock==5.1.0
|
|
21
|
+
Requires-Dist: moto[all]==5.0.21
|
|
22
|
+
Requires-Dist: moto[server]==5.0.21
|
|
23
23
|
Requires-Dist: numcodecs==0.13.1
|
|
24
24
|
Requires-Dist: numpy==1.26.4
|
|
25
25
|
Requires-Dist: pandas==2.2.3
|
|
26
|
-
Requires-Dist:
|
|
26
|
+
Requires-Dist: pyarrow==18.1.0
|
|
27
27
|
Requires-Dist: python-dotenv==1.0.0
|
|
28
28
|
Requires-Dist: requests==2.32.3
|
|
29
|
-
Requires-Dist: s3fs==
|
|
29
|
+
Requires-Dist: s3fs==2023.12.1
|
|
30
30
|
Requires-Dist: scipy==1.14.1
|
|
31
|
+
Requires-Dist: setuptools==75.6.0
|
|
31
32
|
Requires-Dist: shapely==2.0.3
|
|
32
33
|
Requires-Dist: typing-extensions==4.10.0
|
|
33
|
-
Requires-Dist: xarray==
|
|
34
|
+
Requires-Dist: xarray==2024.10.0
|
|
34
35
|
Requires-Dist: zarr==2.18.3
|
|
35
36
|
|
|
36
37
|
# Water Column Sonar Processing
|
|
@@ -85,6 +86,8 @@ Processing tool for converting L0 data to L1 and L2 as well as generating geospa
|
|
|
85
86
|
```commandline
|
|
86
87
|
pytest --disable-warnings
|
|
87
88
|
```
|
|
89
|
+
or
|
|
90
|
+
> pytest --cache-clear --cov=src tests/ --cov-report=xml
|
|
88
91
|
|
|
89
92
|
# Instructions
|
|
90
93
|
Following this tutorial:
|
|
@@ -122,3 +125,10 @@ https://plugins.jetbrains.com/plugin/20574-ruff
|
|
|
122
125
|
|
|
123
126
|
# Colab Test
|
|
124
127
|
https://colab.research.google.com/drive/1KiLMueXiz9WVB9o4RuzYeGjNZ6PsZU7a#scrollTo=AayVyvpBdfIZ
|
|
128
|
+
|
|
129
|
+
# Test Coverage
|
|
130
|
+
20241124
|
|
131
|
+
8 failed, 32 passed, 3 skipped, 1 warning in 6.92s
|
|
132
|
+
20241125
|
|
133
|
+
5 failed, 35 passed, 3 skipped, 1 warning in 9.71s
|
|
134
|
+
3 failed, 38 passed, 3 skipped, 1 warning in 7.24s
|
|
@@ -50,6 +50,8 @@ Processing tool for converting L0 data to L1 and L2 as well as generating geospa
|
|
|
50
50
|
```commandline
|
|
51
51
|
pytest --disable-warnings
|
|
52
52
|
```
|
|
53
|
+
or
|
|
54
|
+
> pytest --cache-clear --cov=src tests/ --cov-report=xml
|
|
53
55
|
|
|
54
56
|
# Instructions
|
|
55
57
|
Following this tutorial:
|
|
@@ -86,4 +88,11 @@ Ruff
|
|
|
86
88
|
https://plugins.jetbrains.com/plugin/20574-ruff
|
|
87
89
|
|
|
88
90
|
# Colab Test
|
|
89
|
-
https://colab.research.google.com/drive/1KiLMueXiz9WVB9o4RuzYeGjNZ6PsZU7a#scrollTo=AayVyvpBdfIZ
|
|
91
|
+
https://colab.research.google.com/drive/1KiLMueXiz9WVB9o4RuzYeGjNZ6PsZU7a#scrollTo=AayVyvpBdfIZ
|
|
92
|
+
|
|
93
|
+
# Test Coverage
|
|
94
|
+
20241124
|
|
95
|
+
8 failed, 32 passed, 3 skipped, 1 warning in 6.92s
|
|
96
|
+
20241125
|
|
97
|
+
5 failed, 35 passed, 3 skipped, 1 warning in 9.71s
|
|
98
|
+
3 failed, 38 passed, 3 skipped, 1 warning in 7.24s
|
|
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
|
|
|
7
7
|
|
|
8
8
|
[project]
|
|
9
9
|
name = "water_column_sonar_processing"
|
|
10
|
-
version = "0.0.
|
|
10
|
+
version = "0.0.8"
|
|
11
11
|
authors = [
|
|
12
12
|
{ name="Rudy Klucik", email="rudy.klucik@noaa.gov" },
|
|
13
13
|
]
|
|
@@ -25,19 +25,29 @@ dynamic = ["dependencies"]
|
|
|
25
25
|
Homepage = "https://github.com/CI-CMG/water-column-sonar-processing"
|
|
26
26
|
Issues = "https://github.com/CI-CMG/water-column-sonar-processing/issues"
|
|
27
27
|
|
|
28
|
-
#[pytest]
|
|
28
|
+
#[tool.pytest.ini_options]
|
|
29
|
+
#filterwarnings = [
|
|
30
|
+
# "error",
|
|
31
|
+
# "ignore::UserWarning",
|
|
32
|
+
# # note the use of single quote below to denote "raw" strings in TOML
|
|
33
|
+
# 'ignore:function ham\(\) is deprecated:DeprecationWarning',
|
|
34
|
+
#]
|
|
29
35
|
#pythonpath = "src"
|
|
30
36
|
#testpaths = "tests"
|
|
37
|
+
#[pytest]
|
|
38
|
+
#addopts = [
|
|
39
|
+
# "-p no:warnings"
|
|
40
|
+
#]
|
|
31
41
|
|
|
32
|
-
[tool.pytest.ini_options]
|
|
33
|
-
minversion = "6.0"
|
|
34
|
-
|
|
35
|
-
testpaths = [
|
|
36
|
-
"tests",
|
|
37
|
-
]
|
|
42
|
+
#[tool.pytest.ini_options]
|
|
43
|
+
#minversion = "6.0"
|
|
44
|
+
##addopts = "-ra -q"
|
|
45
|
+
#testpaths = [
|
|
46
|
+
# "tests",
|
|
47
|
+
#]
|
|
38
48
|
|
|
39
49
|
[tool.setuptools.dynamic]
|
|
40
50
|
dependencies = {file = ["requirements.txt"]}
|
|
41
|
-
optional-dependencies = {dev = { file = ["
|
|
51
|
+
optional-dependencies = {dev = { file = ["requirements_dev.txt"] }}
|
|
42
52
|
|
|
43
53
|
# https://packaging.python.org/en/latest/guides/writing-pyproject-toml/
|
{water_column_sonar_processing-0.0.7 → water_column_sonar_processing-0.0.8}/requirements.txt
RENAMED
|
@@ -2,26 +2,27 @@
|
|
|
2
2
|
# defined for Python 3.12
|
|
3
3
|
# Note: be careful with conversions for pandas >=2.0.0, timestamps will have a lot of problems
|
|
4
4
|
|
|
5
|
-
aiobotocore
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
botocore~=1.33.13
|
|
5
|
+
aiobotocore==2.15.2
|
|
6
|
+
boto3==1.35.36
|
|
7
|
+
botocore==1.35.36
|
|
9
8
|
echopype==0.9.0
|
|
10
9
|
fiona==1.10.1
|
|
10
|
+
# Alternative to geopandas: pyogrio
|
|
11
11
|
geopandas==1.0.1
|
|
12
|
-
mock
|
|
13
|
-
moto
|
|
12
|
+
mock==5.1.0
|
|
13
|
+
moto[all]==5.0.21
|
|
14
|
+
moto[server]==5.0.21
|
|
14
15
|
numcodecs==0.13.1
|
|
15
16
|
numpy==1.26.4
|
|
16
17
|
pandas==2.2.3
|
|
17
|
-
|
|
18
|
+
pyarrow==18.1.0
|
|
18
19
|
python-dotenv==1.0.0
|
|
19
20
|
requests==2.32.3
|
|
20
|
-
#s3fs==
|
|
21
|
-
s3fs==
|
|
21
|
+
#s3fs==2024.10.0 # this version creates problems
|
|
22
|
+
s3fs==2023.12.1
|
|
22
23
|
scipy==1.14.1
|
|
24
|
+
setuptools==75.6.0
|
|
23
25
|
shapely==2.0.3
|
|
24
26
|
typing-extensions==4.10.0
|
|
25
|
-
xarray==
|
|
26
|
-
#zarr==2.16.1
|
|
27
|
+
xarray==2024.10.0
|
|
27
28
|
zarr==2.18.3
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from __future__ import absolute_import
|
|
2
|
+
|
|
3
|
+
from . import aws, cruise, geometry, index, model, processing, utility
|
|
4
|
+
# from .model import ZarrManager
|
|
5
|
+
# from .process import Process
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"aws",
|
|
9
|
+
"cruise",
|
|
10
|
+
"geometry",
|
|
11
|
+
"index",
|
|
12
|
+
"model",
|
|
13
|
+
"processing",
|
|
14
|
+
"utility",
|
|
15
|
+
]
|
|
@@ -8,7 +8,11 @@ from boto3.dynamodb.types import TypeDeserializer, TypeSerializer
|
|
|
8
8
|
#########################################################################
|
|
9
9
|
class DynamoDBManager:
|
|
10
10
|
#####################################################################
|
|
11
|
-
def __init__(
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
# endpoint_url
|
|
14
|
+
):
|
|
15
|
+
# self.endpoint_url = endpoint_url
|
|
12
16
|
self.__dynamodb_session = boto3.Session(
|
|
13
17
|
aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
|
|
14
18
|
aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
|
|
@@ -16,9 +20,11 @@ class DynamoDBManager:
|
|
|
16
20
|
)
|
|
17
21
|
self.__dynamodb_resource = self.__dynamodb_session.resource(
|
|
18
22
|
service_name="dynamodb",
|
|
23
|
+
# endpoint_url=self.endpoint_url
|
|
19
24
|
)
|
|
20
25
|
self.__dynamodb_client = self.__dynamodb_session.client(
|
|
21
26
|
service_name="dynamodb",
|
|
27
|
+
# endpoint_url=self.endpoint_url
|
|
22
28
|
)
|
|
23
29
|
self.type_serializer = TypeSerializer() # https://stackoverflow.com/a/46738251
|
|
24
30
|
self.type_deserializer = TypeDeserializer()
|
|
@@ -35,31 +41,14 @@ class DynamoDBManager:
|
|
|
35
41
|
# assert (status_code == 200), "Problem, unable to update dynamodb table."
|
|
36
42
|
|
|
37
43
|
#####################################################################
|
|
38
|
-
def create_table(
|
|
39
|
-
self,
|
|
40
|
-
table_name,
|
|
41
|
-
key_schema,
|
|
42
|
-
attribute_definitions,
|
|
43
|
-
):
|
|
44
|
-
self.__dynamodb_client.create_table(
|
|
45
|
-
AttributeDefinitions=attribute_definitions,
|
|
46
|
-
TableName=table_name,
|
|
47
|
-
KeySchema=key_schema,
|
|
48
|
-
BillingMode="PAY_PER_REQUEST", # "PROVISIONED",
|
|
49
|
-
# ProvisionedThroughput={
|
|
50
|
-
# 'ReadCapacityUnits': 1_000,
|
|
51
|
-
# 'WriteCapacityUnits': 1_000
|
|
52
|
-
# }
|
|
53
|
-
)
|
|
54
|
-
|
|
55
44
|
#####################################################################
|
|
56
45
|
def create_water_column_sonar_table(
|
|
57
46
|
self,
|
|
58
47
|
table_name,
|
|
59
48
|
):
|
|
60
|
-
self.create_table(
|
|
61
|
-
|
|
62
|
-
|
|
49
|
+
self.__dynamodb_client.create_table(
|
|
50
|
+
TableName=table_name,
|
|
51
|
+
KeySchema=[
|
|
63
52
|
{
|
|
64
53
|
"AttributeName": "FILE_NAME",
|
|
65
54
|
"KeyType": "HASH",
|
|
@@ -69,20 +58,50 @@ class DynamoDBManager:
|
|
|
69
58
|
"KeyType": "RANGE",
|
|
70
59
|
},
|
|
71
60
|
],
|
|
72
|
-
|
|
61
|
+
AttributeDefinitions=[
|
|
73
62
|
{"AttributeName": "FILE_NAME", "AttributeType": "S"},
|
|
74
63
|
{"AttributeName": "CRUISE_NAME", "AttributeType": "S"},
|
|
75
64
|
],
|
|
65
|
+
BillingMode="PAY_PER_REQUEST"
|
|
66
|
+
# ProvisionedThroughput={
|
|
67
|
+
# 'ReadCapacityUnits': 1_000,
|
|
68
|
+
# 'WriteCapacityUnits': 1_000
|
|
69
|
+
# }
|
|
76
70
|
)
|
|
71
|
+
# TODO: after creating status is 'CREATING', wait until 'ACTIVE'
|
|
72
|
+
response = self.__dynamodb_client.describe_table(TableName=table_name)
|
|
73
|
+
print(response) # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb/client/describe_table.html
|
|
74
|
+
# sleep then response['Table']['TableStatus'] == 'ACTIVE'
|
|
75
|
+
|
|
76
|
+
#####################################################################
|
|
77
|
+
# don't think this is used?
|
|
78
|
+
# def get_item(
|
|
79
|
+
# self,
|
|
80
|
+
# table_name,
|
|
81
|
+
# key
|
|
82
|
+
# ):
|
|
83
|
+
# response = self.__dynamodb_client.get_item(TableName=table_name, Key=key)
|
|
84
|
+
# item = None
|
|
85
|
+
# if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
|
|
86
|
+
# if "Item" in response:
|
|
87
|
+
# item = response["Item"]
|
|
88
|
+
# return item
|
|
77
89
|
|
|
78
90
|
#####################################################################
|
|
79
|
-
def
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
91
|
+
def get_table_item(
|
|
92
|
+
self,
|
|
93
|
+
table_name,
|
|
94
|
+
key,
|
|
95
|
+
):
|
|
96
|
+
"""
|
|
97
|
+
Gets a single row from the db.
|
|
98
|
+
"""
|
|
99
|
+
table = self.__dynamodb_resource.Table(table_name)
|
|
100
|
+
response = table.get_item(Key=key)
|
|
101
|
+
# TODO:
|
|
102
|
+
# if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
|
|
103
|
+
# throw error
|
|
104
|
+
return response
|
|
86
105
|
|
|
87
106
|
#####################################################################
|
|
88
107
|
def update_item(
|
|
@@ -101,17 +120,22 @@ class DynamoDBManager:
|
|
|
101
120
|
UpdateExpression=update_expression,
|
|
102
121
|
)
|
|
103
122
|
status_code = response["ResponseMetadata"]["HTTPStatusCode"]
|
|
104
|
-
|
|
123
|
+
assert response['ConsumedCapacity']['TableName'] == table_name
|
|
105
124
|
assert status_code == 200, "Problem, unable to update dynamodb table."
|
|
106
125
|
|
|
107
126
|
#####################################################################
|
|
127
|
+
# TODO: change to "get_cruise_as_df"
|
|
108
128
|
def get_table_as_df(
|
|
109
129
|
self,
|
|
110
130
|
ship_name,
|
|
111
131
|
cruise_name,
|
|
112
132
|
sensor_name,
|
|
113
133
|
table_name,
|
|
114
|
-
):
|
|
134
|
+
) -> pd.DataFrame:
|
|
135
|
+
"""
|
|
136
|
+
To be used to initialize a cruise, deletes all entries associated with that cruise
|
|
137
|
+
in the database.
|
|
138
|
+
"""
|
|
115
139
|
expression_attribute_values = {
|
|
116
140
|
":cr": {"S": cruise_name},
|
|
117
141
|
":se": {"S": sensor_name},
|
|
@@ -128,6 +152,9 @@ class DynamoDBManager:
|
|
|
128
152
|
FilterExpression=filter_expression,
|
|
129
153
|
)
|
|
130
154
|
# Note: table.scan() has 1 MB limit on results so pagination is used
|
|
155
|
+
if len(response["Items"]) == 0:
|
|
156
|
+
return pd.DataFrame() # If no results, return empty dataframe
|
|
157
|
+
|
|
131
158
|
data = response["Items"]
|
|
132
159
|
|
|
133
160
|
while "LastEvaluatedKey" in response:
|
|
@@ -146,25 +173,104 @@ class DynamoDBManager:
|
|
|
146
173
|
return df.sort_values(by="START_TIME", ignore_index=True)
|
|
147
174
|
|
|
148
175
|
#####################################################################
|
|
149
|
-
#
|
|
150
|
-
def
|
|
176
|
+
# TODO: WIP
|
|
177
|
+
def delete_item(
|
|
151
178
|
self,
|
|
152
179
|
table_name,
|
|
153
|
-
|
|
180
|
+
cruise_name,
|
|
181
|
+
file_name,
|
|
154
182
|
):
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
183
|
+
"""
|
|
184
|
+
Finds all rows associated with a cruise and deletes them.
|
|
185
|
+
"""
|
|
186
|
+
response = self.__dynamodb_client.delete_item(
|
|
187
|
+
Key={
|
|
188
|
+
"CRUISE_NAME": {
|
|
189
|
+
"S": cruise_name
|
|
190
|
+
},
|
|
191
|
+
"FILE_NAME": {
|
|
192
|
+
"S": file_name
|
|
193
|
+
}
|
|
194
|
+
},
|
|
195
|
+
TableName=table_name,
|
|
196
|
+
ReturnConsumedCapacity="TOTALS",
|
|
197
|
+
)
|
|
198
|
+
# TODO: there should be attributes included in response but they are missing
|
|
199
|
+
# if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
|
|
200
|
+
# throw error
|
|
158
201
|
return response
|
|
159
202
|
|
|
160
203
|
#####################################################################
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
table_name,
|
|
165
|
-
cruise_name,
|
|
204
|
+
def describe_table(
|
|
205
|
+
self,
|
|
206
|
+
table_name,
|
|
166
207
|
):
|
|
167
|
-
|
|
208
|
+
"""
|
|
209
|
+
Get a description of the table. Used to verify that records were added/removed.
|
|
210
|
+
"""
|
|
211
|
+
response = self.__dynamodb_client.describe_table(TableName=table_name)
|
|
212
|
+
print(response)
|
|
213
|
+
return response
|
|
214
|
+
|
|
215
|
+
|
|
168
216
|
|
|
217
|
+
#####################################################################
|
|
218
|
+
# TODO: from test_raw_to_zarr get enum and use here
|
|
219
|
+
# def __update_processing_status(
|
|
220
|
+
# self,
|
|
221
|
+
# file_name: str,
|
|
222
|
+
# cruise_name: str,
|
|
223
|
+
# pipeline_status: str,
|
|
224
|
+
# error_message: str = None,
|
|
225
|
+
# ):
|
|
226
|
+
# print(f"Updating processing status to {pipeline_status}.")
|
|
227
|
+
# if error_message:
|
|
228
|
+
# print(f"Error message: {error_message}")
|
|
229
|
+
# self.__dynamo.update_item(
|
|
230
|
+
# table_name=self.__table_name,
|
|
231
|
+
# key={
|
|
232
|
+
# 'FILE_NAME': {'S': file_name}, # Partition Key
|
|
233
|
+
# 'CRUISE_NAME': {'S': cruise_name}, # Sort Key
|
|
234
|
+
# },
|
|
235
|
+
# attribute_names={
|
|
236
|
+
# '#PT': 'PIPELINE_TIME',
|
|
237
|
+
# '#PS': 'PIPELINE_STATUS',
|
|
238
|
+
# '#EM': 'ERROR_MESSAGE',
|
|
239
|
+
# },
|
|
240
|
+
# expression='SET #PT = :pt, #PS = :ps, #EM = :em',
|
|
241
|
+
# attribute_values={
|
|
242
|
+
# ':pt': {
|
|
243
|
+
# 'S': datetime.now().isoformat(timespec="seconds") + "Z"
|
|
244
|
+
# },
|
|
245
|
+
# ':ps': {
|
|
246
|
+
# 'S': pipeline_status
|
|
247
|
+
# },
|
|
248
|
+
# ':em': {
|
|
249
|
+
# 'S': error_message
|
|
250
|
+
# }
|
|
251
|
+
# }
|
|
252
|
+
# )
|
|
253
|
+
# else:
|
|
254
|
+
# self.__dynamo.update_item(
|
|
255
|
+
# table_name=self.__table_name,
|
|
256
|
+
# key={
|
|
257
|
+
# 'FILE_NAME': {'S': file_name}, # Partition Key
|
|
258
|
+
# 'CRUISE_NAME': {'S': cruise_name}, # Sort Key
|
|
259
|
+
# },
|
|
260
|
+
# attribute_names={
|
|
261
|
+
# '#PT': 'PIPELINE_TIME',
|
|
262
|
+
# '#PS': 'PIPELINE_STATUS',
|
|
263
|
+
# },
|
|
264
|
+
# expression='SET #PT = :pt, #PS = :ps',
|
|
265
|
+
# attribute_values={
|
|
266
|
+
# ':pt': {
|
|
267
|
+
# 'S': datetime.now().isoformat(timespec="seconds") + "Z"
|
|
268
|
+
# },
|
|
269
|
+
# ':ps': {
|
|
270
|
+
# 'S': pipeline_status
|
|
271
|
+
# }
|
|
272
|
+
# }
|
|
273
|
+
# )
|
|
274
|
+
# print("Done updating processing status.")
|
|
169
275
|
|
|
170
276
|
#########################################################################
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
+
import boto3
|
|
3
4
|
from collections.abc import Generator
|
|
4
5
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
-
|
|
6
|
-
import boto3
|
|
7
6
|
from boto3.s3.transfer import TransferConfig
|
|
8
7
|
from botocore.config import Config
|
|
9
8
|
from botocore.exceptions import ClientError
|
|
@@ -25,10 +24,16 @@ class S3Manager:
|
|
|
25
24
|
#####################################################################
|
|
26
25
|
def __init__(
|
|
27
26
|
self,
|
|
27
|
+
# input_endpoint_url: str,
|
|
28
|
+
# output_endpoint_url: str,
|
|
29
|
+
# endpoint_url
|
|
28
30
|
# TODO: Need to allow passing in of credentials when writing to protected bucket
|
|
29
31
|
):
|
|
30
32
|
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
31
33
|
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
34
|
+
# self.endpoint_url = endpoint_url
|
|
35
|
+
# self.input_endpoint_url = input_endpoint_url
|
|
36
|
+
# self.output_endpoint_url = output_endpoint_url
|
|
32
37
|
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
33
38
|
self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
|
|
34
39
|
self.s3_transfer_config = TransferConfig(
|
|
@@ -46,6 +51,7 @@ class S3Manager:
|
|
|
46
51
|
service_name="s3",
|
|
47
52
|
config=self.s3_client_config,
|
|
48
53
|
region_name=self.s3_region,
|
|
54
|
+
# endpoint_url=endpoint_url, # TODO: temporary
|
|
49
55
|
)
|
|
50
56
|
self.s3_resource = boto3.resource(
|
|
51
57
|
service_name="s3",
|
|
@@ -53,7 +59,6 @@ class S3Manager:
|
|
|
53
59
|
region_name=self.s3_region,
|
|
54
60
|
)
|
|
55
61
|
# self.paginator = self.s3_client.get_paginator(operation_name='list_objects_v2')
|
|
56
|
-
# TODO: create both "s3_client_input" and "s3_client_output" ???
|
|
57
62
|
self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
|
|
58
63
|
aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
59
64
|
aws_secret_access_key=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
@@ -63,19 +68,20 @@ class S3Manager:
|
|
|
63
68
|
service_name="s3",
|
|
64
69
|
config=self.s3_client_config,
|
|
65
70
|
region_name=self.s3_region,
|
|
71
|
+
# endpoint_url=endpoint_url, # TODO: temporary
|
|
66
72
|
)
|
|
67
|
-
self.s3_resource_noaa_wcsd_zarr_pds = (
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
region_name=self.s3_region,
|
|
72
|
-
)
|
|
73
|
+
self.s3_resource_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.resource(
|
|
74
|
+
service_name="s3",
|
|
75
|
+
config=self.s3_client_config,
|
|
76
|
+
region_name=self.s3_region,
|
|
73
77
|
)
|
|
78
|
+
self.paginator = self.s3_client.get_paginator('list_objects_v2')
|
|
79
|
+
self.paginator_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds.get_paginator('list_objects_v2')
|
|
74
80
|
|
|
75
|
-
def get_client(self):
|
|
81
|
+
def get_client(self): # TODO: do i need this?
|
|
76
82
|
return self.s3_session.client(
|
|
77
83
|
service_name="s3",
|
|
78
|
-
config=self.
|
|
84
|
+
config=self.s3_client_config,
|
|
79
85
|
region_name=self.s3_region,
|
|
80
86
|
)
|
|
81
87
|
|
|
@@ -103,17 +109,18 @@ class S3Manager:
|
|
|
103
109
|
self,
|
|
104
110
|
file_name: str,
|
|
105
111
|
key: str,
|
|
112
|
+
output_bucket_name: str,
|
|
106
113
|
):
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
)
|
|
114
|
+
"""
|
|
115
|
+
Used to upload a single file, e.g. the GeoJSON file to the NODD bucket
|
|
116
|
+
"""
|
|
117
|
+
self.s3_resource_noaa_wcsd_zarr_pds.Bucket(output_bucket_name).upload_file(Filename=file_name, Key=key)
|
|
112
118
|
return key
|
|
113
119
|
|
|
114
120
|
#####################################################################
|
|
115
121
|
def upload_files_with_thread_pool_executor(
|
|
116
122
|
self,
|
|
123
|
+
output_bucket_name: str,
|
|
117
124
|
all_files: list,
|
|
118
125
|
):
|
|
119
126
|
# 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
@@ -122,21 +129,45 @@ class S3Manager:
|
|
|
122
129
|
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
|
123
130
|
futures = [
|
|
124
131
|
executor.submit(
|
|
125
|
-
self.upload_nodd_file,
|
|
132
|
+
self.upload_nodd_file, # TODO: verify which one is using this
|
|
126
133
|
all_file[0], # file_name
|
|
127
134
|
all_file[1], # key
|
|
135
|
+
output_bucket_name, # output_bucket_name
|
|
128
136
|
)
|
|
129
137
|
for all_file in all_files
|
|
130
138
|
]
|
|
131
139
|
for future in as_completed(futures):
|
|
132
140
|
result = future.result()
|
|
133
141
|
if result:
|
|
134
|
-
all_uploads.extend(result)
|
|
142
|
+
all_uploads.extend([result])
|
|
135
143
|
except Exception as err:
|
|
136
144
|
print(err)
|
|
137
145
|
print("Done uploading files using threading pool.")
|
|
138
146
|
return all_uploads
|
|
139
147
|
|
|
148
|
+
#####################################################################
|
|
149
|
+
# def upload_nodd_file2(
|
|
150
|
+
# self,
|
|
151
|
+
# body: str,
|
|
152
|
+
# bucket: str,
|
|
153
|
+
# key: str,
|
|
154
|
+
# ):
|
|
155
|
+
# self.s3_client_noaa_wcsd_zarr_pds.put_object(
|
|
156
|
+
# Body=body,
|
|
157
|
+
# Bucket=bucket,
|
|
158
|
+
# Key=key,
|
|
159
|
+
# )
|
|
160
|
+
|
|
161
|
+
# TODO: this uses resource, try to use client
|
|
162
|
+
def upload_file(
|
|
163
|
+
self,
|
|
164
|
+
filename: str,
|
|
165
|
+
bucket_name: str,
|
|
166
|
+
key: str,
|
|
167
|
+
):
|
|
168
|
+
# self.s3_client.upload_file(Filename=filename, Bucket=bucket, Key=key)
|
|
169
|
+
self.s3_resource.Bucket(bucket_name).upload_file(Filename=filename, Key=key)
|
|
170
|
+
|
|
140
171
|
#####################################################################
|
|
141
172
|
def upload_zarr_files_to_bucket( # noaa-wcsd-model-pds
|
|
142
173
|
self,
|
|
@@ -165,32 +196,34 @@ class S3Manager:
|
|
|
165
196
|
return all_uploads
|
|
166
197
|
|
|
167
198
|
#####################################################################
|
|
168
|
-
# used: raw-to-
|
|
169
|
-
def list_objects( # noaa-wcsd-pds and noaa-wcsd-
|
|
170
|
-
self,
|
|
199
|
+
# used: raw-to-zarr
|
|
200
|
+
def list_objects( # noaa-wcsd-pds and noaa-wcsd-zarr-pds
|
|
201
|
+
self,
|
|
202
|
+
bucket_name,
|
|
203
|
+
prefix
|
|
171
204
|
):
|
|
172
205
|
# analog to "find_children_objects"
|
|
173
206
|
# Returns a list of key strings for each object in bucket defined by prefix
|
|
174
|
-
s3_client = self.s3_client
|
|
207
|
+
# s3_client = self.s3_client
|
|
175
208
|
keys = []
|
|
176
|
-
paginator = s3_client.get_paginator("list_objects_v2")
|
|
177
|
-
page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
|
|
209
|
+
# paginator = s3_client.get_paginator("list_objects_v2")
|
|
210
|
+
page_iterator = self.paginator.paginate(Bucket=bucket_name, Prefix=prefix)
|
|
178
211
|
for page in page_iterator:
|
|
179
212
|
if "Contents" in page.keys():
|
|
180
213
|
keys.extend([k["Key"] for k in page["Contents"]])
|
|
181
214
|
return keys
|
|
182
215
|
|
|
183
|
-
def list_nodd_objects( # These are used by the geometry for uploading data
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
):
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
216
|
+
# def list_nodd_objects( # These are used by the geometry for uploading data
|
|
217
|
+
# self,
|
|
218
|
+
# prefix,
|
|
219
|
+
# ):
|
|
220
|
+
# # Returns a list of key strings for each object in bucket defined by prefix
|
|
221
|
+
# keys = []
|
|
222
|
+
# page_iterator = self.paginator_noaa_wcsd_zarr_pds.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
|
|
223
|
+
# for page in paginator.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
|
|
224
|
+
# if "Contents" in page.keys():
|
|
225
|
+
# keys.extend([k["Key"] for k in page["Contents"]])
|
|
226
|
+
# return keys
|
|
194
227
|
|
|
195
228
|
#####################################################################
|
|
196
229
|
# TODO: change name to "directory"
|
|
@@ -279,9 +312,10 @@ class S3Manager:
|
|
|
279
312
|
self,
|
|
280
313
|
bucket_name,
|
|
281
314
|
key,
|
|
282
|
-
file_name,
|
|
315
|
+
file_name, # where the file will be saved
|
|
283
316
|
):
|
|
284
317
|
self.s3_client.download_file(Bucket=bucket_name, Key=key, Filename=file_name)
|
|
318
|
+
# TODO: if bottom file doesn't exist, don't fail downloader
|
|
285
319
|
print("downloaded file")
|
|
286
320
|
|
|
287
321
|
#####################################################################
|
|
@@ -318,7 +352,7 @@ class S3Manager:
|
|
|
318
352
|
#####################################################################
|
|
319
353
|
# not used TODO: remove
|
|
320
354
|
def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
|
|
321
|
-
self.s3_client.put_object(Bucket=bucket_name, Key=key, Body=body)
|
|
355
|
+
self.s3_client.put_object(Bucket=bucket_name, Key=key, Body=body) # "Body" can be a file
|
|
322
356
|
|
|
323
357
|
#####################################################################
|
|
324
358
|
def read_s3_json(
|