water-column-sonar-processing 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/__init__.py +0 -2
- water_column_sonar_processing/aws/__init__.py +2 -2
- water_column_sonar_processing/aws/s3_manager.py +63 -42
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +5 -5
- water_column_sonar_processing/cruise/resample_regrid.py +3 -3
- water_column_sonar_processing/geometry/geometry_manager.py +7 -9
- water_column_sonar_processing/geometry/pmtile_generation.py +0 -2
- water_column_sonar_processing/index/index_manager.py +1 -1
- water_column_sonar_processing/model/zarr_manager.py +18 -13
- water_column_sonar_processing/process.py +4 -4
- water_column_sonar_processing/processing/cruise_sampler.py +1 -1
- water_column_sonar_processing/processing/raw_to_zarr.py +29 -32
- water_column_sonar_processing/utility/__init__.py +2 -2
- {water_column_sonar_processing-0.0.8.dist-info → water_column_sonar_processing-0.0.10.dist-info}/METADATA +8 -2
- {water_column_sonar_processing-0.0.8.dist-info → water_column_sonar_processing-0.0.10.dist-info}/RECORD +18 -18
- {water_column_sonar_processing-0.0.8.dist-info → water_column_sonar_processing-0.0.10.dist-info}/LICENSE +0 -0
- {water_column_sonar_processing-0.0.8.dist-info → water_column_sonar_processing-0.0.10.dist-info}/WHEEL +0 -0
- {water_column_sonar_processing-0.0.8.dist-info → water_column_sonar_processing-0.0.10.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from .dynamodb_manager import DynamoDBManager
|
|
2
|
-
from .s3_manager import S3Manager
|
|
2
|
+
from .s3_manager import S3Manager, chunked
|
|
3
3
|
from .s3fs_manager import S3FSManager
|
|
4
4
|
from .sns_manager import SNSManager
|
|
5
5
|
from .sqs_manager import SQSManager
|
|
6
6
|
|
|
7
|
-
__all__ = ["DynamoDBManager", "S3Manager", "S3FSManager", "SNSManager", "SQSManager"]
|
|
7
|
+
__all__ = ["DynamoDBManager", "S3Manager", "chunked", "S3FSManager", "SNSManager", "SQSManager"]
|
|
@@ -3,6 +3,8 @@ import os
|
|
|
3
3
|
import boto3
|
|
4
4
|
from collections.abc import Generator
|
|
5
5
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
6
|
+
|
|
7
|
+
import botocore
|
|
6
8
|
from boto3.s3.transfer import TransferConfig
|
|
7
9
|
from botocore.config import Config
|
|
8
10
|
from botocore.exceptions import ClientError
|
|
@@ -14,7 +16,10 @@ GB = 1024**3
|
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
#########################################################################
|
|
17
|
-
def chunked(
|
|
19
|
+
def chunked(
|
|
20
|
+
ll: list,
|
|
21
|
+
n: int
|
|
22
|
+
) -> Generator:
|
|
18
23
|
# Yields successively n-sized chunks from ll.
|
|
19
24
|
for i in range(0, len(ll), n):
|
|
20
25
|
yield ll[i : i + n]
|
|
@@ -24,16 +29,9 @@ class S3Manager:
|
|
|
24
29
|
#####################################################################
|
|
25
30
|
def __init__(
|
|
26
31
|
self,
|
|
27
|
-
# input_endpoint_url: str,
|
|
28
|
-
# output_endpoint_url: str,
|
|
29
|
-
# endpoint_url
|
|
30
|
-
# TODO: Need to allow passing in of credentials when writing to protected bucket
|
|
31
32
|
):
|
|
32
33
|
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
33
34
|
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
34
|
-
# self.endpoint_url = endpoint_url
|
|
35
|
-
# self.input_endpoint_url = input_endpoint_url
|
|
36
|
-
# self.output_endpoint_url = output_endpoint_url
|
|
37
35
|
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
38
36
|
self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
|
|
39
37
|
self.s3_transfer_config = TransferConfig(
|
|
@@ -51,14 +49,12 @@ class S3Manager:
|
|
|
51
49
|
service_name="s3",
|
|
52
50
|
config=self.s3_client_config,
|
|
53
51
|
region_name=self.s3_region,
|
|
54
|
-
# endpoint_url=endpoint_url, # TODO: temporary
|
|
55
52
|
)
|
|
56
53
|
self.s3_resource = boto3.resource(
|
|
57
54
|
service_name="s3",
|
|
58
55
|
config=self.s3_client_config,
|
|
59
56
|
region_name=self.s3_region,
|
|
60
57
|
)
|
|
61
|
-
# self.paginator = self.s3_client.get_paginator(operation_name='list_objects_v2')
|
|
62
58
|
self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
|
|
63
59
|
aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
64
60
|
aws_secret_access_key=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
@@ -68,7 +64,6 @@ class S3Manager:
|
|
|
68
64
|
service_name="s3",
|
|
69
65
|
config=self.s3_client_config,
|
|
70
66
|
region_name=self.s3_region,
|
|
71
|
-
# endpoint_url=endpoint_url, # TODO: temporary
|
|
72
67
|
)
|
|
73
68
|
self.s3_resource_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.resource(
|
|
74
69
|
service_name="s3",
|
|
@@ -78,12 +73,12 @@ class S3Manager:
|
|
|
78
73
|
self.paginator = self.s3_client.get_paginator('list_objects_v2')
|
|
79
74
|
self.paginator_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds.get_paginator('list_objects_v2')
|
|
80
75
|
|
|
81
|
-
def get_client(self): # TODO: do i need this?
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
76
|
+
# def get_client(self): # TODO: do i need this?
|
|
77
|
+
# return self.s3_session.client(
|
|
78
|
+
# service_name="s3",
|
|
79
|
+
# config=self.s3_client_config,
|
|
80
|
+
# region_name=self.s3_region,
|
|
81
|
+
# )
|
|
87
82
|
|
|
88
83
|
#####################################################################
|
|
89
84
|
def create_bucket(
|
|
@@ -146,18 +141,6 @@ class S3Manager:
|
|
|
146
141
|
return all_uploads
|
|
147
142
|
|
|
148
143
|
#####################################################################
|
|
149
|
-
# def upload_nodd_file2(
|
|
150
|
-
# self,
|
|
151
|
-
# body: str,
|
|
152
|
-
# bucket: str,
|
|
153
|
-
# key: str,
|
|
154
|
-
# ):
|
|
155
|
-
# self.s3_client_noaa_wcsd_zarr_pds.put_object(
|
|
156
|
-
# Body=body,
|
|
157
|
-
# Bucket=bucket,
|
|
158
|
-
# Key=key,
|
|
159
|
-
# )
|
|
160
|
-
|
|
161
144
|
# TODO: this uses resource, try to use client
|
|
162
145
|
def upload_file(
|
|
163
146
|
self,
|
|
@@ -190,11 +173,36 @@ class S3Manager:
|
|
|
190
173
|
all_files.append([local_path, s3_key])
|
|
191
174
|
|
|
192
175
|
all_uploads = self.upload_files_with_thread_pool_executor(
|
|
176
|
+
output_bucket_name=self.output_bucket_name,
|
|
193
177
|
all_files=all_files,
|
|
194
178
|
)
|
|
195
179
|
print("Done uploading files to output bucket.")
|
|
196
180
|
return all_uploads
|
|
197
181
|
|
|
182
|
+
#####################################################################
|
|
183
|
+
def check_if_object_exists(
|
|
184
|
+
self,
|
|
185
|
+
bucket_name,
|
|
186
|
+
key_name
|
|
187
|
+
) -> bool:
|
|
188
|
+
s3_manager2 = S3Manager()
|
|
189
|
+
s3_manager2.list_objects(bucket_name=bucket_name, prefix=key_name)
|
|
190
|
+
s3_client_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds
|
|
191
|
+
try:
|
|
192
|
+
# response = s3_resource_noaa_wcsd_zarr_pds.Object(bucket_name, key_name).load()
|
|
193
|
+
s3_client_noaa_wcsd_zarr_pds.head_object(Bucket=bucket_name, Key=key_name)
|
|
194
|
+
except botocore.exceptions.ClientError as e:
|
|
195
|
+
if e.response['Error']['Code'] == "404":
|
|
196
|
+
# The object does not exist.
|
|
197
|
+
return False
|
|
198
|
+
elif e.response['Error']['Code'] == 403:
|
|
199
|
+
# Unauthorized, including invalid bucket
|
|
200
|
+
return False
|
|
201
|
+
else:
|
|
202
|
+
# Something else has gone wrong.
|
|
203
|
+
raise
|
|
204
|
+
return True
|
|
205
|
+
|
|
198
206
|
#####################################################################
|
|
199
207
|
# used: raw-to-zarr
|
|
200
208
|
def list_objects( # noaa-wcsd-pds and noaa-wcsd-zarr-pds
|
|
@@ -202,6 +210,7 @@ class S3Manager:
|
|
|
202
210
|
bucket_name,
|
|
203
211
|
prefix
|
|
204
212
|
):
|
|
213
|
+
# TODO: this isn't working for geojson detecting objects!!!!!!!
|
|
205
214
|
# analog to "find_children_objects"
|
|
206
215
|
# Returns a list of key strings for each object in bucket defined by prefix
|
|
207
216
|
# s3_client = self.s3_client
|
|
@@ -227,7 +236,11 @@ class S3Manager:
|
|
|
227
236
|
|
|
228
237
|
#####################################################################
|
|
229
238
|
# TODO: change name to "directory"
|
|
230
|
-
def folder_exists_and_not_empty(
|
|
239
|
+
def folder_exists_and_not_empty(
|
|
240
|
+
self,
|
|
241
|
+
bucket_name: str,
|
|
242
|
+
path: str
|
|
243
|
+
) -> bool:
|
|
231
244
|
if not path.endswith("/"):
|
|
232
245
|
path = path + "/"
|
|
233
246
|
s3_client = self.s3_client
|
|
@@ -319,23 +332,15 @@ class S3Manager:
|
|
|
319
332
|
print("downloaded file")
|
|
320
333
|
|
|
321
334
|
#####################################################################
|
|
322
|
-
#
|
|
323
|
-
# def delete_nodd_object( # noaa-wcsd-model-pds
|
|
324
|
-
# self,
|
|
325
|
-
# bucket_name,
|
|
326
|
-
# key
|
|
327
|
-
# ): # -> dict:
|
|
328
|
-
# #return self.__s3_client.delete_object(Bucket=bucket_name, Key=key)
|
|
329
|
-
# self.s3_client.delete_object(Bucket=bucket_name, Key=key)
|
|
330
|
-
|
|
331
|
-
#####################################################################
|
|
335
|
+
# TODO: need to test this!!!
|
|
332
336
|
def delete_nodd_objects( # nodd-bucket
|
|
333
337
|
self,
|
|
338
|
+
bucket_name,
|
|
334
339
|
objects: list,
|
|
335
340
|
):
|
|
336
341
|
try:
|
|
337
342
|
print(
|
|
338
|
-
f"Deleting {len(objects)} objects in {
|
|
343
|
+
f"Deleting {len(objects)} objects in {bucket_name} in batches."
|
|
339
344
|
)
|
|
340
345
|
objects_to_delete = []
|
|
341
346
|
for obj in objects:
|
|
@@ -343,12 +348,28 @@ class S3Manager:
|
|
|
343
348
|
# Note: request can contain a list of up to 1000 keys
|
|
344
349
|
for batch in chunked(ll=objects_to_delete, n=1000):
|
|
345
350
|
self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
|
|
346
|
-
Bucket=
|
|
351
|
+
Bucket=bucket_name, Delete={"Objects": batch}
|
|
347
352
|
)
|
|
348
353
|
print(f"Deleted files.")
|
|
349
354
|
except Exception as err:
|
|
350
355
|
print(f"Problem was encountered while deleting objects: {err}")
|
|
351
356
|
|
|
357
|
+
#####################################################################
|
|
358
|
+
# TODO: need to test this!!!
|
|
359
|
+
def delete_nodd_object(
|
|
360
|
+
self,
|
|
361
|
+
bucket_name,
|
|
362
|
+
key_name,
|
|
363
|
+
):
|
|
364
|
+
try:
|
|
365
|
+
print(
|
|
366
|
+
f"Deleting {key_name} objects in {bucket_name}."
|
|
367
|
+
)
|
|
368
|
+
self.s3_client_noaa_wcsd_zarr_pds.delete_object(Bucket=bucket_name, Key=key_name)
|
|
369
|
+
print(f"Deleted file.")
|
|
370
|
+
except Exception as err:
|
|
371
|
+
print(f"Problem was encountered while deleting objects: {err}")
|
|
372
|
+
|
|
352
373
|
#####################################################################
|
|
353
374
|
# not used TODO: remove
|
|
354
375
|
def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
|
|
@@ -3,10 +3,10 @@ import os
|
|
|
3
3
|
import numcodecs
|
|
4
4
|
import numpy as np
|
|
5
5
|
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from
|
|
9
|
-
from
|
|
6
|
+
from water_column_sonar_processing.aws import DynamoDBManager
|
|
7
|
+
from water_column_sonar_processing.aws import S3Manager
|
|
8
|
+
from water_column_sonar_processing.model import ZarrManager
|
|
9
|
+
from water_column_sonar_processing.utility import Cleaner
|
|
10
10
|
|
|
11
11
|
numcodecs.blosc.use_threads = False
|
|
12
12
|
numcodecs.blosc.set_nthreads(1)
|
|
@@ -47,7 +47,7 @@ class CreateEmptyZarrStore:
|
|
|
47
47
|
):
|
|
48
48
|
for file in files:
|
|
49
49
|
local_path = os.path.join(subdir, file)
|
|
50
|
-
# 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.model
|
|
50
|
+
# 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.model/..zattrs'
|
|
51
51
|
s3_key = f'{object_prefix}/{cruise_name}.model{local_path.split(f"{cruise_name}.model")[-1]}'
|
|
52
52
|
all_files.append([local_path, s3_key])
|
|
53
53
|
#
|
|
@@ -7,9 +7,9 @@ import numpy as np
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
import xarray as xr
|
|
9
9
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
from
|
|
10
|
+
from water_column_sonar_processing.aws import DynamoDBManager
|
|
11
|
+
from water_column_sonar_processing.geometry import GeometryManager
|
|
12
|
+
from water_column_sonar_processing.model import ZarrManager
|
|
13
13
|
|
|
14
14
|
numcodecs.blosc.use_threads = False
|
|
15
15
|
numcodecs.blosc.set_nthreads(1)
|
|
@@ -5,8 +5,8 @@ import geopandas
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
7
7
|
|
|
8
|
-
from
|
|
9
|
-
from
|
|
8
|
+
from water_column_sonar_processing.aws import S3Manager
|
|
9
|
+
from water_column_sonar_processing.utility import Cleaner
|
|
10
10
|
|
|
11
11
|
"""
|
|
12
12
|
// [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
|
|
@@ -138,15 +138,13 @@ class GeometryManager:
|
|
|
138
138
|
|
|
139
139
|
print("Checking s3 and deleting any existing GeoJSON file.")
|
|
140
140
|
s3_manager = S3Manager()
|
|
141
|
-
|
|
141
|
+
geojson_object_exists = s3_manager.check_if_object_exists(
|
|
142
142
|
bucket_name=output_bucket_name,
|
|
143
|
-
|
|
143
|
+
key_name=f"{geo_json_prefix}/{geo_json_name}"
|
|
144
144
|
)
|
|
145
|
-
if
|
|
146
|
-
print(
|
|
147
|
-
|
|
148
|
-
)
|
|
149
|
-
s3_manager.delete_nodd_objects(objects=s3_objects)
|
|
145
|
+
if geojson_object_exists:
|
|
146
|
+
print("GeoJSON already exists in s3, deleting existing and continuing.")
|
|
147
|
+
s3_manager.delete_nodd_object(bucket_name=output_bucket_name, key_name=f"{geo_json_prefix}/{geo_json_name}")
|
|
150
148
|
|
|
151
149
|
print("Upload GeoJSON to s3.")
|
|
152
150
|
s3_manager.upload_nodd_file(
|
|
@@ -12,8 +12,6 @@ import pyogrio
|
|
|
12
12
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
13
13
|
from shapely.geometry import LineString
|
|
14
14
|
|
|
15
|
-
from src.water_column_sonar_processing.aws import S3Manager, S3FSManager
|
|
16
|
-
|
|
17
15
|
MAX_POOL_CONNECTIONS = 64
|
|
18
16
|
MAX_CONCURRENCY = 64
|
|
19
17
|
MAX_WORKERS = 64
|
|
@@ -4,7 +4,7 @@ import pandas as pd
|
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from concurrent.futures import ThreadPoolExecutor
|
|
6
6
|
from concurrent.futures import as_completed
|
|
7
|
-
from
|
|
7
|
+
from water_column_sonar_processing.aws import S3Manager
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class IndexManager:
|
|
@@ -5,9 +5,10 @@ import xarray as xr
|
|
|
5
5
|
import zarr
|
|
6
6
|
from numcodecs import Blosc
|
|
7
7
|
|
|
8
|
-
from
|
|
9
|
-
from
|
|
10
|
-
from
|
|
8
|
+
from water_column_sonar_processing.aws import S3FSManager
|
|
9
|
+
from water_column_sonar_processing.utility import Constants
|
|
10
|
+
from water_column_sonar_processing.utility import Timestamp
|
|
11
|
+
from water_column_sonar_processing.utility import Coordinates
|
|
11
12
|
|
|
12
13
|
numcodecs.blosc.use_threads = False
|
|
13
14
|
numcodecs.blosc.set_nthreads(1)
|
|
@@ -85,8 +86,6 @@ class ZarrManager:
|
|
|
85
86
|
data=np.repeat(0.0, width),
|
|
86
87
|
shape=width,
|
|
87
88
|
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
88
|
-
# Constants.TILE_SIZE.value,
|
|
89
|
-
#), # TODO: the chunking scheme doesn't seem to be working here
|
|
90
89
|
dtype=np.dtype(Coordinates.TIME_DTYPE.value),
|
|
91
90
|
compressor=self.__compressor,
|
|
92
91
|
fill_value=np.nan, # TODO: do i want nan's?
|
|
@@ -124,14 +123,16 @@ class ZarrManager:
|
|
|
124
123
|
|
|
125
124
|
root.depth.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.DEPTH.value]
|
|
126
125
|
|
|
127
|
-
root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
|
|
128
126
|
root.depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
|
|
127
|
+
root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
|
|
128
|
+
root.depth.attrs["standard_name"] = Coordinates.DEPTH_STANDARD_NAME.value
|
|
129
129
|
|
|
130
130
|
#####################################################################
|
|
131
131
|
# --- Coordinate: Latitude --- #
|
|
132
132
|
root.create_dataset(
|
|
133
133
|
name=Coordinates.LATITUDE.value,
|
|
134
|
-
# data=np.repeat(0.0, width),
|
|
134
|
+
# data=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
135
|
+
data=np.repeat(np.nan, width),
|
|
135
136
|
shape=width,
|
|
136
137
|
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
137
138
|
dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
|
|
@@ -143,14 +144,16 @@ class ZarrManager:
|
|
|
143
144
|
# Note: LATITUDE is indexed by TIME
|
|
144
145
|
root.latitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
145
146
|
|
|
146
|
-
root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
|
|
147
147
|
root.latitude.attrs["units"] = Coordinates.LATITUDE_UNITS.value
|
|
148
|
+
root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
|
|
149
|
+
root.latitude.attrs["standard_name"] = Coordinates.LATITUDE_STANDARD_NAME.value
|
|
148
150
|
|
|
149
151
|
#####################################################################
|
|
150
152
|
# --- Coordinate: Longitude --- #
|
|
151
153
|
root.create_dataset(
|
|
152
154
|
name=Coordinates.LONGITUDE.value,
|
|
153
155
|
# data=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
156
|
+
data=np.repeat(np.nan, width),
|
|
154
157
|
shape=width,
|
|
155
158
|
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
156
159
|
dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
|
|
@@ -162,8 +165,9 @@ class ZarrManager:
|
|
|
162
165
|
# Note: LONGITUDE is indexed by TIME
|
|
163
166
|
root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
164
167
|
|
|
165
|
-
root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
|
|
166
168
|
root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
|
|
169
|
+
root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
|
|
170
|
+
root.longitude.attrs["standard_name"] = Coordinates.LONGITUDE_STANDARD_NAME.value
|
|
167
171
|
|
|
168
172
|
#####################################################################
|
|
169
173
|
# TODO: verify adding this variable for where the bottom was detected
|
|
@@ -182,8 +186,9 @@ class ZarrManager:
|
|
|
182
186
|
# BOTTOM is indexed by TIME
|
|
183
187
|
root.bottom.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
184
188
|
|
|
185
|
-
root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
|
|
186
189
|
root.bottom.attrs["units"] = Coordinates.BOTTOM_UNITS.value
|
|
190
|
+
root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
|
|
191
|
+
root.bottom.attrs["standard_name"] = Coordinates.BOTTOM_STANDARD_NAME.value
|
|
187
192
|
|
|
188
193
|
#####################################################################
|
|
189
194
|
# --- Coordinate: Frequency --- #
|
|
@@ -203,11 +208,11 @@ class ZarrManager:
|
|
|
203
208
|
Coordinates.FREQUENCY.value
|
|
204
209
|
] # TODO: is this correct
|
|
205
210
|
|
|
211
|
+
root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
|
|
206
212
|
root.frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
|
|
207
213
|
root.frequency.attrs["standard_name"] = (
|
|
208
214
|
Coordinates.FREQUENCY_STANDARD_NAME.value
|
|
209
215
|
)
|
|
210
|
-
root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
|
|
211
216
|
|
|
212
217
|
#####################################################################
|
|
213
218
|
# --- Sv Data --- #
|
|
@@ -229,8 +234,8 @@ class ZarrManager:
|
|
|
229
234
|
Coordinates.FREQUENCY.value,
|
|
230
235
|
]
|
|
231
236
|
|
|
232
|
-
root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
|
|
233
237
|
root.Sv.attrs["units"] = Coordinates.SV_UNITS.value
|
|
238
|
+
root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
|
|
234
239
|
root.Sv.attrs["tile_size"] = Constants.TILE_SIZE.value
|
|
235
240
|
|
|
236
241
|
#####################################################################
|
|
@@ -241,7 +246,7 @@ class ZarrManager:
|
|
|
241
246
|
#
|
|
242
247
|
root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
|
|
243
248
|
root.attrs["processing_software_version"] = (
|
|
244
|
-
"0.0.
|
|
249
|
+
"0.0.9" # TODO: get programmatically, echopype>utils>prov.py
|
|
245
250
|
)
|
|
246
251
|
root.attrs["processing_software_time"] = Timestamp.get_timestamp()
|
|
247
252
|
#
|
|
@@ -3,10 +3,10 @@ import os
|
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from
|
|
9
|
-
from
|
|
6
|
+
from water_column_sonar_processing.aws import DynamoDBManager
|
|
7
|
+
from water_column_sonar_processing.aws import S3Manager
|
|
8
|
+
from water_column_sonar_processing.aws import S3FSManager
|
|
9
|
+
from water_column_sonar_processing.aws import SNSManager
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
###########################################################
|
|
@@ -7,9 +7,9 @@ from numcodecs import Blosc
|
|
|
7
7
|
from datetime import datetime
|
|
8
8
|
from pathlib import Path # , PurePath
|
|
9
9
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
from
|
|
10
|
+
from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
|
|
11
|
+
from water_column_sonar_processing.geometry import GeometryManager
|
|
12
|
+
from water_column_sonar_processing.utility import Cleaner
|
|
13
13
|
|
|
14
14
|
TEMPDIR = "/tmp"
|
|
15
15
|
|
|
@@ -149,10 +149,14 @@ class RawToZarr:
|
|
|
149
149
|
sensor_name,
|
|
150
150
|
raw_file_name,
|
|
151
151
|
):
|
|
152
|
+
"""
|
|
153
|
+
Downloads the raw files, processes them with echopype, writes geojson, and uploads files
|
|
154
|
+
to the nodd bucket.
|
|
155
|
+
"""
|
|
152
156
|
print(f'Opening raw: {raw_file_name} and creating zarr store.')
|
|
153
157
|
geometry_manager = GeometryManager()
|
|
154
158
|
cleaner = Cleaner()
|
|
155
|
-
cleaner.delete_local_files(file_types=["*.zarr", "*.json"]) # TODO: include bot and raw
|
|
159
|
+
cleaner.delete_local_files(file_types=["*.zarr", "*.json"]) # TODO: include bot and raw?
|
|
156
160
|
try:
|
|
157
161
|
gc.collect()
|
|
158
162
|
print('Opening raw file with echopype.')
|
|
@@ -204,14 +208,27 @@ class RawToZarr:
|
|
|
204
208
|
store_name = f"{Path(raw_file_name).stem}.zarr"
|
|
205
209
|
ds_sv.to_zarr(store=store_name)
|
|
206
210
|
#################################################################
|
|
207
|
-
# TODO: do i still need this?
|
|
208
|
-
# print('Note: Adding GeoJSON inside Zarr store')
|
|
209
|
-
# self.__write_geojson_to_file( # Was trying to write geojson to the L1 zarr store
|
|
210
|
-
# store_name=store_name,
|
|
211
|
-
# data=gps_data
|
|
212
|
-
# )
|
|
213
|
-
#################################################################
|
|
214
211
|
output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
|
|
212
|
+
#################################################################
|
|
213
|
+
# If zarr store already exists then delete
|
|
214
|
+
s3_manager = S3Manager()
|
|
215
|
+
child_objects = s3_manager.get_child_objects(
|
|
216
|
+
bucket_name=output_bucket_name,
|
|
217
|
+
sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.zarr",
|
|
218
|
+
)
|
|
219
|
+
if len(child_objects) > 0:
|
|
220
|
+
print('Zarr store data already exists in s3, deleting existing and continuing.')
|
|
221
|
+
s3_manager.delete_nodd_objects(
|
|
222
|
+
bucket_name=output_bucket_name,
|
|
223
|
+
objects=child_objects,
|
|
224
|
+
)
|
|
225
|
+
#################################################################
|
|
226
|
+
self.__upload_files_to_output_bucket(
|
|
227
|
+
output_bucket_name=output_bucket_name,
|
|
228
|
+
local_directory=store_name,
|
|
229
|
+
object_prefix=output_zarr_prefix
|
|
230
|
+
)
|
|
231
|
+
#################################################################
|
|
215
232
|
self.__zarr_info_to_table(
|
|
216
233
|
output_bucket_name=output_bucket_name,
|
|
217
234
|
table_name=table_name,
|
|
@@ -228,29 +245,9 @@ class RawToZarr:
|
|
|
228
245
|
frequencies=frequencies,
|
|
229
246
|
channels=channels
|
|
230
247
|
)
|
|
231
|
-
###################################################################
|
|
232
|
-
#######################################################################
|
|
233
|
-
self.__upload_files_to_output_bucket(
|
|
234
|
-
output_bucket_name=output_bucket_name,
|
|
235
|
-
local_directory=store_name,
|
|
236
|
-
object_prefix=output_zarr_prefix
|
|
237
|
-
)
|
|
238
|
-
#######################################################################
|
|
239
|
-
# # TODO: verify count of objects matches
|
|
240
|
-
# s3_objects = self.__s3.list_objects(
|
|
241
|
-
# bucket_name=self.__output_bucket,
|
|
242
|
-
# prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
|
|
243
|
-
# access_key_id=self.__output_bucket_access_key,
|
|
244
|
-
# secret_access_key=self.__output_bucket_secret_access_key
|
|
245
|
-
# )
|
|
246
248
|
#######################################################################
|
|
247
|
-
#
|
|
248
|
-
# file_name=input_file_name,
|
|
249
|
-
# cruise_name=cruise_name,
|
|
250
|
-
# pipeline_status='SUCCESS_RAW_TO_ZARR'
|
|
251
|
-
# )
|
|
249
|
+
# TODO: verify count of objects matches, publish message, update status
|
|
252
250
|
#######################################################################
|
|
253
|
-
# self.__publish_done_message(input_message)
|
|
254
251
|
print('here')
|
|
255
252
|
except Exception as err:
|
|
256
253
|
print(f'Exception encountered creating local Zarr store with echopype: {err}')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from .cleaner import Cleaner
|
|
2
|
-
from .constants import Constants
|
|
2
|
+
from .constants import Constants, Coordinates
|
|
3
3
|
from .pipeline_status import PipelineStatus
|
|
4
4
|
from .timestamp import Timestamp
|
|
5
5
|
|
|
6
|
-
__all__ = ["Cleaner", "Constants", "PipelineStatus", "Timestamp"]
|
|
6
|
+
__all__ = ["Cleaner", "Constants", "Coordinates", "PipelineStatus", "Timestamp"]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: water_column_sonar_processing
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.10
|
|
4
4
|
Summary: A processing tool for water column sonar data.
|
|
5
5
|
Author-email: Rudy Klucik <rudy.klucik@noaa.gov>
|
|
6
6
|
Project-URL: Homepage, https://github.com/CI-CMG/water-column-sonar-processing
|
|
@@ -24,7 +24,7 @@ Requires-Dist: numcodecs==0.13.1
|
|
|
24
24
|
Requires-Dist: numpy==1.26.4
|
|
25
25
|
Requires-Dist: pandas==2.2.3
|
|
26
26
|
Requires-Dist: pyarrow==18.1.0
|
|
27
|
-
Requires-Dist: python-dotenv==1.0.
|
|
27
|
+
Requires-Dist: python-dotenv==1.0.1
|
|
28
28
|
Requires-Dist: requests==2.32.3
|
|
29
29
|
Requires-Dist: s3fs==2023.12.1
|
|
30
30
|
Requires-Dist: scipy==1.14.1
|
|
@@ -114,6 +114,7 @@ python -m twine upload --repository pypi dist/*
|
|
|
114
114
|
```
|
|
115
115
|
|
|
116
116
|
# Pre Commit Hook
|
|
117
|
+
see here for installation: https://pre-commit.com/
|
|
117
118
|
https://dev.to/rafaelherik/using-trufflehog-and-pre-commit-hook-to-prevent-secret-exposure-edo
|
|
118
119
|
```
|
|
119
120
|
pre-commit install --allow-missing-config
|
|
@@ -132,3 +133,8 @@ https://colab.research.google.com/drive/1KiLMueXiz9WVB9o4RuzYeGjNZ6PsZU7a#scroll
|
|
|
132
133
|
20241125
|
|
133
134
|
5 failed, 35 passed, 3 skipped, 1 warning in 9.71s
|
|
134
135
|
3 failed, 38 passed, 3 skipped, 1 warning in 7.24s
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# TODO:
|
|
139
|
+
add https://pypi.org/project/setuptools-scm/
|
|
140
|
+
for extracting the version
|
|
@@ -1,32 +1,32 @@
|
|
|
1
|
-
water_column_sonar_processing/__init__.py,sha256=
|
|
2
|
-
water_column_sonar_processing/process.py,sha256
|
|
3
|
-
water_column_sonar_processing/aws/__init__.py,sha256=
|
|
1
|
+
water_column_sonar_processing/__init__.py,sha256=fvRK4uFo_A0l7w_T4yckvDqJ3wMUq4JB3VVPXqWfewE,226
|
|
2
|
+
water_column_sonar_processing/process.py,sha256=-yQtK3rnZq6lGAr3q02zLDe1NuMH9c0PiUOxKzG_r18,5386
|
|
3
|
+
water_column_sonar_processing/aws/__init__.py,sha256=KJqK8oYMn-u8n8i-Jp_lG5BvCOTjwWSjWP8yAyDlWVo,297
|
|
4
4
|
water_column_sonar_processing/aws/dynamodb_manager.py,sha256=sZHn-hgCt3K3w0x5BcXfF5jLMt_F11dAtQHJToij9nU,10008
|
|
5
|
-
water_column_sonar_processing/aws/s3_manager.py,sha256=
|
|
5
|
+
water_column_sonar_processing/aws/s3_manager.py,sha256=kS48Vu_jE_fOKbwKOhCLWKDSqHzOGVEdZ_Lc4MaMCfA,15291
|
|
6
6
|
water_column_sonar_processing/aws/s3fs_manager.py,sha256=thVJPQKhbvF1g-Ue3BYgwazFOFDYOICIEJx4zkXBQ1E,2381
|
|
7
7
|
water_column_sonar_processing/aws/sns_manager.py,sha256=Dp9avG5VSugSWPR1dZ-askuAw1fCZkNUHbOUP65iR-k,1867
|
|
8
8
|
water_column_sonar_processing/aws/sqs_manager.py,sha256=NSUrWmnSC8h8Gf7gT0U8zFaQQ-yX89h0Q0mDLKGqp2Y,1597
|
|
9
9
|
water_column_sonar_processing/cruise/__init__.py,sha256=H5hW0JMORuaFvQk_R31B4VL8RnRyKeanOOiWmqEMZJk,156
|
|
10
|
-
water_column_sonar_processing/cruise/create_empty_zarr_store.py,sha256=
|
|
11
|
-
water_column_sonar_processing/cruise/resample_regrid.py,sha256=
|
|
10
|
+
water_column_sonar_processing/cruise/create_empty_zarr_store.py,sha256=uQiZoKm16jD0SUuXmhuPryxdE-6bUc6BlCi2UtmzUpw,7318
|
|
11
|
+
water_column_sonar_processing/cruise/resample_regrid.py,sha256=4Tw6Ro9mQZOr0uIph6foz6a1OeFAZW0SMUT_asIwvKw,12309
|
|
12
12
|
water_column_sonar_processing/geometry/__init__.py,sha256=_ol5nI8AL30pYXeAh5rtP7YmQggitPC6LA_kuTfPJ0Q,231
|
|
13
|
-
water_column_sonar_processing/geometry/geometry_manager.py,sha256=
|
|
13
|
+
water_column_sonar_processing/geometry/geometry_manager.py,sha256=0Q9IRiBr6XvxUg5M2vCPtUhbnYnwa5pJI1ayfWXMgMs,10587
|
|
14
14
|
water_column_sonar_processing/geometry/geometry_simplification.py,sha256=im1HG9nfYIerQv3w-PUHzphw2B7aGgnsA3Zcdy2oTmA,3016
|
|
15
|
-
water_column_sonar_processing/geometry/pmtile_generation.py,sha256=
|
|
15
|
+
water_column_sonar_processing/geometry/pmtile_generation.py,sha256=7Lm08Jr6YaM4nYmexClxbIMOqSV1teo9wMm6dfjFuNA,12384
|
|
16
16
|
water_column_sonar_processing/index/__init__.py,sha256=izEObsKiOoIJ0kZCFhvaYsBd6Ga71XJxnogjrNInw68,68
|
|
17
|
-
water_column_sonar_processing/index/index_manager.py,sha256=
|
|
17
|
+
water_column_sonar_processing/index/index_manager.py,sha256=YS6y_THfGAZpjfBZOj5n8O1aY_BnBYS781eNHfhpip0,11239
|
|
18
18
|
water_column_sonar_processing/model/__init__.py,sha256=FXaCdbPqxp0ogmZm9NplRirqpgMiYs1iRYgJbFbbX2Y,65
|
|
19
|
-
water_column_sonar_processing/model/zarr_manager.py,sha256=
|
|
19
|
+
water_column_sonar_processing/model/zarr_manager.py,sha256=TbcVux-GWfX4XJ7UT20E7dI_h_islrKsGtjx_VwSsLg,14003
|
|
20
20
|
water_column_sonar_processing/processing/__init__.py,sha256=UwdB3BnoUxy4q3k9-ZjBF6KzmCWVDcqbcArTeHgmvGA,118
|
|
21
|
-
water_column_sonar_processing/processing/cruise_sampler.py,sha256=
|
|
22
|
-
water_column_sonar_processing/processing/raw_to_zarr.py,sha256=
|
|
23
|
-
water_column_sonar_processing/utility/__init__.py,sha256=
|
|
21
|
+
water_column_sonar_processing/processing/cruise_sampler.py,sha256=hadPrnH5nz7_oG_4pND7YbMFH6NMR9d6p3xAXedtKU8,15927
|
|
22
|
+
water_column_sonar_processing/processing/raw_to_zarr.py,sha256=7vvoNe0jlB34R5mBPceQjL9N_5X0GTWs9xpCqvRK1nQ,15931
|
|
23
|
+
water_column_sonar_processing/utility/__init__.py,sha256=yDObMOL0_OxKWet5wffK2-XVJgoE9iwiY2q04GZrtBQ,234
|
|
24
24
|
water_column_sonar_processing/utility/cleaner.py,sha256=bNbs-hopWxtKAFBK0Eu18xdRErZCGZvtla3j-1bTwQw,619
|
|
25
25
|
water_column_sonar_processing/utility/constants.py,sha256=EbzsorvYKadsPjuutRjQKKByGibhFm0Gw6D-Sp2ZD3I,2143
|
|
26
26
|
water_column_sonar_processing/utility/pipeline_status.py,sha256=O-0SySqdRGJ6bs3zQe1NV9vkOpmsRM7zj5QoHgzYioY,4395
|
|
27
27
|
water_column_sonar_processing/utility/timestamp.py,sha256=bO0oir7KxxoEHPGRkz9FCBfOligkocUyRiWRzAq8fnU,361
|
|
28
|
-
water_column_sonar_processing-0.0.
|
|
29
|
-
water_column_sonar_processing-0.0.
|
|
30
|
-
water_column_sonar_processing-0.0.
|
|
31
|
-
water_column_sonar_processing-0.0.
|
|
32
|
-
water_column_sonar_processing-0.0.
|
|
28
|
+
water_column_sonar_processing-0.0.10.dist-info/LICENSE,sha256=lz4IpJ5_adG3S0ali-WaIpQFVTnEAOucMDQPECUVEYw,1110
|
|
29
|
+
water_column_sonar_processing-0.0.10.dist-info/METADATA,sha256=qFNeJ3GduRHKfcJRYShO9LamuMREk66qm18IUUXsMg8,4566
|
|
30
|
+
water_column_sonar_processing-0.0.10.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
31
|
+
water_column_sonar_processing-0.0.10.dist-info/top_level.txt,sha256=aRYU4A7RNBlNrL4vzjytFAir3BNnmOgsvIGKKA36tg4,30
|
|
32
|
+
water_column_sonar_processing-0.0.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|