water-column-sonar-processing 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/aws/dynamodb_manager.py +15 -11
- water_column_sonar_processing/aws/s3_manager.py +63 -42
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +1 -1
- water_column_sonar_processing/geometry/geometry_manager.py +5 -7
- water_column_sonar_processing/model/zarr_manager.py +14 -10
- water_column_sonar_processing/processing/raw_to_zarr.py +49 -42
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-0.0.11.dist-info}/METADATA +8 -2
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-0.0.11.dist-info}/RECORD +11 -11
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-0.0.11.dist-info}/LICENSE +0 -0
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-0.0.11.dist-info}/WHEEL +0 -0
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-0.0.11.dist-info}/top_level.txt +0 -0
|
@@ -111,17 +111,21 @@ class DynamoDBManager:
|
|
|
111
111
|
expression_attribute_names,
|
|
112
112
|
expression_attribute_values,
|
|
113
113
|
update_expression,
|
|
114
|
-
):
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
114
|
+
): # TODO: convert to boolean
|
|
115
|
+
try:
|
|
116
|
+
response = self.__dynamodb_client.update_item(
|
|
117
|
+
TableName=table_name,
|
|
118
|
+
Key=key,
|
|
119
|
+
ExpressionAttributeNames=expression_attribute_names,
|
|
120
|
+
ExpressionAttributeValues=expression_attribute_values,
|
|
121
|
+
UpdateExpression=update_expression,
|
|
122
|
+
)
|
|
123
|
+
status_code = response["ResponseMetadata"]["HTTPStatusCode"]
|
|
124
|
+
print(f"HTTPStatusCode: {status_code}")
|
|
125
|
+
# assert status_code == 200, "Problem, unable to update dynamodb table."
|
|
126
|
+
# assert response['ConsumedCapacity']['TableName'] == table_name
|
|
127
|
+
except Exception as err:
|
|
128
|
+
print(f"Problem was encountered while updating item: {err}")
|
|
125
129
|
|
|
126
130
|
#####################################################################
|
|
127
131
|
# TODO: change to "get_cruise_as_df"
|
|
@@ -3,6 +3,8 @@ import os
|
|
|
3
3
|
import boto3
|
|
4
4
|
from collections.abc import Generator
|
|
5
5
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
6
|
+
|
|
7
|
+
import botocore
|
|
6
8
|
from boto3.s3.transfer import TransferConfig
|
|
7
9
|
from botocore.config import Config
|
|
8
10
|
from botocore.exceptions import ClientError
|
|
@@ -14,7 +16,10 @@ GB = 1024**3
|
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
#########################################################################
|
|
17
|
-
def chunked(
|
|
19
|
+
def chunked(
|
|
20
|
+
ll: list,
|
|
21
|
+
n: int
|
|
22
|
+
) -> Generator:
|
|
18
23
|
# Yields successively n-sized chunks from ll.
|
|
19
24
|
for i in range(0, len(ll), n):
|
|
20
25
|
yield ll[i : i + n]
|
|
@@ -24,16 +29,9 @@ class S3Manager:
|
|
|
24
29
|
#####################################################################
|
|
25
30
|
def __init__(
|
|
26
31
|
self,
|
|
27
|
-
# input_endpoint_url: str,
|
|
28
|
-
# output_endpoint_url: str,
|
|
29
|
-
# endpoint_url
|
|
30
|
-
# TODO: Need to allow passing in of credentials when writing to protected bucket
|
|
31
32
|
):
|
|
32
33
|
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
33
34
|
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
34
|
-
# self.endpoint_url = endpoint_url
|
|
35
|
-
# self.input_endpoint_url = input_endpoint_url
|
|
36
|
-
# self.output_endpoint_url = output_endpoint_url
|
|
37
35
|
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
38
36
|
self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
|
|
39
37
|
self.s3_transfer_config = TransferConfig(
|
|
@@ -51,14 +49,12 @@ class S3Manager:
|
|
|
51
49
|
service_name="s3",
|
|
52
50
|
config=self.s3_client_config,
|
|
53
51
|
region_name=self.s3_region,
|
|
54
|
-
# endpoint_url=endpoint_url, # TODO: temporary
|
|
55
52
|
)
|
|
56
53
|
self.s3_resource = boto3.resource(
|
|
57
54
|
service_name="s3",
|
|
58
55
|
config=self.s3_client_config,
|
|
59
56
|
region_name=self.s3_region,
|
|
60
57
|
)
|
|
61
|
-
# self.paginator = self.s3_client.get_paginator(operation_name='list_objects_v2')
|
|
62
58
|
self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
|
|
63
59
|
aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
64
60
|
aws_secret_access_key=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
@@ -68,7 +64,6 @@ class S3Manager:
|
|
|
68
64
|
service_name="s3",
|
|
69
65
|
config=self.s3_client_config,
|
|
70
66
|
region_name=self.s3_region,
|
|
71
|
-
# endpoint_url=endpoint_url, # TODO: temporary
|
|
72
67
|
)
|
|
73
68
|
self.s3_resource_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.resource(
|
|
74
69
|
service_name="s3",
|
|
@@ -78,12 +73,12 @@ class S3Manager:
|
|
|
78
73
|
self.paginator = self.s3_client.get_paginator('list_objects_v2')
|
|
79
74
|
self.paginator_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds.get_paginator('list_objects_v2')
|
|
80
75
|
|
|
81
|
-
def get_client(self): # TODO: do i need this?
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
76
|
+
# def get_client(self): # TODO: do i need this?
|
|
77
|
+
# return self.s3_session.client(
|
|
78
|
+
# service_name="s3",
|
|
79
|
+
# config=self.s3_client_config,
|
|
80
|
+
# region_name=self.s3_region,
|
|
81
|
+
# )
|
|
87
82
|
|
|
88
83
|
#####################################################################
|
|
89
84
|
def create_bucket(
|
|
@@ -146,18 +141,6 @@ class S3Manager:
|
|
|
146
141
|
return all_uploads
|
|
147
142
|
|
|
148
143
|
#####################################################################
|
|
149
|
-
# def upload_nodd_file2(
|
|
150
|
-
# self,
|
|
151
|
-
# body: str,
|
|
152
|
-
# bucket: str,
|
|
153
|
-
# key: str,
|
|
154
|
-
# ):
|
|
155
|
-
# self.s3_client_noaa_wcsd_zarr_pds.put_object(
|
|
156
|
-
# Body=body,
|
|
157
|
-
# Bucket=bucket,
|
|
158
|
-
# Key=key,
|
|
159
|
-
# )
|
|
160
|
-
|
|
161
144
|
# TODO: this uses resource, try to use client
|
|
162
145
|
def upload_file(
|
|
163
146
|
self,
|
|
@@ -190,11 +173,36 @@ class S3Manager:
|
|
|
190
173
|
all_files.append([local_path, s3_key])
|
|
191
174
|
|
|
192
175
|
all_uploads = self.upload_files_with_thread_pool_executor(
|
|
176
|
+
output_bucket_name=self.output_bucket_name,
|
|
193
177
|
all_files=all_files,
|
|
194
178
|
)
|
|
195
179
|
print("Done uploading files to output bucket.")
|
|
196
180
|
return all_uploads
|
|
197
181
|
|
|
182
|
+
#####################################################################
|
|
183
|
+
def check_if_object_exists(
|
|
184
|
+
self,
|
|
185
|
+
bucket_name,
|
|
186
|
+
key_name
|
|
187
|
+
) -> bool:
|
|
188
|
+
s3_manager2 = S3Manager()
|
|
189
|
+
s3_manager2.list_objects(bucket_name=bucket_name, prefix=key_name)
|
|
190
|
+
s3_client_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds
|
|
191
|
+
try:
|
|
192
|
+
# response = s3_resource_noaa_wcsd_zarr_pds.Object(bucket_name, key_name).load()
|
|
193
|
+
s3_client_noaa_wcsd_zarr_pds.head_object(Bucket=bucket_name, Key=key_name)
|
|
194
|
+
except botocore.exceptions.ClientError as e:
|
|
195
|
+
if e.response['Error']['Code'] == "404":
|
|
196
|
+
# The object does not exist.
|
|
197
|
+
return False
|
|
198
|
+
elif e.response['Error']['Code'] == 403:
|
|
199
|
+
# Unauthorized, including invalid bucket
|
|
200
|
+
return False
|
|
201
|
+
else:
|
|
202
|
+
# Something else has gone wrong.
|
|
203
|
+
raise
|
|
204
|
+
return True
|
|
205
|
+
|
|
198
206
|
#####################################################################
|
|
199
207
|
# used: raw-to-zarr
|
|
200
208
|
def list_objects( # noaa-wcsd-pds and noaa-wcsd-zarr-pds
|
|
@@ -202,6 +210,7 @@ class S3Manager:
|
|
|
202
210
|
bucket_name,
|
|
203
211
|
prefix
|
|
204
212
|
):
|
|
213
|
+
# TODO: this isn't working for geojson detecting objects!!!!!!!
|
|
205
214
|
# analog to "find_children_objects"
|
|
206
215
|
# Returns a list of key strings for each object in bucket defined by prefix
|
|
207
216
|
# s3_client = self.s3_client
|
|
@@ -227,7 +236,11 @@ class S3Manager:
|
|
|
227
236
|
|
|
228
237
|
#####################################################################
|
|
229
238
|
# TODO: change name to "directory"
|
|
230
|
-
def folder_exists_and_not_empty(
|
|
239
|
+
def folder_exists_and_not_empty(
|
|
240
|
+
self,
|
|
241
|
+
bucket_name: str,
|
|
242
|
+
path: str
|
|
243
|
+
) -> bool:
|
|
231
244
|
if not path.endswith("/"):
|
|
232
245
|
path = path + "/"
|
|
233
246
|
s3_client = self.s3_client
|
|
@@ -319,23 +332,15 @@ class S3Manager:
|
|
|
319
332
|
print("downloaded file")
|
|
320
333
|
|
|
321
334
|
#####################################################################
|
|
322
|
-
#
|
|
323
|
-
# def delete_nodd_object( # noaa-wcsd-model-pds
|
|
324
|
-
# self,
|
|
325
|
-
# bucket_name,
|
|
326
|
-
# key
|
|
327
|
-
# ): # -> dict:
|
|
328
|
-
# #return self.__s3_client.delete_object(Bucket=bucket_name, Key=key)
|
|
329
|
-
# self.s3_client.delete_object(Bucket=bucket_name, Key=key)
|
|
330
|
-
|
|
331
|
-
#####################################################################
|
|
335
|
+
# TODO: need to test this!!!
|
|
332
336
|
def delete_nodd_objects( # nodd-bucket
|
|
333
337
|
self,
|
|
338
|
+
bucket_name,
|
|
334
339
|
objects: list,
|
|
335
340
|
):
|
|
336
341
|
try:
|
|
337
342
|
print(
|
|
338
|
-
f"Deleting {len(objects)} objects in {
|
|
343
|
+
f"Deleting {len(objects)} objects in {bucket_name} in batches."
|
|
339
344
|
)
|
|
340
345
|
objects_to_delete = []
|
|
341
346
|
for obj in objects:
|
|
@@ -343,12 +348,28 @@ class S3Manager:
|
|
|
343
348
|
# Note: request can contain a list of up to 1000 keys
|
|
344
349
|
for batch in chunked(ll=objects_to_delete, n=1000):
|
|
345
350
|
self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
|
|
346
|
-
Bucket=
|
|
351
|
+
Bucket=bucket_name, Delete={"Objects": batch}
|
|
347
352
|
)
|
|
348
353
|
print(f"Deleted files.")
|
|
349
354
|
except Exception as err:
|
|
350
355
|
print(f"Problem was encountered while deleting objects: {err}")
|
|
351
356
|
|
|
357
|
+
#####################################################################
|
|
358
|
+
# TODO: need to test this!!!
|
|
359
|
+
def delete_nodd_object(
|
|
360
|
+
self,
|
|
361
|
+
bucket_name,
|
|
362
|
+
key_name,
|
|
363
|
+
):
|
|
364
|
+
try:
|
|
365
|
+
print(
|
|
366
|
+
f"Deleting {key_name} objects in {bucket_name}."
|
|
367
|
+
)
|
|
368
|
+
self.s3_client_noaa_wcsd_zarr_pds.delete_object(Bucket=bucket_name, Key=key_name)
|
|
369
|
+
print(f"Deleted file.")
|
|
370
|
+
except Exception as err:
|
|
371
|
+
print(f"Problem was encountered while deleting objects: {err}")
|
|
372
|
+
|
|
352
373
|
#####################################################################
|
|
353
374
|
# not used TODO: remove
|
|
354
375
|
def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
|
|
@@ -47,7 +47,7 @@ class CreateEmptyZarrStore:
|
|
|
47
47
|
):
|
|
48
48
|
for file in files:
|
|
49
49
|
local_path = os.path.join(subdir, file)
|
|
50
|
-
# 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.model
|
|
50
|
+
# 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.model/..zattrs'
|
|
51
51
|
s3_key = f'{object_prefix}/{cruise_name}.model{local_path.split(f"{cruise_name}.model")[-1]}'
|
|
52
52
|
all_files.append([local_path, s3_key])
|
|
53
53
|
#
|
|
@@ -138,15 +138,13 @@ class GeometryManager:
|
|
|
138
138
|
|
|
139
139
|
print("Checking s3 and deleting any existing GeoJSON file.")
|
|
140
140
|
s3_manager = S3Manager()
|
|
141
|
-
|
|
141
|
+
geojson_object_exists = s3_manager.check_if_object_exists(
|
|
142
142
|
bucket_name=output_bucket_name,
|
|
143
|
-
|
|
143
|
+
key_name=f"{geo_json_prefix}/{geo_json_name}"
|
|
144
144
|
)
|
|
145
|
-
if
|
|
146
|
-
print(
|
|
147
|
-
|
|
148
|
-
)
|
|
149
|
-
s3_manager.delete_nodd_objects(objects=s3_objects)
|
|
145
|
+
if geojson_object_exists:
|
|
146
|
+
print("GeoJSON already exists in s3, deleting existing and continuing.")
|
|
147
|
+
s3_manager.delete_nodd_object(bucket_name=output_bucket_name, key_name=f"{geo_json_prefix}/{geo_json_name}")
|
|
150
148
|
|
|
151
149
|
print("Upload GeoJSON to s3.")
|
|
152
150
|
s3_manager.upload_nodd_file(
|
|
@@ -86,8 +86,6 @@ class ZarrManager:
|
|
|
86
86
|
data=np.repeat(0.0, width),
|
|
87
87
|
shape=width,
|
|
88
88
|
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
89
|
-
# Constants.TILE_SIZE.value,
|
|
90
|
-
#), # TODO: the chunking scheme doesn't seem to be working here
|
|
91
89
|
dtype=np.dtype(Coordinates.TIME_DTYPE.value),
|
|
92
90
|
compressor=self.__compressor,
|
|
93
91
|
fill_value=np.nan, # TODO: do i want nan's?
|
|
@@ -125,14 +123,16 @@ class ZarrManager:
|
|
|
125
123
|
|
|
126
124
|
root.depth.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.DEPTH.value]
|
|
127
125
|
|
|
128
|
-
root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
|
|
129
126
|
root.depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
|
|
127
|
+
root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
|
|
128
|
+
root.depth.attrs["standard_name"] = Coordinates.DEPTH_STANDARD_NAME.value
|
|
130
129
|
|
|
131
130
|
#####################################################################
|
|
132
131
|
# --- Coordinate: Latitude --- #
|
|
133
132
|
root.create_dataset(
|
|
134
133
|
name=Coordinates.LATITUDE.value,
|
|
135
|
-
# data=np.repeat(0.0, width),
|
|
134
|
+
# data=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
135
|
+
data=np.repeat(np.nan, width),
|
|
136
136
|
shape=width,
|
|
137
137
|
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
138
138
|
dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
|
|
@@ -144,14 +144,16 @@ class ZarrManager:
|
|
|
144
144
|
# Note: LATITUDE is indexed by TIME
|
|
145
145
|
root.latitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
146
146
|
|
|
147
|
-
root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
|
|
148
147
|
root.latitude.attrs["units"] = Coordinates.LATITUDE_UNITS.value
|
|
148
|
+
root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
|
|
149
|
+
root.latitude.attrs["standard_name"] = Coordinates.LATITUDE_STANDARD_NAME.value
|
|
149
150
|
|
|
150
151
|
#####################################################################
|
|
151
152
|
# --- Coordinate: Longitude --- #
|
|
152
153
|
root.create_dataset(
|
|
153
154
|
name=Coordinates.LONGITUDE.value,
|
|
154
155
|
# data=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
156
|
+
data=np.repeat(np.nan, width),
|
|
155
157
|
shape=width,
|
|
156
158
|
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
157
159
|
dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
|
|
@@ -163,8 +165,9 @@ class ZarrManager:
|
|
|
163
165
|
# Note: LONGITUDE is indexed by TIME
|
|
164
166
|
root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
165
167
|
|
|
166
|
-
root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
|
|
167
168
|
root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
|
|
169
|
+
root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
|
|
170
|
+
root.longitude.attrs["standard_name"] = Coordinates.LONGITUDE_STANDARD_NAME.value
|
|
168
171
|
|
|
169
172
|
#####################################################################
|
|
170
173
|
# TODO: verify adding this variable for where the bottom was detected
|
|
@@ -183,8 +186,9 @@ class ZarrManager:
|
|
|
183
186
|
# BOTTOM is indexed by TIME
|
|
184
187
|
root.bottom.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
185
188
|
|
|
186
|
-
root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
|
|
187
189
|
root.bottom.attrs["units"] = Coordinates.BOTTOM_UNITS.value
|
|
190
|
+
root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
|
|
191
|
+
root.bottom.attrs["standard_name"] = Coordinates.BOTTOM_STANDARD_NAME.value
|
|
188
192
|
|
|
189
193
|
#####################################################################
|
|
190
194
|
# --- Coordinate: Frequency --- #
|
|
@@ -204,11 +208,11 @@ class ZarrManager:
|
|
|
204
208
|
Coordinates.FREQUENCY.value
|
|
205
209
|
] # TODO: is this correct
|
|
206
210
|
|
|
211
|
+
root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
|
|
207
212
|
root.frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
|
|
208
213
|
root.frequency.attrs["standard_name"] = (
|
|
209
214
|
Coordinates.FREQUENCY_STANDARD_NAME.value
|
|
210
215
|
)
|
|
211
|
-
root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
|
|
212
216
|
|
|
213
217
|
#####################################################################
|
|
214
218
|
# --- Sv Data --- #
|
|
@@ -230,8 +234,8 @@ class ZarrManager:
|
|
|
230
234
|
Coordinates.FREQUENCY.value,
|
|
231
235
|
]
|
|
232
236
|
|
|
233
|
-
root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
|
|
234
237
|
root.Sv.attrs["units"] = Coordinates.SV_UNITS.value
|
|
238
|
+
root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
|
|
235
239
|
root.Sv.attrs["tile_size"] = Constants.TILE_SIZE.value
|
|
236
240
|
|
|
237
241
|
#####################################################################
|
|
@@ -242,7 +246,7 @@ class ZarrManager:
|
|
|
242
246
|
#
|
|
243
247
|
root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
|
|
244
248
|
root.attrs["processing_software_version"] = (
|
|
245
|
-
"0.0.
|
|
249
|
+
"0.0.9" # TODO: get programmatically, echopype>utils>prov.py
|
|
246
250
|
)
|
|
247
251
|
root.attrs["processing_software_time"] = Timestamp.get_timestamp()
|
|
248
252
|
#
|
|
@@ -9,7 +9,7 @@ from pathlib import Path # , PurePath
|
|
|
9
9
|
|
|
10
10
|
from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
|
|
11
11
|
from water_column_sonar_processing.geometry import GeometryManager
|
|
12
|
-
from water_column_sonar_processing.utility import Cleaner
|
|
12
|
+
from water_column_sonar_processing.utility import Cleaner, PipelineStatus
|
|
13
13
|
|
|
14
14
|
TEMPDIR = "/tmp"
|
|
15
15
|
|
|
@@ -53,10 +53,6 @@ class RawToZarr:
|
|
|
53
53
|
):
|
|
54
54
|
print('Writing Zarr information to DynamoDB table.')
|
|
55
55
|
dynamodb_manager = DynamoDBManager()
|
|
56
|
-
|
|
57
|
-
# The problem is that these values were never populated
|
|
58
|
-
# and so when the query looks for values that aren't there
|
|
59
|
-
# they fail
|
|
60
56
|
dynamodb_manager.update_item(
|
|
61
57
|
table_name=table_name,
|
|
62
58
|
key={
|
|
@@ -87,7 +83,8 @@ class RawToZarr:
|
|
|
87
83
|
":ma": {"N": str(np.round(max_echo_range, 4))},
|
|
88
84
|
":mi": {"N": str(np.round(min_echo_range, 4))},
|
|
89
85
|
":nd": {"N": str(num_ping_time_dropna)},
|
|
90
|
-
":ps": {"S": "PROCESSING_RESAMPLE_AND_WRITE_TO_ZARR_STORE"},
|
|
86
|
+
# ":ps": {"S": "PROCESSING_RESAMPLE_AND_WRITE_TO_ZARR_STORE"},
|
|
87
|
+
":ps": {"S": PipelineStatus.LEVEL_1_PROCESSING.name},
|
|
91
88
|
":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
|
|
92
89
|
":se": {"S": sensor_name},
|
|
93
90
|
":sh": {"S": ship_name},
|
|
@@ -113,6 +110,7 @@ class RawToZarr:
|
|
|
113
110
|
"#ZP = :zp"
|
|
114
111
|
),
|
|
115
112
|
)
|
|
113
|
+
print('Done writing Zarr information to DynamoDB table.')
|
|
116
114
|
|
|
117
115
|
############################################################################
|
|
118
116
|
############################################################################
|
|
@@ -143,16 +141,29 @@ class RawToZarr:
|
|
|
143
141
|
def raw_to_zarr(
|
|
144
142
|
self,
|
|
145
143
|
table_name,
|
|
144
|
+
input_bucket_name,
|
|
146
145
|
output_bucket_name,
|
|
147
146
|
ship_name,
|
|
148
147
|
cruise_name,
|
|
149
148
|
sensor_name,
|
|
150
149
|
raw_file_name,
|
|
151
150
|
):
|
|
151
|
+
"""
|
|
152
|
+
Downloads the raw files, processes them with echopype, writes geojson, and uploads files
|
|
153
|
+
to the nodd bucket.
|
|
154
|
+
"""
|
|
152
155
|
print(f'Opening raw: {raw_file_name} and creating zarr store.')
|
|
153
156
|
geometry_manager = GeometryManager()
|
|
154
157
|
cleaner = Cleaner()
|
|
155
|
-
cleaner.delete_local_files(file_types=["*.zarr", "*.json"]) # TODO: include bot and raw
|
|
158
|
+
cleaner.delete_local_files(file_types=["*.zarr", "*.json"]) # TODO: include bot and raw?
|
|
159
|
+
|
|
160
|
+
s3_manager = S3Manager()
|
|
161
|
+
s3_file_path = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
|
|
162
|
+
bottom_file_name = f"{Path(raw_file_name).stem}.bot"
|
|
163
|
+
s3_bottom_file_path = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
|
|
164
|
+
s3_manager.download_file(bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name)
|
|
165
|
+
s3_manager.download_file(bucket_name=input_bucket_name, key=s3_bottom_file_path, file_name=bottom_file_name)
|
|
166
|
+
|
|
156
167
|
try:
|
|
157
168
|
gc.collect()
|
|
158
169
|
print('Opening raw file with echopype.')
|
|
@@ -168,7 +179,12 @@ class RawToZarr:
|
|
|
168
179
|
)
|
|
169
180
|
print('Compute volume backscattering strength (Sv) from raw data.')
|
|
170
181
|
ds_sv = ep.calibrate.compute_Sv(echodata)
|
|
171
|
-
print('Done computing volume
|
|
182
|
+
print('Done computing volume backscatter strength (Sv) from raw data.')
|
|
183
|
+
# Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
|
|
184
|
+
# but is not written out with ds_sv
|
|
185
|
+
if "detected_seafloor_depth" in list(echodata.vendor.variables):
|
|
186
|
+
ds_sv["detected_seafloor_depth"] = echodata.vendor.detected_seafloor_depth
|
|
187
|
+
#
|
|
172
188
|
frequencies = echodata.environment.frequency_nominal.values
|
|
173
189
|
#################################################################
|
|
174
190
|
# Get GPS coordinates
|
|
@@ -187,12 +203,9 @@ class RawToZarr:
|
|
|
187
203
|
# TODO: this var name is supposed to represent minimum resolution of depth measurements
|
|
188
204
|
# TODO revert this so that smaller diffs can be used
|
|
189
205
|
# The most minimum the resolution can be is as small as 0.25 meters
|
|
190
|
-
min_echo_range = np.
|
|
191
|
-
0.25,
|
|
192
|
-
np.nanmin(np.diff(ds_sv.echo_range.values))
|
|
193
|
-
)
|
|
206
|
+
min_echo_range = np.round(np.nanmin(np.diff(ds_sv.echo_range.values)), 2)
|
|
194
207
|
max_echo_range = float(np.nanmax(ds_sv.echo_range))
|
|
195
|
-
#
|
|
208
|
+
# This is the number of missing values found throughout the lat/lon
|
|
196
209
|
num_ping_time_dropna = lat[~np.isnan(lat)].shape[0] # symmetric to lon
|
|
197
210
|
#
|
|
198
211
|
start_time = np.datetime_as_string(ds_sv.ping_time.values[0], unit='ms') + "Z"
|
|
@@ -204,14 +217,27 @@ class RawToZarr:
|
|
|
204
217
|
store_name = f"{Path(raw_file_name).stem}.zarr"
|
|
205
218
|
ds_sv.to_zarr(store=store_name)
|
|
206
219
|
#################################################################
|
|
207
|
-
# TODO: do i still need this?
|
|
208
|
-
# print('Note: Adding GeoJSON inside Zarr store')
|
|
209
|
-
# self.__write_geojson_to_file( # Was trying to write geojson to the L1 zarr store
|
|
210
|
-
# store_name=store_name,
|
|
211
|
-
# data=gps_data
|
|
212
|
-
# )
|
|
213
|
-
#################################################################
|
|
214
220
|
output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
|
|
221
|
+
#################################################################
|
|
222
|
+
# If zarr store already exists then delete
|
|
223
|
+
s3_manager = S3Manager()
|
|
224
|
+
child_objects = s3_manager.get_child_objects(
|
|
225
|
+
bucket_name=output_bucket_name,
|
|
226
|
+
sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.zarr",
|
|
227
|
+
)
|
|
228
|
+
if len(child_objects) > 0:
|
|
229
|
+
print('Zarr store data already exists in s3, deleting existing and continuing.')
|
|
230
|
+
s3_manager.delete_nodd_objects(
|
|
231
|
+
bucket_name=output_bucket_name,
|
|
232
|
+
objects=child_objects,
|
|
233
|
+
)
|
|
234
|
+
#################################################################
|
|
235
|
+
self.__upload_files_to_output_bucket(
|
|
236
|
+
output_bucket_name=output_bucket_name,
|
|
237
|
+
local_directory=store_name,
|
|
238
|
+
object_prefix=output_zarr_prefix
|
|
239
|
+
)
|
|
240
|
+
#################################################################
|
|
215
241
|
self.__zarr_info_to_table(
|
|
216
242
|
output_bucket_name=output_bucket_name,
|
|
217
243
|
table_name=table_name,
|
|
@@ -228,34 +254,15 @@ class RawToZarr:
|
|
|
228
254
|
frequencies=frequencies,
|
|
229
255
|
channels=channels
|
|
230
256
|
)
|
|
231
|
-
###################################################################
|
|
232
|
-
#######################################################################
|
|
233
|
-
self.__upload_files_to_output_bucket(
|
|
234
|
-
output_bucket_name=output_bucket_name,
|
|
235
|
-
local_directory=store_name,
|
|
236
|
-
object_prefix=output_zarr_prefix
|
|
237
|
-
)
|
|
238
|
-
#######################################################################
|
|
239
|
-
# # TODO: verify count of objects matches
|
|
240
|
-
# s3_objects = self.__s3.list_objects(
|
|
241
|
-
# bucket_name=self.__output_bucket,
|
|
242
|
-
# prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
|
|
243
|
-
# access_key_id=self.__output_bucket_access_key,
|
|
244
|
-
# secret_access_key=self.__output_bucket_secret_access_key
|
|
245
|
-
# )
|
|
246
257
|
#######################################################################
|
|
247
|
-
#
|
|
248
|
-
# file_name=input_file_name,
|
|
249
|
-
# cruise_name=cruise_name,
|
|
250
|
-
# pipeline_status='SUCCESS_RAW_TO_ZARR'
|
|
251
|
-
# )
|
|
258
|
+
# TODO: verify count of objects matches, publish message, update status
|
|
252
259
|
#######################################################################
|
|
253
|
-
|
|
254
|
-
print('here')
|
|
260
|
+
print('Finished raw-to-zarr conversion.')
|
|
255
261
|
except Exception as err:
|
|
256
262
|
print(f'Exception encountered creating local Zarr store with echopype: {err}')
|
|
257
263
|
raise RuntimeError(f"Problem creating local Zarr store, {err}")
|
|
258
264
|
finally:
|
|
265
|
+
print("Finally.")
|
|
259
266
|
cleaner.delete_local_files(file_types=["*.raw", "*.bot", "*.zarr", "*.json"])
|
|
260
267
|
print('Done creating local zarr store.')
|
|
261
268
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: water_column_sonar_processing
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.11
|
|
4
4
|
Summary: A processing tool for water column sonar data.
|
|
5
5
|
Author-email: Rudy Klucik <rudy.klucik@noaa.gov>
|
|
6
6
|
Project-URL: Homepage, https://github.com/CI-CMG/water-column-sonar-processing
|
|
@@ -24,7 +24,7 @@ Requires-Dist: numcodecs==0.13.1
|
|
|
24
24
|
Requires-Dist: numpy==1.26.4
|
|
25
25
|
Requires-Dist: pandas==2.2.3
|
|
26
26
|
Requires-Dist: pyarrow==18.1.0
|
|
27
|
-
Requires-Dist: python-dotenv==1.0.
|
|
27
|
+
Requires-Dist: python-dotenv==1.0.1
|
|
28
28
|
Requires-Dist: requests==2.32.3
|
|
29
29
|
Requires-Dist: s3fs==2023.12.1
|
|
30
30
|
Requires-Dist: scipy==1.14.1
|
|
@@ -114,6 +114,7 @@ python -m twine upload --repository pypi dist/*
|
|
|
114
114
|
```
|
|
115
115
|
|
|
116
116
|
# Pre Commit Hook
|
|
117
|
+
see here for installation: https://pre-commit.com/
|
|
117
118
|
https://dev.to/rafaelherik/using-trufflehog-and-pre-commit-hook-to-prevent-secret-exposure-edo
|
|
118
119
|
```
|
|
119
120
|
pre-commit install --allow-missing-config
|
|
@@ -132,3 +133,8 @@ https://colab.research.google.com/drive/1KiLMueXiz9WVB9o4RuzYeGjNZ6PsZU7a#scroll
|
|
|
132
133
|
20241125
|
|
133
134
|
5 failed, 35 passed, 3 skipped, 1 warning in 9.71s
|
|
134
135
|
3 failed, 38 passed, 3 skipped, 1 warning in 7.24s
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# TODO:
|
|
139
|
+
add https://pypi.org/project/setuptools-scm/
|
|
140
|
+
for extracting the version
|
|
@@ -1,32 +1,32 @@
|
|
|
1
1
|
water_column_sonar_processing/__init__.py,sha256=fvRK4uFo_A0l7w_T4yckvDqJ3wMUq4JB3VVPXqWfewE,226
|
|
2
2
|
water_column_sonar_processing/process.py,sha256=-yQtK3rnZq6lGAr3q02zLDe1NuMH9c0PiUOxKzG_r18,5386
|
|
3
3
|
water_column_sonar_processing/aws/__init__.py,sha256=KJqK8oYMn-u8n8i-Jp_lG5BvCOTjwWSjWP8yAyDlWVo,297
|
|
4
|
-
water_column_sonar_processing/aws/dynamodb_manager.py,sha256=
|
|
5
|
-
water_column_sonar_processing/aws/s3_manager.py,sha256=
|
|
4
|
+
water_column_sonar_processing/aws/dynamodb_manager.py,sha256=gMDAXLE_p_nKmNZYICKA9T56PYDqtXBySlysSOVnWrI,10250
|
|
5
|
+
water_column_sonar_processing/aws/s3_manager.py,sha256=kS48Vu_jE_fOKbwKOhCLWKDSqHzOGVEdZ_Lc4MaMCfA,15291
|
|
6
6
|
water_column_sonar_processing/aws/s3fs_manager.py,sha256=thVJPQKhbvF1g-Ue3BYgwazFOFDYOICIEJx4zkXBQ1E,2381
|
|
7
7
|
water_column_sonar_processing/aws/sns_manager.py,sha256=Dp9avG5VSugSWPR1dZ-askuAw1fCZkNUHbOUP65iR-k,1867
|
|
8
8
|
water_column_sonar_processing/aws/sqs_manager.py,sha256=NSUrWmnSC8h8Gf7gT0U8zFaQQ-yX89h0Q0mDLKGqp2Y,1597
|
|
9
9
|
water_column_sonar_processing/cruise/__init__.py,sha256=H5hW0JMORuaFvQk_R31B4VL8RnRyKeanOOiWmqEMZJk,156
|
|
10
|
-
water_column_sonar_processing/cruise/create_empty_zarr_store.py,sha256=
|
|
10
|
+
water_column_sonar_processing/cruise/create_empty_zarr_store.py,sha256=uQiZoKm16jD0SUuXmhuPryxdE-6bUc6BlCi2UtmzUpw,7318
|
|
11
11
|
water_column_sonar_processing/cruise/resample_regrid.py,sha256=4Tw6Ro9mQZOr0uIph6foz6a1OeFAZW0SMUT_asIwvKw,12309
|
|
12
12
|
water_column_sonar_processing/geometry/__init__.py,sha256=_ol5nI8AL30pYXeAh5rtP7YmQggitPC6LA_kuTfPJ0Q,231
|
|
13
|
-
water_column_sonar_processing/geometry/geometry_manager.py,sha256=
|
|
13
|
+
water_column_sonar_processing/geometry/geometry_manager.py,sha256=0Q9IRiBr6XvxUg5M2vCPtUhbnYnwa5pJI1ayfWXMgMs,10587
|
|
14
14
|
water_column_sonar_processing/geometry/geometry_simplification.py,sha256=im1HG9nfYIerQv3w-PUHzphw2B7aGgnsA3Zcdy2oTmA,3016
|
|
15
15
|
water_column_sonar_processing/geometry/pmtile_generation.py,sha256=7Lm08Jr6YaM4nYmexClxbIMOqSV1teo9wMm6dfjFuNA,12384
|
|
16
16
|
water_column_sonar_processing/index/__init__.py,sha256=izEObsKiOoIJ0kZCFhvaYsBd6Ga71XJxnogjrNInw68,68
|
|
17
17
|
water_column_sonar_processing/index/index_manager.py,sha256=YS6y_THfGAZpjfBZOj5n8O1aY_BnBYS781eNHfhpip0,11239
|
|
18
18
|
water_column_sonar_processing/model/__init__.py,sha256=FXaCdbPqxp0ogmZm9NplRirqpgMiYs1iRYgJbFbbX2Y,65
|
|
19
|
-
water_column_sonar_processing/model/zarr_manager.py,sha256=
|
|
19
|
+
water_column_sonar_processing/model/zarr_manager.py,sha256=TbcVux-GWfX4XJ7UT20E7dI_h_islrKsGtjx_VwSsLg,14003
|
|
20
20
|
water_column_sonar_processing/processing/__init__.py,sha256=UwdB3BnoUxy4q3k9-ZjBF6KzmCWVDcqbcArTeHgmvGA,118
|
|
21
21
|
water_column_sonar_processing/processing/cruise_sampler.py,sha256=hadPrnH5nz7_oG_4pND7YbMFH6NMR9d6p3xAXedtKU8,15927
|
|
22
|
-
water_column_sonar_processing/processing/raw_to_zarr.py,sha256=
|
|
22
|
+
water_column_sonar_processing/processing/raw_to_zarr.py,sha256=OPu4CoIlHQFW38iY4DLe5A5Ttrdz4NXtjYThrB-FuPs,16874
|
|
23
23
|
water_column_sonar_processing/utility/__init__.py,sha256=yDObMOL0_OxKWet5wffK2-XVJgoE9iwiY2q04GZrtBQ,234
|
|
24
24
|
water_column_sonar_processing/utility/cleaner.py,sha256=bNbs-hopWxtKAFBK0Eu18xdRErZCGZvtla3j-1bTwQw,619
|
|
25
25
|
water_column_sonar_processing/utility/constants.py,sha256=EbzsorvYKadsPjuutRjQKKByGibhFm0Gw6D-Sp2ZD3I,2143
|
|
26
26
|
water_column_sonar_processing/utility/pipeline_status.py,sha256=O-0SySqdRGJ6bs3zQe1NV9vkOpmsRM7zj5QoHgzYioY,4395
|
|
27
27
|
water_column_sonar_processing/utility/timestamp.py,sha256=bO0oir7KxxoEHPGRkz9FCBfOligkocUyRiWRzAq8fnU,361
|
|
28
|
-
water_column_sonar_processing-0.0.
|
|
29
|
-
water_column_sonar_processing-0.0.
|
|
30
|
-
water_column_sonar_processing-0.0.
|
|
31
|
-
water_column_sonar_processing-0.0.
|
|
32
|
-
water_column_sonar_processing-0.0.
|
|
28
|
+
water_column_sonar_processing-0.0.11.dist-info/LICENSE,sha256=lz4IpJ5_adG3S0ali-WaIpQFVTnEAOucMDQPECUVEYw,1110
|
|
29
|
+
water_column_sonar_processing-0.0.11.dist-info/METADATA,sha256=KFkI1367kV7L7pl8SIK4UFwUVJvUCHkRTPwBCqpnxWA,4566
|
|
30
|
+
water_column_sonar_processing-0.0.11.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
31
|
+
water_column_sonar_processing-0.0.11.dist-info/top_level.txt,sha256=aRYU4A7RNBlNrL4vzjytFAir3BNnmOgsvIGKKA36tg4,30
|
|
32
|
+
water_column_sonar_processing-0.0.11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|