water-column-sonar-processing 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/aws/s3_manager.py +63 -42
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +1 -1
- water_column_sonar_processing/geometry/geometry_manager.py +5 -7
- water_column_sonar_processing/model/zarr_manager.py +14 -10
- water_column_sonar_processing/processing/raw_to_zarr.py +26 -29
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-0.0.10.dist-info}/METADATA +8 -2
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-0.0.10.dist-info}/RECORD +10 -10
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-0.0.10.dist-info}/LICENSE +0 -0
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-0.0.10.dist-info}/WHEEL +0 -0
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-0.0.10.dist-info}/top_level.txt +0 -0
|
@@ -3,6 +3,8 @@ import os
|
|
|
3
3
|
import boto3
|
|
4
4
|
from collections.abc import Generator
|
|
5
5
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
6
|
+
|
|
7
|
+
import botocore
|
|
6
8
|
from boto3.s3.transfer import TransferConfig
|
|
7
9
|
from botocore.config import Config
|
|
8
10
|
from botocore.exceptions import ClientError
|
|
@@ -14,7 +16,10 @@ GB = 1024**3
|
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
#########################################################################
|
|
17
|
-
def chunked(
|
|
19
|
+
def chunked(
|
|
20
|
+
ll: list,
|
|
21
|
+
n: int
|
|
22
|
+
) -> Generator:
|
|
18
23
|
# Yields successively n-sized chunks from ll.
|
|
19
24
|
for i in range(0, len(ll), n):
|
|
20
25
|
yield ll[i : i + n]
|
|
@@ -24,16 +29,9 @@ class S3Manager:
|
|
|
24
29
|
#####################################################################
|
|
25
30
|
def __init__(
|
|
26
31
|
self,
|
|
27
|
-
# input_endpoint_url: str,
|
|
28
|
-
# output_endpoint_url: str,
|
|
29
|
-
# endpoint_url
|
|
30
|
-
# TODO: Need to allow passing in of credentials when writing to protected bucket
|
|
31
32
|
):
|
|
32
33
|
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
33
34
|
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
34
|
-
# self.endpoint_url = endpoint_url
|
|
35
|
-
# self.input_endpoint_url = input_endpoint_url
|
|
36
|
-
# self.output_endpoint_url = output_endpoint_url
|
|
37
35
|
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
38
36
|
self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
|
|
39
37
|
self.s3_transfer_config = TransferConfig(
|
|
@@ -51,14 +49,12 @@ class S3Manager:
|
|
|
51
49
|
service_name="s3",
|
|
52
50
|
config=self.s3_client_config,
|
|
53
51
|
region_name=self.s3_region,
|
|
54
|
-
# endpoint_url=endpoint_url, # TODO: temporary
|
|
55
52
|
)
|
|
56
53
|
self.s3_resource = boto3.resource(
|
|
57
54
|
service_name="s3",
|
|
58
55
|
config=self.s3_client_config,
|
|
59
56
|
region_name=self.s3_region,
|
|
60
57
|
)
|
|
61
|
-
# self.paginator = self.s3_client.get_paginator(operation_name='list_objects_v2')
|
|
62
58
|
self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
|
|
63
59
|
aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
64
60
|
aws_secret_access_key=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
@@ -68,7 +64,6 @@ class S3Manager:
|
|
|
68
64
|
service_name="s3",
|
|
69
65
|
config=self.s3_client_config,
|
|
70
66
|
region_name=self.s3_region,
|
|
71
|
-
# endpoint_url=endpoint_url, # TODO: temporary
|
|
72
67
|
)
|
|
73
68
|
self.s3_resource_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.resource(
|
|
74
69
|
service_name="s3",
|
|
@@ -78,12 +73,12 @@ class S3Manager:
|
|
|
78
73
|
self.paginator = self.s3_client.get_paginator('list_objects_v2')
|
|
79
74
|
self.paginator_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds.get_paginator('list_objects_v2')
|
|
80
75
|
|
|
81
|
-
def get_client(self): # TODO: do i need this?
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
76
|
+
# def get_client(self): # TODO: do i need this?
|
|
77
|
+
# return self.s3_session.client(
|
|
78
|
+
# service_name="s3",
|
|
79
|
+
# config=self.s3_client_config,
|
|
80
|
+
# region_name=self.s3_region,
|
|
81
|
+
# )
|
|
87
82
|
|
|
88
83
|
#####################################################################
|
|
89
84
|
def create_bucket(
|
|
@@ -146,18 +141,6 @@ class S3Manager:
|
|
|
146
141
|
return all_uploads
|
|
147
142
|
|
|
148
143
|
#####################################################################
|
|
149
|
-
# def upload_nodd_file2(
|
|
150
|
-
# self,
|
|
151
|
-
# body: str,
|
|
152
|
-
# bucket: str,
|
|
153
|
-
# key: str,
|
|
154
|
-
# ):
|
|
155
|
-
# self.s3_client_noaa_wcsd_zarr_pds.put_object(
|
|
156
|
-
# Body=body,
|
|
157
|
-
# Bucket=bucket,
|
|
158
|
-
# Key=key,
|
|
159
|
-
# )
|
|
160
|
-
|
|
161
144
|
# TODO: this uses resource, try to use client
|
|
162
145
|
def upload_file(
|
|
163
146
|
self,
|
|
@@ -190,11 +173,36 @@ class S3Manager:
|
|
|
190
173
|
all_files.append([local_path, s3_key])
|
|
191
174
|
|
|
192
175
|
all_uploads = self.upload_files_with_thread_pool_executor(
|
|
176
|
+
output_bucket_name=self.output_bucket_name,
|
|
193
177
|
all_files=all_files,
|
|
194
178
|
)
|
|
195
179
|
print("Done uploading files to output bucket.")
|
|
196
180
|
return all_uploads
|
|
197
181
|
|
|
182
|
+
#####################################################################
|
|
183
|
+
def check_if_object_exists(
|
|
184
|
+
self,
|
|
185
|
+
bucket_name,
|
|
186
|
+
key_name
|
|
187
|
+
) -> bool:
|
|
188
|
+
s3_manager2 = S3Manager()
|
|
189
|
+
s3_manager2.list_objects(bucket_name=bucket_name, prefix=key_name)
|
|
190
|
+
s3_client_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds
|
|
191
|
+
try:
|
|
192
|
+
# response = s3_resource_noaa_wcsd_zarr_pds.Object(bucket_name, key_name).load()
|
|
193
|
+
s3_client_noaa_wcsd_zarr_pds.head_object(Bucket=bucket_name, Key=key_name)
|
|
194
|
+
except botocore.exceptions.ClientError as e:
|
|
195
|
+
if e.response['Error']['Code'] == "404":
|
|
196
|
+
# The object does not exist.
|
|
197
|
+
return False
|
|
198
|
+
elif e.response['Error']['Code'] == 403:
|
|
199
|
+
# Unauthorized, including invalid bucket
|
|
200
|
+
return False
|
|
201
|
+
else:
|
|
202
|
+
# Something else has gone wrong.
|
|
203
|
+
raise
|
|
204
|
+
return True
|
|
205
|
+
|
|
198
206
|
#####################################################################
|
|
199
207
|
# used: raw-to-zarr
|
|
200
208
|
def list_objects( # noaa-wcsd-pds and noaa-wcsd-zarr-pds
|
|
@@ -202,6 +210,7 @@ class S3Manager:
|
|
|
202
210
|
bucket_name,
|
|
203
211
|
prefix
|
|
204
212
|
):
|
|
213
|
+
# TODO: this isn't working for geojson detecting objects!!!!!!!
|
|
205
214
|
# analog to "find_children_objects"
|
|
206
215
|
# Returns a list of key strings for each object in bucket defined by prefix
|
|
207
216
|
# s3_client = self.s3_client
|
|
@@ -227,7 +236,11 @@ class S3Manager:
|
|
|
227
236
|
|
|
228
237
|
#####################################################################
|
|
229
238
|
# TODO: change name to "directory"
|
|
230
|
-
def folder_exists_and_not_empty(
|
|
239
|
+
def folder_exists_and_not_empty(
|
|
240
|
+
self,
|
|
241
|
+
bucket_name: str,
|
|
242
|
+
path: str
|
|
243
|
+
) -> bool:
|
|
231
244
|
if not path.endswith("/"):
|
|
232
245
|
path = path + "/"
|
|
233
246
|
s3_client = self.s3_client
|
|
@@ -319,23 +332,15 @@ class S3Manager:
|
|
|
319
332
|
print("downloaded file")
|
|
320
333
|
|
|
321
334
|
#####################################################################
|
|
322
|
-
#
|
|
323
|
-
# def delete_nodd_object( # noaa-wcsd-model-pds
|
|
324
|
-
# self,
|
|
325
|
-
# bucket_name,
|
|
326
|
-
# key
|
|
327
|
-
# ): # -> dict:
|
|
328
|
-
# #return self.__s3_client.delete_object(Bucket=bucket_name, Key=key)
|
|
329
|
-
# self.s3_client.delete_object(Bucket=bucket_name, Key=key)
|
|
330
|
-
|
|
331
|
-
#####################################################################
|
|
335
|
+
# TODO: need to test this!!!
|
|
332
336
|
def delete_nodd_objects( # nodd-bucket
|
|
333
337
|
self,
|
|
338
|
+
bucket_name,
|
|
334
339
|
objects: list,
|
|
335
340
|
):
|
|
336
341
|
try:
|
|
337
342
|
print(
|
|
338
|
-
f"Deleting {len(objects)} objects in {
|
|
343
|
+
f"Deleting {len(objects)} objects in {bucket_name} in batches."
|
|
339
344
|
)
|
|
340
345
|
objects_to_delete = []
|
|
341
346
|
for obj in objects:
|
|
@@ -343,12 +348,28 @@ class S3Manager:
|
|
|
343
348
|
# Note: request can contain a list of up to 1000 keys
|
|
344
349
|
for batch in chunked(ll=objects_to_delete, n=1000):
|
|
345
350
|
self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
|
|
346
|
-
Bucket=
|
|
351
|
+
Bucket=bucket_name, Delete={"Objects": batch}
|
|
347
352
|
)
|
|
348
353
|
print(f"Deleted files.")
|
|
349
354
|
except Exception as err:
|
|
350
355
|
print(f"Problem was encountered while deleting objects: {err}")
|
|
351
356
|
|
|
357
|
+
#####################################################################
|
|
358
|
+
# TODO: need to test this!!!
|
|
359
|
+
def delete_nodd_object(
|
|
360
|
+
self,
|
|
361
|
+
bucket_name,
|
|
362
|
+
key_name,
|
|
363
|
+
):
|
|
364
|
+
try:
|
|
365
|
+
print(
|
|
366
|
+
f"Deleting {key_name} objects in {bucket_name}."
|
|
367
|
+
)
|
|
368
|
+
self.s3_client_noaa_wcsd_zarr_pds.delete_object(Bucket=bucket_name, Key=key_name)
|
|
369
|
+
print(f"Deleted file.")
|
|
370
|
+
except Exception as err:
|
|
371
|
+
print(f"Problem was encountered while deleting objects: {err}")
|
|
372
|
+
|
|
352
373
|
#####################################################################
|
|
353
374
|
# not used TODO: remove
|
|
354
375
|
def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
|
|
@@ -47,7 +47,7 @@ class CreateEmptyZarrStore:
|
|
|
47
47
|
):
|
|
48
48
|
for file in files:
|
|
49
49
|
local_path = os.path.join(subdir, file)
|
|
50
|
-
# 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.model
|
|
50
|
+
# 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.model/..zattrs'
|
|
51
51
|
s3_key = f'{object_prefix}/{cruise_name}.model{local_path.split(f"{cruise_name}.model")[-1]}'
|
|
52
52
|
all_files.append([local_path, s3_key])
|
|
53
53
|
#
|
|
@@ -138,15 +138,13 @@ class GeometryManager:
|
|
|
138
138
|
|
|
139
139
|
print("Checking s3 and deleting any existing GeoJSON file.")
|
|
140
140
|
s3_manager = S3Manager()
|
|
141
|
-
|
|
141
|
+
geojson_object_exists = s3_manager.check_if_object_exists(
|
|
142
142
|
bucket_name=output_bucket_name,
|
|
143
|
-
|
|
143
|
+
key_name=f"{geo_json_prefix}/{geo_json_name}"
|
|
144
144
|
)
|
|
145
|
-
if
|
|
146
|
-
print(
|
|
147
|
-
|
|
148
|
-
)
|
|
149
|
-
s3_manager.delete_nodd_objects(objects=s3_objects)
|
|
145
|
+
if geojson_object_exists:
|
|
146
|
+
print("GeoJSON already exists in s3, deleting existing and continuing.")
|
|
147
|
+
s3_manager.delete_nodd_object(bucket_name=output_bucket_name, key_name=f"{geo_json_prefix}/{geo_json_name}")
|
|
150
148
|
|
|
151
149
|
print("Upload GeoJSON to s3.")
|
|
152
150
|
s3_manager.upload_nodd_file(
|
|
@@ -86,8 +86,6 @@ class ZarrManager:
|
|
|
86
86
|
data=np.repeat(0.0, width),
|
|
87
87
|
shape=width,
|
|
88
88
|
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
89
|
-
# Constants.TILE_SIZE.value,
|
|
90
|
-
#), # TODO: the chunking scheme doesn't seem to be working here
|
|
91
89
|
dtype=np.dtype(Coordinates.TIME_DTYPE.value),
|
|
92
90
|
compressor=self.__compressor,
|
|
93
91
|
fill_value=np.nan, # TODO: do i want nan's?
|
|
@@ -125,14 +123,16 @@ class ZarrManager:
|
|
|
125
123
|
|
|
126
124
|
root.depth.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.DEPTH.value]
|
|
127
125
|
|
|
128
|
-
root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
|
|
129
126
|
root.depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
|
|
127
|
+
root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
|
|
128
|
+
root.depth.attrs["standard_name"] = Coordinates.DEPTH_STANDARD_NAME.value
|
|
130
129
|
|
|
131
130
|
#####################################################################
|
|
132
131
|
# --- Coordinate: Latitude --- #
|
|
133
132
|
root.create_dataset(
|
|
134
133
|
name=Coordinates.LATITUDE.value,
|
|
135
|
-
# data=np.repeat(0.0, width),
|
|
134
|
+
# data=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
135
|
+
data=np.repeat(np.nan, width),
|
|
136
136
|
shape=width,
|
|
137
137
|
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
138
138
|
dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
|
|
@@ -144,14 +144,16 @@ class ZarrManager:
|
|
|
144
144
|
# Note: LATITUDE is indexed by TIME
|
|
145
145
|
root.latitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
146
146
|
|
|
147
|
-
root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
|
|
148
147
|
root.latitude.attrs["units"] = Coordinates.LATITUDE_UNITS.value
|
|
148
|
+
root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
|
|
149
|
+
root.latitude.attrs["standard_name"] = Coordinates.LATITUDE_STANDARD_NAME.value
|
|
149
150
|
|
|
150
151
|
#####################################################################
|
|
151
152
|
# --- Coordinate: Longitude --- #
|
|
152
153
|
root.create_dataset(
|
|
153
154
|
name=Coordinates.LONGITUDE.value,
|
|
154
155
|
# data=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
156
|
+
data=np.repeat(np.nan, width),
|
|
155
157
|
shape=width,
|
|
156
158
|
chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
|
|
157
159
|
dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
|
|
@@ -163,8 +165,9 @@ class ZarrManager:
|
|
|
163
165
|
# Note: LONGITUDE is indexed by TIME
|
|
164
166
|
root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
165
167
|
|
|
166
|
-
root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
|
|
167
168
|
root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
|
|
169
|
+
root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
|
|
170
|
+
root.longitude.attrs["standard_name"] = Coordinates.LONGITUDE_STANDARD_NAME.value
|
|
168
171
|
|
|
169
172
|
#####################################################################
|
|
170
173
|
# TODO: verify adding this variable for where the bottom was detected
|
|
@@ -183,8 +186,9 @@ class ZarrManager:
|
|
|
183
186
|
# BOTTOM is indexed by TIME
|
|
184
187
|
root.bottom.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
185
188
|
|
|
186
|
-
root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
|
|
187
189
|
root.bottom.attrs["units"] = Coordinates.BOTTOM_UNITS.value
|
|
190
|
+
root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
|
|
191
|
+
root.bottom.attrs["standard_name"] = Coordinates.BOTTOM_STANDARD_NAME.value
|
|
188
192
|
|
|
189
193
|
#####################################################################
|
|
190
194
|
# --- Coordinate: Frequency --- #
|
|
@@ -204,11 +208,11 @@ class ZarrManager:
|
|
|
204
208
|
Coordinates.FREQUENCY.value
|
|
205
209
|
] # TODO: is this correct
|
|
206
210
|
|
|
211
|
+
root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
|
|
207
212
|
root.frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
|
|
208
213
|
root.frequency.attrs["standard_name"] = (
|
|
209
214
|
Coordinates.FREQUENCY_STANDARD_NAME.value
|
|
210
215
|
)
|
|
211
|
-
root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
|
|
212
216
|
|
|
213
217
|
#####################################################################
|
|
214
218
|
# --- Sv Data --- #
|
|
@@ -230,8 +234,8 @@ class ZarrManager:
|
|
|
230
234
|
Coordinates.FREQUENCY.value,
|
|
231
235
|
]
|
|
232
236
|
|
|
233
|
-
root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
|
|
234
237
|
root.Sv.attrs["units"] = Coordinates.SV_UNITS.value
|
|
238
|
+
root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
|
|
235
239
|
root.Sv.attrs["tile_size"] = Constants.TILE_SIZE.value
|
|
236
240
|
|
|
237
241
|
#####################################################################
|
|
@@ -242,7 +246,7 @@ class ZarrManager:
|
|
|
242
246
|
#
|
|
243
247
|
root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
|
|
244
248
|
root.attrs["processing_software_version"] = (
|
|
245
|
-
"0.0.
|
|
249
|
+
"0.0.9" # TODO: get programmatically, echopype>utils>prov.py
|
|
246
250
|
)
|
|
247
251
|
root.attrs["processing_software_time"] = Timestamp.get_timestamp()
|
|
248
252
|
#
|
|
@@ -149,10 +149,14 @@ class RawToZarr:
|
|
|
149
149
|
sensor_name,
|
|
150
150
|
raw_file_name,
|
|
151
151
|
):
|
|
152
|
+
"""
|
|
153
|
+
Downloads the raw files, processes them with echopype, writes geojson, and uploads files
|
|
154
|
+
to the nodd bucket.
|
|
155
|
+
"""
|
|
152
156
|
print(f'Opening raw: {raw_file_name} and creating zarr store.')
|
|
153
157
|
geometry_manager = GeometryManager()
|
|
154
158
|
cleaner = Cleaner()
|
|
155
|
-
cleaner.delete_local_files(file_types=["*.zarr", "*.json"]) # TODO: include bot and raw
|
|
159
|
+
cleaner.delete_local_files(file_types=["*.zarr", "*.json"]) # TODO: include bot and raw?
|
|
156
160
|
try:
|
|
157
161
|
gc.collect()
|
|
158
162
|
print('Opening raw file with echopype.')
|
|
@@ -204,14 +208,27 @@ class RawToZarr:
|
|
|
204
208
|
store_name = f"{Path(raw_file_name).stem}.zarr"
|
|
205
209
|
ds_sv.to_zarr(store=store_name)
|
|
206
210
|
#################################################################
|
|
207
|
-
# TODO: do i still need this?
|
|
208
|
-
# print('Note: Adding GeoJSON inside Zarr store')
|
|
209
|
-
# self.__write_geojson_to_file( # Was trying to write geojson to the L1 zarr store
|
|
210
|
-
# store_name=store_name,
|
|
211
|
-
# data=gps_data
|
|
212
|
-
# )
|
|
213
|
-
#################################################################
|
|
214
211
|
output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
|
|
212
|
+
#################################################################
|
|
213
|
+
# If zarr store already exists then delete
|
|
214
|
+
s3_manager = S3Manager()
|
|
215
|
+
child_objects = s3_manager.get_child_objects(
|
|
216
|
+
bucket_name=output_bucket_name,
|
|
217
|
+
sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.zarr",
|
|
218
|
+
)
|
|
219
|
+
if len(child_objects) > 0:
|
|
220
|
+
print('Zarr store data already exists in s3, deleting existing and continuing.')
|
|
221
|
+
s3_manager.delete_nodd_objects(
|
|
222
|
+
bucket_name=output_bucket_name,
|
|
223
|
+
objects=child_objects,
|
|
224
|
+
)
|
|
225
|
+
#################################################################
|
|
226
|
+
self.__upload_files_to_output_bucket(
|
|
227
|
+
output_bucket_name=output_bucket_name,
|
|
228
|
+
local_directory=store_name,
|
|
229
|
+
object_prefix=output_zarr_prefix
|
|
230
|
+
)
|
|
231
|
+
#################################################################
|
|
215
232
|
self.__zarr_info_to_table(
|
|
216
233
|
output_bucket_name=output_bucket_name,
|
|
217
234
|
table_name=table_name,
|
|
@@ -228,29 +245,9 @@ class RawToZarr:
|
|
|
228
245
|
frequencies=frequencies,
|
|
229
246
|
channels=channels
|
|
230
247
|
)
|
|
231
|
-
###################################################################
|
|
232
|
-
#######################################################################
|
|
233
|
-
self.__upload_files_to_output_bucket(
|
|
234
|
-
output_bucket_name=output_bucket_name,
|
|
235
|
-
local_directory=store_name,
|
|
236
|
-
object_prefix=output_zarr_prefix
|
|
237
|
-
)
|
|
238
|
-
#######################################################################
|
|
239
|
-
# # TODO: verify count of objects matches
|
|
240
|
-
# s3_objects = self.__s3.list_objects(
|
|
241
|
-
# bucket_name=self.__output_bucket,
|
|
242
|
-
# prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
|
|
243
|
-
# access_key_id=self.__output_bucket_access_key,
|
|
244
|
-
# secret_access_key=self.__output_bucket_secret_access_key
|
|
245
|
-
# )
|
|
246
248
|
#######################################################################
|
|
247
|
-
#
|
|
248
|
-
# file_name=input_file_name,
|
|
249
|
-
# cruise_name=cruise_name,
|
|
250
|
-
# pipeline_status='SUCCESS_RAW_TO_ZARR'
|
|
251
|
-
# )
|
|
249
|
+
# TODO: verify count of objects matches, publish message, update status
|
|
252
250
|
#######################################################################
|
|
253
|
-
# self.__publish_done_message(input_message)
|
|
254
251
|
print('here')
|
|
255
252
|
except Exception as err:
|
|
256
253
|
print(f'Exception encountered creating local Zarr store with echopype: {err}')
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: water_column_sonar_processing
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.10
|
|
4
4
|
Summary: A processing tool for water column sonar data.
|
|
5
5
|
Author-email: Rudy Klucik <rudy.klucik@noaa.gov>
|
|
6
6
|
Project-URL: Homepage, https://github.com/CI-CMG/water-column-sonar-processing
|
|
@@ -24,7 +24,7 @@ Requires-Dist: numcodecs==0.13.1
|
|
|
24
24
|
Requires-Dist: numpy==1.26.4
|
|
25
25
|
Requires-Dist: pandas==2.2.3
|
|
26
26
|
Requires-Dist: pyarrow==18.1.0
|
|
27
|
-
Requires-Dist: python-dotenv==1.0.
|
|
27
|
+
Requires-Dist: python-dotenv==1.0.1
|
|
28
28
|
Requires-Dist: requests==2.32.3
|
|
29
29
|
Requires-Dist: s3fs==2023.12.1
|
|
30
30
|
Requires-Dist: scipy==1.14.1
|
|
@@ -114,6 +114,7 @@ python -m twine upload --repository pypi dist/*
|
|
|
114
114
|
```
|
|
115
115
|
|
|
116
116
|
# Pre Commit Hook
|
|
117
|
+
see here for installation: https://pre-commit.com/
|
|
117
118
|
https://dev.to/rafaelherik/using-trufflehog-and-pre-commit-hook-to-prevent-secret-exposure-edo
|
|
118
119
|
```
|
|
119
120
|
pre-commit install --allow-missing-config
|
|
@@ -132,3 +133,8 @@ https://colab.research.google.com/drive/1KiLMueXiz9WVB9o4RuzYeGjNZ6PsZU7a#scroll
|
|
|
132
133
|
20241125
|
|
133
134
|
5 failed, 35 passed, 3 skipped, 1 warning in 9.71s
|
|
134
135
|
3 failed, 38 passed, 3 skipped, 1 warning in 7.24s
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# TODO:
|
|
139
|
+
add https://pypi.org/project/setuptools-scm/
|
|
140
|
+
for extracting the version
|
|
@@ -2,31 +2,31 @@ water_column_sonar_processing/__init__.py,sha256=fvRK4uFo_A0l7w_T4yckvDqJ3wMUq4J
|
|
|
2
2
|
water_column_sonar_processing/process.py,sha256=-yQtK3rnZq6lGAr3q02zLDe1NuMH9c0PiUOxKzG_r18,5386
|
|
3
3
|
water_column_sonar_processing/aws/__init__.py,sha256=KJqK8oYMn-u8n8i-Jp_lG5BvCOTjwWSjWP8yAyDlWVo,297
|
|
4
4
|
water_column_sonar_processing/aws/dynamodb_manager.py,sha256=sZHn-hgCt3K3w0x5BcXfF5jLMt_F11dAtQHJToij9nU,10008
|
|
5
|
-
water_column_sonar_processing/aws/s3_manager.py,sha256=
|
|
5
|
+
water_column_sonar_processing/aws/s3_manager.py,sha256=kS48Vu_jE_fOKbwKOhCLWKDSqHzOGVEdZ_Lc4MaMCfA,15291
|
|
6
6
|
water_column_sonar_processing/aws/s3fs_manager.py,sha256=thVJPQKhbvF1g-Ue3BYgwazFOFDYOICIEJx4zkXBQ1E,2381
|
|
7
7
|
water_column_sonar_processing/aws/sns_manager.py,sha256=Dp9avG5VSugSWPR1dZ-askuAw1fCZkNUHbOUP65iR-k,1867
|
|
8
8
|
water_column_sonar_processing/aws/sqs_manager.py,sha256=NSUrWmnSC8h8Gf7gT0U8zFaQQ-yX89h0Q0mDLKGqp2Y,1597
|
|
9
9
|
water_column_sonar_processing/cruise/__init__.py,sha256=H5hW0JMORuaFvQk_R31B4VL8RnRyKeanOOiWmqEMZJk,156
|
|
10
|
-
water_column_sonar_processing/cruise/create_empty_zarr_store.py,sha256=
|
|
10
|
+
water_column_sonar_processing/cruise/create_empty_zarr_store.py,sha256=uQiZoKm16jD0SUuXmhuPryxdE-6bUc6BlCi2UtmzUpw,7318
|
|
11
11
|
water_column_sonar_processing/cruise/resample_regrid.py,sha256=4Tw6Ro9mQZOr0uIph6foz6a1OeFAZW0SMUT_asIwvKw,12309
|
|
12
12
|
water_column_sonar_processing/geometry/__init__.py,sha256=_ol5nI8AL30pYXeAh5rtP7YmQggitPC6LA_kuTfPJ0Q,231
|
|
13
|
-
water_column_sonar_processing/geometry/geometry_manager.py,sha256=
|
|
13
|
+
water_column_sonar_processing/geometry/geometry_manager.py,sha256=0Q9IRiBr6XvxUg5M2vCPtUhbnYnwa5pJI1ayfWXMgMs,10587
|
|
14
14
|
water_column_sonar_processing/geometry/geometry_simplification.py,sha256=im1HG9nfYIerQv3w-PUHzphw2B7aGgnsA3Zcdy2oTmA,3016
|
|
15
15
|
water_column_sonar_processing/geometry/pmtile_generation.py,sha256=7Lm08Jr6YaM4nYmexClxbIMOqSV1teo9wMm6dfjFuNA,12384
|
|
16
16
|
water_column_sonar_processing/index/__init__.py,sha256=izEObsKiOoIJ0kZCFhvaYsBd6Ga71XJxnogjrNInw68,68
|
|
17
17
|
water_column_sonar_processing/index/index_manager.py,sha256=YS6y_THfGAZpjfBZOj5n8O1aY_BnBYS781eNHfhpip0,11239
|
|
18
18
|
water_column_sonar_processing/model/__init__.py,sha256=FXaCdbPqxp0ogmZm9NplRirqpgMiYs1iRYgJbFbbX2Y,65
|
|
19
|
-
water_column_sonar_processing/model/zarr_manager.py,sha256=
|
|
19
|
+
water_column_sonar_processing/model/zarr_manager.py,sha256=TbcVux-GWfX4XJ7UT20E7dI_h_islrKsGtjx_VwSsLg,14003
|
|
20
20
|
water_column_sonar_processing/processing/__init__.py,sha256=UwdB3BnoUxy4q3k9-ZjBF6KzmCWVDcqbcArTeHgmvGA,118
|
|
21
21
|
water_column_sonar_processing/processing/cruise_sampler.py,sha256=hadPrnH5nz7_oG_4pND7YbMFH6NMR9d6p3xAXedtKU8,15927
|
|
22
|
-
water_column_sonar_processing/processing/raw_to_zarr.py,sha256=
|
|
22
|
+
water_column_sonar_processing/processing/raw_to_zarr.py,sha256=7vvoNe0jlB34R5mBPceQjL9N_5X0GTWs9xpCqvRK1nQ,15931
|
|
23
23
|
water_column_sonar_processing/utility/__init__.py,sha256=yDObMOL0_OxKWet5wffK2-XVJgoE9iwiY2q04GZrtBQ,234
|
|
24
24
|
water_column_sonar_processing/utility/cleaner.py,sha256=bNbs-hopWxtKAFBK0Eu18xdRErZCGZvtla3j-1bTwQw,619
|
|
25
25
|
water_column_sonar_processing/utility/constants.py,sha256=EbzsorvYKadsPjuutRjQKKByGibhFm0Gw6D-Sp2ZD3I,2143
|
|
26
26
|
water_column_sonar_processing/utility/pipeline_status.py,sha256=O-0SySqdRGJ6bs3zQe1NV9vkOpmsRM7zj5QoHgzYioY,4395
|
|
27
27
|
water_column_sonar_processing/utility/timestamp.py,sha256=bO0oir7KxxoEHPGRkz9FCBfOligkocUyRiWRzAq8fnU,361
|
|
28
|
-
water_column_sonar_processing-0.0.
|
|
29
|
-
water_column_sonar_processing-0.0.
|
|
30
|
-
water_column_sonar_processing-0.0.
|
|
31
|
-
water_column_sonar_processing-0.0.
|
|
32
|
-
water_column_sonar_processing-0.0.
|
|
28
|
+
water_column_sonar_processing-0.0.10.dist-info/LICENSE,sha256=lz4IpJ5_adG3S0ali-WaIpQFVTnEAOucMDQPECUVEYw,1110
|
|
29
|
+
water_column_sonar_processing-0.0.10.dist-info/METADATA,sha256=qFNeJ3GduRHKfcJRYShO9LamuMREk66qm18IUUXsMg8,4566
|
|
30
|
+
water_column_sonar_processing-0.0.10.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
31
|
+
water_column_sonar_processing-0.0.10.dist-info/top_level.txt,sha256=aRYU4A7RNBlNrL4vzjytFAir3BNnmOgsvIGKKA36tg4,30
|
|
32
|
+
water_column_sonar_processing-0.0.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|