water-column-sonar-processing 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/aws/dynamodb_manager.py +15 -11
- water_column_sonar_processing/aws/s3_manager.py +39 -13
- water_column_sonar_processing/aws/s3fs_manager.py +42 -35
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +15 -8
- water_column_sonar_processing/cruise/experiment_datatree.py +13 -0
- water_column_sonar_processing/cruise/resample_regrid.py +29 -18
- water_column_sonar_processing/geometry/geometry_manager.py +10 -4
- water_column_sonar_processing/model/zarr_manager.py +33 -10
- water_column_sonar_processing/processing/raw_to_zarr.py +45 -20
- {water_column_sonar_processing-0.0.10.dist-info → water_column_sonar_processing-0.0.12.dist-info}/METADATA +18 -16
- {water_column_sonar_processing-0.0.10.dist-info → water_column_sonar_processing-0.0.12.dist-info}/RECORD +14 -13
- {water_column_sonar_processing-0.0.10.dist-info → water_column_sonar_processing-0.0.12.dist-info}/LICENSE +0 -0
- {water_column_sonar_processing-0.0.10.dist-info → water_column_sonar_processing-0.0.12.dist-info}/WHEEL +0 -0
- {water_column_sonar_processing-0.0.10.dist-info → water_column_sonar_processing-0.0.12.dist-info}/top_level.txt +0 -0
|
@@ -111,17 +111,21 @@ class DynamoDBManager:
|
|
|
111
111
|
expression_attribute_names,
|
|
112
112
|
expression_attribute_values,
|
|
113
113
|
update_expression,
|
|
114
|
-
):
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
114
|
+
): # TODO: convert to boolean
|
|
115
|
+
try:
|
|
116
|
+
response = self.__dynamodb_client.update_item(
|
|
117
|
+
TableName=table_name,
|
|
118
|
+
Key=key,
|
|
119
|
+
ExpressionAttributeNames=expression_attribute_names,
|
|
120
|
+
ExpressionAttributeValues=expression_attribute_values,
|
|
121
|
+
UpdateExpression=update_expression,
|
|
122
|
+
)
|
|
123
|
+
status_code = response["ResponseMetadata"]["HTTPStatusCode"]
|
|
124
|
+
# print(f"HTTPStatusCode: {status_code}")
|
|
125
|
+
# assert status_code == 200, "Problem, unable to update dynamodb table."
|
|
126
|
+
# assert response['ConsumedCapacity']['TableName'] == table_name
|
|
127
|
+
except Exception as err:
|
|
128
|
+
print(f"Problem was encountered while updating item: {err}")
|
|
125
129
|
|
|
126
130
|
#####################################################################
|
|
127
131
|
# TODO: change to "get_cruise_as_df"
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
3
|
import boto3
|
|
4
|
+
from typing import Optional
|
|
4
5
|
from collections.abc import Generator
|
|
5
6
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
6
7
|
|
|
@@ -29,9 +30,11 @@ class S3Manager:
|
|
|
29
30
|
#####################################################################
|
|
30
31
|
def __init__(
|
|
31
32
|
self,
|
|
33
|
+
endpoint_url: Optional[str] = None,
|
|
32
34
|
):
|
|
33
|
-
self.
|
|
34
|
-
self.
|
|
35
|
+
self.endpoint_url = endpoint_url
|
|
36
|
+
# self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
37
|
+
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
35
38
|
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
36
39
|
self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
|
|
37
40
|
self.s3_transfer_config = TransferConfig(
|
|
@@ -49,6 +52,7 @@ class S3Manager:
|
|
|
49
52
|
service_name="s3",
|
|
50
53
|
config=self.s3_client_config,
|
|
51
54
|
region_name=self.s3_region,
|
|
55
|
+
endpoint_url=self.endpoint_url,
|
|
52
56
|
)
|
|
53
57
|
self.s3_resource = boto3.resource(
|
|
54
58
|
service_name="s3",
|
|
@@ -64,11 +68,13 @@ class S3Manager:
|
|
|
64
68
|
service_name="s3",
|
|
65
69
|
config=self.s3_client_config,
|
|
66
70
|
region_name=self.s3_region,
|
|
71
|
+
endpoint_url=self.endpoint_url,
|
|
67
72
|
)
|
|
68
73
|
self.s3_resource_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.resource(
|
|
69
74
|
service_name="s3",
|
|
70
75
|
config=self.s3_client_config,
|
|
71
76
|
region_name=self.s3_region,
|
|
77
|
+
endpoint_url=self.endpoint_url,
|
|
72
78
|
)
|
|
73
79
|
self.paginator = self.s3_client.get_paginator('list_objects_v2')
|
|
74
80
|
self.paginator_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds.get_paginator('list_objects_v2')
|
|
@@ -85,13 +91,31 @@ class S3Manager:
|
|
|
85
91
|
self,
|
|
86
92
|
bucket_name: str,
|
|
87
93
|
):
|
|
88
|
-
|
|
94
|
+
"""
|
|
95
|
+
Note: this function is only really meant to be used for creating test
|
|
96
|
+
buckets. It allows public read of all objects.
|
|
97
|
+
"""
|
|
98
|
+
# https://github.com/aodn/aodn_cloud_optimised/blob/e5035495e782783cc8b9e58711d63ed466420350/test_aodn_cloud_optimised/test_schema.py#L7
|
|
99
|
+
public_policy = {
|
|
100
|
+
"Version": "2012-10-17",
|
|
101
|
+
"Statement": [
|
|
102
|
+
{
|
|
103
|
+
"Effect": "Allow",
|
|
104
|
+
"Principal": "*",
|
|
105
|
+
"Action": "s3:GetObject",
|
|
106
|
+
"Resource": f"arn:aws:s3:::{bucket_name}/*",
|
|
107
|
+
}
|
|
108
|
+
],
|
|
109
|
+
}
|
|
110
|
+
response1 = self.s3_client.create_bucket(
|
|
89
111
|
Bucket=bucket_name,
|
|
90
|
-
|
|
91
|
-
#
|
|
92
|
-
# TODO: if region is us-east-1, don't include this line somehow
|
|
93
|
-
# CreateBucketConfiguration={'LocationConstraint': self.__s3_region}
|
|
112
|
+
ACL='public-read'
|
|
94
113
|
)
|
|
114
|
+
print(response1)
|
|
115
|
+
# response = self.s3_client.put_bucket_policy(
|
|
116
|
+
# Bucket=bucket_name, Policy=json.dumps(public_policy)
|
|
117
|
+
# )
|
|
118
|
+
# print(response)
|
|
95
119
|
|
|
96
120
|
#####################################################################
|
|
97
121
|
def list_buckets(self):
|
|
@@ -156,6 +180,7 @@ class S3Manager:
|
|
|
156
180
|
self,
|
|
157
181
|
local_directory,
|
|
158
182
|
remote_directory,
|
|
183
|
+
output_bucket_name,
|
|
159
184
|
):
|
|
160
185
|
# Right now this is just for uploading a model store to s3
|
|
161
186
|
print("Uploading files to output bucket.")
|
|
@@ -173,7 +198,7 @@ class S3Manager:
|
|
|
173
198
|
all_files.append([local_path, s3_key])
|
|
174
199
|
|
|
175
200
|
all_uploads = self.upload_files_with_thread_pool_executor(
|
|
176
|
-
output_bucket_name=
|
|
201
|
+
output_bucket_name=output_bucket_name,
|
|
177
202
|
all_files=all_files,
|
|
178
203
|
)
|
|
179
204
|
print("Done uploading files to output bucket.")
|
|
@@ -228,8 +253,8 @@ class S3Manager:
|
|
|
228
253
|
# ):
|
|
229
254
|
# # Returns a list of key strings for each object in bucket defined by prefix
|
|
230
255
|
# keys = []
|
|
231
|
-
# page_iterator = self.paginator_noaa_wcsd_zarr_pds.paginate(Bucket=
|
|
232
|
-
# for page in paginator.paginate(Bucket=
|
|
256
|
+
# page_iterator = self.paginator_noaa_wcsd_zarr_pds.paginate(Bucket=output_bucket_name, Prefix=prefix):
|
|
257
|
+
# for page in paginator.paginate(Bucket=output_bucket_name, Prefix=prefix):
|
|
233
258
|
# if "Contents" in page.keys():
|
|
234
259
|
# keys.extend([k["Key"] for k in page["Contents"]])
|
|
235
260
|
# return keys
|
|
@@ -371,7 +396,6 @@ class S3Manager:
|
|
|
371
396
|
print(f"Problem was encountered while deleting objects: {err}")
|
|
372
397
|
|
|
373
398
|
#####################################################################
|
|
374
|
-
# not used TODO: remove
|
|
375
399
|
def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
|
|
376
400
|
self.s3_client.put_object(Bucket=bucket_name, Key=key, Body=body) # "Body" can be a file
|
|
377
401
|
|
|
@@ -382,10 +406,12 @@ class S3Manager:
|
|
|
382
406
|
cruise_name,
|
|
383
407
|
sensor_name,
|
|
384
408
|
file_name_stem,
|
|
409
|
+
output_bucket_name,
|
|
385
410
|
) -> str:
|
|
386
411
|
try:
|
|
387
|
-
|
|
388
|
-
|
|
412
|
+
resource = self.s3_resource_noaa_wcsd_zarr_pds
|
|
413
|
+
content_object = resource.Object(
|
|
414
|
+
bucket_name=output_bucket_name,
|
|
389
415
|
key=f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.json",
|
|
390
416
|
).get()
|
|
391
417
|
file_content = content_object["Body"].read().decode("utf-8")
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
|
|
2
|
+
from typing import Optional
|
|
3
3
|
import s3fs
|
|
4
4
|
|
|
5
5
|
# TODO: S3FS_LOGGING_LEVEL=DEBUG
|
|
@@ -9,37 +9,25 @@ class S3FSManager:
|
|
|
9
9
|
#####################################################################
|
|
10
10
|
def __init__(
|
|
11
11
|
self,
|
|
12
|
+
endpoint_url: Optional[str] = None,
|
|
12
13
|
):
|
|
13
|
-
self.
|
|
14
|
+
self.endpoint_url = endpoint_url
|
|
15
|
+
# self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
16
|
+
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
17
|
+
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
14
18
|
self.s3fs = s3fs.S3FileSystem(
|
|
19
|
+
endpoint_url=endpoint_url,
|
|
15
20
|
key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
16
21
|
secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
17
|
-
# asynchronous=True
|
|
18
|
-
# use_ssl=False,
|
|
19
|
-
# skip_instance_cache=True,
|
|
20
|
-
# default_block_size='100MB', # if no specific value is given at all time. The built-in default is 5MB
|
|
21
|
-
# client_kwargs={
|
|
22
|
-
# "region_name": self.__s3_region
|
|
23
|
-
# }
|
|
24
22
|
)
|
|
25
23
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
print(ff)
|
|
35
|
-
|
|
36
|
-
#####################################################################
|
|
37
|
-
def upload_data(self, bucket_name, file_path, prefix):
|
|
38
|
-
# TODO: this works in theory but use boto3 to upload files
|
|
39
|
-
s3_path = f"s3://{bucket_name}/{prefix}/"
|
|
40
|
-
s3_file_system = self.s3fs
|
|
41
|
-
s3_file_system.put(file_path, s3_path, recursive=True)
|
|
42
|
-
|
|
24
|
+
# s3_fs = s3fs.S3FileSystem( # TODO: use s3fs_manager?
|
|
25
|
+
# anon=True,
|
|
26
|
+
# client_kwargs={
|
|
27
|
+
# "endpoint_url": moto_server,
|
|
28
|
+
# "region_name": "us-east-1",
|
|
29
|
+
# },
|
|
30
|
+
# )
|
|
43
31
|
#####################################################################
|
|
44
32
|
def s3_map(
|
|
45
33
|
self,
|
|
@@ -49,20 +37,39 @@ class S3FSManager:
|
|
|
49
37
|
# create=False, not false because will be writing
|
|
50
38
|
# return s3fs.S3Map(root=s3_zarr_store_path, s3=self.s3fs, check=True)
|
|
51
39
|
return s3fs.S3Map(
|
|
52
|
-
root=s3_zarr_store_path,
|
|
40
|
+
root=s3_zarr_store_path,
|
|
41
|
+
s3=self.s3fs
|
|
53
42
|
) # create=False, not false because will be writing
|
|
54
43
|
|
|
44
|
+
#####################################################################
|
|
45
|
+
# def add_file(self, filename):
|
|
46
|
+
# full_path = f"{os.getenv('OUTPUT_BUCKET_NAME')}/testing/{filename}"
|
|
47
|
+
# print(full_path)
|
|
48
|
+
#
|
|
49
|
+
# self.s3fs.touch(full_path)
|
|
50
|
+
# ff = self.s3fs.ls(f"{os.getenv('OUTPUT_BUCKET_NAME')}/")
|
|
51
|
+
#
|
|
52
|
+
# print(ff)
|
|
53
|
+
|
|
54
|
+
#####################################################################
|
|
55
|
+
def upload_data(
|
|
56
|
+
self,
|
|
57
|
+
bucket_name,
|
|
58
|
+
file_path,
|
|
59
|
+
prefix
|
|
60
|
+
):
|
|
61
|
+
# TODO: this works in theory but use boto3 to upload files
|
|
62
|
+
s3_path = f"s3://{bucket_name}/{prefix}/"
|
|
63
|
+
s3_file_system = self.s3fs
|
|
64
|
+
s3_file_system.put(file_path, s3_path, recursive=True)
|
|
65
|
+
|
|
55
66
|
#####################################################################
|
|
56
67
|
def exists(
|
|
57
68
|
self,
|
|
58
|
-
|
|
69
|
+
s3_path,
|
|
59
70
|
):
|
|
60
|
-
s3_file_system =
|
|
61
|
-
return
|
|
71
|
+
# s3_file_system =
|
|
72
|
+
return self.s3fs.exists(s3_path)
|
|
73
|
+
|
|
62
74
|
|
|
63
75
|
#####################################################################
|
|
64
|
-
# def put(
|
|
65
|
-
# self
|
|
66
|
-
# ):
|
|
67
|
-
# s3_file_system = self.s3fs
|
|
68
|
-
# return
|
|
@@ -24,14 +24,14 @@ class CreateEmptyZarrStore:
|
|
|
24
24
|
self,
|
|
25
25
|
):
|
|
26
26
|
self.__overwrite = True
|
|
27
|
-
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
28
|
-
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
27
|
+
# self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
28
|
+
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
29
29
|
|
|
30
30
|
#######################################################
|
|
31
|
-
|
|
32
31
|
# TODO: move this to the s3_manager
|
|
33
32
|
def upload_zarr_store_to_s3(
|
|
34
33
|
self,
|
|
34
|
+
output_bucket_name: str,
|
|
35
35
|
local_directory: str,
|
|
36
36
|
object_prefix: str,
|
|
37
37
|
cruise_name: str,
|
|
@@ -43,24 +43,28 @@ class CreateEmptyZarrStore:
|
|
|
43
43
|
# # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
44
44
|
all_files = []
|
|
45
45
|
for subdir, dirs, files in os.walk(
|
|
46
|
-
f"{local_directory}/{cruise_name}.
|
|
46
|
+
f"{local_directory}/{cruise_name}.zarr"
|
|
47
47
|
):
|
|
48
48
|
for file in files:
|
|
49
49
|
local_path = os.path.join(subdir, file)
|
|
50
|
-
#
|
|
51
|
-
|
|
50
|
+
# TODO: find a better method for splitting strings here:
|
|
51
|
+
# 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
|
|
52
|
+
s3_key = f'{object_prefix}/{cruise_name}.zarr{local_path.split(f"{cruise_name}.zarr")[-1]}'
|
|
52
53
|
all_files.append([local_path, s3_key])
|
|
53
54
|
#
|
|
54
55
|
# print(all_files)
|
|
55
56
|
s3_manager.upload_files_with_thread_pool_executor(
|
|
57
|
+
output_bucket_name=output_bucket_name,
|
|
56
58
|
all_files=all_files,
|
|
57
59
|
)
|
|
58
60
|
print("Done uploading with thread pool executor.")
|
|
59
61
|
# TODO: move to common place
|
|
60
62
|
|
|
61
63
|
#######################################################
|
|
64
|
+
# @classmethod
|
|
62
65
|
def create_cruise_level_zarr_store(
|
|
63
66
|
self,
|
|
67
|
+
output_bucket_name: str,
|
|
64
68
|
ship_name: str,
|
|
65
69
|
cruise_name: str,
|
|
66
70
|
sensor_name: str,
|
|
@@ -116,17 +120,18 @@ class CreateEmptyZarrStore:
|
|
|
116
120
|
new_width = int(consolidated_zarr_width)
|
|
117
121
|
print(f"new_width: {new_width}")
|
|
118
122
|
#################################################################
|
|
119
|
-
store_name = f"{cruise_name}.
|
|
123
|
+
store_name = f"{cruise_name}.zarr"
|
|
120
124
|
print(store_name)
|
|
121
125
|
################################################################
|
|
122
126
|
# Delete existing model store if it exists
|
|
123
127
|
zarr_prefix = os.path.join("level_2", ship_name, cruise_name, sensor_name)
|
|
124
128
|
child_objects = s3_manager.get_child_objects(
|
|
125
|
-
bucket_name=
|
|
129
|
+
bucket_name=output_bucket_name,
|
|
126
130
|
sub_prefix=zarr_prefix,
|
|
127
131
|
)
|
|
128
132
|
if len(child_objects) > 0:
|
|
129
133
|
s3_manager.delete_nodd_objects(
|
|
134
|
+
bucket_name=output_bucket_name,
|
|
130
135
|
objects=child_objects,
|
|
131
136
|
)
|
|
132
137
|
################################################################
|
|
@@ -153,6 +158,7 @@ class CreateEmptyZarrStore:
|
|
|
153
158
|
)
|
|
154
159
|
#################################################################
|
|
155
160
|
self.upload_zarr_store_to_s3(
|
|
161
|
+
output_bucket_name=output_bucket_name,
|
|
156
162
|
local_directory=tempdir,
|
|
157
163
|
object_prefix=zarr_prefix,
|
|
158
164
|
cruise_name=cruise_name,
|
|
@@ -174,6 +180,7 @@ class CreateEmptyZarrStore:
|
|
|
174
180
|
#################################################################
|
|
175
181
|
# Success
|
|
176
182
|
# TODO: update enum in dynamodb
|
|
183
|
+
print("Done creating cruise level zarr store.")
|
|
177
184
|
#################################################################
|
|
178
185
|
except Exception as err:
|
|
179
186
|
print(f"Problem trying to create new cruise model store: {err}")
|
|
@@ -26,8 +26,8 @@ class ResampleRegrid:
|
|
|
26
26
|
self,
|
|
27
27
|
):
|
|
28
28
|
self.__overwrite = True
|
|
29
|
-
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
30
|
-
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
29
|
+
# self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
30
|
+
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
31
31
|
self.dtype = "float32"
|
|
32
32
|
|
|
33
33
|
#################################################################
|
|
@@ -144,6 +144,10 @@ class ResampleRegrid:
|
|
|
144
144
|
cruise_name,
|
|
145
145
|
sensor_name,
|
|
146
146
|
table_name,
|
|
147
|
+
# TODO: file_name?,
|
|
148
|
+
bucket_name, # TODO: this is the same bucket
|
|
149
|
+
override_select_files=None,
|
|
150
|
+
endpoint_url=None
|
|
147
151
|
) -> None:
|
|
148
152
|
"""
|
|
149
153
|
The goal here is to interpolate the data against the depth values already populated
|
|
@@ -151,17 +155,17 @@ class ResampleRegrid:
|
|
|
151
155
|
read/write operations. We open the file-level store with Xarray to leverage tools for
|
|
152
156
|
resampling and subsetting the data.
|
|
153
157
|
"""
|
|
154
|
-
print("Interpolating data.")
|
|
158
|
+
print("Resample Regrid, Interpolating data.")
|
|
155
159
|
try:
|
|
156
160
|
zarr_manager = ZarrManager()
|
|
157
|
-
# s3_manager = S3Manager()
|
|
158
161
|
geo_manager = GeometryManager()
|
|
159
|
-
|
|
162
|
+
|
|
160
163
|
output_zarr_store = zarr_manager.open_s3_zarr_store_with_zarr(
|
|
161
164
|
ship_name=ship_name,
|
|
162
165
|
cruise_name=cruise_name,
|
|
163
166
|
sensor_name=sensor_name,
|
|
164
|
-
|
|
167
|
+
output_bucket_name=bucket_name,
|
|
168
|
+
endpoint_url=endpoint_url,
|
|
165
169
|
)
|
|
166
170
|
|
|
167
171
|
# get dynamo stuff
|
|
@@ -175,8 +179,12 @@ class ResampleRegrid:
|
|
|
175
179
|
|
|
176
180
|
#########################################################
|
|
177
181
|
#########################################################
|
|
178
|
-
# TODO: iterate files here
|
|
179
182
|
all_file_names = cruise_df["FILE_NAME"]
|
|
183
|
+
|
|
184
|
+
if override_select_files is not None:
|
|
185
|
+
all_file_names = override_select_files
|
|
186
|
+
|
|
187
|
+
# Iterate files
|
|
180
188
|
for file_name in all_file_names:
|
|
181
189
|
gc.collect()
|
|
182
190
|
file_name_stem = Path(file_name).stem
|
|
@@ -200,6 +208,8 @@ class ResampleRegrid:
|
|
|
200
208
|
cruise_name=cruise_name,
|
|
201
209
|
sensor_name=sensor_name,
|
|
202
210
|
file_name_stem=file_name_stem,
|
|
211
|
+
input_bucket_name=bucket_name,
|
|
212
|
+
endpoint_url=endpoint_url,
|
|
203
213
|
)
|
|
204
214
|
#########################################################################
|
|
205
215
|
# [3] Get needed indices
|
|
@@ -225,11 +235,11 @@ class ResampleRegrid:
|
|
|
225
235
|
:, start_ping_time_index:end_ping_time_index, :
|
|
226
236
|
].shape
|
|
227
237
|
)
|
|
228
|
-
cruise_sv_subset[:, :, :] = np.nan
|
|
238
|
+
cruise_sv_subset[:, :, :] = np.nan
|
|
229
239
|
|
|
230
240
|
all_cruise_depth_values = zarr_manager.get_depth_values(
|
|
231
241
|
min_echo_range=min_echo_range, max_echo_range=max_echo_range
|
|
232
|
-
)
|
|
242
|
+
) # (5262,) and
|
|
233
243
|
|
|
234
244
|
print(" ".join(list(input_xr_zarr_store.Sv.dims)))
|
|
235
245
|
if set(input_xr_zarr_store.Sv.dims) != {
|
|
@@ -239,13 +249,14 @@ class ResampleRegrid:
|
|
|
239
249
|
}:
|
|
240
250
|
raise Exception("Xarray dimensions are not as expected.")
|
|
241
251
|
|
|
242
|
-
# get geojson
|
|
243
252
|
indices, geospatial = geo_manager.read_s3_geo_json(
|
|
244
253
|
ship_name=ship_name,
|
|
245
254
|
cruise_name=cruise_name,
|
|
246
255
|
sensor_name=sensor_name,
|
|
247
256
|
file_name_stem=file_name_stem,
|
|
248
257
|
input_xr_zarr_store=input_xr_zarr_store,
|
|
258
|
+
endpoint_url=endpoint_url,
|
|
259
|
+
output_bucket_name=bucket_name,
|
|
249
260
|
)
|
|
250
261
|
|
|
251
262
|
input_xr = input_xr_zarr_store.isel(ping_time=indices)
|
|
@@ -261,22 +272,18 @@ class ResampleRegrid:
|
|
|
261
272
|
)
|
|
262
273
|
|
|
263
274
|
# --- UPDATING --- #
|
|
264
|
-
|
|
265
275
|
regrid_resample = self.interpolate_data(
|
|
266
276
|
input_xr=input_xr,
|
|
267
277
|
ping_times=ping_times,
|
|
268
278
|
all_cruise_depth_values=all_cruise_depth_values,
|
|
269
279
|
)
|
|
270
280
|
|
|
271
|
-
print(
|
|
272
|
-
f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}"
|
|
273
|
-
)
|
|
274
|
-
|
|
281
|
+
print(f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}")
|
|
275
282
|
#########################################################################
|
|
276
283
|
# write Sv values to cruise-level-model-store
|
|
277
284
|
for channel in range(
|
|
278
285
|
len(input_xr.channel.values)
|
|
279
|
-
): #
|
|
286
|
+
): # does not like being written in one fell swoop :(
|
|
280
287
|
output_zarr_store.Sv[
|
|
281
288
|
:, start_ping_time_index:end_ping_time_index, channel
|
|
282
289
|
] = regrid_resample[:, :, channel]
|
|
@@ -285,14 +292,18 @@ class ResampleRegrid:
|
|
|
285
292
|
# [5] write subset of latitude/longitude
|
|
286
293
|
output_zarr_store.latitude[
|
|
287
294
|
start_ping_time_index:end_ping_time_index
|
|
288
|
-
] = geospatial.dropna()["latitude"].values
|
|
295
|
+
] = geospatial.dropna()["latitude"].values # TODO: get from ds_sv directly, dont need geojson anymore
|
|
289
296
|
output_zarr_store.longitude[
|
|
290
297
|
start_ping_time_index:end_ping_time_index
|
|
291
298
|
] = geospatial.dropna()["longitude"].values
|
|
292
299
|
except Exception as err:
|
|
293
300
|
print(f"Problem interpolating the data: {err}")
|
|
294
301
|
raise err
|
|
295
|
-
|
|
302
|
+
# else:
|
|
303
|
+
# pass
|
|
304
|
+
finally:
|
|
305
|
+
print("Done interpolating data.")
|
|
306
|
+
# TODO: read across times and verify data was written?
|
|
296
307
|
|
|
297
308
|
#######################################################
|
|
298
309
|
|
|
@@ -38,6 +38,7 @@ class GeometryManager:
|
|
|
38
38
|
cruise_name,
|
|
39
39
|
sensor_name,
|
|
40
40
|
file_name,
|
|
41
|
+
endpoint_url=None,
|
|
41
42
|
write_geojson=True,
|
|
42
43
|
) -> tuple:
|
|
43
44
|
file_name_stem = Path(file_name).stem
|
|
@@ -61,7 +62,7 @@ class GeometryManager:
|
|
|
61
62
|
time1 = echodata.environment.time1.values
|
|
62
63
|
|
|
63
64
|
if len(nmea_times) < len(time1):
|
|
64
|
-
raise Exception(
|
|
65
|
+
raise Exception( # TODO: explore this logic further...
|
|
65
66
|
"Problem: Not enough NMEA times available to extrapolate time1."
|
|
66
67
|
)
|
|
67
68
|
|
|
@@ -137,7 +138,7 @@ class GeometryManager:
|
|
|
137
138
|
)
|
|
138
139
|
|
|
139
140
|
print("Checking s3 and deleting any existing GeoJSON file.")
|
|
140
|
-
s3_manager = S3Manager()
|
|
141
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
141
142
|
geojson_object_exists = s3_manager.check_if_object_exists(
|
|
142
143
|
bucket_name=output_bucket_name,
|
|
143
144
|
key_name=f"{geo_json_prefix}/{geo_json_name}"
|
|
@@ -180,7 +181,8 @@ class GeometryManager:
|
|
|
180
181
|
raise
|
|
181
182
|
# Note: returned lat/lon values can include np.nan because they need to be aligned with
|
|
182
183
|
# the Sv data! GeoJSON needs simplification but has been filtered.
|
|
183
|
-
return gps_df.index.values, gps_df.latitude.values, gps_df.longitude.values
|
|
184
|
+
# return gps_df.index.values, gps_df.latitude.values, gps_df.longitude.values
|
|
185
|
+
return gps_df.index.values, lat, lon
|
|
184
186
|
# TODO: if geojson is already returned with 0,0, the return here
|
|
185
187
|
# can include np.nan values?
|
|
186
188
|
|
|
@@ -192,14 +194,18 @@ class GeometryManager:
|
|
|
192
194
|
sensor_name,
|
|
193
195
|
file_name_stem,
|
|
194
196
|
input_xr_zarr_store,
|
|
197
|
+
endpoint_url,
|
|
198
|
+
output_bucket_name,
|
|
195
199
|
):
|
|
196
200
|
try:
|
|
197
|
-
s3_manager = S3Manager()
|
|
201
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
198
202
|
geo_json = s3_manager.read_s3_json(
|
|
199
203
|
ship_name=ship_name,
|
|
200
204
|
cruise_name=cruise_name,
|
|
201
205
|
sensor_name=sensor_name,
|
|
202
206
|
file_name_stem=file_name_stem,
|
|
207
|
+
output_bucket_name=output_bucket_name,
|
|
208
|
+
|
|
203
209
|
)
|
|
204
210
|
###
|
|
205
211
|
geospatial = geopandas.GeoDataFrame.from_features(
|
|
@@ -28,8 +28,8 @@ class ZarrManager:
|
|
|
28
28
|
self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
|
|
29
29
|
self.__overwrite = True
|
|
30
30
|
self.__num_threads = numcodecs.blosc.get_nthreads()
|
|
31
|
-
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
32
|
-
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
31
|
+
# self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
32
|
+
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
33
33
|
|
|
34
34
|
#######################################################
|
|
35
35
|
def get_depth_values(
|
|
@@ -54,7 +54,7 @@ class ZarrManager:
|
|
|
54
54
|
#######################################################
|
|
55
55
|
def create_zarr_store(
|
|
56
56
|
self,
|
|
57
|
-
path: str,
|
|
57
|
+
path: str, # 'level_2/Henry_B._Bigelow/HB0707/EK60/HB0707.model/tmp/HB0707.zarr/.zattrs'
|
|
58
58
|
ship_name: str,
|
|
59
59
|
cruise_name: str,
|
|
60
60
|
sensor_name: str,
|
|
@@ -246,7 +246,7 @@ class ZarrManager:
|
|
|
246
246
|
#
|
|
247
247
|
root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
|
|
248
248
|
root.attrs["processing_software_version"] = (
|
|
249
|
-
"0.0.
|
|
249
|
+
"0.0.12" # TODO: get programmatically, echopype>utils>prov.py
|
|
250
250
|
)
|
|
251
251
|
root.attrs["processing_software_time"] = Timestamp.get_timestamp()
|
|
252
252
|
#
|
|
@@ -282,14 +282,16 @@ class ZarrManager:
|
|
|
282
282
|
ship_name: str,
|
|
283
283
|
cruise_name: str,
|
|
284
284
|
sensor_name: str,
|
|
285
|
-
# zarr_synchronizer: Union[str, None] = None,
|
|
285
|
+
# zarr_synchronizer: Union[str, None] = None, # TODO:
|
|
286
|
+
output_bucket_name: str,
|
|
287
|
+
endpoint_url=None,
|
|
286
288
|
):
|
|
287
289
|
# Mounts a Zarr store using pythons Zarr implementation. The mounted store
|
|
288
290
|
# will have read/write privileges so that store can be updated.
|
|
289
291
|
print("Opening Zarr store with Zarr.")
|
|
290
292
|
try:
|
|
291
|
-
s3fs_manager = S3FSManager()
|
|
292
|
-
root = f"{
|
|
293
|
+
s3fs_manager = S3FSManager(endpoint_url=endpoint_url)
|
|
294
|
+
root = f"{output_bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{cruise_name}.zarr"
|
|
293
295
|
store = s3fs_manager.s3_map(s3_zarr_store_path=root)
|
|
294
296
|
# synchronizer = model.ProcessSynchronizer(f"/tmp/{ship_name}_{cruise_name}.sync")
|
|
295
297
|
cruise_zarr = zarr.open(store=store, mode="r+")
|
|
@@ -306,11 +308,13 @@ class ZarrManager:
|
|
|
306
308
|
cruise_name: str,
|
|
307
309
|
sensor_name: str,
|
|
308
310
|
file_name_stem: str,
|
|
311
|
+
input_bucket_name: str,
|
|
312
|
+
endpoint_url=None,
|
|
309
313
|
) -> xr.Dataset:
|
|
310
|
-
print("Opening Zarr store in S3
|
|
314
|
+
print("Opening L1 Zarr store in S3 with Xarray.")
|
|
311
315
|
try:
|
|
312
|
-
zarr_path = f"s3://{
|
|
313
|
-
s3fs_manager = S3FSManager()
|
|
316
|
+
zarr_path = f"s3://{input_bucket_name}/level_1/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.zarr"
|
|
317
|
+
s3fs_manager = S3FSManager(endpoint_url=endpoint_url)
|
|
314
318
|
store_s3_map = s3fs_manager.s3_map(s3_zarr_store_path=zarr_path)
|
|
315
319
|
ds = xr.open_zarr(
|
|
316
320
|
store=store_s3_map, consolidated=None
|
|
@@ -321,6 +325,25 @@ class ZarrManager:
|
|
|
321
325
|
print("Done opening Zarr store in S3 as Xarray.")
|
|
322
326
|
return ds
|
|
323
327
|
|
|
328
|
+
def open_l2_zarr_store_with_xarray(
|
|
329
|
+
self,
|
|
330
|
+
ship_name: str,
|
|
331
|
+
cruise_name: str,
|
|
332
|
+
sensor_name: str,
|
|
333
|
+
bucket_name: str,
|
|
334
|
+
endpoint_url=None,
|
|
335
|
+
) -> xr.Dataset:
|
|
336
|
+
print("Opening L2 Zarr store in S3 with Xarray.")
|
|
337
|
+
try:
|
|
338
|
+
zarr_path = f"s3://{bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{cruise_name}.zarr"
|
|
339
|
+
s3fs_manager = S3FSManager(endpoint_url=endpoint_url)
|
|
340
|
+
store_s3_map = s3fs_manager.s3_map(s3_zarr_store_path=zarr_path)
|
|
341
|
+
ds = xr.open_zarr(store=store_s3_map, consolidated=None)
|
|
342
|
+
except Exception as err:
|
|
343
|
+
print("Problem opening Zarr store in S3 as Xarray.")
|
|
344
|
+
raise err
|
|
345
|
+
print("Done opening Zarr store in S3 as Xarray.")
|
|
346
|
+
return ds
|
|
324
347
|
############################################################################
|
|
325
348
|
|
|
326
349
|
#######################################################
|
|
@@ -9,7 +9,7 @@ from pathlib import Path # , PurePath
|
|
|
9
9
|
|
|
10
10
|
from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
|
|
11
11
|
from water_column_sonar_processing.geometry import GeometryManager
|
|
12
|
-
from water_column_sonar_processing.utility import Cleaner
|
|
12
|
+
from water_column_sonar_processing.utility import Cleaner, PipelineStatus
|
|
13
13
|
|
|
14
14
|
TEMPDIR = "/tmp"
|
|
15
15
|
|
|
@@ -53,10 +53,6 @@ class RawToZarr:
|
|
|
53
53
|
):
|
|
54
54
|
print('Writing Zarr information to DynamoDB table.')
|
|
55
55
|
dynamodb_manager = DynamoDBManager()
|
|
56
|
-
|
|
57
|
-
# The problem is that these values were never populated
|
|
58
|
-
# and so when the query looks for values that aren't there
|
|
59
|
-
# they fail
|
|
60
56
|
dynamodb_manager.update_item(
|
|
61
57
|
table_name=table_name,
|
|
62
58
|
key={
|
|
@@ -87,7 +83,8 @@ class RawToZarr:
|
|
|
87
83
|
":ma": {"N": str(np.round(max_echo_range, 4))},
|
|
88
84
|
":mi": {"N": str(np.round(min_echo_range, 4))},
|
|
89
85
|
":nd": {"N": str(num_ping_time_dropna)},
|
|
90
|
-
":ps": {"S": "PROCESSING_RESAMPLE_AND_WRITE_TO_ZARR_STORE"},
|
|
86
|
+
# ":ps": {"S": "PROCESSING_RESAMPLE_AND_WRITE_TO_ZARR_STORE"},
|
|
87
|
+
":ps": {"S": PipelineStatus.LEVEL_1_PROCESSING.name},
|
|
91
88
|
":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
|
|
92
89
|
":se": {"S": sensor_name},
|
|
93
90
|
":sh": {"S": ship_name},
|
|
@@ -113,6 +110,7 @@ class RawToZarr:
|
|
|
113
110
|
"#ZP = :zp"
|
|
114
111
|
),
|
|
115
112
|
)
|
|
113
|
+
print('Done writing Zarr information to DynamoDB table.')
|
|
116
114
|
|
|
117
115
|
############################################################################
|
|
118
116
|
############################################################################
|
|
@@ -122,9 +120,10 @@ class RawToZarr:
|
|
|
122
120
|
output_bucket_name,
|
|
123
121
|
local_directory,
|
|
124
122
|
object_prefix,
|
|
123
|
+
endpoint_url,
|
|
125
124
|
):
|
|
126
125
|
# Note: this will be passed credentials if using NODD
|
|
127
|
-
s3_manager = S3Manager()
|
|
126
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
128
127
|
print('Uploading files using thread pool executor.')
|
|
129
128
|
all_files = []
|
|
130
129
|
for subdir, dirs, files in os.walk(local_directory):
|
|
@@ -143,11 +142,14 @@ class RawToZarr:
|
|
|
143
142
|
def raw_to_zarr(
|
|
144
143
|
self,
|
|
145
144
|
table_name,
|
|
145
|
+
input_bucket_name,
|
|
146
146
|
output_bucket_name,
|
|
147
147
|
ship_name,
|
|
148
148
|
cruise_name,
|
|
149
149
|
sensor_name,
|
|
150
150
|
raw_file_name,
|
|
151
|
+
endpoint_url=None,
|
|
152
|
+
include_bot=True,
|
|
151
153
|
):
|
|
152
154
|
"""
|
|
153
155
|
Downloads the raw files, processes them with echopype, writes geojson, and uploads files
|
|
@@ -157,6 +159,16 @@ class RawToZarr:
|
|
|
157
159
|
geometry_manager = GeometryManager()
|
|
158
160
|
cleaner = Cleaner()
|
|
159
161
|
cleaner.delete_local_files(file_types=["*.zarr", "*.json"]) # TODO: include bot and raw?
|
|
162
|
+
|
|
163
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
164
|
+
s3_file_path = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
|
|
165
|
+
bottom_file_name = f"{Path(raw_file_name).stem}.bot"
|
|
166
|
+
s3_bottom_file_path = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
|
|
167
|
+
s3_manager.download_file(bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name)
|
|
168
|
+
# TODO: add the bottom file
|
|
169
|
+
if include_bot:
|
|
170
|
+
s3_manager.download_file(bucket_name=input_bucket_name, key=s3_bottom_file_path, file_name=bottom_file_name)
|
|
171
|
+
|
|
160
172
|
try:
|
|
161
173
|
gc.collect()
|
|
162
174
|
print('Opening raw file with echopype.')
|
|
@@ -165,14 +177,20 @@ class RawToZarr:
|
|
|
165
177
|
echodata = ep.open_raw(
|
|
166
178
|
raw_file=raw_file_name,
|
|
167
179
|
sonar_model=sensor_name,
|
|
168
|
-
include_bot=
|
|
169
|
-
use_swap=True,
|
|
170
|
-
# max_chunk_size=
|
|
180
|
+
include_bot=include_bot,
|
|
181
|
+
# use_swap=True,
|
|
182
|
+
# max_chunk_size=300,
|
|
171
183
|
# storage_options={'anon': True } # 'endpoint_url': self.endpoint_url} # this was creating problems
|
|
172
184
|
)
|
|
173
185
|
print('Compute volume backscattering strength (Sv) from raw data.')
|
|
174
186
|
ds_sv = ep.calibrate.compute_Sv(echodata)
|
|
175
|
-
|
|
187
|
+
gc.collect()
|
|
188
|
+
print('Done computing volume backscatter strength (Sv) from raw data.')
|
|
189
|
+
# Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
|
|
190
|
+
# but is not written out with ds_sv
|
|
191
|
+
if "detected_seafloor_depth" in list(echodata.vendor.variables):
|
|
192
|
+
ds_sv["detected_seafloor_depth"] = echodata.vendor.detected_seafloor_depth
|
|
193
|
+
#
|
|
176
194
|
frequencies = echodata.environment.frequency_nominal.values
|
|
177
195
|
#################################################################
|
|
178
196
|
# Get GPS coordinates
|
|
@@ -183,20 +201,21 @@ class RawToZarr:
|
|
|
183
201
|
cruise_name=cruise_name,
|
|
184
202
|
sensor_name=sensor_name,
|
|
185
203
|
file_name=raw_file_name,
|
|
204
|
+
endpoint_url=endpoint_url,
|
|
186
205
|
write_geojson=True
|
|
187
206
|
)
|
|
207
|
+
ds_sv = ep.consolidate.add_location(ds_sv, echodata)
|
|
208
|
+
ds_sv.latitude.values = lat # overwriting echopype gps values to include missing values
|
|
209
|
+
ds_sv.longitude.values = lon
|
|
188
210
|
# gps_data, lat, lon = self.__get_gps_data(echodata=echodata)
|
|
189
211
|
#################################################################
|
|
190
212
|
# Technically the min_echo_range would be 0 m.
|
|
191
213
|
# TODO: this var name is supposed to represent minimum resolution of depth measurements
|
|
192
214
|
# TODO revert this so that smaller diffs can be used
|
|
193
215
|
# The most minimum the resolution can be is as small as 0.25 meters
|
|
194
|
-
min_echo_range = np.
|
|
195
|
-
0.25,
|
|
196
|
-
np.nanmin(np.diff(ds_sv.echo_range.values))
|
|
197
|
-
)
|
|
216
|
+
min_echo_range = np.round(np.nanmin(np.diff(ds_sv.echo_range.values)), 2)
|
|
198
217
|
max_echo_range = float(np.nanmax(ds_sv.echo_range))
|
|
199
|
-
#
|
|
218
|
+
# This is the number of missing values found throughout the lat/lon
|
|
200
219
|
num_ping_time_dropna = lat[~np.isnan(lat)].shape[0] # symmetric to lon
|
|
201
220
|
#
|
|
202
221
|
start_time = np.datetime_as_string(ds_sv.ping_time.values[0], unit='ms') + "Z"
|
|
@@ -206,12 +225,15 @@ class RawToZarr:
|
|
|
206
225
|
#################################################################
|
|
207
226
|
# Create the zarr store
|
|
208
227
|
store_name = f"{Path(raw_file_name).stem}.zarr"
|
|
209
|
-
ds_sv.
|
|
228
|
+
# Sv = ds_sv.Sv
|
|
229
|
+
# ds_sv['Sv'] = Sv.astype('int32', copy=False)
|
|
230
|
+
ds_sv.to_zarr(store=store_name) # ds_sv.Sv.sel(channel=ds_sv.channel.values[0]).shape
|
|
231
|
+
gc.collect()
|
|
210
232
|
#################################################################
|
|
211
233
|
output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
|
|
212
234
|
#################################################################
|
|
213
235
|
# If zarr store already exists then delete
|
|
214
|
-
s3_manager = S3Manager()
|
|
236
|
+
s3_manager = S3Manager(endpoint_url=endpoint_url)
|
|
215
237
|
child_objects = s3_manager.get_child_objects(
|
|
216
238
|
bucket_name=output_bucket_name,
|
|
217
239
|
sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.zarr",
|
|
@@ -226,7 +248,8 @@ class RawToZarr:
|
|
|
226
248
|
self.__upload_files_to_output_bucket(
|
|
227
249
|
output_bucket_name=output_bucket_name,
|
|
228
250
|
local_directory=store_name,
|
|
229
|
-
object_prefix=output_zarr_prefix
|
|
251
|
+
object_prefix=output_zarr_prefix,
|
|
252
|
+
endpoint_url=endpoint_url
|
|
230
253
|
)
|
|
231
254
|
#################################################################
|
|
232
255
|
self.__zarr_info_to_table(
|
|
@@ -248,11 +271,13 @@ class RawToZarr:
|
|
|
248
271
|
#######################################################################
|
|
249
272
|
# TODO: verify count of objects matches, publish message, update status
|
|
250
273
|
#######################################################################
|
|
251
|
-
print('
|
|
274
|
+
print('Finished raw-to-zarr conversion.')
|
|
252
275
|
except Exception as err:
|
|
253
276
|
print(f'Exception encountered creating local Zarr store with echopype: {err}')
|
|
254
277
|
raise RuntimeError(f"Problem creating local Zarr store, {err}")
|
|
255
278
|
finally:
|
|
279
|
+
gc.collect()
|
|
280
|
+
print("Finally.")
|
|
256
281
|
cleaner.delete_local_files(file_types=["*.raw", "*.bot", "*.zarr", "*.json"])
|
|
257
282
|
print('Done creating local zarr store.')
|
|
258
283
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: water_column_sonar_processing
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.12
|
|
4
4
|
Summary: A processing tool for water column sonar data.
|
|
5
5
|
Author-email: Rudy Klucik <rudy.klucik@noaa.gov>
|
|
6
6
|
Project-URL: Homepage, https://github.com/CI-CMG/water-column-sonar-processing
|
|
@@ -28,7 +28,7 @@ Requires-Dist: python-dotenv==1.0.1
|
|
|
28
28
|
Requires-Dist: requests==2.32.3
|
|
29
29
|
Requires-Dist: s3fs==2023.12.1
|
|
30
30
|
Requires-Dist: scipy==1.14.1
|
|
31
|
-
Requires-Dist: setuptools
|
|
31
|
+
Requires-Dist: setuptools
|
|
32
32
|
Requires-Dist: shapely==2.0.3
|
|
33
33
|
Requires-Dist: typing-extensions==4.10.0
|
|
34
34
|
Requires-Dist: xarray==2024.10.0
|
|
@@ -37,6 +37,16 @@ Requires-Dist: zarr==2.18.3
|
|
|
37
37
|
# Water Column Sonar Processing
|
|
38
38
|
Processing tool for converting L0 data to L1 and L2 as well as generating geospatial information
|
|
39
39
|
|
|
40
|
+

|
|
41
|
+
|
|
42
|
+

|
|
43
|
+
|
|
44
|
+

|
|
45
|
+
|
|
46
|
+

|
|
47
|
+
|
|
48
|
+
 
|
|
49
|
+
|
|
40
50
|
# Setting up the Python Environment
|
|
41
51
|
> Python 3.10.12
|
|
42
52
|
|
|
@@ -93,20 +103,6 @@ or
|
|
|
93
103
|
Following this tutorial:
|
|
94
104
|
https://packaging.python.org/en/latest/tutorials/packaging-projects/
|
|
95
105
|
|
|
96
|
-
# To Publish To TEST
|
|
97
|
-
```commandline
|
|
98
|
-
python -m build
|
|
99
|
-
# python -m build --sdist
|
|
100
|
-
# python -m build --wheel
|
|
101
|
-
python -m twine upload --repository testpypi dist/*
|
|
102
|
-
pytho -m pip install --index-url https://test.pypi.org/simple/ hello-pypi-rudy-klucik
|
|
103
|
-
python
|
|
104
|
-
```
|
|
105
|
-
```
|
|
106
|
-
from water-column-sonar-processing import ZarrManager
|
|
107
|
-
example.add_one(2)
|
|
108
|
-
```
|
|
109
|
-
|
|
110
106
|
# To Publish To PROD
|
|
111
107
|
```commandline
|
|
112
108
|
python -m build
|
|
@@ -134,6 +130,12 @@ https://colab.research.google.com/drive/1KiLMueXiz9WVB9o4RuzYeGjNZ6PsZU7a#scroll
|
|
|
134
130
|
5 failed, 35 passed, 3 skipped, 1 warning in 9.71s
|
|
135
131
|
3 failed, 38 passed, 3 skipped, 1 warning in 7.24s
|
|
136
132
|
|
|
133
|
+
# Tag a Release
|
|
134
|
+
```commandline
|
|
135
|
+
git tag "v0.0.12" -a
|
|
136
|
+
# enter description
|
|
137
|
+
git push origin --tags
|
|
138
|
+
```
|
|
137
139
|
|
|
138
140
|
# TODO:
|
|
139
141
|
add https://pypi.org/project/setuptools-scm/
|
|
@@ -1,32 +1,33 @@
|
|
|
1
1
|
water_column_sonar_processing/__init__.py,sha256=fvRK4uFo_A0l7w_T4yckvDqJ3wMUq4JB3VVPXqWfewE,226
|
|
2
2
|
water_column_sonar_processing/process.py,sha256=-yQtK3rnZq6lGAr3q02zLDe1NuMH9c0PiUOxKzG_r18,5386
|
|
3
3
|
water_column_sonar_processing/aws/__init__.py,sha256=KJqK8oYMn-u8n8i-Jp_lG5BvCOTjwWSjWP8yAyDlWVo,297
|
|
4
|
-
water_column_sonar_processing/aws/dynamodb_manager.py,sha256=
|
|
5
|
-
water_column_sonar_processing/aws/s3_manager.py,sha256
|
|
6
|
-
water_column_sonar_processing/aws/s3fs_manager.py,sha256=
|
|
4
|
+
water_column_sonar_processing/aws/dynamodb_manager.py,sha256=LQ3eh7Zf1fBLG-RKovod9KbQwhE-0Qdq1JPk4Ro5bdo,10252
|
|
5
|
+
water_column_sonar_processing/aws/s3_manager.py,sha256=-PCiW7YF31nGIPa1oVOVTzjTSExAAkT_IyNNnvWv2HU,16214
|
|
6
|
+
water_column_sonar_processing/aws/s3fs_manager.py,sha256=d7p9Sx-ocooKzHjVJVCawnXSGv6BpmKvvN9uhzilglw,2529
|
|
7
7
|
water_column_sonar_processing/aws/sns_manager.py,sha256=Dp9avG5VSugSWPR1dZ-askuAw1fCZkNUHbOUP65iR-k,1867
|
|
8
8
|
water_column_sonar_processing/aws/sqs_manager.py,sha256=NSUrWmnSC8h8Gf7gT0U8zFaQQ-yX89h0Q0mDLKGqp2Y,1597
|
|
9
9
|
water_column_sonar_processing/cruise/__init__.py,sha256=H5hW0JMORuaFvQk_R31B4VL8RnRyKeanOOiWmqEMZJk,156
|
|
10
|
-
water_column_sonar_processing/cruise/create_empty_zarr_store.py,sha256=
|
|
11
|
-
water_column_sonar_processing/cruise/
|
|
10
|
+
water_column_sonar_processing/cruise/create_empty_zarr_store.py,sha256=1IehrlhMAS5XAl7DLdQI4jIMSY9ZNLiW4YdcBEwYkbc,7679
|
|
11
|
+
water_column_sonar_processing/cruise/experiment_datatree.py,sha256=K6Uq_36Rygw5oFF8zWavEwb1x8D27lJv5G3j0B59agE,243
|
|
12
|
+
water_column_sonar_processing/cruise/resample_regrid.py,sha256=WFWxP083X4VpH9x50Om4nxSEUwTsjKjdejQz3Nh8CLs,12822
|
|
12
13
|
water_column_sonar_processing/geometry/__init__.py,sha256=_ol5nI8AL30pYXeAh5rtP7YmQggitPC6LA_kuTfPJ0Q,231
|
|
13
|
-
water_column_sonar_processing/geometry/geometry_manager.py,sha256=
|
|
14
|
+
water_column_sonar_processing/geometry/geometry_manager.py,sha256=nz5T1vCDWHYIfQ853EqKYHDetTul7jRWS3y8Evep8QU,10855
|
|
14
15
|
water_column_sonar_processing/geometry/geometry_simplification.py,sha256=im1HG9nfYIerQv3w-PUHzphw2B7aGgnsA3Zcdy2oTmA,3016
|
|
15
16
|
water_column_sonar_processing/geometry/pmtile_generation.py,sha256=7Lm08Jr6YaM4nYmexClxbIMOqSV1teo9wMm6dfjFuNA,12384
|
|
16
17
|
water_column_sonar_processing/index/__init__.py,sha256=izEObsKiOoIJ0kZCFhvaYsBd6Ga71XJxnogjrNInw68,68
|
|
17
18
|
water_column_sonar_processing/index/index_manager.py,sha256=YS6y_THfGAZpjfBZOj5n8O1aY_BnBYS781eNHfhpip0,11239
|
|
18
19
|
water_column_sonar_processing/model/__init__.py,sha256=FXaCdbPqxp0ogmZm9NplRirqpgMiYs1iRYgJbFbbX2Y,65
|
|
19
|
-
water_column_sonar_processing/model/zarr_manager.py,sha256=
|
|
20
|
+
water_column_sonar_processing/model/zarr_manager.py,sha256=ph0sU-aJQM5TkbyyArDHqXLpeiIki_ce6WN_Z7RVxxw,15053
|
|
20
21
|
water_column_sonar_processing/processing/__init__.py,sha256=UwdB3BnoUxy4q3k9-ZjBF6KzmCWVDcqbcArTeHgmvGA,118
|
|
21
22
|
water_column_sonar_processing/processing/cruise_sampler.py,sha256=hadPrnH5nz7_oG_4pND7YbMFH6NMR9d6p3xAXedtKU8,15927
|
|
22
|
-
water_column_sonar_processing/processing/raw_to_zarr.py,sha256=
|
|
23
|
+
water_column_sonar_processing/processing/raw_to_zarr.py,sha256=agbb2A0BWf7D4b5u-mYOBN_VyjRVjOdQM2aeRGBweWw,17617
|
|
23
24
|
water_column_sonar_processing/utility/__init__.py,sha256=yDObMOL0_OxKWet5wffK2-XVJgoE9iwiY2q04GZrtBQ,234
|
|
24
25
|
water_column_sonar_processing/utility/cleaner.py,sha256=bNbs-hopWxtKAFBK0Eu18xdRErZCGZvtla3j-1bTwQw,619
|
|
25
26
|
water_column_sonar_processing/utility/constants.py,sha256=EbzsorvYKadsPjuutRjQKKByGibhFm0Gw6D-Sp2ZD3I,2143
|
|
26
27
|
water_column_sonar_processing/utility/pipeline_status.py,sha256=O-0SySqdRGJ6bs3zQe1NV9vkOpmsRM7zj5QoHgzYioY,4395
|
|
27
28
|
water_column_sonar_processing/utility/timestamp.py,sha256=bO0oir7KxxoEHPGRkz9FCBfOligkocUyRiWRzAq8fnU,361
|
|
28
|
-
water_column_sonar_processing-0.0.
|
|
29
|
-
water_column_sonar_processing-0.0.
|
|
30
|
-
water_column_sonar_processing-0.0.
|
|
31
|
-
water_column_sonar_processing-0.0.
|
|
32
|
-
water_column_sonar_processing-0.0.
|
|
29
|
+
water_column_sonar_processing-0.0.12.dist-info/LICENSE,sha256=lz4IpJ5_adG3S0ali-WaIpQFVTnEAOucMDQPECUVEYw,1110
|
|
30
|
+
water_column_sonar_processing-0.0.12.dist-info/METADATA,sha256=813ibpVKvkucEfCFlJVHeUfKIC8n1_Pt_Di4k6OebrQ,4960
|
|
31
|
+
water_column_sonar_processing-0.0.12.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
32
|
+
water_column_sonar_processing-0.0.12.dist-info/top_level.txt,sha256=aRYU4A7RNBlNrL4vzjytFAir3BNnmOgsvIGKKA36tg4,30
|
|
33
|
+
water_column_sonar_processing-0.0.12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|