water-column-sonar-processing 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/__init__.py +16 -0
- water_column_sonar_processing/aws/__init__.py +7 -4
- water_column_sonar_processing/aws/dynamodb_manager.py +70 -49
- water_column_sonar_processing/aws/s3_manager.py +112 -122
- water_column_sonar_processing/aws/s3fs_manager.py +13 -19
- water_column_sonar_processing/aws/sns_manager.py +10 -21
- water_column_sonar_processing/aws/sqs_manager.py +10 -18
- water_column_sonar_processing/cruise/__init__.py +4 -0
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +51 -33
- water_column_sonar_processing/cruise/resample_regrid.py +109 -58
- water_column_sonar_processing/geometry/__init__.py +5 -0
- water_column_sonar_processing/geometry/geometry_manager.py +79 -48
- water_column_sonar_processing/geometry/geometry_simplification.py +13 -12
- water_column_sonar_processing/geometry/pmtile_generation.py +24 -23
- water_column_sonar_processing/index/__init__.py +3 -0
- water_column_sonar_processing/index/index_manager.py +104 -80
- water_column_sonar_processing/model/__init__.py +3 -0
- water_column_sonar_processing/model/zarr_manager.py +113 -75
- water_column_sonar_processing/process.py +76 -69
- water_column_sonar_processing/utility/__init__.py +6 -0
- water_column_sonar_processing/utility/cleaner.py +6 -7
- water_column_sonar_processing/utility/constants.py +42 -35
- water_column_sonar_processing/utility/pipeline_status.py +37 -10
- water_column_sonar_processing/utility/timestamp.py +3 -2
- {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/METADATA +31 -1
- water_column_sonar_processing-0.0.6.dist-info/RECORD +29 -0
- water_column_sonar_processing-0.0.5.dist-info/RECORD +0 -29
- {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/LICENSE +0 -0
- {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/WHEEL +0 -0
- {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.6.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import s3fs
|
|
3
2
|
|
|
3
|
+
import s3fs
|
|
4
4
|
|
|
5
5
|
# TODO: S3FS_LOGGING_LEVEL=DEBUG
|
|
6
6
|
|
|
@@ -8,12 +8,12 @@ import s3fs
|
|
|
8
8
|
class S3FSManager:
|
|
9
9
|
#####################################################################
|
|
10
10
|
def __init__(
|
|
11
|
-
|
|
11
|
+
self,
|
|
12
12
|
):
|
|
13
13
|
self.__s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
14
14
|
self.s3fs = s3fs.S3FileSystem(
|
|
15
|
-
key=os.environ.get(
|
|
16
|
-
secret=os.environ.get(
|
|
15
|
+
key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
16
|
+
secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
17
17
|
# asynchronous=True
|
|
18
18
|
# use_ssl=False,
|
|
19
19
|
# skip_instance_cache=True,
|
|
@@ -24,10 +24,7 @@ class S3FSManager:
|
|
|
24
24
|
)
|
|
25
25
|
|
|
26
26
|
#####################################################################
|
|
27
|
-
def add_file(
|
|
28
|
-
self,
|
|
29
|
-
filename
|
|
30
|
-
):
|
|
27
|
+
def add_file(self, filename):
|
|
31
28
|
full_path = f"{os.getenv('OUTPUT_BUCKET_NAME')}/testing/{filename}"
|
|
32
29
|
print(full_path)
|
|
33
30
|
|
|
@@ -37,12 +34,7 @@ class S3FSManager:
|
|
|
37
34
|
print(ff)
|
|
38
35
|
|
|
39
36
|
#####################################################################
|
|
40
|
-
def upload_data(
|
|
41
|
-
self,
|
|
42
|
-
bucket_name,
|
|
43
|
-
file_path,
|
|
44
|
-
prefix
|
|
45
|
-
):
|
|
37
|
+
def upload_data(self, bucket_name, file_path, prefix):
|
|
46
38
|
# TODO: this works in theory but use boto3 to upload files
|
|
47
39
|
s3_path = f"s3://{bucket_name}/{prefix}/"
|
|
48
40
|
s3_file_system = self.s3fs
|
|
@@ -50,18 +42,20 @@ class S3FSManager:
|
|
|
50
42
|
|
|
51
43
|
#####################################################################
|
|
52
44
|
def s3_map(
|
|
53
|
-
|
|
54
|
-
|
|
45
|
+
self,
|
|
46
|
+
s3_zarr_store_path, # f's3://{bucket}/{input_zarr_path}'
|
|
55
47
|
):
|
|
56
48
|
# The "s3_zarr_store_path" is defined as f's3://{bucket}/{input_zarr_path}'
|
|
57
49
|
# create=False, not false because will be writing
|
|
58
50
|
# return s3fs.S3Map(root=s3_zarr_store_path, s3=self.s3fs, check=True)
|
|
59
|
-
return s3fs.S3Map(
|
|
51
|
+
return s3fs.S3Map(
|
|
52
|
+
root=s3_zarr_store_path, s3=self.s3fs
|
|
53
|
+
) # create=False, not false because will be writing
|
|
60
54
|
|
|
61
55
|
#####################################################################
|
|
62
56
|
def exists(
|
|
63
|
-
|
|
64
|
-
|
|
57
|
+
self,
|
|
58
|
+
geo_json_s3_path,
|
|
65
59
|
):
|
|
66
60
|
s3_file_system = self.s3fs
|
|
67
61
|
return s3_file_system.exists(path=geo_json_s3_path)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
|
|
2
|
+
|
|
3
3
|
import boto3
|
|
4
4
|
|
|
5
5
|
|
|
@@ -7,32 +7,22 @@ import boto3
|
|
|
7
7
|
class SNSManager:
|
|
8
8
|
#######################################################
|
|
9
9
|
def __init__(
|
|
10
|
-
|
|
10
|
+
self,
|
|
11
11
|
):
|
|
12
12
|
self.__sns_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
13
13
|
self.__sns_session = boto3.Session(
|
|
14
|
-
aws_access_key_id=os.environ.get(
|
|
15
|
-
aws_secret_access_key=os.environ.get(
|
|
16
|
-
region_name=self.__sns_region
|
|
14
|
+
aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
|
|
15
|
+
aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
|
|
16
|
+
region_name=self.__sns_region,
|
|
17
17
|
)
|
|
18
18
|
self.__sns_resource = self.__sns_session.resource(
|
|
19
|
-
service_name="sns",
|
|
20
|
-
region_name=self.__sns_region
|
|
19
|
+
service_name="sns", region_name=self.__sns_region
|
|
21
20
|
)
|
|
22
21
|
self.__sns_client = self.__sns_session.client(
|
|
23
|
-
service_name="sns",
|
|
24
|
-
region_name=self.__sns_region
|
|
22
|
+
service_name="sns", region_name=self.__sns_region
|
|
25
23
|
)
|
|
26
24
|
|
|
27
25
|
#######################################################
|
|
28
|
-
# TODO: pick one
|
|
29
|
-
# def publish_message(self, topic_arn, message):
|
|
30
|
-
# response = self.__sns_client.publish(
|
|
31
|
-
# TopicArn=topic_arn,
|
|
32
|
-
# Message=message
|
|
33
|
-
# )
|
|
34
|
-
# print(f"Topic Response: {topic_arn} : '{message}' => {response}")
|
|
35
|
-
|
|
36
26
|
# TODO: pick one
|
|
37
27
|
def publish(self, topic_arn, message):
|
|
38
28
|
response = self.__sns_client.publish(
|
|
@@ -55,13 +45,12 @@ class SNSManager:
|
|
|
55
45
|
#######################################################
|
|
56
46
|
def subscribe(self, topic_arn, endpoint):
|
|
57
47
|
self.__sns_client.subscribe(
|
|
58
|
-
TopicArn=topic_arn,
|
|
59
|
-
Protocol='sqs',
|
|
60
|
-
Endpoint=endpoint
|
|
48
|
+
TopicArn=topic_arn, Protocol="sqs", Endpoint=endpoint
|
|
61
49
|
)
|
|
62
50
|
|
|
63
51
|
#######################################################
|
|
64
52
|
def list_topics(self):
|
|
65
53
|
print(self.__sns_client.list_topics())
|
|
66
54
|
|
|
67
|
-
|
|
55
|
+
|
|
56
|
+
###########################################################
|
|
@@ -1,34 +1,29 @@
|
|
|
1
1
|
import os
|
|
2
|
+
|
|
2
3
|
import boto3
|
|
3
|
-
# import time
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
###########################################################
|
|
7
7
|
class SQSManager:
|
|
8
8
|
#######################################################
|
|
9
9
|
def __init__(
|
|
10
|
-
|
|
10
|
+
self,
|
|
11
11
|
):
|
|
12
12
|
self.__sqs_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
13
13
|
self.__sqs_session = boto3.Session(
|
|
14
|
-
aws_access_key_id=os.environ.get(
|
|
15
|
-
aws_secret_access_key=os.environ.get(
|
|
16
|
-
region_name=self.__sqs_region
|
|
14
|
+
aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
|
|
15
|
+
aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
|
|
16
|
+
region_name=self.__sqs_region,
|
|
17
17
|
)
|
|
18
18
|
self.__sqs_resource = self.__sqs_session.resource(
|
|
19
|
-
service_name="sqs",
|
|
20
|
-
region_name=self.__sqs_region
|
|
19
|
+
service_name="sqs", region_name=self.__sqs_region
|
|
21
20
|
)
|
|
22
21
|
self.__sqs_client = self.__sqs_session.client(
|
|
23
|
-
service_name="sqs",
|
|
24
|
-
region_name=self.__sqs_region
|
|
22
|
+
service_name="sqs", region_name=self.__sqs_region
|
|
25
23
|
)
|
|
26
24
|
|
|
27
25
|
#######################################################
|
|
28
|
-
def create_queue(
|
|
29
|
-
self,
|
|
30
|
-
queue_name
|
|
31
|
-
):
|
|
26
|
+
def create_queue(self, queue_name):
|
|
32
27
|
response = self.__sqs_client.create_queue(QueueName=queue_name)
|
|
33
28
|
return response
|
|
34
29
|
|
|
@@ -38,13 +33,10 @@ class SQSManager:
|
|
|
38
33
|
return sqs_queue
|
|
39
34
|
|
|
40
35
|
#######################################################
|
|
41
|
-
def list_queues(
|
|
42
|
-
self,
|
|
43
|
-
queue_name_prefix
|
|
44
|
-
):
|
|
36
|
+
def list_queues(self, queue_name_prefix):
|
|
45
37
|
# Note: SQS control plane is eventually consistent, meaning that it
|
|
46
38
|
# takes a while to propagate the data accross the systems.
|
|
47
39
|
response = self.__sqs_client.list_queues(QueueNamePrefix=queue_name_prefix)
|
|
48
40
|
print(response)
|
|
49
41
|
|
|
50
|
-
#######################################################
|
|
42
|
+
#######################################################
|
|
@@ -1,46 +1,48 @@
|
|
|
1
1
|
import os
|
|
2
|
+
|
|
2
3
|
import numcodecs
|
|
3
4
|
import numpy as np
|
|
4
5
|
|
|
5
|
-
from water_column_sonar_processing.utility.cleaner import Cleaner
|
|
6
6
|
from water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
|
|
7
7
|
from water_column_sonar_processing.aws.s3_manager import S3Manager
|
|
8
8
|
from water_column_sonar_processing.model.zarr_manager import ZarrManager
|
|
9
|
+
from water_column_sonar_processing.utility.cleaner import Cleaner
|
|
9
10
|
|
|
10
11
|
numcodecs.blosc.use_threads = False
|
|
11
12
|
numcodecs.blosc.set_nthreads(1)
|
|
12
13
|
|
|
13
|
-
TEMPDIR = "/tmp"
|
|
14
|
-
|
|
14
|
+
# TEMPDIR = "/tmp"
|
|
15
15
|
# TODO: when ready switch to version 3 of model spec
|
|
16
16
|
# ZARR_V3_EXPERIMENTAL_API = 1
|
|
17
17
|
# creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
18
18
|
|
|
19
|
+
|
|
19
20
|
class CreateEmptyZarrStore:
|
|
20
21
|
#######################################################
|
|
21
22
|
def __init__(
|
|
22
|
-
|
|
23
|
+
self,
|
|
23
24
|
):
|
|
24
25
|
self.__overwrite = True
|
|
25
|
-
# TODO: create output_bucket and input_bucket variables here?
|
|
26
26
|
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
27
27
|
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
28
28
|
|
|
29
29
|
#######################################################
|
|
30
30
|
|
|
31
31
|
def upload_zarr_store_to_s3(
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
32
|
+
self,
|
|
33
|
+
local_directory: str,
|
|
34
|
+
object_prefix: str,
|
|
35
|
+
cruise_name: str,
|
|
36
36
|
) -> None:
|
|
37
|
-
print(
|
|
37
|
+
print("uploading model store to s3")
|
|
38
38
|
s3_manager = S3Manager()
|
|
39
39
|
#
|
|
40
|
-
print(
|
|
40
|
+
print("Starting upload with thread pool executor.")
|
|
41
41
|
# # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
42
42
|
all_files = []
|
|
43
|
-
for subdir, dirs, files in os.walk(
|
|
43
|
+
for subdir, dirs, files in os.walk(
|
|
44
|
+
f"{local_directory}/{cruise_name}.zarr_manager"
|
|
45
|
+
):
|
|
44
46
|
for file in files:
|
|
45
47
|
local_path = os.path.join(subdir, file)
|
|
46
48
|
# 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.model/.zattrs'
|
|
@@ -51,48 +53,62 @@ class CreateEmptyZarrStore:
|
|
|
51
53
|
s3_manager.upload_files_with_thread_pool_executor(
|
|
52
54
|
all_files=all_files,
|
|
53
55
|
)
|
|
54
|
-
print(
|
|
56
|
+
print("Done uploading with thread pool executor.")
|
|
55
57
|
# TODO: move to common place
|
|
56
58
|
|
|
57
59
|
#######################################################
|
|
58
60
|
def create_cruise_level_zarr_store(
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
61
|
+
self,
|
|
62
|
+
ship_name: str,
|
|
63
|
+
cruise_name: str,
|
|
64
|
+
sensor_name: str,
|
|
65
|
+
table_name: str,
|
|
66
|
+
tempdir: str,
|
|
64
67
|
) -> None:
|
|
65
68
|
try:
|
|
66
69
|
# HB0806 - 123, HB0903 - 220
|
|
67
70
|
dynamo_db_manager = DynamoDBManager()
|
|
71
|
+
s3_manager = S3Manager()
|
|
68
72
|
|
|
69
73
|
df = dynamo_db_manager.get_table_as_df(
|
|
70
74
|
table_name=table_name,
|
|
71
75
|
ship_name=ship_name,
|
|
72
76
|
cruise_name=cruise_name,
|
|
73
|
-
sensor_name=sensor_name
|
|
77
|
+
sensor_name=sensor_name,
|
|
74
78
|
)
|
|
75
79
|
|
|
76
|
-
# filter the dataframe just for enums >= LEVEL_1_PROCESSING
|
|
80
|
+
# TODO: filter the dataframe just for enums >= LEVEL_1_PROCESSING
|
|
77
81
|
# df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
|
|
78
82
|
|
|
79
83
|
# TODO: VERIFY GEOJSON EXISTS as prerequisite!!!
|
|
80
84
|
|
|
81
85
|
print(f"DataFrame shape: {df.shape}")
|
|
82
|
-
cruise_channels = list(
|
|
86
|
+
cruise_channels = list(
|
|
87
|
+
set([i for sublist in df["CHANNELS"].dropna() for i in sublist])
|
|
88
|
+
)
|
|
83
89
|
cruise_channels.sort()
|
|
84
90
|
|
|
85
|
-
consolidated_zarr_width = np.sum(
|
|
91
|
+
consolidated_zarr_width = np.sum(
|
|
92
|
+
df["NUM_PING_TIME_DROPNA"].dropna().astype(int)
|
|
93
|
+
)
|
|
86
94
|
|
|
87
95
|
# [3] calculate the max/min measurement resolutions for the whole cruise
|
|
88
|
-
cruise_min_echo_range = float(
|
|
96
|
+
cruise_min_echo_range = float(
|
|
97
|
+
np.min(df["MIN_ECHO_RANGE"].dropna().astype(float))
|
|
98
|
+
)
|
|
89
99
|
|
|
90
100
|
# [4] calculate the maximum of the max depth values
|
|
91
|
-
cruise_max_echo_range = float(
|
|
92
|
-
|
|
101
|
+
cruise_max_echo_range = float(
|
|
102
|
+
np.max(df["MAX_ECHO_RANGE"].dropna().astype(float))
|
|
103
|
+
)
|
|
104
|
+
print(
|
|
105
|
+
f"cruise_min_echo_range: {cruise_min_echo_range}, cruise_max_echo_range: {cruise_max_echo_range}"
|
|
106
|
+
)
|
|
93
107
|
|
|
94
108
|
# [5] get number of channels
|
|
95
|
-
cruise_frequencies = [
|
|
109
|
+
cruise_frequencies = [
|
|
110
|
+
float(i) for i in df["FREQUENCIES"].dropna().values.flatten()[0]
|
|
111
|
+
]
|
|
96
112
|
print(cruise_frequencies)
|
|
97
113
|
|
|
98
114
|
new_width = int(consolidated_zarr_width)
|
|
@@ -102,7 +118,6 @@ class CreateEmptyZarrStore:
|
|
|
102
118
|
print(store_name)
|
|
103
119
|
################################################################
|
|
104
120
|
# Delete existing model store if it exists
|
|
105
|
-
s3_manager = S3Manager()
|
|
106
121
|
zarr_prefix = os.path.join("level_2", ship_name, cruise_name, sensor_name)
|
|
107
122
|
child_objects = s3_manager.get_child_objects(
|
|
108
123
|
bucket_name=self.output_bucket_name,
|
|
@@ -115,14 +130,16 @@ class CreateEmptyZarrStore:
|
|
|
115
130
|
################################################################
|
|
116
131
|
# Create new model store
|
|
117
132
|
zarr_manager = ZarrManager()
|
|
118
|
-
new_height = len(
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
133
|
+
new_height = len(
|
|
134
|
+
zarr_manager.get_depth_values(
|
|
135
|
+
min_echo_range=cruise_min_echo_range,
|
|
136
|
+
max_echo_range=cruise_max_echo_range,
|
|
137
|
+
)
|
|
138
|
+
)
|
|
122
139
|
print(f"new_height: {new_height}")
|
|
123
140
|
|
|
124
141
|
zarr_manager.create_zarr_store(
|
|
125
|
-
path=
|
|
142
|
+
path=tempdir,
|
|
126
143
|
ship_name=ship_name,
|
|
127
144
|
cruise_name=cruise_name,
|
|
128
145
|
sensor_name=sensor_name,
|
|
@@ -134,7 +151,7 @@ class CreateEmptyZarrStore:
|
|
|
134
151
|
)
|
|
135
152
|
#################################################################
|
|
136
153
|
self.upload_zarr_store_to_s3(
|
|
137
|
-
local_directory=
|
|
154
|
+
local_directory=tempdir,
|
|
138
155
|
object_prefix=zarr_prefix,
|
|
139
156
|
cruise_name=cruise_name,
|
|
140
157
|
)
|
|
@@ -161,6 +178,7 @@ class CreateEmptyZarrStore:
|
|
|
161
178
|
finally:
|
|
162
179
|
cleaner = Cleaner()
|
|
163
180
|
cleaner.delete_local_files()
|
|
181
|
+
# TODO: should delete zarr store in temp directory too?
|
|
164
182
|
print("Done creating cruise level model store")
|
|
165
183
|
|
|
166
184
|
|