water-column-sonar-processing 0.0.9__py3-none-any.whl → 26.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/aws/dynamodb_manager.py +138 -59
- water_column_sonar_processing/aws/s3_manager.py +179 -141
- water_column_sonar_processing/aws/s3fs_manager.py +29 -33
- water_column_sonar_processing/aws/sqs_manager.py +1 -1
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +35 -96
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +142 -127
- water_column_sonar_processing/geometry/__init__.py +10 -2
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +50 -49
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +227 -223
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/index_manager.py +151 -33
- water_column_sonar_processing/model/zarr_manager.py +665 -262
- water_column_sonar_processing/processing/__init__.py +3 -3
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +206 -214
- water_column_sonar_processing/utility/__init__.py +9 -2
- water_column_sonar_processing/utility/constants.py +69 -18
- water_column_sonar_processing/utility/pipeline_status.py +11 -15
- water_column_sonar_processing/utility/timestamp.py +3 -4
- water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
- water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
- water_column_sonar_processing/process.py +0 -147
- water_column_sonar_processing/processing/cruise_sampler.py +0 -342
- water_column_sonar_processing-0.0.9.dist-info/METADATA +0 -134
- water_column_sonar_processing-0.0.9.dist-info/RECORD +0 -32
- {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
|
@@ -1,147 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import os
|
|
3
|
-
|
|
4
|
-
import numpy as np
|
|
5
|
-
|
|
6
|
-
from water_column_sonar_processing.aws import DynamoDBManager
|
|
7
|
-
from water_column_sonar_processing.aws import S3Manager
|
|
8
|
-
from water_column_sonar_processing.aws import S3FSManager
|
|
9
|
-
from water_column_sonar_processing.aws import SNSManager
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
###########################################################
|
|
13
|
-
class Process:
|
|
14
|
-
#######################################################
|
|
15
|
-
def __init__(
|
|
16
|
-
self,
|
|
17
|
-
):
|
|
18
|
-
self.input_bucket_name = os.environ["INPUT_BUCKET_NAME"]
|
|
19
|
-
self.output_bucket_name = os.environ["OUTPUT_BUCKET_NAME"]
|
|
20
|
-
self.table_name = os.environ["TABLE_NAME"]
|
|
21
|
-
self.topic_arn = os.environ["TOPIC_ARN"]
|
|
22
|
-
# self.output_bucket_access_key = ?
|
|
23
|
-
# self.output_bucket_secret_access_key = ?
|
|
24
|
-
|
|
25
|
-
def execute(self):
|
|
26
|
-
input_s3_manager = (
|
|
27
|
-
S3Manager()
|
|
28
|
-
) # TODO: Need to allow passing in of credentials when writing to protected bucket
|
|
29
|
-
s3fs_manager = S3FSManager() # TODO: delete this
|
|
30
|
-
print(s3fs_manager) # TODO: delete this
|
|
31
|
-
output_s3_manager = S3Manager()
|
|
32
|
-
# TODO: s3fs?
|
|
33
|
-
sns_manager = SNSManager()
|
|
34
|
-
ddb_manager = DynamoDBManager()
|
|
35
|
-
|
|
36
|
-
# [1 of 5] Update Pipeline Status in DynamoDB
|
|
37
|
-
# self.dynamodb.update_ status ()
|
|
38
|
-
|
|
39
|
-
# [2 of 5] Download Object From Input Bucket
|
|
40
|
-
# return_value = input_s3_manager.download_file(
|
|
41
|
-
# bucket_name=self.input_bucket_name,
|
|
42
|
-
# key="the_input_key",
|
|
43
|
-
# file_name="the_input_key",
|
|
44
|
-
# )
|
|
45
|
-
# print(return_value)
|
|
46
|
-
|
|
47
|
-
# [3 of 5] Update Entry in DynamoDB
|
|
48
|
-
ship_name = "David_Starr_Jordan" # TODO: get this from input sns message
|
|
49
|
-
cruise_name = "DS0604"
|
|
50
|
-
sensor_name = "EK60"
|
|
51
|
-
file_name = "DSJ0604-D20060406-T113407.raw"
|
|
52
|
-
|
|
53
|
-
test_channels = [
|
|
54
|
-
"GPT 38 kHz 009072055a7f 2 ES38B",
|
|
55
|
-
"GPT 70 kHz 00907203400a 3 ES70-7C",
|
|
56
|
-
"GPT 120 kHz 009072034d52 1 ES120-7",
|
|
57
|
-
"GPT 200 kHz 0090720564e4 4 ES200-7C",
|
|
58
|
-
]
|
|
59
|
-
test_frequencies = [38_000, 70_000, 120_000, 200_000]
|
|
60
|
-
ddb_manager.update_item(
|
|
61
|
-
table_name=self.table_name,
|
|
62
|
-
key={
|
|
63
|
-
"FILE_NAME": {"S": file_name}, # Partition Key
|
|
64
|
-
"CRUISE_NAME": {"S": cruise_name}, # Sort Key
|
|
65
|
-
},
|
|
66
|
-
expression_attribute_names={
|
|
67
|
-
"#CH": "CHANNELS",
|
|
68
|
-
"#ET": "END_TIME",
|
|
69
|
-
"#ED": "ERROR_DETAIL",
|
|
70
|
-
"#FR": "FREQUENCIES",
|
|
71
|
-
"#MA": "MAX_ECHO_RANGE",
|
|
72
|
-
"#MI": "MIN_ECHO_RANGE",
|
|
73
|
-
"#ND": "NUM_PING_TIME_DROPNA",
|
|
74
|
-
"#PS": "PIPELINE_STATUS", # testing this updated
|
|
75
|
-
"#PT": "PIPELINE_TIME", # testing this updated
|
|
76
|
-
"#SE": "SENSOR_NAME",
|
|
77
|
-
"#SH": "SHIP_NAME",
|
|
78
|
-
"#ST": "START_TIME",
|
|
79
|
-
"#ZB": "ZARR_BUCKET",
|
|
80
|
-
"#ZP": "ZARR_PATH",
|
|
81
|
-
},
|
|
82
|
-
expression_attribute_values={
|
|
83
|
-
":ch": {"L": [{"S": i} for i in test_channels]},
|
|
84
|
-
":et": {"S": "2006-04-06T13:35:28.688Z"},
|
|
85
|
-
":ed": {"S": ""},
|
|
86
|
-
":fr": {"L": [{"N": str(i)} for i in test_frequencies]},
|
|
87
|
-
":ma": {"N": str(np.round(499.7653, 4))},
|
|
88
|
-
":mi": {"N": str(np.round(0.25, 4))},
|
|
89
|
-
":nd": {"N": str(2458)},
|
|
90
|
-
":ps": {"S": "SUCCESS_AGGREGATOR"},
|
|
91
|
-
":pt": {"S": "2023-10-02T08:54:43Z"},
|
|
92
|
-
":se": {"S": sensor_name},
|
|
93
|
-
":sh": {"S": ship_name},
|
|
94
|
-
":st": {"S": "2006-04-06T11:34:07.288Z"},
|
|
95
|
-
":zb": {"S": "r2d2-dev-echofish2-118234403147-echofish-dev-output"},
|
|
96
|
-
":zp": {
|
|
97
|
-
"S": "level_1/David_Starr_Jordan/DS0604/EK60/DSJ0604-D20060406-T113407.model"
|
|
98
|
-
},
|
|
99
|
-
},
|
|
100
|
-
update_expression=(
|
|
101
|
-
"SET "
|
|
102
|
-
"#CH = :ch, "
|
|
103
|
-
"#ET = :et, "
|
|
104
|
-
"#ED = :ed, "
|
|
105
|
-
"#FR = :fr, "
|
|
106
|
-
"#MA = :ma, "
|
|
107
|
-
"#MI = :mi, "
|
|
108
|
-
"#ND = :nd, "
|
|
109
|
-
"#PS = :ps, "
|
|
110
|
-
"#PT = :pt, "
|
|
111
|
-
"#SE = :se, "
|
|
112
|
-
"#SH = :sh, "
|
|
113
|
-
"#ST = :st, "
|
|
114
|
-
"#ZB = :zb, "
|
|
115
|
-
"#ZP = :zp"
|
|
116
|
-
),
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
# [4 of 5] Write Object to Output Bucket
|
|
120
|
-
output_s3_manager.put(
|
|
121
|
-
bucket_name=self.output_bucket_name, key="123", body="456"
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
# [_ of _] Read file-level Zarr store from bucket, Create GeoJSON, Write to bucket
|
|
125
|
-
# [_ of _] Create empty cruise-level Zarr store
|
|
126
|
-
# [_ of _] Resample and write to cruise-level Zarr Store
|
|
127
|
-
|
|
128
|
-
# [5 of 5] Publish Done Message
|
|
129
|
-
success_message = {
|
|
130
|
-
"default": {
|
|
131
|
-
"shipName": ship_name,
|
|
132
|
-
"cruiseName": cruise_name,
|
|
133
|
-
"sensorName": sensor_name,
|
|
134
|
-
"fileName": file_name,
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
sns_manager.publish(
|
|
138
|
-
topic_arn=self.topic_arn,
|
|
139
|
-
message=json.dumps(success_message),
|
|
140
|
-
)
|
|
141
|
-
print("done...")
|
|
142
|
-
|
|
143
|
-
#######################################################
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
###########################################################
|
|
147
|
-
###########################################################
|
|
@@ -1,342 +0,0 @@
|
|
|
1
|
-
import gc
|
|
2
|
-
import os
|
|
3
|
-
import echopype as ep
|
|
4
|
-
import numpy as np
|
|
5
|
-
from numcodecs import Blosc
|
|
6
|
-
|
|
7
|
-
from water_column_sonar_processing.utility import Cleaner
|
|
8
|
-
|
|
9
|
-
TEMPDIR = "/tmp"
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
# This code is getting copied from echofish-aws-raw-to-zarr-lambda
|
|
13
|
-
class CruiseSampler:
|
|
14
|
-
#######################################################
|
|
15
|
-
def __init__(
|
|
16
|
-
self,
|
|
17
|
-
):
|
|
18
|
-
# TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
|
|
19
|
-
self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
|
|
20
|
-
self.bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
21
|
-
# self.__s3 = s3_operations
|
|
22
|
-
|
|
23
|
-
############################################################################
|
|
24
|
-
############################################################################
|
|
25
|
-
def __zarr_info_to_table(
|
|
26
|
-
self,
|
|
27
|
-
file_name,
|
|
28
|
-
cruise_name,
|
|
29
|
-
zarr_path,
|
|
30
|
-
min_echo_range,
|
|
31
|
-
max_echo_range,
|
|
32
|
-
num_ping_time_dropna,
|
|
33
|
-
start_time,
|
|
34
|
-
end_time,
|
|
35
|
-
frequencies,
|
|
36
|
-
channels
|
|
37
|
-
):
|
|
38
|
-
print('Writing Zarr information to DynamoDB table.')
|
|
39
|
-
self.__dynamo.update_item(
|
|
40
|
-
table_name=self.__table_name,
|
|
41
|
-
key={
|
|
42
|
-
'FILE_NAME': {'S': file_name}, # Partition Key
|
|
43
|
-
'CRUISE_NAME': {'S': cruise_name}, # Sort Key
|
|
44
|
-
# TODO: should be FILE_NAME & SENSOR_NAME so they are truely unique for when two sensors are processed within one cruise
|
|
45
|
-
},
|
|
46
|
-
expression='SET #ZB = :zb, #ZP = :zp, #MINER = :miner, #MAXER = :maxer, #P = :p, #ST = :st, #ET = :et, #F = :f, #C = :c',
|
|
47
|
-
attribute_names={
|
|
48
|
-
'#ZB': 'ZARR_BUCKET',
|
|
49
|
-
'#ZP': 'ZARR_PATH',
|
|
50
|
-
'#MINER': 'MIN_ECHO_RANGE',
|
|
51
|
-
'#MAXER': 'MAX_ECHO_RANGE',
|
|
52
|
-
'#P': 'NUM_PING_TIME_DROPNA',
|
|
53
|
-
'#ST': 'START_TIME',
|
|
54
|
-
'#ET': 'END_TIME',
|
|
55
|
-
'#F': 'FREQUENCIES',
|
|
56
|
-
'#C': 'CHANNELS',
|
|
57
|
-
},
|
|
58
|
-
attribute_values={
|
|
59
|
-
':zb': {
|
|
60
|
-
'S': self.__output_bucket
|
|
61
|
-
},
|
|
62
|
-
':zp': {
|
|
63
|
-
'S': zarr_path
|
|
64
|
-
},
|
|
65
|
-
':miner': {
|
|
66
|
-
'N': str(np.round(min_echo_range, 4))
|
|
67
|
-
},
|
|
68
|
-
':maxer': {
|
|
69
|
-
'N': str(np.round(max_echo_range, 4))
|
|
70
|
-
},
|
|
71
|
-
':p': {
|
|
72
|
-
'N': str(num_ping_time_dropna)
|
|
73
|
-
},
|
|
74
|
-
':st': {
|
|
75
|
-
'S': start_time
|
|
76
|
-
},
|
|
77
|
-
':et': {
|
|
78
|
-
'S': end_time
|
|
79
|
-
},
|
|
80
|
-
':f': {
|
|
81
|
-
'L': [{'N': str(i)} for i in frequencies]
|
|
82
|
-
},
|
|
83
|
-
':c': {
|
|
84
|
-
'L': [{'S': i} for i in channels]
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
############################################################################
|
|
90
|
-
############################################################################
|
|
91
|
-
############################################################################
|
|
92
|
-
def raw_to_zarr(
|
|
93
|
-
self,
|
|
94
|
-
ship_name,
|
|
95
|
-
cruise_name,
|
|
96
|
-
sensor_name,
|
|
97
|
-
file_name,
|
|
98
|
-
):
|
|
99
|
-
print(f'Opening raw: {file_name} and creating zarr store.')
|
|
100
|
-
geometry_manager = GeometryManager()
|
|
101
|
-
try:
|
|
102
|
-
gc.collect()
|
|
103
|
-
print('Opening raw file with echopype.')
|
|
104
|
-
bucket_name="test_input_bucket" # noaa-wcsd-pds
|
|
105
|
-
s3_file_path = f"s3://{bucket_name}/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}"
|
|
106
|
-
# s3_file_path = Path(f"s3://noaa-wcsd-pds/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}")
|
|
107
|
-
# TODO: add the bottom file here
|
|
108
|
-
echodata = ep.open_raw(
|
|
109
|
-
raw_file=s3_file_path,
|
|
110
|
-
sonar_model=sensor_name,
|
|
111
|
-
# include_bot=True,
|
|
112
|
-
use_swap=True,
|
|
113
|
-
# max_chunk_size=100,
|
|
114
|
-
# storage_options={'anon': True} # this was creating problems
|
|
115
|
-
)
|
|
116
|
-
print('Compute volume backscattering strength (Sv) from raw data.')
|
|
117
|
-
ds_sv = ep.calibrate.compute_Sv(echodata)
|
|
118
|
-
print('Done computing volume backscattering strength (Sv) from raw data.')
|
|
119
|
-
frequencies = echodata.environment.frequency_nominal.values
|
|
120
|
-
#################################################################
|
|
121
|
-
# Get GPS coordinates
|
|
122
|
-
gps_data, lat, lon = geometry_manager.read_echodata_gps_data(
|
|
123
|
-
echodata=echodata,
|
|
124
|
-
ship_name=ship_name,
|
|
125
|
-
cruise_name=cruise_name,
|
|
126
|
-
sensor_name=sensor_name,
|
|
127
|
-
file_name=file_name,
|
|
128
|
-
write_geojson=True
|
|
129
|
-
)
|
|
130
|
-
# gps_data, lat, lon = self.__get_gps_data(echodata=echodata)
|
|
131
|
-
#################################################################
|
|
132
|
-
# Technically the min_echo_range would be 0 m.
|
|
133
|
-
# TODO: this var name is supposed to represent minimum resolution of depth measurements
|
|
134
|
-
# The most minimum the resolution can be is as small as 0.25 meters
|
|
135
|
-
min_echo_range = np.maximum(0.25, np.nanmin(np.diff(ds_sv.echo_range.values)))
|
|
136
|
-
max_echo_range = float(np.nanmax(ds_sv.echo_range))
|
|
137
|
-
#
|
|
138
|
-
num_ping_time_dropna = lat[~np.isnan(lat)].shape[0] # symmetric to lon
|
|
139
|
-
#
|
|
140
|
-
start_time = np.datetime_as_string(ds_sv.ping_time.values[0], unit='ms') + "Z"
|
|
141
|
-
end_time = np.datetime_as_string(ds_sv.ping_time.values[-1], unit='ms') + "Z"
|
|
142
|
-
channels = list(ds_sv.channel.values)
|
|
143
|
-
#
|
|
144
|
-
#################################################################
|
|
145
|
-
# Create the zarr store
|
|
146
|
-
ds_sv.to_zarr(store=store_name)
|
|
147
|
-
#################################################################
|
|
148
|
-
print('Note: Adding GeoJSON inside Zarr store')
|
|
149
|
-
self.__write_geojson_to_file(store_name=store_name, data=gps_data)
|
|
150
|
-
#################################################################
|
|
151
|
-
self.__zarr_info_to_table(
|
|
152
|
-
file_name=raw_file_name,
|
|
153
|
-
cruise_name=cruise_name,
|
|
154
|
-
zarr_path=os.path.join(output_zarr_prefix, store_name),
|
|
155
|
-
min_echo_range=min_echo_range,
|
|
156
|
-
max_echo_range=max_echo_range,
|
|
157
|
-
num_ping_time_dropna=num_ping_time_dropna,
|
|
158
|
-
start_time=start_time,
|
|
159
|
-
end_time=end_time,
|
|
160
|
-
frequencies=frequencies,
|
|
161
|
-
channels=channels
|
|
162
|
-
)
|
|
163
|
-
except Exception as err:
|
|
164
|
-
print(f'Exception encountered creating local Zarr store with echopype: {err}')
|
|
165
|
-
raise RuntimeError(f"Problem creating local Zarr store, {err}")
|
|
166
|
-
print('Done creating local zarr store.')
|
|
167
|
-
|
|
168
|
-
############################################################################
|
|
169
|
-
def __upload_files_to_output_bucket(
|
|
170
|
-
self,
|
|
171
|
-
local_directory,
|
|
172
|
-
object_prefix,
|
|
173
|
-
):
|
|
174
|
-
# Note: this will be passed credentials if using NODD
|
|
175
|
-
print('Uploading files using thread pool executor.')
|
|
176
|
-
all_files = []
|
|
177
|
-
for subdir, dirs, files in os.walk(local_directory):
|
|
178
|
-
for file in files:
|
|
179
|
-
local_path = os.path.join(subdir, file)
|
|
180
|
-
s3_key = os.path.join(object_prefix, local_path)
|
|
181
|
-
all_files.append([local_path, s3_key])
|
|
182
|
-
# all_files
|
|
183
|
-
all_uploads = self.__s3.upload_files_with_thread_pool_executor(
|
|
184
|
-
bucket_name=self.__output_bucket,
|
|
185
|
-
all_files=all_files,
|
|
186
|
-
access_key_id=self.__output_bucket_access_key,
|
|
187
|
-
secret_access_key=self.__output_bucket_secret_access_key
|
|
188
|
-
)
|
|
189
|
-
return all_uploads
|
|
190
|
-
|
|
191
|
-
############################################################################
|
|
192
|
-
def execute(self, input_message):
|
|
193
|
-
ship_name = input_message['shipName']
|
|
194
|
-
cruise_name = input_message['cruiseName']
|
|
195
|
-
sensor_name = input_message['sensorName']
|
|
196
|
-
input_file_name = input_message['fileName']
|
|
197
|
-
#
|
|
198
|
-
try:
|
|
199
|
-
self.__update_processing_status(
|
|
200
|
-
file_name=input_file_name,
|
|
201
|
-
cruise_name=cruise_name,
|
|
202
|
-
pipeline_status="PROCESSING_RAW_TO_ZARR"
|
|
203
|
-
)
|
|
204
|
-
#######################################################################
|
|
205
|
-
store_name = f"{os.path.splitext(input_file_name)[0]}.zarr"
|
|
206
|
-
output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}"
|
|
207
|
-
bucket_key = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{input_file_name}"
|
|
208
|
-
zarr_prefix = os.path.join("level_1", ship_name, cruise_name, sensor_name)
|
|
209
|
-
#
|
|
210
|
-
os.chdir(TEMPDIR) # Lambdas require use of temp directory
|
|
211
|
-
#######################################################################
|
|
212
|
-
#######################################################################
|
|
213
|
-
# Check if zarr store already exists
|
|
214
|
-
s3_objects = self.__s3.list_objects(
|
|
215
|
-
bucket_name=self.__output_bucket,
|
|
216
|
-
prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
|
|
217
|
-
access_key_id=self.__output_bucket_access_key,
|
|
218
|
-
secret_access_key=self.__output_bucket_secret_access_key
|
|
219
|
-
)
|
|
220
|
-
if len(s3_objects) > 0:
|
|
221
|
-
print('Zarr store data already exists in s3, deleting existing and continuing.')
|
|
222
|
-
self.__s3.delete_objects(
|
|
223
|
-
bucket_name=self.__output_bucket,
|
|
224
|
-
objects=s3_objects,
|
|
225
|
-
access_key_id=self.__output_bucket_access_key,
|
|
226
|
-
secret_access_key=self.__output_bucket_secret_access_key
|
|
227
|
-
)
|
|
228
|
-
#######################################################################
|
|
229
|
-
# self.__delete_all_local_raw_and_zarr_files()
|
|
230
|
-
Cleaner.delete_local_files(file_types=["*.raw*", "*.zarr"])
|
|
231
|
-
self.__s3.download_file(
|
|
232
|
-
bucket_name=self.__input_bucket,
|
|
233
|
-
key=bucket_key,
|
|
234
|
-
file_name=input_file_name
|
|
235
|
-
)
|
|
236
|
-
self.__create_local_zarr_store(
|
|
237
|
-
raw_file_name=input_file_name,
|
|
238
|
-
cruise_name=cruise_name,
|
|
239
|
-
sensor_name=sensor_name,
|
|
240
|
-
output_zarr_prefix=output_zarr_prefix,
|
|
241
|
-
store_name=store_name
|
|
242
|
-
)
|
|
243
|
-
#######################################################################
|
|
244
|
-
self.__upload_files_to_output_bucket(store_name, output_zarr_prefix)
|
|
245
|
-
#######################################################################
|
|
246
|
-
# # TODO: verify count of objects matches
|
|
247
|
-
# s3_objects = self.__s3.list_objects(
|
|
248
|
-
# bucket_name=self.__output_bucket,
|
|
249
|
-
# prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
|
|
250
|
-
# access_key_id=self.__output_bucket_access_key,
|
|
251
|
-
# secret_access_key=self.__output_bucket_secret_access_key
|
|
252
|
-
# )
|
|
253
|
-
#######################################################################
|
|
254
|
-
self.__update_processing_status(
|
|
255
|
-
file_name=input_file_name,
|
|
256
|
-
cruise_name=cruise_name,
|
|
257
|
-
pipeline_status='SUCCESS_RAW_TO_ZARR'
|
|
258
|
-
)
|
|
259
|
-
#######################################################################
|
|
260
|
-
self.__publish_done_message(input_message)
|
|
261
|
-
#######################################################################
|
|
262
|
-
# except Exception as err:
|
|
263
|
-
# print(f'Exception encountered: {err}')
|
|
264
|
-
# self.__update_processing_status(
|
|
265
|
-
# file_name=input_file_name,
|
|
266
|
-
# cruise_name=cruise_name,
|
|
267
|
-
# pipeline_status='FAILURE_RAW_TO_ZARR',
|
|
268
|
-
# error_message=str(err),
|
|
269
|
-
# )
|
|
270
|
-
finally:
|
|
271
|
-
self.__delete_all_local_raw_and_zarr_files()
|
|
272
|
-
#######################################################################
|
|
273
|
-
|
|
274
|
-
############################################################################
|
|
275
|
-
|
|
276
|
-
################################################################################
|
|
277
|
-
############################################################################
|
|
278
|
-
# TODO: DELETE
|
|
279
|
-
# def __get_gps_data(
|
|
280
|
-
# self,
|
|
281
|
-
# echodata: ep.echodata.echodata.EchoData
|
|
282
|
-
# ) -> tuple:
|
|
283
|
-
# print('Getting GPS data.')
|
|
284
|
-
# try:
|
|
285
|
-
# # if 'latitude' not in echodata.platform.variables and 'longitude' not in echodata.platform.variables:
|
|
286
|
-
# # raise KeyError;
|
|
287
|
-
# assert( # TODO: raise error, e.g. KeyError
|
|
288
|
-
# 'latitude' in echodata.platform.variables and 'longitude' in echodata.platform.variables
|
|
289
|
-
# ), "Problem: GPS coordinates not found in echodata."
|
|
290
|
-
# latitude = echodata.platform.latitude.values
|
|
291
|
-
# longitude = echodata.platform.longitude.values # len(longitude) == 14691
|
|
292
|
-
# # RE: time coordinates: https://github.com/OSOceanAcoustics/echopype/issues/656#issue-1219104771
|
|
293
|
-
# assert(
|
|
294
|
-
# 'time1' in echodata.platform.variables and 'time1' in echodata.environment.variables
|
|
295
|
-
# ), "Problem: Time coordinate not found in echodata."
|
|
296
|
-
# # 'nmea_times' are times from the nmea datalogger associated with GPS
|
|
297
|
-
# # nmea times, unlike env times, can be sorted
|
|
298
|
-
# nmea_times = np.sort(echodata.platform.time1.values)
|
|
299
|
-
# # 'time1' are times from the echosounder associated with transducer measurement
|
|
300
|
-
# time1 = echodata.environment.time1.values
|
|
301
|
-
# # Align 'sv_times' to 'nmea_times'
|
|
302
|
-
# assert(
|
|
303
|
-
# np.all(time1[:-1] <= time1[1:]) and np.all(nmea_times[:-1] <= nmea_times[1:])
|
|
304
|
-
# ), "Problem: NMEA time stamps are not sorted."
|
|
305
|
-
# # Finds the indices where 'v' can be inserted just to the right of 'a'
|
|
306
|
-
# indices = np.searchsorted(a=nmea_times, v=time1, side="right") - 1
|
|
307
|
-
# #
|
|
308
|
-
# lat = latitude[indices]
|
|
309
|
-
# lat[indices < 0] = np.nan # values recorded before indexing are set to nan
|
|
310
|
-
# lon = longitude[indices]
|
|
311
|
-
# lon[indices < 0] = np.nan
|
|
312
|
-
# if len(lat) < 2 or len(lon) < 2:
|
|
313
|
-
# raise Exception("There was not enough data in lat or lon to create geojson.")
|
|
314
|
-
# assert( # TODO: raise ValueError
|
|
315
|
-
# np.all(lat[~np.isnan(lat)] >= -90.) and np.all(lat[~np.isnan(lat)] <= 90.) and np.all(lon[~np.isnan(lon)] >= -180.) and np.all(lon[~np.isnan(lon)] <= 180.)
|
|
316
|
-
# ), "Problem: Data falls outside GPS bounds!"
|
|
317
|
-
# # TODO: check for visits to null island
|
|
318
|
-
# # https://osoceanacoustics.github.io/echopype-examples/echopype_tour.html
|
|
319
|
-
# print(np.count_nonzero(np.isnan(lat)))
|
|
320
|
-
# print(np.count_nonzero(np.isnan(lon)))
|
|
321
|
-
# if len(lat[~np.isnan(lat)]) < 1:
|
|
322
|
-
# raise RuntimeError(f"Problem all data is NaN.")
|
|
323
|
-
# time1 = time1[~np.isnan(lat)]
|
|
324
|
-
# lat = lat[~np.isnan(lat)]
|
|
325
|
-
# lon = lon[~np.isnan(lon)]
|
|
326
|
-
# #
|
|
327
|
-
# gps_df = pd.DataFrame({
|
|
328
|
-
# 'latitude': lat,
|
|
329
|
-
# 'longitude': lon,
|
|
330
|
-
# 'time1': time1
|
|
331
|
-
# }).set_index(['time1'])
|
|
332
|
-
# gps_gdf = geopandas.GeoDataFrame(
|
|
333
|
-
# gps_df,
|
|
334
|
-
# geometry=geopandas.points_from_xy(gps_df['longitude'], gps_df['latitude']),
|
|
335
|
-
# crs="epsg:4326" # TODO: does this sound right?
|
|
336
|
-
# )
|
|
337
|
-
# # GeoJSON FeatureCollection with IDs as "time1"
|
|
338
|
-
# geo_json = gps_gdf.to_json()
|
|
339
|
-
# except Exception as err:
|
|
340
|
-
# print(f'Exception encountered creating local Zarr store with echopype: {err}')
|
|
341
|
-
# raise
|
|
342
|
-
# return geo_json, lat, lon
|
|
@@ -1,134 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: water_column_sonar_processing
|
|
3
|
-
Version: 0.0.9
|
|
4
|
-
Summary: A processing tool for water column sonar data.
|
|
5
|
-
Author-email: Rudy Klucik <rudy.klucik@noaa.gov>
|
|
6
|
-
Project-URL: Homepage, https://github.com/CI-CMG/water-column-sonar-processing
|
|
7
|
-
Project-URL: Issues, https://github.com/CI-CMG/water-column-sonar-processing/issues
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Requires-Python: >=3.10
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
License-File: LICENSE
|
|
14
|
-
Requires-Dist: aiobotocore==2.15.2
|
|
15
|
-
Requires-Dist: boto3==1.35.36
|
|
16
|
-
Requires-Dist: botocore==1.35.36
|
|
17
|
-
Requires-Dist: echopype==0.9.0
|
|
18
|
-
Requires-Dist: fiona==1.10.1
|
|
19
|
-
Requires-Dist: geopandas==1.0.1
|
|
20
|
-
Requires-Dist: mock==5.1.0
|
|
21
|
-
Requires-Dist: moto[all]==5.0.21
|
|
22
|
-
Requires-Dist: moto[server]==5.0.21
|
|
23
|
-
Requires-Dist: numcodecs==0.13.1
|
|
24
|
-
Requires-Dist: numpy==1.26.4
|
|
25
|
-
Requires-Dist: pandas==2.2.3
|
|
26
|
-
Requires-Dist: pyarrow==18.1.0
|
|
27
|
-
Requires-Dist: python-dotenv==1.0.0
|
|
28
|
-
Requires-Dist: requests==2.32.3
|
|
29
|
-
Requires-Dist: s3fs==2023.12.1
|
|
30
|
-
Requires-Dist: scipy==1.14.1
|
|
31
|
-
Requires-Dist: setuptools==75.6.0
|
|
32
|
-
Requires-Dist: shapely==2.0.3
|
|
33
|
-
Requires-Dist: typing-extensions==4.10.0
|
|
34
|
-
Requires-Dist: xarray==2024.10.0
|
|
35
|
-
Requires-Dist: zarr==2.18.3
|
|
36
|
-
|
|
37
|
-
# Water Column Sonar Processing
|
|
38
|
-
Processing tool for converting L0 data to L1 and L2 as well as generating geospatial information
|
|
39
|
-
|
|
40
|
-
# Setting up the Python Environment
|
|
41
|
-
> Python 3.10.12
|
|
42
|
-
|
|
43
|
-
# MacOS Pyenv Installation Instructions
|
|
44
|
-
1. Install pyenv (https://github.com/pyenv/pyenv#set-up-your-shell-environment-for-pyenv)
|
|
45
|
-
1. ```brew update```
|
|
46
|
-
2. ```arch -arm64 brew install pyenv```
|
|
47
|
-
3. In ~/.bashrc add
|
|
48
|
-
1. ```export PYENV_ROOT="$HOME/.pyenv"```
|
|
49
|
-
2. ```export PATH="$PYENV_ROOT/bin:$PATH"```
|
|
50
|
-
3. ```eval "$(pyenv init -)"```
|
|
51
|
-
4. ```arch -arm64 brew install openssl readline sqlite3 xz zlib tcl-tk```
|
|
52
|
-
2. Install pyenv-virtualenv (https://github.com/pyenv/pyenv-virtualenv)
|
|
53
|
-
1. ```arch -arm64 brew install pyenv-virtualenv```
|
|
54
|
-
2. In ~/.bashrc add
|
|
55
|
-
1. ```eval "$(pyenv virtualenv-init -)"```
|
|
56
|
-
3. Open a new terminal
|
|
57
|
-
4. Install Python version
|
|
58
|
-
1. ```env CONFIGURE_OPTS='--enable-optimizations' arch -arm64 pyenv install 3.10.12```
|
|
59
|
-
5. Create virtual env (to delete 'pyenv uninstall 3.10.12/water-column-sonar-processing')
|
|
60
|
-
1. ```pyenv virtualenv 3.10.12 water-column-sonar-processing```
|
|
61
|
-
6. Set local version of python (if not done already)
|
|
62
|
-
1. change directory to root of project
|
|
63
|
-
2. ```pyenv local 3.10.12 water-column-sonar-processing```
|
|
64
|
-
3. ```pyenv activate water-column-sonar-processing```
|
|
65
|
-
|
|
66
|
-
# Setting up IntelliJ
|
|
67
|
-
|
|
68
|
-
1. Install the IntelliJ Python plugin
|
|
69
|
-
2. Set up pyenv
|
|
70
|
-
1. File -> Project Structure or CMD + ;
|
|
71
|
-
2. SDKs -> + -> Add Python SDK -> Virtual Environment
|
|
72
|
-
3. Select Existing Environment
|
|
73
|
-
4. Choose ~/.pyenv/versions/mocking_aws/bin/python
|
|
74
|
-
3. Set up Python Facet (not sure if this is required)
|
|
75
|
-
1. File -> Project Structure or CMD + ;
|
|
76
|
-
2. Facets -> + -> Python
|
|
77
|
-
3. Set interpreter
|
|
78
|
-
|
|
79
|
-
# Installing Dependencies
|
|
80
|
-
|
|
81
|
-
1. Add dependencies with versions to requirements.txt
|
|
82
|
-
2. ```pip install --upgrade pip && pip install -r requirements_dev.txt```
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
# Pytest
|
|
86
|
-
```commandline
|
|
87
|
-
pytest --disable-warnings
|
|
88
|
-
```
|
|
89
|
-
or
|
|
90
|
-
> pytest --cache-clear --cov=src tests/ --cov-report=xml
|
|
91
|
-
|
|
92
|
-
# Instructions
|
|
93
|
-
Following this tutorial:
|
|
94
|
-
https://packaging.python.org/en/latest/tutorials/packaging-projects/
|
|
95
|
-
|
|
96
|
-
# To Publish To TEST
|
|
97
|
-
```commandline
|
|
98
|
-
python -m build
|
|
99
|
-
# python -m build --sdist
|
|
100
|
-
# python -m build --wheel
|
|
101
|
-
python -m twine upload --repository testpypi dist/*
|
|
102
|
-
pytho -m pip install --index-url https://test.pypi.org/simple/ hello-pypi-rudy-klucik
|
|
103
|
-
python
|
|
104
|
-
```
|
|
105
|
-
```
|
|
106
|
-
from water-column-sonar-processing import ZarrManager
|
|
107
|
-
example.add_one(2)
|
|
108
|
-
```
|
|
109
|
-
|
|
110
|
-
# To Publish To PROD
|
|
111
|
-
```commandline
|
|
112
|
-
python -m build
|
|
113
|
-
python -m twine upload --repository pypi dist/*
|
|
114
|
-
```
|
|
115
|
-
|
|
116
|
-
# Pre Commit Hook
|
|
117
|
-
https://dev.to/rafaelherik/using-trufflehog-and-pre-commit-hook-to-prevent-secret-exposure-edo
|
|
118
|
-
```
|
|
119
|
-
pre-commit install --allow-missing-config
|
|
120
|
-
```
|
|
121
|
-
|
|
122
|
-
# Linting
|
|
123
|
-
Ruff
|
|
124
|
-
https://plugins.jetbrains.com/plugin/20574-ruff
|
|
125
|
-
|
|
126
|
-
# Colab Test
|
|
127
|
-
https://colab.research.google.com/drive/1KiLMueXiz9WVB9o4RuzYeGjNZ6PsZU7a#scrollTo=AayVyvpBdfIZ
|
|
128
|
-
|
|
129
|
-
# Test Coverage
|
|
130
|
-
20241124
|
|
131
|
-
8 failed, 32 passed, 3 skipped, 1 warning in 6.92s
|
|
132
|
-
20241125
|
|
133
|
-
5 failed, 35 passed, 3 skipped, 1 warning in 9.71s
|
|
134
|
-
3 failed, 38 passed, 3 skipped, 1 warning in 7.24s
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
water_column_sonar_processing/__init__.py,sha256=fvRK4uFo_A0l7w_T4yckvDqJ3wMUq4JB3VVPXqWfewE,226
|
|
2
|
-
water_column_sonar_processing/process.py,sha256=-yQtK3rnZq6lGAr3q02zLDe1NuMH9c0PiUOxKzG_r18,5386
|
|
3
|
-
water_column_sonar_processing/aws/__init__.py,sha256=KJqK8oYMn-u8n8i-Jp_lG5BvCOTjwWSjWP8yAyDlWVo,297
|
|
4
|
-
water_column_sonar_processing/aws/dynamodb_manager.py,sha256=sZHn-hgCt3K3w0x5BcXfF5jLMt_F11dAtQHJToij9nU,10008
|
|
5
|
-
water_column_sonar_processing/aws/s3_manager.py,sha256=ctNWMkgqMlwbwmXHgwKEV8otLwIjr-dHX6bQ2rOw1ug,14718
|
|
6
|
-
water_column_sonar_processing/aws/s3fs_manager.py,sha256=thVJPQKhbvF1g-Ue3BYgwazFOFDYOICIEJx4zkXBQ1E,2381
|
|
7
|
-
water_column_sonar_processing/aws/sns_manager.py,sha256=Dp9avG5VSugSWPR1dZ-askuAw1fCZkNUHbOUP65iR-k,1867
|
|
8
|
-
water_column_sonar_processing/aws/sqs_manager.py,sha256=NSUrWmnSC8h8Gf7gT0U8zFaQQ-yX89h0Q0mDLKGqp2Y,1597
|
|
9
|
-
water_column_sonar_processing/cruise/__init__.py,sha256=H5hW0JMORuaFvQk_R31B4VL8RnRyKeanOOiWmqEMZJk,156
|
|
10
|
-
water_column_sonar_processing/cruise/create_empty_zarr_store.py,sha256=uLwHZazndSy4puXrS-2PrGhicV-umsCCiXoqt2MMpkM,7317
|
|
11
|
-
water_column_sonar_processing/cruise/resample_regrid.py,sha256=4Tw6Ro9mQZOr0uIph6foz6a1OeFAZW0SMUT_asIwvKw,12309
|
|
12
|
-
water_column_sonar_processing/geometry/__init__.py,sha256=_ol5nI8AL30pYXeAh5rtP7YmQggitPC6LA_kuTfPJ0Q,231
|
|
13
|
-
water_column_sonar_processing/geometry/geometry_manager.py,sha256=7WZ1UerY_h3uOKc3mcaOpvhgZ1yV3gD-CUnhZJl1BOQ,10550
|
|
14
|
-
water_column_sonar_processing/geometry/geometry_simplification.py,sha256=im1HG9nfYIerQv3w-PUHzphw2B7aGgnsA3Zcdy2oTmA,3016
|
|
15
|
-
water_column_sonar_processing/geometry/pmtile_generation.py,sha256=7Lm08Jr6YaM4nYmexClxbIMOqSV1teo9wMm6dfjFuNA,12384
|
|
16
|
-
water_column_sonar_processing/index/__init__.py,sha256=izEObsKiOoIJ0kZCFhvaYsBd6Ga71XJxnogjrNInw68,68
|
|
17
|
-
water_column_sonar_processing/index/index_manager.py,sha256=YS6y_THfGAZpjfBZOj5n8O1aY_BnBYS781eNHfhpip0,11239
|
|
18
|
-
water_column_sonar_processing/model/__init__.py,sha256=FXaCdbPqxp0ogmZm9NplRirqpgMiYs1iRYgJbFbbX2Y,65
|
|
19
|
-
water_column_sonar_processing/model/zarr_manager.py,sha256=fpRkk6Qg1_LVdLg1M_X0J9Lchp2OJygTxfekEk1Mi88,13641
|
|
20
|
-
water_column_sonar_processing/processing/__init__.py,sha256=UwdB3BnoUxy4q3k9-ZjBF6KzmCWVDcqbcArTeHgmvGA,118
|
|
21
|
-
water_column_sonar_processing/processing/cruise_sampler.py,sha256=hadPrnH5nz7_oG_4pND7YbMFH6NMR9d6p3xAXedtKU8,15927
|
|
22
|
-
water_column_sonar_processing/processing/raw_to_zarr.py,sha256=QBz58P-hYTZwg6hhf4u_kNg710lrfxPwnvIr7UmK30I,16125
|
|
23
|
-
water_column_sonar_processing/utility/__init__.py,sha256=yDObMOL0_OxKWet5wffK2-XVJgoE9iwiY2q04GZrtBQ,234
|
|
24
|
-
water_column_sonar_processing/utility/cleaner.py,sha256=bNbs-hopWxtKAFBK0Eu18xdRErZCGZvtla3j-1bTwQw,619
|
|
25
|
-
water_column_sonar_processing/utility/constants.py,sha256=EbzsorvYKadsPjuutRjQKKByGibhFm0Gw6D-Sp2ZD3I,2143
|
|
26
|
-
water_column_sonar_processing/utility/pipeline_status.py,sha256=O-0SySqdRGJ6bs3zQe1NV9vkOpmsRM7zj5QoHgzYioY,4395
|
|
27
|
-
water_column_sonar_processing/utility/timestamp.py,sha256=bO0oir7KxxoEHPGRkz9FCBfOligkocUyRiWRzAq8fnU,361
|
|
28
|
-
water_column_sonar_processing-0.0.9.dist-info/LICENSE,sha256=lz4IpJ5_adG3S0ali-WaIpQFVTnEAOucMDQPECUVEYw,1110
|
|
29
|
-
water_column_sonar_processing-0.0.9.dist-info/METADATA,sha256=5mtfW5UU7dpl2oKGUi7GKTbjsrcMo13ackWFpNVkjr4,4432
|
|
30
|
-
water_column_sonar_processing-0.0.9.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
31
|
-
water_column_sonar_processing-0.0.9.dist-info/top_level.txt,sha256=aRYU4A7RNBlNrL4vzjytFAir3BNnmOgsvIGKKA36tg4,30
|
|
32
|
-
water_column_sonar_processing-0.0.9.dist-info/RECORD,,
|
|
File without changes
|