water-column-sonar-processing 0.0.1__py3-none-any.whl → 25.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/__init__.py +13 -0
- water_column_sonar_processing/aws/__init__.py +7 -0
- water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
- water_column_sonar_processing/aws/s3_manager.py +420 -0
- water_column_sonar_processing/aws/s3fs_manager.py +72 -0
- {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
- {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
- water_column_sonar_processing/cruise/__init__.py +4 -0
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +191 -0
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +339 -0
- water_column_sonar_processing/geometry/__init__.py +11 -0
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +243 -0
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +261 -0
- water_column_sonar_processing/index/__init__.py +3 -0
- water_column_sonar_processing/index/index_manager.py +384 -0
- water_column_sonar_processing/model/__init__.py +3 -0
- water_column_sonar_processing/model/zarr_manager.py +722 -0
- water_column_sonar_processing/process.py +149 -0
- water_column_sonar_processing/processing/__init__.py +4 -0
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +425 -0
- water_column_sonar_processing/utility/__init__.py +13 -0
- {model → water_column_sonar_processing}/utility/cleaner.py +7 -8
- water_column_sonar_processing/utility/constants.py +118 -0
- {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
- water_column_sonar_processing/utility/timestamp.py +12 -0
- water_column_sonar_processing-25.11.1.dist-info/METADATA +182 -0
- water_column_sonar_processing-25.11.1.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing-25.11.1.dist-info/top_level.txt +1 -0
- __init__.py +0 -0
- model/__init__.py +0 -0
- model/aws/__init__.py +0 -0
- model/aws/dynamodb_manager.py +0 -149
- model/aws/s3_manager.py +0 -356
- model/aws/s3fs_manager.py +0 -74
- model/cruise/__init__.py +0 -0
- model/cruise/create_empty_zarr_store.py +0 -166
- model/cruise/resample_regrid.py +0 -248
- model/geospatial/__init__.py +0 -0
- model/geospatial/geometry_manager.py +0 -194
- model/geospatial/geometry_simplification.py +0 -81
- model/geospatial/pmtile_generation.py +0 -74
- model/index/__init__.py +0 -0
- model/index/index.py +0 -228
- model/model.py +0 -138
- model/utility/__init__.py +0 -0
- model/utility/constants.py +0 -56
- model/utility/timestamp.py +0 -12
- model/zarr/__init__.py +0 -0
- model/zarr/bar.py +0 -28
- model/zarr/foo.py +0 -11
- model/zarr/zarr_manager.py +0 -298
- water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
- water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
- water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import tempfile
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
|
|
7
|
+
from water_column_sonar_processing.model import ZarrManager
|
|
8
|
+
from water_column_sonar_processing.utility import Cleaner
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# TODO: change name to "CreateLocalEmptyZarrStore"
|
|
12
|
+
class CreateEmptyZarrStore:
|
|
13
|
+
#######################################################
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
):
|
|
17
|
+
self.__overwrite = True
|
|
18
|
+
# self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
19
|
+
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
20
|
+
|
|
21
|
+
#######################################################
|
|
22
|
+
# TODO: moved this to the s3_manager
|
|
23
|
+
# def upload_zarr_store_to_s3(
|
|
24
|
+
# self,
|
|
25
|
+
# output_bucket_name: str,
|
|
26
|
+
# local_directory: str,
|
|
27
|
+
# object_prefix: str,
|
|
28
|
+
# cruise_name: str,
|
|
29
|
+
# ) -> None:
|
|
30
|
+
# print("uploading model store to s3")
|
|
31
|
+
# s3_manager = S3Manager()
|
|
32
|
+
# #
|
|
33
|
+
# print("Starting upload with thread pool executor.")
|
|
34
|
+
# # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
35
|
+
# all_files = []
|
|
36
|
+
# for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
|
|
37
|
+
# for file in files:
|
|
38
|
+
# local_path = os.path.join(subdir, file)
|
|
39
|
+
# # TODO: find a better method for splitting strings here:
|
|
40
|
+
# # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
|
|
41
|
+
# s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
|
|
42
|
+
# all_files.append([local_path, s3_key])
|
|
43
|
+
# #
|
|
44
|
+
# # print(all_files)
|
|
45
|
+
# s3_manager.upload_files_with_thread_pool_executor(
|
|
46
|
+
# output_bucket_name=output_bucket_name,
|
|
47
|
+
# all_files=all_files,
|
|
48
|
+
# )
|
|
49
|
+
# print("Done uploading with thread pool executor.")
|
|
50
|
+
# # TODO: move to common place
|
|
51
|
+
|
|
52
|
+
#######################################################
|
|
53
|
+
def create_cruise_level_zarr_store(
|
|
54
|
+
self,
|
|
55
|
+
output_bucket_name: str,
|
|
56
|
+
ship_name: str,
|
|
57
|
+
cruise_name: str,
|
|
58
|
+
sensor_name: str,
|
|
59
|
+
table_name: str,
|
|
60
|
+
# override_cruise_min_epsilon=None,
|
|
61
|
+
) -> None:
|
|
62
|
+
"""
|
|
63
|
+
Initialize zarr store. The water_level needs to be integrated.
|
|
64
|
+
"""
|
|
65
|
+
tempdir = tempfile.TemporaryDirectory()
|
|
66
|
+
try:
|
|
67
|
+
# HB0806 - 123, HB0903 - 220
|
|
68
|
+
dynamo_db_manager = DynamoDBManager()
|
|
69
|
+
s3_manager = S3Manager()
|
|
70
|
+
|
|
71
|
+
df = dynamo_db_manager.get_table_as_df(
|
|
72
|
+
table_name=table_name,
|
|
73
|
+
cruise_name=cruise_name,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# TODO: filter the dataframe just for enums >= LEVEL_1_PROCESSING
|
|
77
|
+
# df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
|
|
78
|
+
|
|
79
|
+
# TODO: VERIFY GEOJSON EXISTS as prerequisite!!!
|
|
80
|
+
|
|
81
|
+
print(f"DataFrame shape: {df.shape}")
|
|
82
|
+
cruise_channels = list(
|
|
83
|
+
set([i for sublist in df["CHANNELS"].dropna() for i in sublist])
|
|
84
|
+
)
|
|
85
|
+
cruise_channels.sort()
|
|
86
|
+
|
|
87
|
+
consolidated_zarr_width = np.sum(
|
|
88
|
+
df["NUM_PING_TIME_DROPNA"].dropna().astype(int)
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# [3] calculate the max/min measurement resolutions for the whole cruise
|
|
92
|
+
# cruise_min_echo_range = np.min(
|
|
93
|
+
# (df["MIN_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
|
|
94
|
+
# )
|
|
95
|
+
|
|
96
|
+
# [4] calculate the np.max(max_echo_range + water_level)
|
|
97
|
+
cruise_max_echo_range = np.max(
|
|
98
|
+
(df["MAX_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# TODO: set this to either 1 or 0.5 meters
|
|
102
|
+
cruise_min_epsilon = np.min(df["MIN_ECHO_RANGE"].dropna().astype(float))
|
|
103
|
+
|
|
104
|
+
print(f"cruise_max_echo_range: {cruise_max_echo_range}")
|
|
105
|
+
|
|
106
|
+
# [5] get number of channels
|
|
107
|
+
cruise_frequencies = [
|
|
108
|
+
float(i) for i in df["FREQUENCIES"].dropna().values.flatten()[0]
|
|
109
|
+
]
|
|
110
|
+
print(cruise_frequencies)
|
|
111
|
+
|
|
112
|
+
new_width = int(consolidated_zarr_width)
|
|
113
|
+
print(f"new_width: {new_width}")
|
|
114
|
+
#################################################################
|
|
115
|
+
store_name = f"{cruise_name}.zarr"
|
|
116
|
+
print(store_name)
|
|
117
|
+
################################################################
|
|
118
|
+
# Delete existing model store if it exists
|
|
119
|
+
zarr_prefix = os.path.join("level_2", ship_name, cruise_name, sensor_name)
|
|
120
|
+
child_objects = s3_manager.get_child_objects(
|
|
121
|
+
bucket_name=output_bucket_name,
|
|
122
|
+
sub_prefix=zarr_prefix,
|
|
123
|
+
)
|
|
124
|
+
#
|
|
125
|
+
if len(child_objects) > 0:
|
|
126
|
+
s3_manager.delete_nodd_objects(
|
|
127
|
+
bucket_name=output_bucket_name,
|
|
128
|
+
objects=child_objects,
|
|
129
|
+
)
|
|
130
|
+
################################################################
|
|
131
|
+
# Create new model store
|
|
132
|
+
zarr_manager = ZarrManager()
|
|
133
|
+
new_height = len( # [0.19m down to 1001.744m] = 5272 samples, 10.3 tiles @ 512
|
|
134
|
+
zarr_manager.get_depth_values( # these depths should be from min_epsilon to max_range+water_level
|
|
135
|
+
# min_echo_range=cruise_min_echo_range,
|
|
136
|
+
max_echo_range=cruise_max_echo_range,
|
|
137
|
+
cruise_min_epsilon=cruise_min_epsilon,
|
|
138
|
+
)
|
|
139
|
+
)
|
|
140
|
+
print(f"new_height: {new_height}")
|
|
141
|
+
|
|
142
|
+
zarr_manager.create_zarr_store(
|
|
143
|
+
path=tempdir.name, # TODO: need to use .name or problem
|
|
144
|
+
ship_name=ship_name,
|
|
145
|
+
cruise_name=cruise_name,
|
|
146
|
+
sensor_name=sensor_name,
|
|
147
|
+
frequencies=cruise_frequencies,
|
|
148
|
+
width=new_width,
|
|
149
|
+
# min_echo_range=cruise_min_echo_range,
|
|
150
|
+
max_echo_range=cruise_max_echo_range,
|
|
151
|
+
cruise_min_epsilon=cruise_min_epsilon,
|
|
152
|
+
calibration_status=True,
|
|
153
|
+
)
|
|
154
|
+
#################################################################
|
|
155
|
+
s3_manager.upload_zarr_store_to_s3(
|
|
156
|
+
output_bucket_name=output_bucket_name,
|
|
157
|
+
local_directory=tempdir.name, # TODO: need to use .name or problem
|
|
158
|
+
object_prefix=zarr_prefix,
|
|
159
|
+
cruise_name=cruise_name,
|
|
160
|
+
)
|
|
161
|
+
# https://noaa-wcsd-zarr-pds.s3.amazonaws.com/index.html
|
|
162
|
+
#################################################################
|
|
163
|
+
# Verify count of the files uploaded
|
|
164
|
+
# count = self.__get_file_count(store_name=store_name)
|
|
165
|
+
# #
|
|
166
|
+
# raw_zarr_files = self.__get_s3_files( # TODO: just need count
|
|
167
|
+
# bucket_name=self.__output_bucket,
|
|
168
|
+
# sub_prefix=os.path.join(zarr_prefix, store_name),
|
|
169
|
+
# )
|
|
170
|
+
# if len(raw_zarr_files) != count:
|
|
171
|
+
# print(f'Problem writing {store_name} with proper count {count}.')
|
|
172
|
+
# raise Exception("File count doesnt equal number of s3 Zarr store files.")
|
|
173
|
+
# else:
|
|
174
|
+
# print("File counts match.")
|
|
175
|
+
#################################################################
|
|
176
|
+
# Success
|
|
177
|
+
# TODO: update enum in dynamodb
|
|
178
|
+
print("Done creating cruise level zarr store.")
|
|
179
|
+
#################################################################
|
|
180
|
+
except Exception as err:
|
|
181
|
+
raise RuntimeError(
|
|
182
|
+
f"Problem trying to create new cruise model store, {err}"
|
|
183
|
+
)
|
|
184
|
+
finally:
|
|
185
|
+
cleaner = Cleaner()
|
|
186
|
+
cleaner.delete_local_files()
|
|
187
|
+
# TODO: should delete zarr store in temp directory too?
|
|
188
|
+
print("Done creating cruise level model store")
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
###########################################################
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# ### https://xarray-datatree.readthedocs.io/en/latest/data-structures.html
|
|
2
|
+
# import xarray as xr
|
|
3
|
+
# from datatree import DataTree
|
|
4
|
+
#
|
|
5
|
+
#
|
|
6
|
+
# class DatatreeManager:
|
|
7
|
+
# #######################################################
|
|
8
|
+
# def __init__(
|
|
9
|
+
# self,
|
|
10
|
+
# ):
|
|
11
|
+
# self.dtype = "float32"
|
|
12
|
+
#
|
|
13
|
+
# #################################################################
|
|
14
|
+
# def create_datatree(
|
|
15
|
+
# self,
|
|
16
|
+
# input_ds,
|
|
17
|
+
# ) -> None:
|
|
18
|
+
# ds1 = xr.Dataset({"foo": "orange"})
|
|
19
|
+
# dt = DataTree(name="root", dataset=ds1) # create root node
|
|
20
|
+
# # ds2 = xr.Dataset({"bar": 0}, coords={"y": ("y", [0, 1, 2])})
|
|
21
|
+
# return dt
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
import gc
|
|
2
|
+
import warnings
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import pandas as pd
|
|
7
|
+
import xarray as xr
|
|
8
|
+
|
|
9
|
+
from water_column_sonar_processing.aws import DynamoDBManager
|
|
10
|
+
from water_column_sonar_processing.geometry import GeometryManager
|
|
11
|
+
from water_column_sonar_processing.model import ZarrManager
|
|
12
|
+
|
|
13
|
+
warnings.simplefilter("ignore", category=RuntimeWarning)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ResampleRegrid:
|
|
17
|
+
#######################################################
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
):
|
|
21
|
+
self.__overwrite = True
|
|
22
|
+
# self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
23
|
+
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
24
|
+
self.dtype = "float32"
|
|
25
|
+
|
|
26
|
+
#################################################################
|
|
27
|
+
def interpolate_data(
|
|
28
|
+
self,
|
|
29
|
+
input_xr,
|
|
30
|
+
ping_times,
|
|
31
|
+
all_cruise_depth_values, # includes water_level offset
|
|
32
|
+
water_level, # this is the offset that will be added to each respective file
|
|
33
|
+
) -> np.ndarray:
|
|
34
|
+
"""
|
|
35
|
+
What gets passed into interpolate data
|
|
36
|
+
"""
|
|
37
|
+
print("Interpolating dataset.")
|
|
38
|
+
try:
|
|
39
|
+
data = np.empty(
|
|
40
|
+
(
|
|
41
|
+
len(all_cruise_depth_values),
|
|
42
|
+
len(ping_times),
|
|
43
|
+
len(input_xr.frequency_nominal),
|
|
44
|
+
),
|
|
45
|
+
dtype=self.dtype,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
data[:] = np.nan
|
|
49
|
+
|
|
50
|
+
regrid_resample = xr.DataArray( # where data will be written to
|
|
51
|
+
data=data,
|
|
52
|
+
dims=("depth", "time", "frequency"),
|
|
53
|
+
coords={
|
|
54
|
+
"depth": all_cruise_depth_values,
|
|
55
|
+
"time": ping_times,
|
|
56
|
+
"frequency": input_xr.frequency_nominal.values,
|
|
57
|
+
},
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# shift the input data by water_level
|
|
61
|
+
input_xr.echo_range.values = (
|
|
62
|
+
input_xr.echo_range.values + water_level
|
|
63
|
+
) # water_level # TODO: change
|
|
64
|
+
|
|
65
|
+
channels = input_xr.channel.values
|
|
66
|
+
for channel in range(
|
|
67
|
+
len(channels)
|
|
68
|
+
): # ?TODO: leaving off here, need to subset for just indices in time axis
|
|
69
|
+
gc.collect()
|
|
70
|
+
max_depths = np.nanmax(
|
|
71
|
+
a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values,
|
|
72
|
+
# + water_level,
|
|
73
|
+
axis=1,
|
|
74
|
+
)
|
|
75
|
+
superset_of_max_depths = set(
|
|
76
|
+
max_depths
|
|
77
|
+
) # HB1501, D20150503-T102035.raw, TypeError: unhashable type: 'numpy.ndarray'
|
|
78
|
+
set_of_max_depths = list(
|
|
79
|
+
{x for x in superset_of_max_depths if x == x}
|
|
80
|
+
) # removes nan's
|
|
81
|
+
# iterate through partitions of dataset with similar depths and resample
|
|
82
|
+
for select_max_depth in set_of_max_depths:
|
|
83
|
+
# TODO: for nan just skip and leave all nan's
|
|
84
|
+
select_indices = [
|
|
85
|
+
i
|
|
86
|
+
for i in range(0, len(max_depths))
|
|
87
|
+
if max_depths[i] == select_max_depth
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
# now create new DataArray with proper dimension and indices
|
|
91
|
+
# data_select = input_xr.Sv.sel(
|
|
92
|
+
# channel=input_xr.channel[channel]
|
|
93
|
+
# ).values[select_indices, :].T # TODO: dont like this transpose
|
|
94
|
+
data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[
|
|
95
|
+
select_indices, :
|
|
96
|
+
].T.values
|
|
97
|
+
# change from ".values[select_indices, :].T" to "[select_indices, :].values.T"
|
|
98
|
+
|
|
99
|
+
times_select = input_xr.ping_time.values[select_indices]
|
|
100
|
+
depths_select = input_xr.echo_range.sel(
|
|
101
|
+
channel=input_xr.channel[channel]
|
|
102
|
+
).values[
|
|
103
|
+
select_indices[0], :
|
|
104
|
+
] # '0' because all others in group should be same
|
|
105
|
+
|
|
106
|
+
da_select = xr.DataArray(
|
|
107
|
+
data=data_select,
|
|
108
|
+
dims=("depth", "time"),
|
|
109
|
+
coords={
|
|
110
|
+
"depth": depths_select,
|
|
111
|
+
"time": times_select,
|
|
112
|
+
},
|
|
113
|
+
).dropna(dim="depth")
|
|
114
|
+
resampled = da_select.interp(
|
|
115
|
+
depth=all_cruise_depth_values, method="nearest"
|
|
116
|
+
)
|
|
117
|
+
# write to the resample array
|
|
118
|
+
regrid_resample.loc[
|
|
119
|
+
dict(
|
|
120
|
+
time=times_select,
|
|
121
|
+
frequency=input_xr.frequency_nominal.values[channel],
|
|
122
|
+
)
|
|
123
|
+
] = resampled
|
|
124
|
+
print(f"updated {len(times_select)} ping times")
|
|
125
|
+
gc.collect()
|
|
126
|
+
except Exception as err:
|
|
127
|
+
raise RuntimeError(f"Problem finding the dynamodb table, {err}")
|
|
128
|
+
print("Done interpolating dataset.")
|
|
129
|
+
return regrid_resample.values.copy()
|
|
130
|
+
|
|
131
|
+
#################################################################
|
|
132
|
+
def resample_regrid(
|
|
133
|
+
self,
|
|
134
|
+
ship_name,
|
|
135
|
+
cruise_name,
|
|
136
|
+
sensor_name,
|
|
137
|
+
table_name,
|
|
138
|
+
bucket_name,
|
|
139
|
+
override_select_files=None,
|
|
140
|
+
# override_cruise_min_epsilon=None,
|
|
141
|
+
endpoint_url=None,
|
|
142
|
+
) -> None:
|
|
143
|
+
"""
|
|
144
|
+
The goal here is to interpolate the dataset against the depth values already populated
|
|
145
|
+
in the existing file level model stores. We open the cruise-level store with model for
|
|
146
|
+
read/write operations. We open the file-level store with Xarray to leverage tools for
|
|
147
|
+
resampling and subsetting the dataset.
|
|
148
|
+
"""
|
|
149
|
+
print("Resample Regrid, Interpolating dataset.")
|
|
150
|
+
try:
|
|
151
|
+
zarr_manager = ZarrManager()
|
|
152
|
+
geo_manager = GeometryManager()
|
|
153
|
+
|
|
154
|
+
output_zarr_store = zarr_manager.open_s3_zarr_store_with_zarr(
|
|
155
|
+
ship_name=ship_name,
|
|
156
|
+
cruise_name=cruise_name,
|
|
157
|
+
sensor_name=sensor_name,
|
|
158
|
+
output_bucket_name=bucket_name,
|
|
159
|
+
endpoint_url=endpoint_url,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# get dynamo stuff
|
|
163
|
+
dynamo_db_manager = DynamoDBManager()
|
|
164
|
+
cruise_df = dynamo_db_manager.get_table_as_df(
|
|
165
|
+
# ship_name=ship_name,
|
|
166
|
+
cruise_name=cruise_name,
|
|
167
|
+
# sensor_name=sensor_name,
|
|
168
|
+
table_name=table_name,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
#########################################################
|
|
172
|
+
#########################################################
|
|
173
|
+
all_file_names = cruise_df["FILE_NAME"]
|
|
174
|
+
|
|
175
|
+
if override_select_files is not None:
|
|
176
|
+
all_file_names = override_select_files
|
|
177
|
+
|
|
178
|
+
# Iterate files
|
|
179
|
+
for file_name in all_file_names:
|
|
180
|
+
gc.collect()
|
|
181
|
+
file_name_stem = Path(file_name).stem
|
|
182
|
+
print(f"Processing file: {file_name_stem}.")
|
|
183
|
+
|
|
184
|
+
if f"{file_name_stem}.raw" not in list(cruise_df["FILE_NAME"]):
|
|
185
|
+
raise Exception("Raw file file_stem not found in dynamodb.")
|
|
186
|
+
|
|
187
|
+
# status = PipelineStatus['LEVEL_1_PROCESSING']
|
|
188
|
+
# TODO: filter rows by enum success, filter the dataframe just for enums >= LEVEL_1_PROCESSING
|
|
189
|
+
# df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
|
|
190
|
+
|
|
191
|
+
# Get index from all cruise files. Note: should be based on which are included in cruise.
|
|
192
|
+
index = int(
|
|
193
|
+
cruise_df.index[cruise_df["FILE_NAME"] == f"{file_name_stem}.raw"][
|
|
194
|
+
0
|
|
195
|
+
]
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Get input store — this is unadjusted for water_level
|
|
199
|
+
input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
|
|
200
|
+
ship_name=ship_name,
|
|
201
|
+
cruise_name=cruise_name,
|
|
202
|
+
sensor_name=sensor_name,
|
|
203
|
+
file_name_stem=file_name_stem,
|
|
204
|
+
input_bucket_name=bucket_name,
|
|
205
|
+
endpoint_url=endpoint_url,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# This is the vertical offset of the sensor related to the ocean surface
|
|
209
|
+
# See https://echopype.readthedocs.io/en/stable/data-proc-additional.html
|
|
210
|
+
if "water_level" in input_xr_zarr_store.keys():
|
|
211
|
+
water_level = input_xr_zarr_store.water_level.values
|
|
212
|
+
else:
|
|
213
|
+
water_level = 0.0
|
|
214
|
+
#########################################################################
|
|
215
|
+
# [3] Get needed time indices — along the x-axis
|
|
216
|
+
# Offset from start index to insert new dataset. Note that missing values are excluded.
|
|
217
|
+
ping_time_cumsum = np.insert(
|
|
218
|
+
np.cumsum(
|
|
219
|
+
cruise_df["NUM_PING_TIME_DROPNA"].dropna().to_numpy(dtype=int)
|
|
220
|
+
),
|
|
221
|
+
obj=0,
|
|
222
|
+
values=0,
|
|
223
|
+
)
|
|
224
|
+
start_ping_time_index = ping_time_cumsum[index]
|
|
225
|
+
end_ping_time_index = ping_time_cumsum[index + 1]
|
|
226
|
+
|
|
227
|
+
max_echo_range = np.max(
|
|
228
|
+
(cruise_df["MAX_ECHO_RANGE"] + cruise_df["WATER_LEVEL"])
|
|
229
|
+
.dropna()
|
|
230
|
+
.astype(float)
|
|
231
|
+
)
|
|
232
|
+
cruise_min_epsilon = np.min(
|
|
233
|
+
cruise_df["MIN_ECHO_RANGE"].dropna().astype(float)
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
# Note: cruise dims (depth, time, frequency)
|
|
237
|
+
all_cruise_depth_values = zarr_manager.get_depth_values( # needs to integrate water_level
|
|
238
|
+
# min_echo_range=min_echo_range,
|
|
239
|
+
max_echo_range=max_echo_range, # does it here
|
|
240
|
+
cruise_min_epsilon=cruise_min_epsilon, # remove this & integrate into min_echo_range
|
|
241
|
+
) # with offset of 7.5 meters, 0 meter measurement should now start at 7.5 meters
|
|
242
|
+
|
|
243
|
+
print(" ".join(list(input_xr_zarr_store.Sv.dims)))
|
|
244
|
+
if set(input_xr_zarr_store.Sv.dims) != {
|
|
245
|
+
"channel",
|
|
246
|
+
"ping_time",
|
|
247
|
+
"range_sample",
|
|
248
|
+
}:
|
|
249
|
+
raise Exception("Xarray dimensions are not as expected.")
|
|
250
|
+
|
|
251
|
+
indices, geospatial = geo_manager.read_s3_geo_json(
|
|
252
|
+
ship_name=ship_name,
|
|
253
|
+
cruise_name=cruise_name,
|
|
254
|
+
sensor_name=sensor_name,
|
|
255
|
+
file_name_stem=file_name_stem,
|
|
256
|
+
input_xr_zarr_store=input_xr_zarr_store,
|
|
257
|
+
endpoint_url=endpoint_url,
|
|
258
|
+
output_bucket_name=bucket_name,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
input_xr = input_xr_zarr_store.isel(
|
|
262
|
+
ping_time=indices
|
|
263
|
+
) # Problem with HB200802-D20080310-T174959.zarr/
|
|
264
|
+
|
|
265
|
+
ping_times = input_xr.ping_time.values
|
|
266
|
+
# Date format: numpy.datetime64('2007-07-20T02:10:25.845073920') converts to "1184897425.845074"
|
|
267
|
+
epoch_seconds = [
|
|
268
|
+
(pd.Timestamp(i) - pd.Timestamp("1970-01-01")) / pd.Timedelta("1s")
|
|
269
|
+
for i in ping_times
|
|
270
|
+
]
|
|
271
|
+
output_zarr_store["time"][start_ping_time_index:end_ping_time_index] = (
|
|
272
|
+
epoch_seconds
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# --- UPDATING --- #
|
|
276
|
+
regrid_resample = self.interpolate_data(
|
|
277
|
+
input_xr=input_xr,
|
|
278
|
+
ping_times=ping_times,
|
|
279
|
+
all_cruise_depth_values=all_cruise_depth_values, # should accommodate the water_level already
|
|
280
|
+
water_level=water_level, # not applied to anything yet
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
print(
|
|
284
|
+
f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}"
|
|
285
|
+
)
|
|
286
|
+
#########################################################################
|
|
287
|
+
# write Sv values to cruise-level-model-store
|
|
288
|
+
|
|
289
|
+
for fff in range(regrid_resample.shape[-1]):
|
|
290
|
+
output_zarr_store["Sv"][
|
|
291
|
+
:, start_ping_time_index:end_ping_time_index, fff
|
|
292
|
+
] = regrid_resample[:, :, fff]
|
|
293
|
+
#########################################################################
|
|
294
|
+
# TODO: add the "detected_seafloor_depth/" to the
|
|
295
|
+
# L2 cruise dataarrays
|
|
296
|
+
# TODO: make bottom optional
|
|
297
|
+
# TODO: Only checking the first channel for now. Need to average across all channels
|
|
298
|
+
# in the future. See https://github.com/CI-CMG/water-column-sonar-processing/issues/11
|
|
299
|
+
if "detected_seafloor_depth" in input_xr.variables:
|
|
300
|
+
print(
|
|
301
|
+
"Found detected_seafloor_depth, adding dataset to output store."
|
|
302
|
+
)
|
|
303
|
+
detected_seafloor_depth = input_xr.detected_seafloor_depth.values
|
|
304
|
+
detected_seafloor_depth[detected_seafloor_depth == 0.0] = np.nan
|
|
305
|
+
# TODO: problem here: Processing file: D20070711-T210709.
|
|
306
|
+
|
|
307
|
+
# Use the lowest frequencies to determine bottom
|
|
308
|
+
detected_seafloor_depths = detected_seafloor_depth[0, :]
|
|
309
|
+
|
|
310
|
+
detected_seafloor_depths[detected_seafloor_depths == 0.0] = np.nan
|
|
311
|
+
print(f"min depth measured: {np.nanmin(detected_seafloor_depths)}")
|
|
312
|
+
print(f"max depth measured: {np.nanmax(detected_seafloor_depths)}")
|
|
313
|
+
# available_indices = np.argwhere(np.isnan(geospatial['latitude'].values))
|
|
314
|
+
output_zarr_store["bottom"][
|
|
315
|
+
start_ping_time_index:end_ping_time_index
|
|
316
|
+
] = detected_seafloor_depths
|
|
317
|
+
#
|
|
318
|
+
#########################################################################
|
|
319
|
+
# [5] write subset of latitude/longitude
|
|
320
|
+
output_zarr_store["latitude"][
|
|
321
|
+
start_ping_time_index:end_ping_time_index
|
|
322
|
+
] = geospatial.dropna()[
|
|
323
|
+
"latitude"
|
|
324
|
+
].values # TODO: get from ds_sv directly, dont need geojson anymore
|
|
325
|
+
output_zarr_store["longitude"][
|
|
326
|
+
start_ping_time_index:end_ping_time_index
|
|
327
|
+
] = geospatial.dropna()["longitude"].values
|
|
328
|
+
#########################################################################
|
|
329
|
+
#########################################################################
|
|
330
|
+
except Exception as err:
|
|
331
|
+
raise RuntimeError(f"Problem with resample_regrid, {err}")
|
|
332
|
+
finally:
|
|
333
|
+
print("Exiting resample_regrid.")
|
|
334
|
+
# TODO: read across times and verify dataset was written?
|
|
335
|
+
|
|
336
|
+
#######################################################
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
###########################################################
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .elevation_manager import ElevationManager
|
|
2
|
+
from .geometry_manager import GeometryManager
|
|
3
|
+
from .line_simplification import LineSimplification
|
|
4
|
+
from .pmtile_generation import PMTileGeneration
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"ElevationManager",
|
|
8
|
+
"GeometryManager",
|
|
9
|
+
"LineSimplification",
|
|
10
|
+
"PMTileGeneration",
|
|
11
|
+
]
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""
|
|
2
|
+
https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry=-31.70235%2C13.03332&geometryType=esriGeometryPoint&returnGeometry=false&returnCatalogItems=false&f=json
|
|
3
|
+
|
|
4
|
+
https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/
|
|
5
|
+
identify?
|
|
6
|
+
geometry=-31.70235%2C13.03332
|
|
7
|
+
&geometryType=esriGeometryPoint
|
|
8
|
+
&returnGeometry=false
|
|
9
|
+
&returnCatalogItems=false
|
|
10
|
+
&f=json
|
|
11
|
+
{"objectId":0,"name":"Pixel","value":"-5733","location":{"x":-31.702349999999999,"y":13.03332,"spatialReference":{"wkid":4326,"latestWkid":4326}},"properties":null,"catalogItems":null,"catalogItemVisibilities":[]}
|
|
12
|
+
-5733
|
|
13
|
+
|
|
14
|
+
(base) rudy:deleteME rudy$ curl https://api.opentopodata.org/v1/gebco2020?locations=13.03332,-31.70235
|
|
15
|
+
{
|
|
16
|
+
"results": [
|
|
17
|
+
{
|
|
18
|
+
"dataset": "gebco2020",
|
|
19
|
+
"elevation": -5729.0,
|
|
20
|
+
"location": {
|
|
21
|
+
"lat": 13.03332,
|
|
22
|
+
"lng": -31.70235
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
],
|
|
26
|
+
"status": "OK"
|
|
27
|
+
}
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
import json
|
|
31
|
+
import time
|
|
32
|
+
from collections.abc import Generator
|
|
33
|
+
|
|
34
|
+
import requests
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def chunked(ll: list, n: int) -> Generator:
|
|
38
|
+
# Yields successively n-sized chunks from ll.
|
|
39
|
+
for i in range(0, len(ll), n):
|
|
40
|
+
yield ll[i : i + n]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ElevationManager:
|
|
44
|
+
#######################################################
|
|
45
|
+
def __init__(
|
|
46
|
+
self,
|
|
47
|
+
):
|
|
48
|
+
self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
|
|
49
|
+
self.TIMOUT_SECONDS = 10
|
|
50
|
+
|
|
51
|
+
#######################################################
|
|
52
|
+
def get_arcgis_elevation(
|
|
53
|
+
self,
|
|
54
|
+
lngs: list,
|
|
55
|
+
lats: list,
|
|
56
|
+
chunk_size: int = 500, # I think this is the api limit
|
|
57
|
+
) -> int:
|
|
58
|
+
# Reference: https://developers.arcgis.com/rest/services-reference/enterprise/map-to-image/
|
|
59
|
+
# Info: https://www.arcgis.com/home/item.html?id=c876e3c96a8642ab8557646a3b4fa0ff
|
|
60
|
+
### 'https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={"points":[[-31.70235,13.03332],[-32.70235,14.03332]]}&geometryType=esriGeometryMultipoint&returnGeometry=false&returnCatalogItems=false&f=json'
|
|
61
|
+
if len(lngs) != len(lats):
|
|
62
|
+
raise ValueError("lngs and lats must have same length")
|
|
63
|
+
|
|
64
|
+
geometryType = "esriGeometryMultipoint" # TODO: allow single point?
|
|
65
|
+
|
|
66
|
+
depths = []
|
|
67
|
+
|
|
68
|
+
list_of_points = [list(elem) for elem in list(zip(lngs, lats))]
|
|
69
|
+
for chunk in chunked(list_of_points, chunk_size):
|
|
70
|
+
time.sleep(0.1)
|
|
71
|
+
# order: (lng, lat)
|
|
72
|
+
geometry = f'{{"points":{str(chunk)}}}'
|
|
73
|
+
url = f"https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={geometry}&geometryType={geometryType}&returnGeometry=false&returnCatalogItems=false&f=json"
|
|
74
|
+
result = requests.get(url, timeout=self.TIMOUT_SECONDS)
|
|
75
|
+
res = json.loads(result.content.decode("utf8"))
|
|
76
|
+
if "results" in res:
|
|
77
|
+
for element in res["results"]:
|
|
78
|
+
depths.append(float(element["value"]))
|
|
79
|
+
elif "value" in res:
|
|
80
|
+
depths.append(float(res["value"]))
|
|
81
|
+
|
|
82
|
+
return depths
|
|
83
|
+
|
|
84
|
+
# def get_gebco_bathymetry_elevation(self) -> int:
|
|
85
|
+
# # Documentation: https://www.opentopodata.org/datasets/gebco2020/
|
|
86
|
+
# latitude = 13.03332
|
|
87
|
+
# longitude = -31.70235
|
|
88
|
+
# dataset = "gebco2020"
|
|
89
|
+
# url = f"https://api.opentopodata.org/v1/{dataset}?locations={latitude},{longitude}"
|
|
90
|
+
# pass
|
|
91
|
+
|
|
92
|
+
# def get_elevation(
|
|
93
|
+
# self,
|
|
94
|
+
# df,
|
|
95
|
+
# lat_column,
|
|
96
|
+
# lon_column,
|
|
97
|
+
# ) -> int:
|
|
98
|
+
# """Query service using lat, lon. add the elevation values as a new column."""
|
|
99
|
+
# url = r'https://epqs.nationalmap.gov/v1/json?'
|
|
100
|
+
# elevations = []
|
|
101
|
+
# for lat, lon in zip(df[lat_column], df[lon_column]):
|
|
102
|
+
# # define rest query params
|
|
103
|
+
# params = {
|
|
104
|
+
# 'output': 'json',
|
|
105
|
+
# 'x': lon,
|
|
106
|
+
# 'y': lat,
|
|
107
|
+
# 'units': 'Meters'
|
|
108
|
+
# }
|
|
109
|
+
# result = requests.get((url + urllib.parse.urlencode(params)))
|
|
110
|
+
# elevations.append(result.json()['value'])
|
|
111
|
+
# return elevations
|