water-column-sonar-processing 25.11.1__py3-none-any.whl → 26.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/aws/s3_manager.py +2 -4
- water_column_sonar_processing/aws/s3fs_manager.py +1 -9
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +19 -81
- water_column_sonar_processing/cruise/resample_regrid.py +88 -104
- water_column_sonar_processing/geometry/__init__.py +2 -0
- water_column_sonar_processing/geometry/elevation_manager.py +2 -2
- water_column_sonar_processing/geometry/geometry_manager.py +11 -13
- water_column_sonar_processing/geometry/line_simplification.py +10 -10
- water_column_sonar_processing/geometry/pmtile_generation.py +8 -3
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/index_manager.py +43 -46
- water_column_sonar_processing/model/zarr_manager.py +533 -514
- water_column_sonar_processing/processing/raw_to_zarr.py +45 -139
- water_column_sonar_processing/utility/cleaner.py +2 -1
- water_column_sonar_processing/utility/constants.py +29 -29
- water_column_sonar_processing-26.1.14.dist-info/METADATA +240 -0
- {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/RECORD +20 -20
- water_column_sonar_processing/process.py +0 -149
- water_column_sonar_processing-25.11.1.dist-info/METADATA +0 -182
- {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/WHEEL +0 -0
- {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/licenses/LICENSE +0 -0
- {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/top_level.txt +0 -0
|
@@ -31,8 +31,6 @@ class S3Manager:
|
|
|
31
31
|
endpoint_url: Optional[str] = None,
|
|
32
32
|
):
|
|
33
33
|
self.endpoint_url = endpoint_url
|
|
34
|
-
# self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
35
|
-
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
36
34
|
self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
|
|
37
35
|
self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
|
|
38
36
|
self.s3_transfer_config = TransferConfig(
|
|
@@ -56,6 +54,7 @@ class S3Manager:
|
|
|
56
54
|
service_name="s3",
|
|
57
55
|
config=self.s3_client_config,
|
|
58
56
|
region_name=self.s3_region,
|
|
57
|
+
endpoint_url=self.endpoint_url,
|
|
59
58
|
)
|
|
60
59
|
self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
|
|
61
60
|
aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
@@ -76,6 +75,7 @@ class S3Manager:
|
|
|
76
75
|
endpoint_url=self.endpoint_url,
|
|
77
76
|
)
|
|
78
77
|
)
|
|
78
|
+
#
|
|
79
79
|
self.paginator = self.s3_client.get_paginator("list_objects_v2")
|
|
80
80
|
self.paginator_noaa_wcsd_zarr_pds = (
|
|
81
81
|
self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
|
|
@@ -117,7 +117,6 @@ class S3Manager:
|
|
|
117
117
|
return client.list_buckets()
|
|
118
118
|
|
|
119
119
|
#####################################################################
|
|
120
|
-
# tested
|
|
121
120
|
def upload_nodd_file(
|
|
122
121
|
self,
|
|
123
122
|
file_name: str,
|
|
@@ -133,7 +132,6 @@ class S3Manager:
|
|
|
133
132
|
return key
|
|
134
133
|
|
|
135
134
|
#####################################################################
|
|
136
|
-
# tested
|
|
137
135
|
def upload_files_with_thread_pool_executor(
|
|
138
136
|
self,
|
|
139
137
|
output_bucket_name: str,
|
|
@@ -3,6 +3,7 @@ from typing import Optional
|
|
|
3
3
|
|
|
4
4
|
import s3fs
|
|
5
5
|
|
|
6
|
+
|
|
6
7
|
# TODO: S3FS_LOGGING_LEVEL=DEBUG
|
|
7
8
|
# S3FS_LOGGING_LEVEL=DEBUG
|
|
8
9
|
|
|
@@ -21,17 +22,8 @@ class S3FSManager:
|
|
|
21
22
|
endpoint_url=endpoint_url,
|
|
22
23
|
key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
|
|
23
24
|
secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
|
|
24
|
-
# asynchronous=True,
|
|
25
25
|
)
|
|
26
|
-
# self.s3fs.ls("")
|
|
27
26
|
|
|
28
|
-
# s3_fs = s3fs.S3FileSystem( # TODO: use s3fs_manager?
|
|
29
|
-
# anon=True,
|
|
30
|
-
# client_kwargs={
|
|
31
|
-
# "endpoint_url": moto_server,
|
|
32
|
-
# "region_name": "us-east-1",
|
|
33
|
-
# },
|
|
34
|
-
# )
|
|
35
27
|
#####################################################################
|
|
36
28
|
def s3_map(
|
|
37
29
|
self,
|
|
@@ -6,6 +6,7 @@ import numpy as np
|
|
|
6
6
|
from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
|
|
7
7
|
from water_column_sonar_processing.model import ZarrManager
|
|
8
8
|
from water_column_sonar_processing.utility import Cleaner
|
|
9
|
+
from water_column_sonar_processing.utility import Constants
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
# TODO: change name to "CreateLocalEmptyZarrStore"
|
|
@@ -19,52 +20,21 @@ class CreateEmptyZarrStore:
|
|
|
19
20
|
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
20
21
|
|
|
21
22
|
#######################################################
|
|
22
|
-
|
|
23
|
-
# def upload_zarr_store_to_s3(
|
|
24
|
-
# self,
|
|
25
|
-
# output_bucket_name: str,
|
|
26
|
-
# local_directory: str,
|
|
27
|
-
# object_prefix: str,
|
|
28
|
-
# cruise_name: str,
|
|
29
|
-
# ) -> None:
|
|
30
|
-
# print("uploading model store to s3")
|
|
31
|
-
# s3_manager = S3Manager()
|
|
32
|
-
# #
|
|
33
|
-
# print("Starting upload with thread pool executor.")
|
|
34
|
-
# # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
35
|
-
# all_files = []
|
|
36
|
-
# for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
|
|
37
|
-
# for file in files:
|
|
38
|
-
# local_path = os.path.join(subdir, file)
|
|
39
|
-
# # TODO: find a better method for splitting strings here:
|
|
40
|
-
# # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
|
|
41
|
-
# s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
|
|
42
|
-
# all_files.append([local_path, s3_key])
|
|
43
|
-
# #
|
|
44
|
-
# # print(all_files)
|
|
45
|
-
# s3_manager.upload_files_with_thread_pool_executor(
|
|
46
|
-
# output_bucket_name=output_bucket_name,
|
|
47
|
-
# all_files=all_files,
|
|
48
|
-
# )
|
|
49
|
-
# print("Done uploading with thread pool executor.")
|
|
50
|
-
# # TODO: move to common place
|
|
51
|
-
|
|
52
|
-
#######################################################
|
|
23
|
+
@staticmethod
|
|
53
24
|
def create_cruise_level_zarr_store(
|
|
54
|
-
self,
|
|
55
25
|
output_bucket_name: str,
|
|
56
26
|
ship_name: str,
|
|
57
27
|
cruise_name: str,
|
|
58
28
|
sensor_name: str,
|
|
59
29
|
table_name: str,
|
|
60
|
-
# override_cruise_min_epsilon=None,
|
|
61
30
|
) -> None:
|
|
62
31
|
"""
|
|
63
|
-
Initialize zarr store
|
|
32
|
+
Initialize zarr store for the entire cruise which aggregates all the raw data.
|
|
33
|
+
All cruises will be resampled at 20 cm depth.
|
|
34
|
+
# tempdir="/tmp", # TODO: create better tmp directory for testing
|
|
64
35
|
"""
|
|
65
36
|
tempdir = tempfile.TemporaryDirectory()
|
|
66
37
|
try:
|
|
67
|
-
# HB0806 - 123, HB0903 - 220
|
|
68
38
|
dynamo_db_manager = DynamoDBManager()
|
|
69
39
|
s3_manager = S3Manager()
|
|
70
40
|
|
|
@@ -76,7 +46,7 @@ class CreateEmptyZarrStore:
|
|
|
76
46
|
# TODO: filter the dataframe just for enums >= LEVEL_1_PROCESSING
|
|
77
47
|
# df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
|
|
78
48
|
|
|
79
|
-
# TODO: VERIFY GEOJSON EXISTS as prerequisite!!!
|
|
49
|
+
# TODO: VERIFY GEOJSON EXISTS as prerequisite!!! ...no more geojson needed
|
|
80
50
|
|
|
81
51
|
print(f"DataFrame shape: {df.shape}")
|
|
82
52
|
cruise_channels = list(
|
|
@@ -88,18 +58,11 @@ class CreateEmptyZarrStore:
|
|
|
88
58
|
df["NUM_PING_TIME_DROPNA"].dropna().astype(int)
|
|
89
59
|
)
|
|
90
60
|
|
|
91
|
-
# [
|
|
92
|
-
#
|
|
93
|
-
# (df["MIN_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
|
|
94
|
-
# )
|
|
95
|
-
|
|
96
|
-
# [4] calculate the np.max(max_echo_range + water_level)
|
|
61
|
+
# [4] max measurement resolution for the whole cruise
|
|
62
|
+
# Each max-echo-range is paired with water-level and then find the max of that
|
|
97
63
|
cruise_max_echo_range = np.max(
|
|
98
64
|
(df["MAX_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
# TODO: set this to either 1 or 0.5 meters
|
|
102
|
-
cruise_min_epsilon = np.min(df["MIN_ECHO_RANGE"].dropna().astype(float))
|
|
65
|
+
) # max_echo_range now includes water_level
|
|
103
66
|
|
|
104
67
|
print(f"cruise_max_echo_range: {cruise_max_echo_range}")
|
|
105
68
|
|
|
@@ -107,21 +70,18 @@ class CreateEmptyZarrStore:
|
|
|
107
70
|
cruise_frequencies = [
|
|
108
71
|
float(i) for i in df["FREQUENCIES"].dropna().values.flatten()[0]
|
|
109
72
|
]
|
|
110
|
-
print(cruise_frequencies)
|
|
111
73
|
|
|
112
74
|
new_width = int(consolidated_zarr_width)
|
|
113
|
-
print(f"new_width: {new_width}")
|
|
114
|
-
#################################################################
|
|
115
|
-
store_name = f"{cruise_name}.zarr"
|
|
116
|
-
print(store_name)
|
|
117
75
|
################################################################
|
|
118
|
-
# Delete existing
|
|
119
|
-
zarr_prefix = os.path.join(
|
|
76
|
+
# Delete any existing stores
|
|
77
|
+
zarr_prefix = os.path.join(
|
|
78
|
+
str(Constants.LEVEL_2.value), ship_name, cruise_name, sensor_name
|
|
79
|
+
)
|
|
120
80
|
child_objects = s3_manager.get_child_objects(
|
|
121
81
|
bucket_name=output_bucket_name,
|
|
122
82
|
sub_prefix=zarr_prefix,
|
|
123
83
|
)
|
|
124
|
-
|
|
84
|
+
|
|
125
85
|
if len(child_objects) > 0:
|
|
126
86
|
s3_manager.delete_nodd_objects(
|
|
127
87
|
bucket_name=output_bucket_name,
|
|
@@ -130,50 +90,28 @@ class CreateEmptyZarrStore:
|
|
|
130
90
|
################################################################
|
|
131
91
|
# Create new model store
|
|
132
92
|
zarr_manager = ZarrManager()
|
|
133
|
-
new_height = len( # [0.19m down to 1001.744m] = 5272 samples, 10.3 tiles @ 512
|
|
134
|
-
zarr_manager.get_depth_values( # these depths should be from min_epsilon to max_range+water_level
|
|
135
|
-
# min_echo_range=cruise_min_echo_range,
|
|
136
|
-
max_echo_range=cruise_max_echo_range,
|
|
137
|
-
cruise_min_epsilon=cruise_min_epsilon,
|
|
138
|
-
)
|
|
139
|
-
)
|
|
140
|
-
print(f"new_height: {new_height}")
|
|
141
|
-
|
|
142
93
|
zarr_manager.create_zarr_store(
|
|
143
|
-
path=tempdir.name,
|
|
94
|
+
path=tempdir.name,
|
|
144
95
|
ship_name=ship_name,
|
|
145
96
|
cruise_name=cruise_name,
|
|
146
97
|
sensor_name=sensor_name,
|
|
147
98
|
frequencies=cruise_frequencies,
|
|
148
99
|
width=new_width,
|
|
149
|
-
# min_echo_range=cruise_min_echo_range,
|
|
150
100
|
max_echo_range=cruise_max_echo_range,
|
|
151
|
-
cruise_min_epsilon=cruise_min_epsilon,
|
|
101
|
+
# cruise_min_epsilon=cruise_min_epsilon,
|
|
152
102
|
calibration_status=True,
|
|
153
103
|
)
|
|
154
104
|
#################################################################
|
|
105
|
+
# TODO: would be more elegant to create directly into s3 bucket
|
|
155
106
|
s3_manager.upload_zarr_store_to_s3(
|
|
156
107
|
output_bucket_name=output_bucket_name,
|
|
157
|
-
local_directory=tempdir.name,
|
|
108
|
+
local_directory=tempdir.name,
|
|
158
109
|
object_prefix=zarr_prefix,
|
|
159
110
|
cruise_name=cruise_name,
|
|
160
111
|
)
|
|
161
|
-
# https://noaa-wcsd-zarr-pds.s3.amazonaws.com/index.html
|
|
162
112
|
#################################################################
|
|
163
|
-
#
|
|
164
|
-
# count = self.__get_file_count(store_name=store_name)
|
|
165
|
-
# #
|
|
166
|
-
# raw_zarr_files = self.__get_s3_files( # TODO: just need count
|
|
167
|
-
# bucket_name=self.__output_bucket,
|
|
168
|
-
# sub_prefix=os.path.join(zarr_prefix, store_name),
|
|
169
|
-
# )
|
|
170
|
-
# if len(raw_zarr_files) != count:
|
|
171
|
-
# print(f'Problem writing {store_name} with proper count {count}.')
|
|
172
|
-
# raise Exception("File count doesnt equal number of s3 Zarr store files.")
|
|
173
|
-
# else:
|
|
174
|
-
# print("File counts match.")
|
|
113
|
+
# TODO: verify count of the files uploaded
|
|
175
114
|
#################################################################
|
|
176
|
-
# Success
|
|
177
115
|
# TODO: update enum in dynamodb
|
|
178
116
|
print("Done creating cruise level zarr store.")
|
|
179
117
|
#################################################################
|
|
@@ -3,11 +3,9 @@ import warnings
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
|
-
import pandas as pd
|
|
7
6
|
import xarray as xr
|
|
8
7
|
|
|
9
8
|
from water_column_sonar_processing.aws import DynamoDBManager
|
|
10
|
-
from water_column_sonar_processing.geometry import GeometryManager
|
|
11
9
|
from water_column_sonar_processing.model import ZarrManager
|
|
12
10
|
|
|
13
11
|
warnings.simplefilter("ignore", category=RuntimeWarning)
|
|
@@ -19,28 +17,29 @@ class ResampleRegrid:
|
|
|
19
17
|
self,
|
|
20
18
|
):
|
|
21
19
|
self.__overwrite = True
|
|
22
|
-
# self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
23
|
-
# self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
24
20
|
self.dtype = "float32"
|
|
25
21
|
|
|
26
22
|
#################################################################
|
|
27
23
|
def interpolate_data(
|
|
28
24
|
self,
|
|
29
|
-
input_xr,
|
|
30
|
-
ping_times,
|
|
31
|
-
all_cruise_depth_values, # includes water_level offset
|
|
32
|
-
water_level
|
|
25
|
+
input_xr: xr.Dataset,
|
|
26
|
+
ping_times: np.ndarray,
|
|
27
|
+
all_cruise_depth_values: np.ndarray, # includes water_level offset
|
|
28
|
+
water_level: float = 0.0,
|
|
33
29
|
) -> np.ndarray:
|
|
34
30
|
"""
|
|
35
|
-
|
|
31
|
+
Input dataset is passed in along with times and depth values to regrid to.
|
|
36
32
|
"""
|
|
37
33
|
print("Interpolating dataset.")
|
|
38
34
|
try:
|
|
35
|
+
# add offset for the water level to the whole input xarray
|
|
36
|
+
input_xr.depth.values = input_xr.depth.values + water_level
|
|
37
|
+
|
|
39
38
|
data = np.empty(
|
|
40
|
-
(
|
|
39
|
+
( # Depth / Time / Frequency
|
|
41
40
|
len(all_cruise_depth_values),
|
|
42
41
|
len(ping_times),
|
|
43
|
-
len(input_xr.frequency_nominal),
|
|
42
|
+
len(input_xr.frequency_nominal.values),
|
|
44
43
|
),
|
|
45
44
|
dtype=self.dtype,
|
|
46
45
|
)
|
|
@@ -49,36 +48,27 @@ class ResampleRegrid:
|
|
|
49
48
|
|
|
50
49
|
regrid_resample = xr.DataArray( # where data will be written to
|
|
51
50
|
data=data,
|
|
52
|
-
dims=("depth", "time", "frequency"),
|
|
53
51
|
coords={
|
|
54
52
|
"depth": all_cruise_depth_values,
|
|
55
53
|
"time": ping_times,
|
|
56
54
|
"frequency": input_xr.frequency_nominal.values,
|
|
57
55
|
},
|
|
56
|
+
dims=("depth", "time", "frequency"),
|
|
57
|
+
name="Sv",
|
|
58
58
|
)
|
|
59
59
|
|
|
60
|
-
# shift the input data by water_level
|
|
61
|
-
input_xr.echo_range.values = (
|
|
62
|
-
input_xr.echo_range.values + water_level
|
|
63
|
-
) # water_level # TODO: change
|
|
64
|
-
|
|
65
60
|
channels = input_xr.channel.values
|
|
66
|
-
for channel in range(
|
|
67
|
-
len(channels)
|
|
68
|
-
): # ?TODO: leaving off here, need to subset for just indices in time axis
|
|
61
|
+
for channel in range(len(channels)):
|
|
69
62
|
gc.collect()
|
|
70
63
|
max_depths = np.nanmax(
|
|
71
|
-
a=input_xr.
|
|
64
|
+
a=input_xr.depth.sel(channel=input_xr.channel[channel]).values,
|
|
72
65
|
# + water_level,
|
|
73
66
|
axis=1,
|
|
74
67
|
)
|
|
75
|
-
superset_of_max_depths = set(
|
|
76
|
-
max_depths
|
|
77
|
-
) # HB1501, D20150503-T102035.raw, TypeError: unhashable type: 'numpy.ndarray'
|
|
68
|
+
superset_of_max_depths = set(max_depths)
|
|
78
69
|
set_of_max_depths = list(
|
|
79
70
|
{x for x in superset_of_max_depths if x == x}
|
|
80
|
-
) #
|
|
81
|
-
# iterate through partitions of dataset with similar depths and resample
|
|
71
|
+
) # To speed things up resample in groups denoted by max_depth -- so samples might no longer be adjacent
|
|
82
72
|
for select_max_depth in set_of_max_depths:
|
|
83
73
|
# TODO: for nan just skip and leave all nan's
|
|
84
74
|
select_indices = [
|
|
@@ -87,46 +77,50 @@ class ResampleRegrid:
|
|
|
87
77
|
if max_depths[i] == select_max_depth
|
|
88
78
|
]
|
|
89
79
|
|
|
90
|
-
# now create new DataArray with proper dimension and indices
|
|
91
|
-
# data_select = input_xr.Sv.sel(
|
|
92
|
-
# channel=input_xr.channel[channel]
|
|
93
|
-
# ).values[select_indices, :].T # TODO: dont like this transpose
|
|
94
80
|
data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[
|
|
95
81
|
select_indices, :
|
|
96
82
|
].T.values
|
|
97
|
-
# change from ".values[select_indices, :].T" to "[select_indices, :].values.T"
|
|
98
83
|
|
|
99
84
|
times_select = input_xr.ping_time.values[select_indices]
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
85
|
+
# input_xr.depth[0][0] -> [0., 499.9] before
|
|
86
|
+
# input_xr.depth.values = input_xr.depth.values + water_level # issue here!! overwritting all the data
|
|
87
|
+
# input_xr.depth[0][0] -> [7.5, 507.40] after
|
|
88
|
+
depths_all = input_xr.depth.sel(
|
|
89
|
+
channel=input_xr.channel[channel],
|
|
90
|
+
ping_time=input_xr.ping_time[select_indices[0]],
|
|
91
|
+
).values
|
|
92
|
+
depths_select = depths_all[~np.isnan(depths_all)]
|
|
93
|
+
#
|
|
106
94
|
da_select = xr.DataArray(
|
|
107
|
-
data=data_select,
|
|
95
|
+
data=data_select[: len(depths_select), :],
|
|
108
96
|
dims=("depth", "time"),
|
|
109
97
|
coords={
|
|
110
98
|
"depth": depths_select,
|
|
111
99
|
"time": times_select,
|
|
112
100
|
},
|
|
113
|
-
).dropna(dim="depth")
|
|
114
|
-
resampled = da_select.interp(
|
|
115
|
-
depth=all_cruise_depth_values, method="nearest"
|
|
116
101
|
)
|
|
117
|
-
#
|
|
118
|
-
|
|
102
|
+
# 'resampled' is now the interpolated superset of new dimensions
|
|
103
|
+
resampled = da_select.interp( # need to define the data with water level (domain)
|
|
104
|
+
depth=all_cruise_depth_values, # and need to interpolate over the (range)
|
|
105
|
+
method="nearest",
|
|
106
|
+
assume_sorted=True,
|
|
107
|
+
) # good through here, @27 is -3.11 which is 5.4 m depth
|
|
108
|
+
|
|
109
|
+
### write to outptut ###
|
|
110
|
+
regrid_resample.loc[ # ~150 MB for 5001x7706x4
|
|
119
111
|
dict(
|
|
120
112
|
time=times_select,
|
|
121
113
|
frequency=input_xr.frequency_nominal.values[channel],
|
|
122
114
|
)
|
|
123
115
|
] = resampled
|
|
124
|
-
print(f"updated {len(times_select)} ping times")
|
|
116
|
+
# print(f"updated {len(times_select)} ping times")
|
|
125
117
|
gc.collect()
|
|
118
|
+
return regrid_resample.values.copy()
|
|
126
119
|
except Exception as err:
|
|
127
120
|
raise RuntimeError(f"Problem finding the dynamodb table, {err}")
|
|
128
|
-
|
|
129
|
-
|
|
121
|
+
finally:
|
|
122
|
+
gc.collect()
|
|
123
|
+
print("Done interpolating dataset.")
|
|
130
124
|
|
|
131
125
|
#################################################################
|
|
132
126
|
def resample_regrid(
|
|
@@ -137,7 +131,6 @@ class ResampleRegrid:
|
|
|
137
131
|
table_name,
|
|
138
132
|
bucket_name,
|
|
139
133
|
override_select_files=None,
|
|
140
|
-
# override_cruise_min_epsilon=None,
|
|
141
134
|
endpoint_url=None,
|
|
142
135
|
) -> None:
|
|
143
136
|
"""
|
|
@@ -149,7 +142,6 @@ class ResampleRegrid:
|
|
|
149
142
|
print("Resample Regrid, Interpolating dataset.")
|
|
150
143
|
try:
|
|
151
144
|
zarr_manager = ZarrManager()
|
|
152
|
-
geo_manager = GeometryManager()
|
|
153
145
|
|
|
154
146
|
output_zarr_store = zarr_manager.open_s3_zarr_store_with_zarr(
|
|
155
147
|
ship_name=ship_name,
|
|
@@ -159,12 +151,9 @@ class ResampleRegrid:
|
|
|
159
151
|
endpoint_url=endpoint_url,
|
|
160
152
|
)
|
|
161
153
|
|
|
162
|
-
# get dynamo stuff
|
|
163
154
|
dynamo_db_manager = DynamoDBManager()
|
|
164
155
|
cruise_df = dynamo_db_manager.get_table_as_df(
|
|
165
|
-
# ship_name=ship_name,
|
|
166
156
|
cruise_name=cruise_name,
|
|
167
|
-
# sensor_name=sensor_name,
|
|
168
157
|
table_name=table_name,
|
|
169
158
|
)
|
|
170
159
|
|
|
@@ -182,6 +171,7 @@ class ResampleRegrid:
|
|
|
182
171
|
print(f"Processing file: {file_name_stem}.")
|
|
183
172
|
|
|
184
173
|
if f"{file_name_stem}.raw" not in list(cruise_df["FILE_NAME"]):
|
|
174
|
+
print("Raw file file_stem not found in dynamodb.")
|
|
185
175
|
raise Exception("Raw file file_stem not found in dynamodb.")
|
|
186
176
|
|
|
187
177
|
# status = PipelineStatus['LEVEL_1_PROCESSING']
|
|
@@ -195,20 +185,21 @@ class ResampleRegrid:
|
|
|
195
185
|
]
|
|
196
186
|
)
|
|
197
187
|
|
|
198
|
-
# Get input store
|
|
188
|
+
# Get input store
|
|
199
189
|
input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
|
|
200
190
|
ship_name=ship_name,
|
|
201
191
|
cruise_name=cruise_name,
|
|
202
192
|
sensor_name=sensor_name,
|
|
203
193
|
file_name_stem=file_name_stem,
|
|
204
|
-
|
|
194
|
+
bucket_name=bucket_name,
|
|
205
195
|
endpoint_url=endpoint_url,
|
|
206
196
|
)
|
|
207
197
|
|
|
198
|
+
#########################################################################
|
|
208
199
|
# This is the vertical offset of the sensor related to the ocean surface
|
|
209
200
|
# See https://echopype.readthedocs.io/en/stable/data-proc-additional.html
|
|
210
201
|
if "water_level" in input_xr_zarr_store.keys():
|
|
211
|
-
water_level = input_xr_zarr_store.water_level.values
|
|
202
|
+
water_level = float(input_xr_zarr_store.water_level.values)
|
|
212
203
|
else:
|
|
213
204
|
water_level = 0.0
|
|
214
205
|
#########################################################################
|
|
@@ -224,60 +215,52 @@ class ResampleRegrid:
|
|
|
224
215
|
start_ping_time_index = ping_time_cumsum[index]
|
|
225
216
|
end_ping_time_index = ping_time_cumsum[index + 1]
|
|
226
217
|
|
|
227
|
-
max_echo_range = np.max(
|
|
218
|
+
max_echo_range = np.max( # Should water level go in here?
|
|
228
219
|
(cruise_df["MAX_ECHO_RANGE"] + cruise_df["WATER_LEVEL"])
|
|
229
220
|
.dropna()
|
|
230
|
-
.astype(
|
|
231
|
-
)
|
|
232
|
-
cruise_min_epsilon = np.min(
|
|
233
|
-
cruise_df["MIN_ECHO_RANGE"].dropna().astype(float)
|
|
221
|
+
.astype(np.float32)
|
|
234
222
|
)
|
|
223
|
+
# cruise_min_epsilon = np.min(
|
|
224
|
+
# cruise_df["MIN_ECHO_RANGE"].dropna().astype(float)
|
|
225
|
+
# ) # TODO: currently overwriting to 0.25 m
|
|
235
226
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
#
|
|
239
|
-
|
|
240
|
-
cruise_min_epsilon=cruise_min_epsilon, # remove this & integrate into min_echo_range
|
|
241
|
-
) # with offset of 7.5 meters, 0 meter measurement should now start at 7.5 meters
|
|
227
|
+
all_cruise_depth_values = zarr_manager.get_depth_values(
|
|
228
|
+
max_echo_range=max_echo_range,
|
|
229
|
+
# cruise_min_epsilon=cruise_min_epsilon,
|
|
230
|
+
)
|
|
242
231
|
|
|
243
|
-
|
|
244
|
-
|
|
232
|
+
if set(
|
|
233
|
+
input_xr_zarr_store.Sv.dims
|
|
234
|
+
) != { # Cruise dimensions are: (depth, time, frequency)
|
|
245
235
|
"channel",
|
|
246
236
|
"ping_time",
|
|
247
237
|
"range_sample",
|
|
248
238
|
}:
|
|
249
239
|
raise Exception("Xarray dimensions are not as expected.")
|
|
250
240
|
|
|
251
|
-
indices, geospatial = geo_manager.read_s3_geo_json(
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
)
|
|
241
|
+
# indices, geospatial = geo_manager.read_s3_geo_json( # TODO: remove this!!!!
|
|
242
|
+
# ship_name=ship_name,
|
|
243
|
+
# cruise_name=cruise_name,
|
|
244
|
+
# sensor_name=sensor_name,
|
|
245
|
+
# file_name_stem=file_name_stem,
|
|
246
|
+
# input_xr_zarr_store=input_xr_zarr_store,
|
|
247
|
+
# endpoint_url=endpoint_url,
|
|
248
|
+
# output_bucket_name=bucket_name,
|
|
249
|
+
# )
|
|
260
250
|
|
|
261
|
-
input_xr = input_xr_zarr_store.isel(
|
|
262
|
-
ping_time=indices
|
|
263
|
-
) # Problem with HB200802-D20080310-T174959.zarr/
|
|
251
|
+
input_xr = input_xr_zarr_store # .isel(ping_time=indices)
|
|
264
252
|
|
|
265
253
|
ping_times = input_xr.ping_time.values
|
|
266
|
-
# Date format: numpy.datetime64('2007-07-20T02:10:25.845073920') converts to "1184897425.845074"
|
|
267
|
-
epoch_seconds = [
|
|
268
|
-
(pd.Timestamp(i) - pd.Timestamp("1970-01-01")) / pd.Timedelta("1s")
|
|
269
|
-
for i in ping_times
|
|
270
|
-
]
|
|
271
254
|
output_zarr_store["time"][start_ping_time_index:end_ping_time_index] = (
|
|
272
|
-
|
|
255
|
+
input_xr.ping_time.data
|
|
273
256
|
)
|
|
274
257
|
|
|
275
|
-
# --- UPDATING --- #
|
|
258
|
+
# --- UPDATING --- # # TODO: problem, this returns dimensionless array
|
|
276
259
|
regrid_resample = self.interpolate_data(
|
|
277
260
|
input_xr=input_xr,
|
|
278
261
|
ping_times=ping_times,
|
|
279
262
|
all_cruise_depth_values=all_cruise_depth_values, # should accommodate the water_level already
|
|
280
|
-
water_level=water_level,
|
|
263
|
+
water_level=water_level,
|
|
281
264
|
)
|
|
282
265
|
|
|
283
266
|
print(
|
|
@@ -288,50 +271,51 @@ class ResampleRegrid:
|
|
|
288
271
|
|
|
289
272
|
for fff in range(regrid_resample.shape[-1]):
|
|
290
273
|
output_zarr_store["Sv"][
|
|
291
|
-
:,
|
|
274
|
+
: regrid_resample[:, :, fff].shape[0],
|
|
275
|
+
start_ping_time_index:end_ping_time_index,
|
|
276
|
+
fff,
|
|
292
277
|
] = regrid_resample[:, :, fff]
|
|
293
278
|
#########################################################################
|
|
294
|
-
# TODO: add the "detected_seafloor_depth/" to the
|
|
295
|
-
# L2 cruise dataarrays
|
|
296
|
-
# TODO: make bottom optional
|
|
297
|
-
# TODO: Only checking the first channel for now. Need to average across all channels
|
|
298
279
|
# in the future. See https://github.com/CI-CMG/water-column-sonar-processing/issues/11
|
|
299
|
-
if "detected_seafloor_depth" in input_xr.variables:
|
|
300
|
-
print(
|
|
301
|
-
"Found detected_seafloor_depth, adding dataset to output store."
|
|
302
|
-
)
|
|
280
|
+
if "detected_seafloor_depth" in list(input_xr.variables):
|
|
281
|
+
print("Adding detected_seafloor_depth to output")
|
|
303
282
|
detected_seafloor_depth = input_xr.detected_seafloor_depth.values
|
|
304
283
|
detected_seafloor_depth[detected_seafloor_depth == 0.0] = np.nan
|
|
305
|
-
# TODO: problem here: Processing file: D20070711-T210709.
|
|
306
284
|
|
|
307
|
-
#
|
|
285
|
+
# As requested, use the lowest frequencies to determine bottom
|
|
308
286
|
detected_seafloor_depths = detected_seafloor_depth[0, :]
|
|
309
287
|
|
|
310
288
|
detected_seafloor_depths[detected_seafloor_depths == 0.0] = np.nan
|
|
311
289
|
print(f"min depth measured: {np.nanmin(detected_seafloor_depths)}")
|
|
312
290
|
print(f"max depth measured: {np.nanmax(detected_seafloor_depths)}")
|
|
313
|
-
# available_indices = np.argwhere(np.isnan(geospatial['latitude'].values))
|
|
314
291
|
output_zarr_store["bottom"][
|
|
315
292
|
start_ping_time_index:end_ping_time_index
|
|
316
293
|
] = detected_seafloor_depths
|
|
317
294
|
#
|
|
318
295
|
#########################################################################
|
|
319
296
|
# [5] write subset of latitude/longitude
|
|
297
|
+
# output_zarr_store["latitude"][
|
|
298
|
+
# start_ping_time_index:end_ping_time_index
|
|
299
|
+
# ] = geospatial.dropna()[
|
|
300
|
+
# "latitude"
|
|
301
|
+
# ].values # TODO: get from ds_sv directly, dont need geojson anymore
|
|
302
|
+
# output_zarr_store["longitude"][
|
|
303
|
+
# start_ping_time_index:end_ping_time_index
|
|
304
|
+
# ] = geospatial.dropna()["longitude"].values
|
|
305
|
+
#########################################################################
|
|
320
306
|
output_zarr_store["latitude"][
|
|
321
307
|
start_ping_time_index:end_ping_time_index
|
|
322
|
-
] =
|
|
323
|
-
"latitude"
|
|
324
|
-
].values # TODO: get from ds_sv directly, dont need geojson anymore
|
|
308
|
+
] = input_xr_zarr_store.latitude.dropna(dim="ping_time").values
|
|
325
309
|
output_zarr_store["longitude"][
|
|
326
310
|
start_ping_time_index:end_ping_time_index
|
|
327
|
-
] =
|
|
328
|
-
#########################################################################
|
|
311
|
+
] = input_xr_zarr_store.longitude.dropna(dim="ping_time").values
|
|
329
312
|
#########################################################################
|
|
330
313
|
except Exception as err:
|
|
331
314
|
raise RuntimeError(f"Problem with resample_regrid, {err}")
|
|
332
315
|
finally:
|
|
333
316
|
print("Exiting resample_regrid.")
|
|
334
317
|
# TODO: read across times and verify dataset was written?
|
|
318
|
+
gc.collect()
|
|
335
319
|
|
|
336
320
|
#######################################################
|
|
337
321
|
|
|
@@ -2,10 +2,12 @@ from .elevation_manager import ElevationManager
|
|
|
2
2
|
from .geometry_manager import GeometryManager
|
|
3
3
|
from .line_simplification import LineSimplification
|
|
4
4
|
from .pmtile_generation import PMTileGeneration
|
|
5
|
+
from .spatiotemporal import Spatiotemporal
|
|
5
6
|
|
|
6
7
|
__all__ = [
|
|
7
8
|
"ElevationManager",
|
|
8
9
|
"GeometryManager",
|
|
9
10
|
"LineSimplification",
|
|
10
11
|
"PMTileGeneration",
|
|
12
|
+
"Spatiotemporal",
|
|
11
13
|
]
|
|
@@ -46,7 +46,7 @@ class ElevationManager:
|
|
|
46
46
|
self,
|
|
47
47
|
):
|
|
48
48
|
self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
|
|
49
|
-
self.
|
|
49
|
+
self.TIMEOUT_SECONDS = 10
|
|
50
50
|
|
|
51
51
|
#######################################################
|
|
52
52
|
def get_arcgis_elevation(
|
|
@@ -71,7 +71,7 @@ class ElevationManager:
|
|
|
71
71
|
# order: (lng, lat)
|
|
72
72
|
geometry = f'{{"points":{str(chunk)}}}'
|
|
73
73
|
url = f"https://gis.ngdc.noaa.gov/arcgis/rest/services/DEM_mosaics/DEM_global_mosaic/ImageServer/identify?geometry={geometry}&geometryType={geometryType}&returnGeometry=false&returnCatalogItems=false&f=json"
|
|
74
|
-
result = requests.get(url, timeout=self.
|
|
74
|
+
result = requests.get(url, timeout=self.TIMEOUT_SECONDS)
|
|
75
75
|
res = json.loads(result.content.decode("utf8"))
|
|
76
76
|
if "results" in res:
|
|
77
77
|
for element in res["results"]:
|