water-column-sonar-processing 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/__init__.py +16 -0
- water_column_sonar_processing/aws/__init__.py +7 -0
- {aws_manager → water_column_sonar_processing/aws}/dynamodb_manager.py +71 -50
- {aws_manager → water_column_sonar_processing/aws}/s3_manager.py +120 -130
- {aws_manager → water_column_sonar_processing/aws}/s3fs_manager.py +13 -19
- {aws_manager → water_column_sonar_processing/aws}/sns_manager.py +10 -21
- {aws_manager → water_column_sonar_processing/aws}/sqs_manager.py +10 -18
- water_column_sonar_processing/cruise/__init__.py +4 -0
- {cruise → water_column_sonar_processing/cruise}/create_empty_zarr_store.py +62 -44
- {cruise → water_column_sonar_processing/cruise}/resample_regrid.py +117 -66
- water_column_sonar_processing/geometry/__init__.py +5 -0
- {geometry_manager → water_column_sonar_processing/geometry}/geometry_manager.py +80 -49
- {geometry_manager → water_column_sonar_processing/geometry}/geometry_simplification.py +13 -12
- {geometry_manager → water_column_sonar_processing/geometry}/pmtile_generation.py +25 -24
- water_column_sonar_processing/index/__init__.py +3 -0
- {index_manager → water_column_sonar_processing/index}/index_manager.py +106 -82
- water_column_sonar_processing/model/__init__.py +3 -0
- {zarr_manager → water_column_sonar_processing/model}/zarr_manager.py +119 -83
- water_column_sonar_processing/process.py +147 -0
- water_column_sonar_processing/utility/__init__.py +6 -0
- {utility → water_column_sonar_processing/utility}/cleaner.py +6 -7
- water_column_sonar_processing/utility/constants.py +63 -0
- {utility → water_column_sonar_processing/utility}/pipeline_status.py +37 -10
- {utility → water_column_sonar_processing/utility}/timestamp.py +3 -2
- {water_column_sonar_processing-0.0.4.dist-info → water_column_sonar_processing-0.0.6.dist-info}/METADATA +31 -1
- water_column_sonar_processing-0.0.6.dist-info/RECORD +29 -0
- water_column_sonar_processing-0.0.6.dist-info/top_level.txt +1 -0
- __init__.py +0 -0
- aws_manager/__init__.py +0 -4
- cruise/__init__.py +0 -0
- geometry_manager/__init__.py +0 -0
- index_manager/__init__.py +0 -0
- model.py +0 -140
- utility/__init__.py +0 -0
- utility/constants.py +0 -56
- water_column_sonar_processing-0.0.4.dist-info/RECORD +0 -29
- water_column_sonar_processing-0.0.4.dist-info/top_level.txt +0 -8
- zarr_manager/__init__.py +0 -0
- {water_column_sonar_processing-0.0.4.dist-info → water_column_sonar_processing-0.0.6.dist-info}/LICENSE +0 -0
- {water_column_sonar_processing-0.0.4.dist-info → water_column_sonar_processing-0.0.6.dist-info}/WHEEL +0 -0
|
@@ -1,108 +1,123 @@
|
|
|
1
1
|
import os
|
|
2
|
+
|
|
2
3
|
import numcodecs
|
|
3
4
|
import numpy as np
|
|
4
5
|
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from
|
|
6
|
+
from water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
|
|
7
|
+
from water_column_sonar_processing.aws.s3_manager import S3Manager
|
|
8
|
+
from water_column_sonar_processing.model.zarr_manager import ZarrManager
|
|
9
|
+
from water_column_sonar_processing.utility.cleaner import Cleaner
|
|
9
10
|
|
|
10
11
|
numcodecs.blosc.use_threads = False
|
|
11
12
|
numcodecs.blosc.set_nthreads(1)
|
|
12
13
|
|
|
13
|
-
TEMPDIR = "/tmp"
|
|
14
|
-
|
|
15
|
-
# TODO: when ready switch to version 3 of zarr_manager spec
|
|
14
|
+
# TEMPDIR = "/tmp"
|
|
15
|
+
# TODO: when ready switch to version 3 of model spec
|
|
16
16
|
# ZARR_V3_EXPERIMENTAL_API = 1
|
|
17
17
|
# creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
18
18
|
|
|
19
|
+
|
|
19
20
|
class CreateEmptyZarrStore:
|
|
20
21
|
#######################################################
|
|
21
22
|
def __init__(
|
|
22
|
-
|
|
23
|
+
self,
|
|
23
24
|
):
|
|
24
25
|
self.__overwrite = True
|
|
25
|
-
# TODO: create output_bucket and input_bucket variables here?
|
|
26
26
|
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
27
27
|
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
28
28
|
|
|
29
29
|
#######################################################
|
|
30
30
|
|
|
31
31
|
def upload_zarr_store_to_s3(
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
32
|
+
self,
|
|
33
|
+
local_directory: str,
|
|
34
|
+
object_prefix: str,
|
|
35
|
+
cruise_name: str,
|
|
36
36
|
) -> None:
|
|
37
|
-
print(
|
|
37
|
+
print("uploading model store to s3")
|
|
38
38
|
s3_manager = S3Manager()
|
|
39
39
|
#
|
|
40
|
-
print(
|
|
40
|
+
print("Starting upload with thread pool executor.")
|
|
41
41
|
# # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
|
|
42
42
|
all_files = []
|
|
43
|
-
for subdir, dirs, files in os.walk(
|
|
43
|
+
for subdir, dirs, files in os.walk(
|
|
44
|
+
f"{local_directory}/{cruise_name}.zarr_manager"
|
|
45
|
+
):
|
|
44
46
|
for file in files:
|
|
45
47
|
local_path = os.path.join(subdir, file)
|
|
46
|
-
# 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.
|
|
47
|
-
s3_key = f'{object_prefix}/{cruise_name}.
|
|
48
|
+
# 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.model/.zattrs'
|
|
49
|
+
s3_key = f'{object_prefix}/{cruise_name}.model{local_path.split(f"{cruise_name}.model")[-1]}'
|
|
48
50
|
all_files.append([local_path, s3_key])
|
|
49
51
|
#
|
|
50
52
|
# print(all_files)
|
|
51
53
|
s3_manager.upload_files_with_thread_pool_executor(
|
|
52
54
|
all_files=all_files,
|
|
53
55
|
)
|
|
54
|
-
print(
|
|
56
|
+
print("Done uploading with thread pool executor.")
|
|
55
57
|
# TODO: move to common place
|
|
56
58
|
|
|
57
59
|
#######################################################
|
|
58
60
|
def create_cruise_level_zarr_store(
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
61
|
+
self,
|
|
62
|
+
ship_name: str,
|
|
63
|
+
cruise_name: str,
|
|
64
|
+
sensor_name: str,
|
|
65
|
+
table_name: str,
|
|
66
|
+
tempdir: str,
|
|
64
67
|
) -> None:
|
|
65
68
|
try:
|
|
66
69
|
# HB0806 - 123, HB0903 - 220
|
|
67
70
|
dynamo_db_manager = DynamoDBManager()
|
|
71
|
+
s3_manager = S3Manager()
|
|
68
72
|
|
|
69
73
|
df = dynamo_db_manager.get_table_as_df(
|
|
70
74
|
table_name=table_name,
|
|
71
75
|
ship_name=ship_name,
|
|
72
76
|
cruise_name=cruise_name,
|
|
73
|
-
sensor_name=sensor_name
|
|
77
|
+
sensor_name=sensor_name,
|
|
74
78
|
)
|
|
75
79
|
|
|
76
|
-
# filter the dataframe just for enums >= LEVEL_1_PROCESSING
|
|
80
|
+
# TODO: filter the dataframe just for enums >= LEVEL_1_PROCESSING
|
|
77
81
|
# df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
|
|
78
82
|
|
|
79
83
|
# TODO: VERIFY GEOJSON EXISTS as prerequisite!!!
|
|
80
84
|
|
|
81
85
|
print(f"DataFrame shape: {df.shape}")
|
|
82
|
-
cruise_channels = list(
|
|
86
|
+
cruise_channels = list(
|
|
87
|
+
set([i for sublist in df["CHANNELS"].dropna() for i in sublist])
|
|
88
|
+
)
|
|
83
89
|
cruise_channels.sort()
|
|
84
90
|
|
|
85
|
-
consolidated_zarr_width = np.sum(
|
|
91
|
+
consolidated_zarr_width = np.sum(
|
|
92
|
+
df["NUM_PING_TIME_DROPNA"].dropna().astype(int)
|
|
93
|
+
)
|
|
86
94
|
|
|
87
95
|
# [3] calculate the max/min measurement resolutions for the whole cruise
|
|
88
|
-
cruise_min_echo_range = float(
|
|
96
|
+
cruise_min_echo_range = float(
|
|
97
|
+
np.min(df["MIN_ECHO_RANGE"].dropna().astype(float))
|
|
98
|
+
)
|
|
89
99
|
|
|
90
100
|
# [4] calculate the maximum of the max depth values
|
|
91
|
-
cruise_max_echo_range = float(
|
|
92
|
-
|
|
101
|
+
cruise_max_echo_range = float(
|
|
102
|
+
np.max(df["MAX_ECHO_RANGE"].dropna().astype(float))
|
|
103
|
+
)
|
|
104
|
+
print(
|
|
105
|
+
f"cruise_min_echo_range: {cruise_min_echo_range}, cruise_max_echo_range: {cruise_max_echo_range}"
|
|
106
|
+
)
|
|
93
107
|
|
|
94
108
|
# [5] get number of channels
|
|
95
|
-
cruise_frequencies = [
|
|
109
|
+
cruise_frequencies = [
|
|
110
|
+
float(i) for i in df["FREQUENCIES"].dropna().values.flatten()[0]
|
|
111
|
+
]
|
|
96
112
|
print(cruise_frequencies)
|
|
97
113
|
|
|
98
114
|
new_width = int(consolidated_zarr_width)
|
|
99
115
|
print(f"new_width: {new_width}")
|
|
100
116
|
#################################################################
|
|
101
|
-
store_name = f"{cruise_name}.
|
|
117
|
+
store_name = f"{cruise_name}.model"
|
|
102
118
|
print(store_name)
|
|
103
119
|
################################################################
|
|
104
|
-
# Delete existing
|
|
105
|
-
s3_manager = S3Manager()
|
|
120
|
+
# Delete existing model store if it exists
|
|
106
121
|
zarr_prefix = os.path.join("level_2", ship_name, cruise_name, sensor_name)
|
|
107
122
|
child_objects = s3_manager.get_child_objects(
|
|
108
123
|
bucket_name=self.output_bucket_name,
|
|
@@ -113,16 +128,18 @@ class CreateEmptyZarrStore:
|
|
|
113
128
|
objects=child_objects,
|
|
114
129
|
)
|
|
115
130
|
################################################################
|
|
116
|
-
# Create new
|
|
131
|
+
# Create new model store
|
|
117
132
|
zarr_manager = ZarrManager()
|
|
118
|
-
new_height = len(
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
133
|
+
new_height = len(
|
|
134
|
+
zarr_manager.get_depth_values(
|
|
135
|
+
min_echo_range=cruise_min_echo_range,
|
|
136
|
+
max_echo_range=cruise_max_echo_range,
|
|
137
|
+
)
|
|
138
|
+
)
|
|
122
139
|
print(f"new_height: {new_height}")
|
|
123
140
|
|
|
124
141
|
zarr_manager.create_zarr_store(
|
|
125
|
-
path=
|
|
142
|
+
path=tempdir,
|
|
126
143
|
ship_name=ship_name,
|
|
127
144
|
cruise_name=cruise_name,
|
|
128
145
|
sensor_name=sensor_name,
|
|
@@ -134,7 +151,7 @@ class CreateEmptyZarrStore:
|
|
|
134
151
|
)
|
|
135
152
|
#################################################################
|
|
136
153
|
self.upload_zarr_store_to_s3(
|
|
137
|
-
local_directory=
|
|
154
|
+
local_directory=tempdir,
|
|
138
155
|
object_prefix=zarr_prefix,
|
|
139
156
|
cruise_name=cruise_name,
|
|
140
157
|
)
|
|
@@ -157,11 +174,12 @@ class CreateEmptyZarrStore:
|
|
|
157
174
|
# TODO: update enum in dynamodb
|
|
158
175
|
#################################################################
|
|
159
176
|
except Exception as err:
|
|
160
|
-
print(f"Problem trying to create new cruise
|
|
177
|
+
print(f"Problem trying to create new cruise model store: {err}")
|
|
161
178
|
finally:
|
|
162
179
|
cleaner = Cleaner()
|
|
163
180
|
cleaner.delete_local_files()
|
|
164
|
-
|
|
181
|
+
# TODO: should delete zarr store in temp directory too?
|
|
182
|
+
print("Done creating cruise level model store")
|
|
165
183
|
|
|
166
184
|
|
|
167
185
|
###########################################################
|
|
@@ -1,48 +1,52 @@
|
|
|
1
1
|
import gc
|
|
2
2
|
import os
|
|
3
3
|
from pathlib import Path
|
|
4
|
+
|
|
4
5
|
import numcodecs
|
|
5
6
|
import numpy as np
|
|
6
|
-
import xarray as xr
|
|
7
7
|
import pandas as pd
|
|
8
|
+
import xarray as xr
|
|
8
9
|
|
|
9
|
-
from
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
|
|
10
|
+
from water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
|
|
11
|
+
from water_column_sonar_processing.geometry.geometry_manager import GeometryManager
|
|
12
|
+
from water_column_sonar_processing.model.zarr_manager import ZarrManager
|
|
13
13
|
|
|
14
14
|
numcodecs.blosc.use_threads = False
|
|
15
15
|
numcodecs.blosc.set_nthreads(1)
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
# TODO: when ready switch to version 3 of
|
|
18
|
+
# TODO: when ready switch to version 3 of model spec
|
|
19
19
|
# ZARR_V3_EXPERIMENTAL_API = 1
|
|
20
20
|
# creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
21
21
|
|
|
22
|
+
|
|
22
23
|
class ResampleRegrid:
|
|
23
24
|
#######################################################
|
|
24
25
|
def __init__(
|
|
25
|
-
|
|
26
|
+
self,
|
|
26
27
|
):
|
|
27
28
|
self.__overwrite = True
|
|
28
29
|
self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
|
|
29
30
|
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
30
|
-
self.dtype =
|
|
31
|
+
self.dtype = "float32"
|
|
31
32
|
|
|
32
33
|
#################################################################
|
|
33
34
|
def interpolate_data(
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
35
|
+
self,
|
|
36
|
+
input_xr,
|
|
37
|
+
ping_times,
|
|
38
|
+
all_cruise_depth_values,
|
|
38
39
|
) -> np.ndarray:
|
|
39
40
|
print("Interpolating data.")
|
|
40
41
|
try:
|
|
41
|
-
data = np.empty(
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
42
|
+
data = np.empty(
|
|
43
|
+
(
|
|
44
|
+
len(all_cruise_depth_values),
|
|
45
|
+
len(ping_times),
|
|
46
|
+
len(input_xr.frequency_nominal),
|
|
47
|
+
),
|
|
48
|
+
dtype=self.dtype,
|
|
49
|
+
)
|
|
46
50
|
|
|
47
51
|
data[:] = np.nan
|
|
48
52
|
|
|
@@ -53,37 +57,60 @@ class ResampleRegrid:
|
|
|
53
57
|
"depth": all_cruise_depth_values,
|
|
54
58
|
"time": ping_times,
|
|
55
59
|
"frequency": input_xr.frequency_nominal.values,
|
|
56
|
-
}
|
|
60
|
+
},
|
|
57
61
|
)
|
|
58
62
|
|
|
59
63
|
channels = input_xr.channel.values
|
|
60
|
-
for channel in range(
|
|
61
|
-
|
|
64
|
+
for channel in range(
|
|
65
|
+
len(channels)
|
|
66
|
+
): # TODO: leaving off here, need to subset for just indices in time axis
|
|
67
|
+
print(
|
|
68
|
+
np.nanmax(
|
|
69
|
+
input_xr.echo_range.sel(
|
|
70
|
+
channel=input_xr.channel[channel]
|
|
71
|
+
).values
|
|
72
|
+
)
|
|
73
|
+
)
|
|
62
74
|
#
|
|
63
75
|
max_depths = np.nanmax(
|
|
64
76
|
a=input_xr.echo_range.sel(channel=input_xr.channel[channel]).values,
|
|
65
|
-
axis=1
|
|
77
|
+
axis=1,
|
|
66
78
|
)
|
|
67
79
|
superset_of_max_depths = set(
|
|
68
|
-
np.nanmax(
|
|
80
|
+
np.nanmax(
|
|
81
|
+
input_xr.echo_range.sel(
|
|
82
|
+
channel=input_xr.channel[channel]
|
|
83
|
+
).values,
|
|
84
|
+
1,
|
|
85
|
+
)
|
|
69
86
|
)
|
|
70
|
-
set_of_max_depths = list(
|
|
87
|
+
set_of_max_depths = list(
|
|
88
|
+
{x for x in superset_of_max_depths if x == x}
|
|
89
|
+
) # removes nan's
|
|
71
90
|
# iterate through partitions of data with similar depths and resample
|
|
72
91
|
for select_max_depth in set_of_max_depths:
|
|
73
92
|
# TODO: for nan just skip and leave all nan's
|
|
74
|
-
select_indices = [
|
|
93
|
+
select_indices = [
|
|
94
|
+
i
|
|
95
|
+
for i in range(0, len(max_depths))
|
|
96
|
+
if max_depths[i] == select_max_depth
|
|
97
|
+
]
|
|
75
98
|
|
|
76
99
|
# now create new DataArray with proper dimension and indices
|
|
77
100
|
# data_select = input_xr.Sv.sel(
|
|
78
101
|
# channel=input_xr.channel[channel]
|
|
79
102
|
# ).values[select_indices, :].T # TODO: dont like this transpose
|
|
80
|
-
data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[
|
|
103
|
+
data_select = input_xr.Sv.sel(channel=input_xr.channel[channel])[
|
|
104
|
+
select_indices, :
|
|
105
|
+
].T.values
|
|
81
106
|
# change from ".values[select_indices, :].T" to "[select_indices, :].values.T"
|
|
82
107
|
|
|
83
108
|
times_select = input_xr.ping_time.values[select_indices]
|
|
84
109
|
depths_select = input_xr.echo_range.sel(
|
|
85
110
|
channel=input_xr.channel[channel]
|
|
86
|
-
).values[
|
|
111
|
+
).values[
|
|
112
|
+
select_indices[0], :
|
|
113
|
+
] # '0' because all others in group should be same
|
|
87
114
|
|
|
88
115
|
da_select = xr.DataArray(
|
|
89
116
|
data=data_select,
|
|
@@ -91,31 +118,36 @@ class ResampleRegrid:
|
|
|
91
118
|
coords={
|
|
92
119
|
"depth": depths_select,
|
|
93
120
|
"time": times_select,
|
|
94
|
-
}
|
|
95
|
-
).dropna(dim=
|
|
96
|
-
resampled = da_select.interp(
|
|
121
|
+
},
|
|
122
|
+
).dropna(dim="depth")
|
|
123
|
+
resampled = da_select.interp(
|
|
124
|
+
depth=all_cruise_depth_values, method="nearest"
|
|
125
|
+
)
|
|
97
126
|
# write to the resample array
|
|
98
127
|
regrid_resample.loc[
|
|
99
|
-
dict(
|
|
128
|
+
dict(
|
|
129
|
+
time=times_select,
|
|
130
|
+
frequency=input_xr.frequency_nominal.values[channel],
|
|
131
|
+
)
|
|
100
132
|
] = resampled
|
|
101
133
|
print(f"updated {len(times_select)} ping times")
|
|
102
134
|
except Exception as err:
|
|
103
|
-
print(f
|
|
135
|
+
print(f"Problem finding the dynamodb table: {err}")
|
|
104
136
|
raise err
|
|
105
137
|
print("Done interpolating data.")
|
|
106
138
|
return regrid_resample
|
|
107
139
|
|
|
108
140
|
#################################################################
|
|
109
141
|
def resample_regrid(
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
142
|
+
self,
|
|
143
|
+
ship_name,
|
|
144
|
+
cruise_name,
|
|
145
|
+
sensor_name,
|
|
146
|
+
table_name,
|
|
115
147
|
) -> None:
|
|
116
148
|
"""
|
|
117
149
|
The goal here is to interpolate the data against the depth values already populated
|
|
118
|
-
in the existing file level
|
|
150
|
+
in the existing file level model stores. We open the cruise-level store with model for
|
|
119
151
|
read/write operations. We open the file-level store with Xarray to leverage tools for
|
|
120
152
|
resampling and subsetting the data.
|
|
121
153
|
"""
|
|
@@ -124,7 +156,7 @@ class ResampleRegrid:
|
|
|
124
156
|
zarr_manager = ZarrManager()
|
|
125
157
|
# s3_manager = S3Manager()
|
|
126
158
|
geo_manager = GeometryManager()
|
|
127
|
-
# get
|
|
159
|
+
# get model store
|
|
128
160
|
output_zarr_store = zarr_manager.open_s3_zarr_store_with_zarr(
|
|
129
161
|
ship_name=ship_name,
|
|
130
162
|
cruise_name=cruise_name,
|
|
@@ -144,7 +176,7 @@ class ResampleRegrid:
|
|
|
144
176
|
#########################################################
|
|
145
177
|
#########################################################
|
|
146
178
|
# TODO: iterate files here
|
|
147
|
-
all_file_names = cruise_df[
|
|
179
|
+
all_file_names = cruise_df["FILE_NAME"]
|
|
148
180
|
for file_name in all_file_names:
|
|
149
181
|
gc.collect()
|
|
150
182
|
file_name_stem = Path(file_name).stem
|
|
@@ -157,8 +189,10 @@ class ResampleRegrid:
|
|
|
157
189
|
# TODO: filter rows by enum success, filter the dataframe just for enums >= LEVEL_1_PROCESSING
|
|
158
190
|
# df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
|
|
159
191
|
|
|
160
|
-
# Get
|
|
161
|
-
index = cruise_df.index[
|
|
192
|
+
# Get index from all cruise files. Note: should be based on which are included in cruise.
|
|
193
|
+
index = cruise_df.index[
|
|
194
|
+
cruise_df["FILE_NAME"] == f"{file_name_stem}.raw"
|
|
195
|
+
][0]
|
|
162
196
|
|
|
163
197
|
# get input store
|
|
164
198
|
input_xr_zarr_store = zarr_manager.open_s3_zarr_store_with_xarray(
|
|
@@ -169,31 +203,40 @@ class ResampleRegrid:
|
|
|
169
203
|
)
|
|
170
204
|
#########################################################################
|
|
171
205
|
# [3] Get needed indices
|
|
172
|
-
# Offset from start
|
|
206
|
+
# Offset from start index to insert new data. Note that missing values are excluded.
|
|
173
207
|
ping_time_cumsum = np.insert(
|
|
174
|
-
np.cumsum(
|
|
208
|
+
np.cumsum(
|
|
209
|
+
cruise_df["NUM_PING_TIME_DROPNA"].dropna().to_numpy(dtype=int)
|
|
210
|
+
),
|
|
175
211
|
obj=0,
|
|
176
|
-
values=0
|
|
212
|
+
values=0,
|
|
177
213
|
)
|
|
178
214
|
start_ping_time_index = ping_time_cumsum[index]
|
|
179
215
|
end_ping_time_index = ping_time_cumsum[index + 1]
|
|
180
216
|
|
|
181
|
-
min_echo_range = np.nanmin(np.float32(cruise_df[
|
|
182
|
-
max_echo_range = np.nanmax(np.float32(cruise_df[
|
|
217
|
+
min_echo_range = np.nanmin(np.float32(cruise_df["MIN_ECHO_RANGE"]))
|
|
218
|
+
max_echo_range = np.nanmax(np.float32(cruise_df["MAX_ECHO_RANGE"]))
|
|
183
219
|
|
|
184
|
-
print(
|
|
220
|
+
print(
|
|
221
|
+
"Creating empty ndarray for Sv data."
|
|
222
|
+
) # Note: cruise_zarr dimensions are (depth, time, frequency)
|
|
185
223
|
cruise_sv_subset = np.empty(
|
|
186
|
-
shape=output_zarr_store.Sv[
|
|
224
|
+
shape=output_zarr_store.Sv[
|
|
225
|
+
:, start_ping_time_index:end_ping_time_index, :
|
|
226
|
+
].shape
|
|
187
227
|
)
|
|
188
228
|
cruise_sv_subset[:, :, :] = np.nan # (5208, 9778, 4)
|
|
189
229
|
|
|
190
230
|
all_cruise_depth_values = zarr_manager.get_depth_values(
|
|
191
|
-
min_echo_range=min_echo_range,
|
|
192
|
-
max_echo_range=max_echo_range
|
|
231
|
+
min_echo_range=min_echo_range, max_echo_range=max_echo_range
|
|
193
232
|
)
|
|
194
233
|
|
|
195
234
|
print(" ".join(list(input_xr_zarr_store.Sv.dims)))
|
|
196
|
-
if set(input_xr_zarr_store.Sv.dims) != {
|
|
235
|
+
if set(input_xr_zarr_store.Sv.dims) != {
|
|
236
|
+
"channel",
|
|
237
|
+
"ping_time",
|
|
238
|
+
"range_sample",
|
|
239
|
+
}:
|
|
197
240
|
raise Exception("Xarray dimensions are not as expected.")
|
|
198
241
|
|
|
199
242
|
# get geojson
|
|
@@ -209,8 +252,13 @@ class ResampleRegrid:
|
|
|
209
252
|
|
|
210
253
|
ping_times = input_xr.ping_time.values
|
|
211
254
|
# Date format: numpy.datetime64('2007-07-20T02:10:25.845073920') converts to "1184897425.845074"
|
|
212
|
-
epoch_seconds = [
|
|
213
|
-
|
|
255
|
+
epoch_seconds = [
|
|
256
|
+
(pd.Timestamp(i) - pd.Timestamp("1970-01-01")) / pd.Timedelta("1s")
|
|
257
|
+
for i in ping_times
|
|
258
|
+
]
|
|
259
|
+
output_zarr_store.time[start_ping_time_index:end_ping_time_index] = (
|
|
260
|
+
epoch_seconds
|
|
261
|
+
)
|
|
214
262
|
|
|
215
263
|
# --- UPDATING --- #
|
|
216
264
|
|
|
@@ -220,30 +268,33 @@ class ResampleRegrid:
|
|
|
220
268
|
all_cruise_depth_values=all_cruise_depth_values,
|
|
221
269
|
)
|
|
222
270
|
|
|
223
|
-
print(
|
|
271
|
+
print(
|
|
272
|
+
f"start_ping_time_index: {start_ping_time_index}, end_ping_time_index: {end_ping_time_index}"
|
|
273
|
+
)
|
|
224
274
|
|
|
225
275
|
#########################################################################
|
|
226
|
-
# write Sv values to cruise-level-
|
|
227
|
-
for channel in range(
|
|
276
|
+
# write Sv values to cruise-level-model-store
|
|
277
|
+
for channel in range(
|
|
278
|
+
len(input_xr.channel.values)
|
|
279
|
+
): # doesn't like being written in one fell swoop :(
|
|
228
280
|
output_zarr_store.Sv[
|
|
229
|
-
:,
|
|
230
|
-
start_ping_time_index:end_ping_time_index,
|
|
231
|
-
channel
|
|
281
|
+
:, start_ping_time_index:end_ping_time_index, channel
|
|
232
282
|
] = regrid_resample[:, :, channel]
|
|
233
283
|
|
|
234
284
|
#########################################################################
|
|
235
285
|
# [5] write subset of latitude/longitude
|
|
236
|
-
output_zarr_store.latitude[
|
|
237
|
-
|
|
238
|
-
].values
|
|
239
|
-
output_zarr_store.longitude[
|
|
240
|
-
|
|
241
|
-
].values
|
|
286
|
+
output_zarr_store.latitude[
|
|
287
|
+
start_ping_time_index:end_ping_time_index
|
|
288
|
+
] = geospatial.dropna()["latitude"].values
|
|
289
|
+
output_zarr_store.longitude[
|
|
290
|
+
start_ping_time_index:end_ping_time_index
|
|
291
|
+
] = geospatial.dropna()["longitude"].values
|
|
242
292
|
except Exception as err:
|
|
243
|
-
print(f
|
|
293
|
+
print(f"Problem interpolating the data: {err}")
|
|
244
294
|
raise err
|
|
245
295
|
print("Done interpolating data.")
|
|
246
296
|
|
|
247
297
|
#######################################################
|
|
248
298
|
|
|
299
|
+
|
|
249
300
|
###########################################################
|