water-column-sonar-processing 0.0.6__py3-none-any.whl → 26.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- water_column_sonar_processing/__init__.py +2 -5
- water_column_sonar_processing/aws/__init__.py +2 -2
- water_column_sonar_processing/aws/dynamodb_manager.py +257 -72
- water_column_sonar_processing/aws/s3_manager.py +184 -112
- water_column_sonar_processing/aws/s3fs_manager.py +29 -33
- water_column_sonar_processing/aws/sqs_manager.py +1 -1
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +38 -97
- water_column_sonar_processing/cruise/datatree_manager.py +21 -0
- water_column_sonar_processing/cruise/resample_regrid.py +144 -129
- water_column_sonar_processing/geometry/__init__.py +10 -2
- water_column_sonar_processing/geometry/elevation_manager.py +111 -0
- water_column_sonar_processing/geometry/geometry_manager.py +60 -44
- water_column_sonar_processing/geometry/line_simplification.py +176 -0
- water_column_sonar_processing/geometry/pmtile_generation.py +242 -51
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/index_manager.py +157 -27
- water_column_sonar_processing/model/zarr_manager.py +663 -258
- water_column_sonar_processing/processing/__init__.py +4 -0
- water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
- water_column_sonar_processing/processing/raw_to_zarr.py +341 -0
- water_column_sonar_processing/utility/__init__.py +9 -2
- water_column_sonar_processing/utility/cleaner.py +1 -0
- water_column_sonar_processing/utility/constants.py +69 -14
- water_column_sonar_processing/utility/pipeline_status.py +11 -15
- water_column_sonar_processing/utility/timestamp.py +3 -4
- water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
- water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
- {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
- {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
- water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
- water_column_sonar_processing/process.py +0 -147
- water_column_sonar_processing-0.0.6.dist-info/METADATA +0 -123
- water_column_sonar_processing-0.0.6.dist-info/RECORD +0 -29
- {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
|
@@ -1,274 +1,633 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from importlib import metadata
|
|
3
|
+
from typing import Optional
|
|
2
4
|
|
|
3
|
-
import numcodecs
|
|
4
5
|
import numpy as np
|
|
5
6
|
import xarray as xr
|
|
6
7
|
import zarr
|
|
7
|
-
from
|
|
8
|
+
from zarr.codecs import BloscCodec, BloscShuffle
|
|
9
|
+
from zarr.core.group import Group
|
|
8
10
|
|
|
9
|
-
from water_column_sonar_processing.
|
|
10
|
-
from water_column_sonar_processing.utility.constants import Constants, Coordinates
|
|
11
|
-
from water_column_sonar_processing.utility.timestamp import Timestamp
|
|
11
|
+
from water_column_sonar_processing.utility import Constants, Coordinates, Timestamp
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
# https://zarr-specs.readthedocs.io/en/latest/v3/codecs/blosc/index.html
|
|
14
|
+
compressors = BloscCodec(
|
|
15
|
+
cname="zstd",
|
|
16
|
+
clevel=9,
|
|
17
|
+
shuffle=BloscShuffle.bitshuffle,
|
|
18
|
+
)
|
|
15
19
|
|
|
16
20
|
|
|
17
|
-
#
|
|
18
|
-
# ZARR_V3_EXPERIMENTAL_API = 1
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
# creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
21
|
+
# creates the latlon dataset: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
22
22
|
class ZarrManager:
|
|
23
23
|
#######################################################
|
|
24
24
|
def __init__(
|
|
25
25
|
self,
|
|
26
|
+
# endpoint_url: Optional[str] = None,
|
|
26
27
|
):
|
|
27
|
-
# TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
|
|
28
|
-
self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
|
|
29
28
|
self.__overwrite = True
|
|
30
|
-
self.
|
|
31
|
-
self.
|
|
32
|
-
self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
|
|
29
|
+
self.key = os.environ.get("OUTPUT_BUCKET_ACCESS_KEY")
|
|
30
|
+
self.secret = os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY")
|
|
33
31
|
|
|
34
32
|
#######################################################
|
|
35
33
|
@staticmethod
|
|
36
34
|
def get_depth_values(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
):
|
|
35
|
+
max_echo_range: float, # maximum depth measured from whole cruise
|
|
36
|
+
cruise_min_epsilon: float = 0.20, # delta subsequent measurements
|
|
37
|
+
) -> np.ndarray[tuple]:
|
|
40
38
|
# Gets the set of depth values that will be used when resampling and
|
|
41
|
-
# regridding the
|
|
42
|
-
# Note: returned values
|
|
43
|
-
|
|
44
|
-
all_cruise_depth_values = np.linspace(
|
|
45
|
-
start=
|
|
46
|
-
stop=max_echo_range,
|
|
47
|
-
num=int(max_echo_range /
|
|
39
|
+
# regridding the dataset to a cruise level model store.
|
|
40
|
+
# Note: returned values start at zero!
|
|
41
|
+
# For more info see here: https://echopype.readthedocs.io/en/stable/data-proc-additional.html
|
|
42
|
+
all_cruise_depth_values = np.linspace( # TODO: PROBLEM HERE
|
|
43
|
+
start=0, # start it at zero
|
|
44
|
+
stop=np.ceil(max_echo_range), # round up
|
|
45
|
+
num=int(np.ceil(max_echo_range) / cruise_min_epsilon) + 1,
|
|
48
46
|
endpoint=True,
|
|
49
47
|
)
|
|
50
48
|
|
|
51
|
-
|
|
49
|
+
if np.any(np.isnan(all_cruise_depth_values)):
|
|
50
|
+
raise Exception("Problem depth values returned were NaN.")
|
|
51
|
+
|
|
52
52
|
return all_cruise_depth_values.round(decimals=2)
|
|
53
53
|
|
|
54
54
|
#######################################################
|
|
55
55
|
def create_zarr_store(
|
|
56
56
|
self,
|
|
57
|
-
path: str,
|
|
57
|
+
path: str, # 'level_2/Henry_B._Bigelow/HB0707/EK60/HB0707.model/tmp/HB0707.zarr/.zattrs'
|
|
58
58
|
ship_name: str,
|
|
59
59
|
cruise_name: str,
|
|
60
60
|
sensor_name: str,
|
|
61
|
-
frequencies: list, # units in Hz
|
|
62
|
-
width: int,
|
|
63
|
-
min_echo_range: float, # smallest resolution in meters
|
|
61
|
+
frequencies: list, # units in Hz, type(frequencies) == np.ndarray
|
|
62
|
+
width: int,
|
|
64
63
|
max_echo_range: float,
|
|
65
64
|
calibration_status: bool = False, # Assume uncalibrated
|
|
66
65
|
) -> str:
|
|
67
|
-
print(
|
|
68
|
-
f"Creating local zarr_manager store at {cruise_name}.zarr for ship {ship_name}"
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
# There should be no repeated frequencies
|
|
72
|
-
assert len(frequencies) == len(set(frequencies))
|
|
73
|
-
# TODO: eventually switch coordinate to "channel"
|
|
74
|
-
|
|
75
|
-
print(f"Debugging number of threads: {self.__num_threads}")
|
|
76
|
-
|
|
77
|
-
zarr_path = f"{path}/{cruise_name}.zarr"
|
|
78
|
-
store = zarr.DirectoryStore(path=zarr_path, normalize_keys=False)
|
|
79
|
-
root = zarr.group(store=store, overwrite=self.__overwrite, cache_attrs=True)
|
|
80
|
-
|
|
81
|
-
#####################################################################
|
|
82
|
-
# --- Coordinate: Time --- #
|
|
83
|
-
# https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
|
|
84
|
-
root.create_dataset(
|
|
85
|
-
name=Coordinates.TIME.value,
|
|
86
|
-
data=np.repeat(0.0, width),
|
|
87
|
-
shape=width,
|
|
88
|
-
chunks=(
|
|
89
|
-
Constants.TILE_SIZE.value,
|
|
90
|
-
), # TODO: the chunking scheme doesn't seem to be working here
|
|
91
|
-
dtype=np.dtype(Coordinates.TIME_DTYPE.value),
|
|
92
|
-
compressor=self.__compressor,
|
|
93
|
-
# fill_value=0.,
|
|
94
|
-
fill_value=np.nan, # TODO: do i want nan's?
|
|
95
|
-
overwrite=self.__overwrite,
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
root.time.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
99
|
-
|
|
100
|
-
root.time.attrs["calendar"] = Coordinates.TIME_CALENDAR.value
|
|
101
|
-
root.time.attrs["units"] = Coordinates.TIME_UNITS.value
|
|
102
|
-
root.time.attrs["long_name"] = Coordinates.TIME_LONG_NAME.value
|
|
103
|
-
root.time.attrs["standard_name"] = Coordinates.TIME_STANDARD_NAME.value
|
|
104
|
-
|
|
105
|
-
#####################################################################
|
|
106
|
-
# --- Coordinate: Depth --- #
|
|
107
|
-
depth_values = self.get_depth_values(
|
|
108
|
-
min_echo_range=min_echo_range, max_echo_range=max_echo_range
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
root.create_dataset(
|
|
112
|
-
name=Coordinates.DEPTH.value,
|
|
113
|
-
# TODO: verify that these values are correct
|
|
114
|
-
data=depth_values,
|
|
115
|
-
shape=len(depth_values),
|
|
116
|
-
chunks=Constants.TILE_SIZE.value,
|
|
117
|
-
dtype=np.dtype(
|
|
118
|
-
Coordinates.DEPTH_DTYPE.value
|
|
119
|
-
), # float16 == 2 significant digits would be ideal
|
|
120
|
-
compressor=self.__compressor,
|
|
121
|
-
# fill_value=np.nan,
|
|
122
|
-
overwrite=self.__overwrite,
|
|
123
|
-
)
|
|
124
|
-
# TODO: change to exception
|
|
125
|
-
assert not np.any(np.isnan(depth_values))
|
|
126
|
-
|
|
127
|
-
root.depth.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.DEPTH.value]
|
|
128
|
-
|
|
129
|
-
root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
|
|
130
|
-
root.depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
|
|
131
|
-
|
|
132
|
-
#####################################################################
|
|
133
|
-
# --- Coordinate: Latitude --- #
|
|
134
|
-
root.create_dataset(
|
|
135
|
-
name=Coordinates.LATITUDE.value,
|
|
136
|
-
data=np.repeat(0.0, width),
|
|
137
|
-
shape=width,
|
|
138
|
-
chunks=Constants.TILE_SIZE.value,
|
|
139
|
-
dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
|
|
140
|
-
compressor=self.__compressor,
|
|
141
|
-
fill_value=0.0,
|
|
142
|
-
overwrite=self.__overwrite,
|
|
143
|
-
)
|
|
144
|
-
|
|
145
|
-
root.latitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
146
|
-
|
|
147
|
-
root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
|
|
148
|
-
root.latitude.attrs["units"] = Coordinates.LATITUDE_UNITS.value
|
|
149
|
-
|
|
150
|
-
#####################################################################
|
|
151
|
-
# --- Coordinate: Longitude --- #
|
|
152
|
-
root.create_dataset(
|
|
153
|
-
name=Coordinates.LONGITUDE.value,
|
|
154
|
-
data=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
155
|
-
shape=width,
|
|
156
|
-
chunks=Constants.TILE_SIZE.value,
|
|
157
|
-
dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
|
|
158
|
-
compressor=self.__compressor,
|
|
159
|
-
fill_value=0.0,
|
|
160
|
-
overwrite=self.__overwrite,
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
164
|
-
|
|
165
|
-
root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
|
|
166
|
-
root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
|
|
167
|
-
|
|
168
|
-
#####################################################################
|
|
169
|
-
# TODO: verify adding this variable for where the bottom was detected
|
|
170
|
-
# --- Coordinate: Bottom --- #
|
|
171
|
-
root.create_dataset(
|
|
172
|
-
name=Coordinates.BOTTOM.value,
|
|
173
|
-
# data=np.repeat(0.0, width), # root.longitude[:] = np.nan
|
|
174
|
-
shape=width,
|
|
175
|
-
chunks=Constants.TILE_SIZE.value,
|
|
176
|
-
dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value),
|
|
177
|
-
compressor=self.__compressor,
|
|
178
|
-
fill_value=np.nan,
|
|
179
|
-
overwrite=self.__overwrite,
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
183
|
-
|
|
184
|
-
root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
|
|
185
|
-
root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
|
|
186
|
-
|
|
187
|
-
#####################################################################
|
|
188
|
-
# --- Coordinate: Frequency --- #
|
|
189
|
-
root.create_dataset(
|
|
190
|
-
name=Coordinates.FREQUENCY.value,
|
|
191
|
-
data=frequencies,
|
|
192
|
-
shape=len(frequencies),
|
|
193
|
-
chunks=1,
|
|
194
|
-
dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
|
|
195
|
-
compressor=self.__compressor,
|
|
196
|
-
fill_value=0.0,
|
|
197
|
-
overwrite=self.__overwrite,
|
|
198
|
-
)
|
|
199
|
-
|
|
200
|
-
# TODO: best coordinate would be channel with str type
|
|
201
|
-
root.frequency.attrs["_ARRAY_DIMENSIONS"] = [
|
|
202
|
-
Coordinates.FREQUENCY.value
|
|
203
|
-
] # TODO: is this correct
|
|
204
|
-
|
|
205
|
-
root.frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
|
|
206
|
-
root.frequency.attrs["standard_name"] = (
|
|
207
|
-
Coordinates.FREQUENCY_STANDARD_NAME.value
|
|
208
|
-
)
|
|
209
|
-
root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
|
|
210
|
-
|
|
211
|
-
#####################################################################
|
|
212
|
-
# --- Sv Data --- #
|
|
213
|
-
root.create_dataset(
|
|
214
|
-
name=Coordinates.SV.value,
|
|
215
|
-
shape=(len(depth_values), width, len(frequencies)),
|
|
216
|
-
chunks=(Constants.TILE_SIZE.value, Constants.TILE_SIZE.value, 1),
|
|
217
|
-
dtype=np.dtype(
|
|
218
|
-
Coordinates.SV_DTYPE.value
|
|
219
|
-
), # TODO: try to experiment with 'float16'
|
|
220
|
-
compressor=self.__compressor,
|
|
221
|
-
fill_value=np.nan,
|
|
222
|
-
overwrite=self.__overwrite,
|
|
223
|
-
)
|
|
224
|
-
|
|
225
|
-
root.Sv.attrs["_ARRAY_DIMENSIONS"] = [
|
|
226
|
-
Coordinates.DEPTH.value,
|
|
227
|
-
Coordinates.TIME.value,
|
|
228
|
-
Coordinates.FREQUENCY.value,
|
|
229
|
-
]
|
|
230
|
-
|
|
231
|
-
root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
|
|
232
|
-
root.Sv.attrs["units"] = Coordinates.SV_UNITS.value
|
|
233
|
-
root.Sv.attrs["tile_size"] = Constants.TILE_SIZE.value
|
|
234
|
-
|
|
235
|
-
#####################################################################
|
|
236
|
-
# --- Metadata --- #
|
|
237
|
-
root.attrs["ship_name"] = ship_name
|
|
238
|
-
root.attrs["cruise_name"] = cruise_name
|
|
239
|
-
root.attrs["sensor_name"] = sensor_name
|
|
240
|
-
#
|
|
241
|
-
root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
|
|
242
|
-
root.attrs["processing_software_version"] = (
|
|
243
|
-
"0.0.6" # TODO: get programmatically
|
|
244
|
-
)
|
|
245
|
-
root.attrs["processing_software_time"] = Timestamp.get_timestamp()
|
|
246
|
-
#
|
|
247
|
-
root.attrs["calibration_status"] = calibration_status
|
|
248
|
-
|
|
249
|
-
zarr.consolidate_metadata(store)
|
|
250
|
-
#####################################################################
|
|
251
66
|
"""
|
|
252
|
-
|
|
253
|
-
# zzz.time[0] = 1274979445.423
|
|
254
|
-
# Initialize all to origin time, will be overwritten late
|
|
67
|
+
Creates a new zarr store in a local temporary directory(?)
|
|
255
68
|
"""
|
|
256
|
-
|
|
69
|
+
try:
|
|
70
|
+
print(f"Creating local zarr store, {cruise_name}.zarr for ship {ship_name}")
|
|
71
|
+
if len(frequencies) != len(set(frequencies)):
|
|
72
|
+
raise Exception(
|
|
73
|
+
"Number of frequencies does not match number of channels"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
zarr_path = f"{path}/{cruise_name}.zarr"
|
|
77
|
+
#####################################################################
|
|
78
|
+
frequencies = np.array(
|
|
79
|
+
frequencies, dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value)
|
|
80
|
+
)
|
|
81
|
+
#####################################################################
|
|
82
|
+
# Define the chunk sizes and the encoding
|
|
83
|
+
depth_chunk_shape = (Constants.TILE_SIZE.value,)
|
|
84
|
+
time_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
85
|
+
frequency_chunk_shape = (len(frequencies),)
|
|
86
|
+
latitude_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
87
|
+
longitude_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
88
|
+
bottom_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
89
|
+
speed_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
90
|
+
distance_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
91
|
+
sv_chunk_shape = (Constants.TILE_SIZE.value, Constants.TILE_SIZE.value, 1)
|
|
92
|
+
#####################################################################
|
|
93
|
+
root = zarr.create_group(store=zarr_path, zarr_format=3, overwrite=True)
|
|
94
|
+
#####################################################################
|
|
95
|
+
# --- Coordinate: Time --- #
|
|
96
|
+
# https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
|
|
97
|
+
# "data_type": "int64", "fill_value": 0, "units": "nanoseconds since 1970-01-01", "calendar": "proleptic_gregorian"
|
|
98
|
+
#
|
|
99
|
+
time_values = np.repeat(0.0, width)
|
|
100
|
+
time_values.astype(np.dtype(Coordinates.TIME_DTYPE.value))
|
|
101
|
+
root.create_array(
|
|
102
|
+
name=Coordinates.TIME.value,
|
|
103
|
+
# shape=width_indices,
|
|
104
|
+
# dtype=np.dtype(Coordinates.TIME_DTYPE.value),
|
|
105
|
+
data=time_values,
|
|
106
|
+
chunks=time_chunk_shape,
|
|
107
|
+
compressor=compressors,
|
|
108
|
+
fill_value=np.nan,
|
|
109
|
+
attributes=dict(
|
|
110
|
+
calendar=Coordinates.TIME_CALENDAR.value,
|
|
111
|
+
units=Coordinates.TIME_UNITS.value,
|
|
112
|
+
long_name=Coordinates.TIME_LONG_NAME.value,
|
|
113
|
+
standard_name=Coordinates.TIME_STANDARD_NAME.value,
|
|
114
|
+
),
|
|
115
|
+
dimension_names=[Coordinates.TIME.value],
|
|
116
|
+
overwrite=True,
|
|
117
|
+
)
|
|
118
|
+
#####################################################################
|
|
119
|
+
#####################################################################
|
|
120
|
+
# # --- Coordinate: Depth --- #
|
|
121
|
+
depth_data_values = self.get_depth_values(
|
|
122
|
+
max_echo_range=max_echo_range,
|
|
123
|
+
)
|
|
124
|
+
depth_data = np.array(
|
|
125
|
+
depth_data_values, dtype=Coordinates.DEPTH_DTYPE.value
|
|
126
|
+
)
|
|
127
|
+
root.create_array(
|
|
128
|
+
name=Coordinates.DEPTH.value,
|
|
129
|
+
# shape=depth_indices,
|
|
130
|
+
# dtype=np.dtype(Coordinates.DEPTH_DTYPE.value),
|
|
131
|
+
data=depth_data,
|
|
132
|
+
chunks=depth_chunk_shape,
|
|
133
|
+
compressor=compressors,
|
|
134
|
+
# fill_value=np.nan,
|
|
135
|
+
attributes=dict(
|
|
136
|
+
units=Coordinates.DEPTH_UNITS.value,
|
|
137
|
+
long_name=Coordinates.DEPTH_LONG_NAME.value,
|
|
138
|
+
standard_name=Coordinates.DEPTH_STANDARD_NAME.value,
|
|
139
|
+
),
|
|
140
|
+
dimension_names=[Coordinates.DEPTH.value], # TODO: is this right
|
|
141
|
+
overwrite=True,
|
|
142
|
+
)
|
|
143
|
+
# #####################################################################
|
|
144
|
+
# # --- Coordinate: Latitude --- #
|
|
145
|
+
# latitude_values = np.rep(np.nan, width_indices)
|
|
146
|
+
# latitude_values.astype(np.dtype(Coordinates.LATITUDE_DTYPE.value))
|
|
147
|
+
root.create_array(
|
|
148
|
+
name=Coordinates.LATITUDE.value,
|
|
149
|
+
shape=width,
|
|
150
|
+
dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
|
|
151
|
+
# data=latitude_values,
|
|
152
|
+
chunks=latitude_chunk_shape,
|
|
153
|
+
compressor=compressors,
|
|
154
|
+
fill_value=np.nan,
|
|
155
|
+
attributes=dict(
|
|
156
|
+
units=Coordinates.LATITUDE_UNITS.value,
|
|
157
|
+
long_name=Coordinates.LATITUDE_LONG_NAME.value,
|
|
158
|
+
standard_name=Coordinates.LATITUDE_STANDARD_NAME.value,
|
|
159
|
+
),
|
|
160
|
+
dimension_names=[Coordinates.TIME.value],
|
|
161
|
+
overwrite=True,
|
|
162
|
+
)
|
|
163
|
+
# #####################################################################
|
|
164
|
+
# # --- Coordinate: Longitude --- #
|
|
165
|
+
# longitude_values = np.arange(0, width_indices)
|
|
166
|
+
# longitude_values.astype(np.dtype(Coordinates.LONGITUDE_DTYPE.value))
|
|
167
|
+
root.create_array(
|
|
168
|
+
name=Coordinates.LONGITUDE.value,
|
|
169
|
+
shape=width,
|
|
170
|
+
dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
|
|
171
|
+
# data=longitude_values,
|
|
172
|
+
chunks=longitude_chunk_shape,
|
|
173
|
+
compressor=compressors,
|
|
174
|
+
fill_value=np.nan,
|
|
175
|
+
attributes=dict(
|
|
176
|
+
units=Coordinates.LONGITUDE_UNITS.value,
|
|
177
|
+
long_name=Coordinates.LONGITUDE_LONG_NAME.value,
|
|
178
|
+
standard_name=Coordinates.LONGITUDE_STANDARD_NAME.value,
|
|
179
|
+
),
|
|
180
|
+
dimension_names=[
|
|
181
|
+
Coordinates.TIME.value
|
|
182
|
+
], # Note: LONGITUDE is indexed by TIME
|
|
183
|
+
overwrite=True,
|
|
184
|
+
)
|
|
185
|
+
# #####################################################################
|
|
186
|
+
# # --- Coordinate: Bottom --- #
|
|
187
|
+
# bottom_values = np.repeat(12.34, width_indices)
|
|
188
|
+
# bottom_values.astype(np.dtype(Coordinates.BOTTOM_DTYPE.value))
|
|
189
|
+
root.create_array(
|
|
190
|
+
name=Coordinates.BOTTOM.value,
|
|
191
|
+
shape=width,
|
|
192
|
+
dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value),
|
|
193
|
+
# data=bottom_values,
|
|
194
|
+
chunks=bottom_chunk_shape,
|
|
195
|
+
compressor=compressors,
|
|
196
|
+
fill_value=np.nan,
|
|
197
|
+
attributes=dict(
|
|
198
|
+
units=Coordinates.BOTTOM_UNITS.value,
|
|
199
|
+
long_name=Coordinates.BOTTOM_LONG_NAME.value,
|
|
200
|
+
standard_name=Coordinates.BOTTOM_STANDARD_NAME.value,
|
|
201
|
+
),
|
|
202
|
+
dimension_names=[Coordinates.TIME.value], # Note: _ is indexed by TIME
|
|
203
|
+
overwrite=True,
|
|
204
|
+
)
|
|
205
|
+
# #####################################################################
|
|
206
|
+
# # --- Coordinate: Speed --- #
|
|
207
|
+
# speed_values = np.repeat(5.67, width_indices)
|
|
208
|
+
# speed_values.astype(np.dtype(Coordinates.SPEED_DTYPE.value))
|
|
209
|
+
root.create_array(
|
|
210
|
+
name=Coordinates.SPEED.value,
|
|
211
|
+
shape=width,
|
|
212
|
+
dtype=np.dtype(Coordinates.SPEED_DTYPE.value),
|
|
213
|
+
# data=speed_values,
|
|
214
|
+
chunks=speed_chunk_shape,
|
|
215
|
+
compressor=compressors,
|
|
216
|
+
fill_value=np.nan,
|
|
217
|
+
attributes=dict(
|
|
218
|
+
units=Coordinates.SPEED_UNITS.value,
|
|
219
|
+
long_name=Coordinates.SPEED_LONG_NAME.value,
|
|
220
|
+
standard_name=Coordinates.SPEED_STANDARD_NAME.value,
|
|
221
|
+
),
|
|
222
|
+
dimension_names=[Coordinates.TIME.value], # Note: _ is indexed by TIME
|
|
223
|
+
overwrite=True,
|
|
224
|
+
)
|
|
225
|
+
# #####################################################################
|
|
226
|
+
# # --- Coordinate: Distance --- #
|
|
227
|
+
# distance_values = np.repeat(8.90, width_indices)
|
|
228
|
+
# distance_values.astype(np.dtype(Coordinates.DISTANCE_DTYPE.value))
|
|
229
|
+
root.create_array(
|
|
230
|
+
name=Coordinates.DISTANCE.value,
|
|
231
|
+
shape=width,
|
|
232
|
+
dtype=np.dtype(Coordinates.DISTANCE_DTYPE.value),
|
|
233
|
+
# data=distance_values,
|
|
234
|
+
chunks=distance_chunk_shape,
|
|
235
|
+
compressor=compressors,
|
|
236
|
+
fill_value=np.nan,
|
|
237
|
+
attributes=dict(
|
|
238
|
+
units=Coordinates.DISTANCE_UNITS.value,
|
|
239
|
+
long_name=Coordinates.DISTANCE_LONG_NAME.value,
|
|
240
|
+
standard_name=Coordinates.DISTANCE_STANDARD_NAME.value,
|
|
241
|
+
),
|
|
242
|
+
dimension_names=[Coordinates.TIME.value], # Note: _ is indexed by TIME
|
|
243
|
+
overwrite=True,
|
|
244
|
+
)
|
|
245
|
+
# #####################################################################
|
|
246
|
+
# # --- Coordinate: Frequency --- #
|
|
247
|
+
root.create_array(
|
|
248
|
+
name=Coordinates.FREQUENCY.value,
|
|
249
|
+
# shape=frequency_indices,
|
|
250
|
+
# dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
|
|
251
|
+
data=frequencies,
|
|
252
|
+
# chunks=(Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,),
|
|
253
|
+
chunks=frequency_chunk_shape,
|
|
254
|
+
compressor=compressors,
|
|
255
|
+
# fill_value=0,
|
|
256
|
+
attributes=dict(
|
|
257
|
+
units=Coordinates.FREQUENCY_UNITS.value,
|
|
258
|
+
long_name=Coordinates.FREQUENCY_LONG_NAME.value,
|
|
259
|
+
standard_name=Coordinates.FREQUENCY_STANDARD_NAME.value,
|
|
260
|
+
),
|
|
261
|
+
dimension_names=[Coordinates.FREQUENCY.value],
|
|
262
|
+
overwrite=True,
|
|
263
|
+
)
|
|
264
|
+
# #####################################################################
|
|
265
|
+
# # --- Sv Data --- #
|
|
266
|
+
root.create_array(
|
|
267
|
+
name=Coordinates.SV.value,
|
|
268
|
+
shape=(len(depth_data), width, len(frequencies)),
|
|
269
|
+
dtype=np.dtype(Coordinates.SV_DTYPE.value),
|
|
270
|
+
# data=,
|
|
271
|
+
chunks=sv_chunk_shape,
|
|
272
|
+
compressor=compressors,
|
|
273
|
+
fill_value=np.nan,
|
|
274
|
+
attributes=dict(
|
|
275
|
+
units=Coordinates.SV_UNITS.value,
|
|
276
|
+
long_name=Coordinates.SV_LONG_NAME.value,
|
|
277
|
+
standard_name=Coordinates.SV_STANDARD_NAME.value,
|
|
278
|
+
),
|
|
279
|
+
dimension_names=[
|
|
280
|
+
Coordinates.DEPTH.value,
|
|
281
|
+
Coordinates.TIME.value,
|
|
282
|
+
Coordinates.FREQUENCY.value,
|
|
283
|
+
],
|
|
284
|
+
overwrite=True,
|
|
285
|
+
)
|
|
286
|
+
#####################################################################
|
|
287
|
+
# # --- Metadata --- #
|
|
288
|
+
root.attrs["ship_name"] = ship_name
|
|
289
|
+
root.attrs["cruise_name"] = cruise_name
|
|
290
|
+
root.attrs["sensor_name"] = sensor_name
|
|
291
|
+
#
|
|
292
|
+
root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
|
|
293
|
+
# NOTE: for the version to be parsable you need to build the python package locally first.
|
|
294
|
+
root.attrs["processing_software_version"] = metadata.version(
|
|
295
|
+
"water-column-sonar-processing"
|
|
296
|
+
)
|
|
297
|
+
root.attrs["processing_software_time"] = Timestamp.get_timestamp()
|
|
298
|
+
#
|
|
299
|
+
root.attrs["calibration_status"] = calibration_status
|
|
300
|
+
root.attrs["tile_size"] = Constants.TILE_SIZE.value
|
|
301
|
+
#
|
|
302
|
+
return zarr_path
|
|
303
|
+
except Exception as err:
|
|
304
|
+
raise RuntimeError(f"Problem trying to create zarr store, {err}")
|
|
257
305
|
|
|
258
|
-
|
|
259
|
-
# def
|
|
306
|
+
# #######################################################
|
|
307
|
+
# def create_zarr_store_old(
|
|
260
308
|
# self,
|
|
261
|
-
# path: str,
|
|
309
|
+
# path: str, # 'level_2/Henry_B._Bigelow/HB0707/EK60/HB0707.model/tmp/HB0707.zarr/.zattrs'
|
|
262
310
|
# ship_name: str,
|
|
263
|
-
# cruise_name: str,
|
|
311
|
+
# cruise_name: str,
|
|
264
312
|
# sensor_name: str,
|
|
265
|
-
#
|
|
313
|
+
# frequencies: list, # units in Hz
|
|
314
|
+
# width: int,
|
|
315
|
+
# max_echo_range: float,
|
|
316
|
+
# # cruise_min_epsilon: float, # smallest resolution in meters
|
|
317
|
+
# calibration_status: bool = False, # Assume uncalibrated
|
|
318
|
+
# ) -> str:
|
|
266
319
|
# """
|
|
267
|
-
#
|
|
268
|
-
# of updating just a subset of the cruise-level Zarr store associated
|
|
269
|
-
# with a file-level Zarr store.
|
|
320
|
+
# Creates a new zarr store in a local temporary directory(?)
|
|
270
321
|
# """
|
|
271
|
-
#
|
|
322
|
+
# try:
|
|
323
|
+
# print(f"Creating local zarr store, {cruise_name}.zarr for ship {ship_name}")
|
|
324
|
+
# if len(frequencies) != len(set(frequencies)):
|
|
325
|
+
# raise Exception(
|
|
326
|
+
# "Number of frequencies does not match number of channels"
|
|
327
|
+
# )
|
|
328
|
+
#
|
|
329
|
+
# zarr_path = f"{path}/{cruise_name}.zarr"
|
|
330
|
+
# #####################################################################
|
|
331
|
+
# # Define the chunk sizes and the encoding
|
|
332
|
+
# # 1_000_000 data points for quickest download
|
|
333
|
+
# spatiotemporal_chunk_size = int(1e6)
|
|
334
|
+
# depth_chunk_shape = (512,)
|
|
335
|
+
# time_chunk_shape = (spatiotemporal_chunk_size,)
|
|
336
|
+
# frequency_chunk_shape = (len(frequencies),)
|
|
337
|
+
# latitude_chunk_shape = (spatiotemporal_chunk_size,)
|
|
338
|
+
# longitude_chunk_shape = (spatiotemporal_chunk_size,)
|
|
339
|
+
# bottom_chunk_shape = (spatiotemporal_chunk_size,)
|
|
340
|
+
# speed_chunk_shape = (spatiotemporal_chunk_size,)
|
|
341
|
+
# distance_chunk_shape = (spatiotemporal_chunk_size,)
|
|
342
|
+
# sv_chunk_shape = (512, 512, 1) # TODO: move to constants
|
|
343
|
+
#
|
|
344
|
+
# #####################################################################
|
|
345
|
+
# ##### Depth #####
|
|
346
|
+
# depth_data_values = self.get_depth_values(
|
|
347
|
+
# max_echo_range=max_echo_range,
|
|
348
|
+
# )
|
|
349
|
+
#
|
|
350
|
+
# depth_data = np.array(
|
|
351
|
+
# depth_data_values, dtype=Coordinates.DEPTH_DTYPE.value
|
|
352
|
+
# )
|
|
353
|
+
# depth_da = xr.DataArray(
|
|
354
|
+
# data=depth_data,
|
|
355
|
+
# dims=Coordinates.DEPTH.value,
|
|
356
|
+
# name=Coordinates.DEPTH.value,
|
|
357
|
+
# attrs=dict(
|
|
358
|
+
# units=Coordinates.DEPTH_UNITS.value,
|
|
359
|
+
# long_name=Coordinates.DEPTH_LONG_NAME.value,
|
|
360
|
+
# standard_name=Coordinates.DEPTH_STANDARD_NAME.value,
|
|
361
|
+
# ),
|
|
362
|
+
# )
|
|
363
|
+
#
|
|
364
|
+
# ##### Time #####
|
|
365
|
+
# # https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
|
|
366
|
+
# time_data = np.array(
|
|
367
|
+
# np.repeat(np.datetime64(0, "ns"), width),
|
|
368
|
+
# dtype="datetime64[ns]",
|
|
369
|
+
# )
|
|
370
|
+
# time_da = xr.DataArray(
|
|
371
|
+
# data=time_data,
|
|
372
|
+
# dims=Coordinates.TIME.value,
|
|
373
|
+
# name=Coordinates.TIME.value,
|
|
374
|
+
# attrs=dict(
|
|
375
|
+
# # Note: cal & units are written automatically by xarray
|
|
376
|
+
# # calendar="proleptic_gregorian",
|
|
377
|
+
# # units="seconds since 1970-01-01 00:00:00",
|
|
378
|
+
# long_name=Coordinates.TIME_LONG_NAME.value,
|
|
379
|
+
# standard_name=Coordinates.TIME_STANDARD_NAME.value,
|
|
380
|
+
# ),
|
|
381
|
+
# )
|
|
382
|
+
#
|
|
383
|
+
# ##### Frequency #####
|
|
384
|
+
# frequency_data = np.array(
|
|
385
|
+
# frequencies,
|
|
386
|
+
# dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
|
|
387
|
+
# )
|
|
388
|
+
# frequency_da = xr.DataArray(
|
|
389
|
+
# data=frequency_data,
|
|
390
|
+
# dims=Coordinates.FREQUENCY.value,
|
|
391
|
+
# name=Coordinates.FREQUENCY.value,
|
|
392
|
+
# attrs=dict(
|
|
393
|
+
# units=Coordinates.FREQUENCY_UNITS.value,
|
|
394
|
+
# long_name=Coordinates.FREQUENCY_LONG_NAME.value,
|
|
395
|
+
# standard_name=Coordinates.FREQUENCY_STANDARD_NAME.value,
|
|
396
|
+
# ),
|
|
397
|
+
# )
|
|
398
|
+
#
|
|
399
|
+
# ##### Latitude #####
|
|
400
|
+
# gps_data = np.array(
|
|
401
|
+
# np.repeat(np.nan, width),
|
|
402
|
+
# dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
|
|
403
|
+
# )
|
|
404
|
+
# latitude_da = xr.DataArray(
|
|
405
|
+
# data=gps_data,
|
|
406
|
+
# coords=dict(
|
|
407
|
+
# time=time_da,
|
|
408
|
+
# ),
|
|
409
|
+
# dims=Coordinates.TIME.value, # Note: "TIME"
|
|
410
|
+
# name=Coordinates.LATITUDE.value,
|
|
411
|
+
# attrs=dict(
|
|
412
|
+
# units=Coordinates.LATITUDE_UNITS.value,
|
|
413
|
+
# long_name=Coordinates.LATITUDE_LONG_NAME.value,
|
|
414
|
+
# standard_name=Coordinates.LATITUDE_STANDARD_NAME.value,
|
|
415
|
+
# ),
|
|
416
|
+
# ) # Note: LATITUDE is indexed by TIME
|
|
417
|
+
#
|
|
418
|
+
# ##### Longitude #####
|
|
419
|
+
# longitude_da = xr.DataArray(
|
|
420
|
+
# data=gps_data,
|
|
421
|
+
# coords=dict(
|
|
422
|
+
# time=time_da,
|
|
423
|
+
# ),
|
|
424
|
+
# dims=Coordinates.TIME.value, # Note: "TIME"
|
|
425
|
+
# name=Coordinates.LONGITUDE.value,
|
|
426
|
+
# attrs=dict(
|
|
427
|
+
# units=Coordinates.LONGITUDE_UNITS.value,
|
|
428
|
+
# long_name=Coordinates.LONGITUDE_LONG_NAME.value,
|
|
429
|
+
# standard_name=Coordinates.LONGITUDE_STANDARD_NAME.value,
|
|
430
|
+
# ),
|
|
431
|
+
# ) # Note: LONGITUDE is indexed by TIME
|
|
432
|
+
#
|
|
433
|
+
# ##### Bottom #####
|
|
434
|
+
# bottom_data = np.array(
|
|
435
|
+
# np.repeat(np.nan, width), dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value)
|
|
436
|
+
# )
|
|
437
|
+
# bottom_da = xr.DataArray(
|
|
438
|
+
# data=bottom_data,
|
|
439
|
+
# coords=dict(
|
|
440
|
+
# time=time_da,
|
|
441
|
+
# ),
|
|
442
|
+
# dims=Coordinates.TIME.value, # Note: "TIME"
|
|
443
|
+
# name=Coordinates.BOTTOM.value,
|
|
444
|
+
# attrs=dict(
|
|
445
|
+
# units=Coordinates.BOTTOM_UNITS.value,
|
|
446
|
+
# long_name=Coordinates.BOTTOM_LONG_NAME.value,
|
|
447
|
+
# standard_name=Coordinates.BOTTOM_STANDARD_NAME.value,
|
|
448
|
+
# ),
|
|
449
|
+
# )
|
|
450
|
+
#
|
|
451
|
+
# ##### Speed #####
|
|
452
|
+
# speed_data = np.array(
|
|
453
|
+
# np.repeat(np.nan, width), dtype=np.dtype(Coordinates.SPEED_DTYPE.value)
|
|
454
|
+
# )
|
|
455
|
+
# speed_da = xr.DataArray(
|
|
456
|
+
# data=speed_data,
|
|
457
|
+
# coords=dict(
|
|
458
|
+
# time=time_da,
|
|
459
|
+
# ),
|
|
460
|
+
# dims=Coordinates.TIME.value, # Note: "TIME"
|
|
461
|
+
# name=Coordinates.SPEED.value,
|
|
462
|
+
# attrs=dict(
|
|
463
|
+
# units=Coordinates.SPEED_UNITS.value,
|
|
464
|
+
# long_name=Coordinates.SPEED_LONG_NAME.value,
|
|
465
|
+
# standard_name=Coordinates.SPEED_STANDARD_NAME.value,
|
|
466
|
+
# ),
|
|
467
|
+
# )
|
|
468
|
+
#
|
|
469
|
+
# ##### Distance #####
|
|
470
|
+
# distance_data = np.array(
|
|
471
|
+
# np.repeat(np.nan, width),
|
|
472
|
+
# dtype=np.dtype(Coordinates.DISTANCE_DTYPE.value),
|
|
473
|
+
# )
|
|
474
|
+
# distance_da = xr.DataArray(
|
|
475
|
+
# data=distance_data,
|
|
476
|
+
# coords=dict(
|
|
477
|
+
# time=time_da,
|
|
478
|
+
# ),
|
|
479
|
+
# dims=Coordinates.TIME.value, # Note: "TIME"
|
|
480
|
+
# name=Coordinates.DISTANCE.value,
|
|
481
|
+
# attrs=dict(
|
|
482
|
+
# units=Coordinates.DISTANCE_UNITS.value,
|
|
483
|
+
# long_name=Coordinates.DISTANCE_LONG_NAME.value,
|
|
484
|
+
# standard_name=Coordinates.DISTANCE_STANDARD_NAME.value,
|
|
485
|
+
# ),
|
|
486
|
+
# )
|
|
487
|
+
#
|
|
488
|
+
# ##### Sv #####
|
|
489
|
+
# gc.collect()
|
|
490
|
+
# # sv_data = np.empty(
|
|
491
|
+
# # (len(depth_data), width, len(frequencies)),
|
|
492
|
+
# # # (2501, 4_100_782, 4), # large cruise used for testing
|
|
493
|
+
# # dtype=np.dtype(Coordinates.SV_DTYPE.value),
|
|
494
|
+
# # )
|
|
495
|
+
# sv_data = np.full(
|
|
496
|
+
# (len(depth_data), width, len(frequencies)),
|
|
497
|
+
# np.nan,
|
|
498
|
+
# dtype=np.dtype(Coordinates.SV_DTYPE.value),
|
|
499
|
+
# )
|
|
500
|
+
# print(f"one: {sys.getsizeof(sv_data)}")
|
|
501
|
+
# # sv_data[:] = np.nan # initialize all
|
|
502
|
+
#
|
|
503
|
+
# sv_da = xr.DataArray(
|
|
504
|
+
# data=sv_data,
|
|
505
|
+
# coords=dict(
|
|
506
|
+
# depth=depth_da,
|
|
507
|
+
# time=time_da,
|
|
508
|
+
# frequency=frequency_da,
|
|
509
|
+
# #
|
|
510
|
+
# latitude=latitude_da,
|
|
511
|
+
# longitude=longitude_da,
|
|
512
|
+
# bottom=bottom_da,
|
|
513
|
+
# speed=speed_da,
|
|
514
|
+
# distance=distance_da,
|
|
515
|
+
# ),
|
|
516
|
+
# dims=( # Depth * Time * Frequency
|
|
517
|
+
# Coordinates.DEPTH.value,
|
|
518
|
+
# Coordinates.TIME.value,
|
|
519
|
+
# Coordinates.FREQUENCY.value,
|
|
520
|
+
# ),
|
|
521
|
+
# name=Coordinates.SV.value,
|
|
522
|
+
# attrs=dict(
|
|
523
|
+
# units=Coordinates.SV_UNITS.value,
|
|
524
|
+
# long_name=Coordinates.SV_LONG_NAME.value,
|
|
525
|
+
# standard_name=Coordinates.SV_STANDARD_NAME.value,
|
|
526
|
+
# tiles_size=Constants.TILE_SIZE.value,
|
|
527
|
+
# _FillValue=np.nan,
|
|
528
|
+
# ),
|
|
529
|
+
# )
|
|
530
|
+
# print(f"two: {sys.getsizeof(sv_data)}") # getting to at least here
|
|
531
|
+
# del sv_data
|
|
532
|
+
# sv_da.encoding = {"compressors": [compressor], "chunks": sv_chunk_shape}
|
|
533
|
+
# # sv_da = sv_da.astype(np.float32) # was crashing here
|
|
534
|
+
# gc.collect()
|
|
535
|
+
# #####################################################################
|
|
536
|
+
# ### Now create the xarray.Dataset
|
|
537
|
+
# ds = xr.Dataset(
|
|
538
|
+
# data_vars=dict(
|
|
539
|
+
# Sv=sv_da,
|
|
540
|
+
# #
|
|
541
|
+
# bottom=bottom_da,
|
|
542
|
+
# speed=speed_da,
|
|
543
|
+
# distance=distance_da,
|
|
544
|
+
# ),
|
|
545
|
+
# coords=dict(
|
|
546
|
+
# depth=depth_da,
|
|
547
|
+
# time=time_da,
|
|
548
|
+
# frequency=frequency_da,
|
|
549
|
+
# #
|
|
550
|
+
# latitude=latitude_da,
|
|
551
|
+
# longitude=longitude_da,
|
|
552
|
+
# ),
|
|
553
|
+
# attrs=dict(
|
|
554
|
+
# # --- Metadata --- #
|
|
555
|
+
# ship_name=ship_name,
|
|
556
|
+
# cruise_name=cruise_name,
|
|
557
|
+
# sensor_name=sensor_name,
|
|
558
|
+
# processing_software_name=Coordinates.PROJECT_NAME.value,
|
|
559
|
+
# # NOTE: for the version to be parsable you need to build the python package
|
|
560
|
+
# # locally first.
|
|
561
|
+
# processing_software_version=importlib.metadata.version(
|
|
562
|
+
# "water-column-sonar-processing"
|
|
563
|
+
# ),
|
|
564
|
+
# processing_software_time=Timestamp.get_timestamp(),
|
|
565
|
+
# calibration_status=calibration_status,
|
|
566
|
+
# tile_size=Constants.TILE_SIZE.value,
|
|
567
|
+
# ),
|
|
568
|
+
# )
|
|
569
|
+
# del sv_da
|
|
570
|
+
# gc.collect()
|
|
571
|
+
# print(f"three: {sys.getsizeof(ds)}")
|
|
572
|
+
# #####################################################################
|
|
573
|
+
# encodings = dict(
|
|
574
|
+
# depth={
|
|
575
|
+
# "compressors": [compressor],
|
|
576
|
+
# "chunks": depth_chunk_shape,
|
|
577
|
+
# },
|
|
578
|
+
# time={
|
|
579
|
+
# "compressors": [compressor],
|
|
580
|
+
# "chunks": time_chunk_shape,
|
|
581
|
+
# "units": Coordinates.TIME_UNITS.value,
|
|
582
|
+
# },
|
|
583
|
+
# frequency={
|
|
584
|
+
# "compressors": [compressor],
|
|
585
|
+
# "chunks": frequency_chunk_shape,
|
|
586
|
+
# },
|
|
587
|
+
# latitude={
|
|
588
|
+
# "compressors": [compressor],
|
|
589
|
+
# "chunks": latitude_chunk_shape,
|
|
590
|
+
# },
|
|
591
|
+
# longitude={
|
|
592
|
+
# "compressors": [compressor],
|
|
593
|
+
# "chunks": longitude_chunk_shape,
|
|
594
|
+
# },
|
|
595
|
+
# bottom={
|
|
596
|
+
# "compressors": [compressor],
|
|
597
|
+
# "chunks": bottom_chunk_shape,
|
|
598
|
+
# },
|
|
599
|
+
# speed={
|
|
600
|
+
# "compressors": [compressor],
|
|
601
|
+
# "chunks": speed_chunk_shape,
|
|
602
|
+
# },
|
|
603
|
+
# distance={
|
|
604
|
+
# "compressors": [compressor],
|
|
605
|
+
# "chunks": distance_chunk_shape,
|
|
606
|
+
# },
|
|
607
|
+
# Sv={
|
|
608
|
+
# "compressors": [compressor],
|
|
609
|
+
# "chunks": sv_chunk_shape,
|
|
610
|
+
# },
|
|
611
|
+
# )
|
|
612
|
+
# gc.collect()
|
|
613
|
+
# ds.to_zarr(
|
|
614
|
+
# store=zarr_path,
|
|
615
|
+
# mode="w", # “w” means create (overwrite if exists)
|
|
616
|
+
# encoding=encodings,
|
|
617
|
+
# consolidated=False,
|
|
618
|
+
# safe_chunks=False,
|
|
619
|
+
# align_chunks=True,
|
|
620
|
+
# zarr_format=3,
|
|
621
|
+
# write_empty_chunks=False, # Might need to change this
|
|
622
|
+
# )
|
|
623
|
+
# #####################################################################
|
|
624
|
+
# return zarr_path
|
|
625
|
+
# except Exception as err:
|
|
626
|
+
# raise RuntimeError(f"Problem trying to create zarr store, {err}")
|
|
627
|
+
# # finally:
|
|
628
|
+
# # cleaner = Cleaner()
|
|
629
|
+
# # cleaner.delete_local_files()
|
|
630
|
+
# # TODO: should delete zarr store in temp directory too?
|
|
272
631
|
|
|
273
632
|
############################################################################
|
|
274
633
|
def open_s3_zarr_store_with_zarr(
|
|
@@ -276,60 +635,106 @@ class ZarrManager:
|
|
|
276
635
|
ship_name: str,
|
|
277
636
|
cruise_name: str,
|
|
278
637
|
sensor_name: str,
|
|
279
|
-
|
|
280
|
-
|
|
638
|
+
output_bucket_name: str,
|
|
639
|
+
endpoint_url: Optional[str] = None,
|
|
640
|
+
) -> Group:
|
|
281
641
|
# Mounts a Zarr store using pythons Zarr implementation. The mounted store
|
|
282
642
|
# will have read/write privileges so that store can be updated.
|
|
283
|
-
print("Opening Zarr store with Zarr.")
|
|
643
|
+
print("Opening L2 Zarr store with Zarr for writing.")
|
|
284
644
|
try:
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
645
|
+
level = str(Constants.LEVEL_2.value)
|
|
646
|
+
store = f"s3://{output_bucket_name}/{level}/{ship_name}/{cruise_name}/{sensor_name}/{cruise_name}.zarr"
|
|
647
|
+
print(f"endpoint url: {endpoint_url}")
|
|
648
|
+
cruise_zarr = zarr.open(
|
|
649
|
+
store=store,
|
|
650
|
+
mode="r+",
|
|
651
|
+
zarr_format=3,
|
|
652
|
+
storage_options={
|
|
653
|
+
"endpoint_url": endpoint_url,
|
|
654
|
+
"key": self.key,
|
|
655
|
+
"secret": self.secret,
|
|
656
|
+
},
|
|
657
|
+
)
|
|
658
|
+
print("Done opening store with Zarr.")
|
|
659
|
+
return cruise_zarr
|
|
290
660
|
except Exception as err: # Failure
|
|
291
|
-
|
|
292
|
-
raise
|
|
293
|
-
print("Done opening Zarr store with Zarr.")
|
|
294
|
-
return cruise_zarr
|
|
661
|
+
raise RuntimeError(f"Exception encountered opening store with Zarr, {err}")
|
|
295
662
|
|
|
296
|
-
|
|
663
|
+
###########################################################################
|
|
664
|
+
@staticmethod
|
|
297
665
|
def open_s3_zarr_store_with_xarray(
|
|
298
|
-
self,
|
|
299
666
|
ship_name: str,
|
|
300
667
|
cruise_name: str,
|
|
301
668
|
sensor_name: str,
|
|
302
669
|
file_name_stem: str,
|
|
670
|
+
bucket_name: str,
|
|
671
|
+
# level: str, # TODO: add level
|
|
672
|
+
endpoint_url: Optional[str] = None, # needed for moto testing
|
|
303
673
|
) -> xr.Dataset:
|
|
304
|
-
print("Opening Zarr store in S3
|
|
674
|
+
print("Opening L1 Zarr store in S3 with Xarray.")
|
|
305
675
|
try:
|
|
306
|
-
zarr_path = f"s3://{
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
676
|
+
zarr_path = f"s3://{bucket_name}/level_1/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.zarr"
|
|
677
|
+
kwargs = {"consolidated": False}
|
|
678
|
+
ds = xr.open_dataset(
|
|
679
|
+
filename_or_obj=zarr_path,
|
|
680
|
+
engine="zarr",
|
|
681
|
+
backend_kwargs={
|
|
682
|
+
"storage_options": {
|
|
683
|
+
"endpoint_url": endpoint_url,
|
|
684
|
+
"anon": True,
|
|
685
|
+
},
|
|
686
|
+
},
|
|
687
|
+
**kwargs,
|
|
688
|
+
)
|
|
689
|
+
return ds
|
|
312
690
|
except Exception as err:
|
|
313
|
-
|
|
314
|
-
raise err
|
|
315
|
-
print("Done opening Zarr store in S3 as Xarray.")
|
|
316
|
-
return ds
|
|
691
|
+
raise RuntimeError(f"Problem opening Zarr store in S3 as Xarray, {err}")
|
|
317
692
|
|
|
318
|
-
|
|
693
|
+
###########################################################################
|
|
694
|
+
# TODO: can this be consolidated with above
|
|
695
|
+
@staticmethod
|
|
696
|
+
def open_l2_zarr_store_with_xarray(
|
|
697
|
+
ship_name: str,
|
|
698
|
+
cruise_name: str,
|
|
699
|
+
sensor_name: str,
|
|
700
|
+
bucket_name: str,
|
|
701
|
+
endpoint_url: Optional[str] = None, # needed for moto testing
|
|
702
|
+
) -> xr.Dataset:
|
|
703
|
+
print("Opening L2 Zarr store in S3 with Xarray.")
|
|
704
|
+
try:
|
|
705
|
+
level = str(Constants.LEVEL_2.value)
|
|
706
|
+
zarr_path = f"s3://{bucket_name}/{level}/{ship_name}/{cruise_name}/{sensor_name}/{cruise_name}.zarr"
|
|
707
|
+
kwargs = {"consolidated": False}
|
|
708
|
+
ds = xr.open_dataset(
|
|
709
|
+
filename_or_obj=zarr_path,
|
|
710
|
+
engine="zarr",
|
|
711
|
+
backend_kwargs={
|
|
712
|
+
"storage_options": {
|
|
713
|
+
"endpoint_url": endpoint_url,
|
|
714
|
+
"anon": True,
|
|
715
|
+
}
|
|
716
|
+
},
|
|
717
|
+
**kwargs,
|
|
718
|
+
)
|
|
719
|
+
return ds
|
|
720
|
+
except Exception as err:
|
|
721
|
+
raise RuntimeError(f"Problem opening Zarr store in S3 as Xarray, {err}")
|
|
319
722
|
|
|
320
|
-
|
|
723
|
+
###########################################################################
|
|
724
|
+
|
|
725
|
+
###########################################################################
|
|
321
726
|
# def create_process_synchronizer(self):
|
|
322
727
|
# # TODO: explore aws redis options
|
|
323
728
|
# pass
|
|
324
729
|
|
|
325
|
-
|
|
730
|
+
###########################################################################
|
|
326
731
|
# def verify_cruise_store_data(self):
|
|
327
732
|
# # TODO: run a check on a finished model store to ensure that
|
|
328
733
|
# # none of the time, latitude, longitude, or depth values
|
|
329
734
|
# # are NaN.
|
|
330
735
|
# pass
|
|
331
736
|
|
|
332
|
-
|
|
737
|
+
###########################################################################
|
|
333
738
|
|
|
334
739
|
|
|
335
740
|
###########################################################
|