water-column-sonar-processing 25.11.1__py3-none-any.whl → 26.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of water-column-sonar-processing might be problematic. Click here for more details.
- water_column_sonar_processing/aws/s3_manager.py +2 -4
- water_column_sonar_processing/aws/s3fs_manager.py +1 -9
- water_column_sonar_processing/cruise/create_empty_zarr_store.py +19 -81
- water_column_sonar_processing/cruise/resample_regrid.py +88 -104
- water_column_sonar_processing/geometry/__init__.py +2 -0
- water_column_sonar_processing/geometry/elevation_manager.py +2 -2
- water_column_sonar_processing/geometry/geometry_manager.py +11 -13
- water_column_sonar_processing/geometry/line_simplification.py +10 -10
- water_column_sonar_processing/geometry/pmtile_generation.py +8 -3
- water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
- water_column_sonar_processing/index/index_manager.py +43 -46
- water_column_sonar_processing/model/zarr_manager.py +533 -514
- water_column_sonar_processing/processing/raw_to_zarr.py +45 -139
- water_column_sonar_processing/utility/cleaner.py +2 -1
- water_column_sonar_processing/utility/constants.py +29 -29
- water_column_sonar_processing-26.1.14.dist-info/METADATA +240 -0
- {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/RECORD +20 -20
- water_column_sonar_processing/process.py +0 -149
- water_column_sonar_processing-25.11.1.dist-info/METADATA +0 -182
- {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/WHEEL +0 -0
- {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/licenses/LICENSE +0 -0
- {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/top_level.txt +0 -0
|
@@ -1,18 +1,21 @@
|
|
|
1
|
-
import
|
|
1
|
+
import os
|
|
2
|
+
from importlib import metadata
|
|
3
|
+
from typing import Optional
|
|
2
4
|
|
|
3
5
|
import numpy as np
|
|
4
6
|
import xarray as xr
|
|
5
7
|
import zarr
|
|
6
8
|
from zarr.codecs import BloscCodec, BloscShuffle
|
|
7
|
-
from zarr.
|
|
9
|
+
from zarr.core.group import Group
|
|
8
10
|
|
|
9
|
-
from water_column_sonar_processing.aws import S3FSManager
|
|
10
11
|
from water_column_sonar_processing.utility import Constants, Coordinates, Timestamp
|
|
11
12
|
|
|
12
|
-
#
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
# https://zarr-specs.readthedocs.io/en/latest/v3/codecs/blosc/index.html
|
|
14
|
+
compressors = BloscCodec(
|
|
15
|
+
cname="zstd",
|
|
16
|
+
clevel=9,
|
|
17
|
+
shuffle=BloscShuffle.bitshuffle,
|
|
18
|
+
)
|
|
16
19
|
|
|
17
20
|
|
|
18
21
|
# creates the latlon dataset: foo = ep.consolidate.add_location(ds_Sv, echodata)
|
|
@@ -20,33 +23,32 @@ class ZarrManager:
|
|
|
20
23
|
#######################################################
|
|
21
24
|
def __init__(
|
|
22
25
|
self,
|
|
26
|
+
# endpoint_url: Optional[str] = None,
|
|
23
27
|
):
|
|
24
28
|
self.__overwrite = True
|
|
29
|
+
self.key = os.environ.get("OUTPUT_BUCKET_ACCESS_KEY")
|
|
30
|
+
self.secret = os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY")
|
|
25
31
|
|
|
26
32
|
#######################################################
|
|
33
|
+
@staticmethod
|
|
27
34
|
def get_depth_values(
|
|
28
|
-
self,
|
|
29
|
-
# min_echo_range: float, # minimum depth measured (zero non-inclusive) from whole cruise
|
|
30
35
|
max_echo_range: float, # maximum depth measured from whole cruise
|
|
31
|
-
cruise_min_epsilon: float = 0.
|
|
32
|
-
)
|
|
36
|
+
cruise_min_epsilon: float = 0.20, # delta subsequent measurements
|
|
37
|
+
) -> np.ndarray[tuple]:
|
|
33
38
|
# Gets the set of depth values that will be used when resampling and
|
|
34
39
|
# regridding the dataset to a cruise level model store.
|
|
35
40
|
# Note: returned values start at zero!
|
|
36
41
|
# For more info see here: https://echopype.readthedocs.io/en/stable/data-proc-additional.html
|
|
37
|
-
print("Computing depth values.")
|
|
38
42
|
all_cruise_depth_values = np.linspace( # TODO: PROBLEM HERE
|
|
39
|
-
start=0, #
|
|
40
|
-
stop=max_echo_range,
|
|
41
|
-
num=int(max_echo_range / cruise_min_epsilon)
|
|
42
|
-
+ 1, # int(np.ceil(max_echo_range / cruise_min_epsilon))?
|
|
43
|
+
start=0, # start it at zero
|
|
44
|
+
stop=np.ceil(max_echo_range), # round up
|
|
45
|
+
num=int(np.ceil(max_echo_range) / cruise_min_epsilon) + 1,
|
|
43
46
|
endpoint=True,
|
|
44
|
-
)
|
|
47
|
+
)
|
|
45
48
|
|
|
46
49
|
if np.any(np.isnan(all_cruise_depth_values)):
|
|
47
50
|
raise Exception("Problem depth values returned were NaN.")
|
|
48
51
|
|
|
49
|
-
print("Done computing depth values.")
|
|
50
52
|
return all_cruise_depth_values.round(decimals=2)
|
|
51
53
|
|
|
52
54
|
#######################################################
|
|
@@ -56,667 +58,684 @@ class ZarrManager:
|
|
|
56
58
|
ship_name: str,
|
|
57
59
|
cruise_name: str,
|
|
58
60
|
sensor_name: str,
|
|
59
|
-
frequencies: list, # units in Hz
|
|
60
|
-
width: int,
|
|
61
|
-
# min_echo_range: float,
|
|
61
|
+
frequencies: list, # units in Hz, type(frequencies) == np.ndarray
|
|
62
|
+
width: int,
|
|
62
63
|
max_echo_range: float,
|
|
63
|
-
cruise_min_epsilon: float, # smallest resolution in meters
|
|
64
64
|
calibration_status: bool = False, # Assume uncalibrated
|
|
65
65
|
) -> str:
|
|
66
|
+
"""
|
|
67
|
+
Creates a new zarr store in a local temporary directory(?)
|
|
68
|
+
This includes the water_level on top of the max_echo_range already, nothing extra needs to be done.
|
|
69
|
+
"""
|
|
66
70
|
try:
|
|
67
|
-
|
|
68
|
-
print(
|
|
69
|
-
f"Creating local zarr_manager store at {cruise_name}.zarr for ship {ship_name}"
|
|
70
|
-
)
|
|
71
|
-
# There can not currently be repeated frequencies
|
|
72
|
-
# TODO: eventually switch coordinate to "channel" because frequencies can repeat
|
|
71
|
+
print(f"Creating local zarr store, {cruise_name}.zarr for ship {ship_name}")
|
|
73
72
|
if len(frequencies) != len(set(frequencies)):
|
|
74
73
|
raise Exception(
|
|
75
74
|
"Number of frequencies does not match number of channels"
|
|
76
75
|
)
|
|
77
76
|
|
|
78
77
|
zarr_path = f"{path}/{cruise_name}.zarr"
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
store = LocalStore(root=zarr_path)
|
|
83
|
-
root = zarr.group(
|
|
84
|
-
store=store, # zarr_path,
|
|
85
|
-
overwrite=self.__overwrite, # cache_attrs=True
|
|
86
|
-
zarr_format=3,
|
|
78
|
+
#####################################################################
|
|
79
|
+
frequencies = np.array(
|
|
80
|
+
frequencies, dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value)
|
|
87
81
|
)
|
|
88
|
-
|
|
82
|
+
#####################################################################
|
|
83
|
+
# Define the chunk sizes and the encoding
|
|
84
|
+
depth_chunk_shape = (Constants.TILE_SIZE.value,)
|
|
85
|
+
time_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
86
|
+
frequency_chunk_shape = (len(frequencies),)
|
|
87
|
+
latitude_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
88
|
+
longitude_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
89
|
+
bottom_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
90
|
+
speed_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
91
|
+
distance_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
|
|
92
|
+
sv_chunk_shape = (Constants.TILE_SIZE.value, Constants.TILE_SIZE.value, 1)
|
|
93
|
+
#####################################################################
|
|
94
|
+
root = zarr.create_group(store=zarr_path, zarr_format=3, overwrite=True)
|
|
89
95
|
#####################################################################
|
|
90
96
|
# --- Coordinate: Time --- #
|
|
91
97
|
# https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
98
|
+
# "data_type": "int64", "fill_value": 0, "units": "nanoseconds since 1970-01-01", "calendar": "proleptic_gregorian"
|
|
99
|
+
#
|
|
100
|
+
time_values = np.repeat(0.0, width)
|
|
101
|
+
time_values.astype(np.dtype(Coordinates.TIME_DTYPE.value))
|
|
102
|
+
root.create_array(
|
|
96
103
|
name=Coordinates.TIME.value,
|
|
97
|
-
|
|
98
|
-
# shape=width,
|
|
99
|
-
chunks=(Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,),
|
|
104
|
+
# shape=width_indices,
|
|
100
105
|
# dtype=np.dtype(Coordinates.TIME_DTYPE.value),
|
|
101
|
-
|
|
106
|
+
data=time_values,
|
|
107
|
+
chunks=time_chunk_shape,
|
|
108
|
+
compressors=compressors,
|
|
102
109
|
fill_value=np.nan,
|
|
103
|
-
|
|
104
|
-
|
|
110
|
+
attributes=dict(
|
|
111
|
+
calendar=Coordinates.TIME_CALENDAR.value,
|
|
112
|
+
units=Coordinates.TIME_UNITS.value,
|
|
113
|
+
long_name=Coordinates.TIME_LONG_NAME.value,
|
|
114
|
+
standard_name=Coordinates.TIME_STANDARD_NAME.value,
|
|
115
|
+
),
|
|
116
|
+
dimension_names=[Coordinates.TIME.value],
|
|
117
|
+
overwrite=True,
|
|
105
118
|
)
|
|
106
|
-
|
|
107
|
-
# time.metadata.dimension_names = (Coordinates.TIME.value,)
|
|
108
|
-
|
|
109
|
-
time.attrs["calendar"] = Coordinates.TIME_CALENDAR.value
|
|
110
|
-
time.attrs["units"] = Coordinates.TIME_UNITS.value
|
|
111
|
-
time.attrs["long_name"] = Coordinates.TIME_LONG_NAME.value
|
|
112
|
-
time.attrs["standard_name"] = Coordinates.TIME_STANDARD_NAME.value
|
|
113
|
-
|
|
114
119
|
#####################################################################
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
120
|
+
#####################################################################
|
|
121
|
+
# # --- Coordinate: Depth --- #
|
|
122
|
+
depth_data_values = self.get_depth_values(
|
|
118
123
|
max_echo_range=max_echo_range,
|
|
119
|
-
cruise_min_epsilon=cruise_min_epsilon,
|
|
120
124
|
)
|
|
121
125
|
depth_data = np.array(
|
|
122
|
-
|
|
126
|
+
depth_data_values, dtype=Coordinates.DEPTH_DTYPE.value
|
|
123
127
|
)
|
|
124
|
-
|
|
125
|
-
depth = root.create_array(
|
|
128
|
+
root.create_array(
|
|
126
129
|
name=Coordinates.DEPTH.value,
|
|
127
|
-
#
|
|
130
|
+
# shape=depth_indices,
|
|
131
|
+
# dtype=np.dtype(Coordinates.DEPTH_DTYPE.value),
|
|
128
132
|
data=depth_data,
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
#
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
dimension_names=
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
if np.any(np.isnan(depth_data)):
|
|
141
|
-
raise Exception("Some depth values returned were NaN.")
|
|
142
|
-
|
|
143
|
-
# depth.metadata.dimension_names = (Coordinates.DEPTH.value,)
|
|
144
|
-
|
|
145
|
-
depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
|
|
146
|
-
depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
|
|
147
|
-
depth.attrs["standard_name"] = Coordinates.DEPTH_STANDARD_NAME.value
|
|
148
|
-
|
|
149
|
-
#####################################################################
|
|
150
|
-
# --- Coordinate: Latitude --- #
|
|
151
|
-
gps_data = np.array(
|
|
152
|
-
np.repeat(np.nan, width),
|
|
153
|
-
dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
|
|
133
|
+
chunks=depth_chunk_shape,
|
|
134
|
+
compressors=compressors,
|
|
135
|
+
# fill_value=np.nan,
|
|
136
|
+
attributes=dict(
|
|
137
|
+
units=Coordinates.DEPTH_UNITS.value,
|
|
138
|
+
long_name=Coordinates.DEPTH_LONG_NAME.value,
|
|
139
|
+
standard_name=Coordinates.DEPTH_STANDARD_NAME.value,
|
|
140
|
+
),
|
|
141
|
+
dimension_names=[Coordinates.DEPTH.value], # TODO: is this right
|
|
142
|
+
overwrite=True,
|
|
154
143
|
)
|
|
155
|
-
|
|
156
|
-
|
|
144
|
+
# #####################################################################
|
|
145
|
+
# # --- Coordinate: Latitude --- #
|
|
146
|
+
# latitude_values = np.rep(np.nan, width_indices)
|
|
147
|
+
# latitude_values.astype(np.dtype(Coordinates.LATITUDE_DTYPE.value))
|
|
148
|
+
root.create_array(
|
|
157
149
|
name=Coordinates.LATITUDE.value,
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
#
|
|
161
|
-
chunks=
|
|
162
|
-
|
|
163
|
-
compressors=compressor,
|
|
150
|
+
shape=width,
|
|
151
|
+
dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
|
|
152
|
+
# data=latitude_values,
|
|
153
|
+
chunks=latitude_chunk_shape,
|
|
154
|
+
compressors=compressors,
|
|
164
155
|
fill_value=np.nan,
|
|
165
|
-
|
|
166
|
-
|
|
156
|
+
attributes=dict(
|
|
157
|
+
units=Coordinates.LATITUDE_UNITS.value,
|
|
158
|
+
long_name=Coordinates.LATITUDE_LONG_NAME.value,
|
|
159
|
+
standard_name=Coordinates.LATITUDE_STANDARD_NAME.value,
|
|
160
|
+
),
|
|
161
|
+
dimension_names=[Coordinates.TIME.value],
|
|
162
|
+
overwrite=True,
|
|
167
163
|
)
|
|
168
|
-
|
|
169
|
-
#
|
|
170
|
-
#
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
|
|
174
|
-
latitude.attrs["standard_name"] = Coordinates.LATITUDE_STANDARD_NAME.value
|
|
175
|
-
|
|
176
|
-
#####################################################################
|
|
177
|
-
# --- Coordinate: Longitude --- #
|
|
178
|
-
longitude = root.create_array(
|
|
164
|
+
# #####################################################################
|
|
165
|
+
# # --- Coordinate: Longitude --- #
|
|
166
|
+
# longitude_values = np.arange(0, width_indices)
|
|
167
|
+
# longitude_values.astype(np.dtype(Coordinates.LONGITUDE_DTYPE.value))
|
|
168
|
+
root.create_array(
|
|
179
169
|
name=Coordinates.LONGITUDE.value,
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
#
|
|
183
|
-
chunks=
|
|
184
|
-
|
|
185
|
-
compressors=compressor,
|
|
170
|
+
shape=width,
|
|
171
|
+
dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
|
|
172
|
+
# data=longitude_values,
|
|
173
|
+
chunks=longitude_chunk_shape,
|
|
174
|
+
compressors=compressors,
|
|
186
175
|
fill_value=np.nan,
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
longitude.attrs["standard_name"] = Coordinates.LONGITUDE_STANDARD_NAME.value
|
|
197
|
-
|
|
198
|
-
#####################################################################
|
|
199
|
-
# TODO: verify adding this variable for where the bottom was detected
|
|
200
|
-
# --- Coordinate: Bottom --- #
|
|
201
|
-
bottom_data = np.array(
|
|
202
|
-
np.repeat(np.nan, width), dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value)
|
|
176
|
+
attributes=dict(
|
|
177
|
+
units=Coordinates.LONGITUDE_UNITS.value,
|
|
178
|
+
long_name=Coordinates.LONGITUDE_LONG_NAME.value,
|
|
179
|
+
standard_name=Coordinates.LONGITUDE_STANDARD_NAME.value,
|
|
180
|
+
),
|
|
181
|
+
dimension_names=[
|
|
182
|
+
Coordinates.TIME.value
|
|
183
|
+
], # Note: LONGITUDE is indexed by TIME
|
|
184
|
+
overwrite=True,
|
|
203
185
|
)
|
|
204
|
-
|
|
205
|
-
|
|
186
|
+
# #####################################################################
|
|
187
|
+
# # --- Coordinate: Bottom --- #
|
|
188
|
+
# bottom_values = np.repeat(12.34, width_indices)
|
|
189
|
+
# bottom_values.astype(np.dtype(Coordinates.BOTTOM_DTYPE.value))
|
|
190
|
+
root.create_array(
|
|
206
191
|
name=Coordinates.BOTTOM.value,
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
compressors=
|
|
192
|
+
shape=width,
|
|
193
|
+
dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value),
|
|
194
|
+
# data=bottom_values,
|
|
195
|
+
chunks=bottom_chunk_shape,
|
|
196
|
+
compressors=compressors,
|
|
212
197
|
fill_value=np.nan,
|
|
213
|
-
|
|
214
|
-
|
|
198
|
+
attributes=dict(
|
|
199
|
+
units=Coordinates.BOTTOM_UNITS.value,
|
|
200
|
+
long_name=Coordinates.BOTTOM_LONG_NAME.value,
|
|
201
|
+
standard_name=Coordinates.BOTTOM_STANDARD_NAME.value,
|
|
202
|
+
),
|
|
203
|
+
dimension_names=[Coordinates.TIME.value], # Note: _ is indexed by TIME
|
|
204
|
+
overwrite=True,
|
|
215
205
|
)
|
|
216
|
-
|
|
217
|
-
#
|
|
218
|
-
#
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
|
|
222
|
-
bottom.attrs["standard_name"] = Coordinates.BOTTOM_STANDARD_NAME.value
|
|
223
|
-
|
|
224
|
-
#####################################################################
|
|
225
|
-
# TODO: verify adding this variable with test
|
|
226
|
-
# --- Coordinate: Speed --- #
|
|
227
|
-
speed_data = np.repeat(np.nan, width)
|
|
228
|
-
speed_data.astype(np.dtype(Coordinates.SPEED_DTYPE.value), copy=False)
|
|
229
|
-
|
|
230
|
-
speed = root.create_array(
|
|
206
|
+
# #####################################################################
|
|
207
|
+
# # --- Coordinate: Speed --- #
|
|
208
|
+
# speed_values = np.repeat(5.67, width_indices)
|
|
209
|
+
# speed_values.astype(np.dtype(Coordinates.SPEED_DTYPE.value))
|
|
210
|
+
root.create_array(
|
|
231
211
|
name=Coordinates.SPEED.value,
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
compressors=
|
|
212
|
+
shape=width,
|
|
213
|
+
dtype=np.dtype(Coordinates.SPEED_DTYPE.value),
|
|
214
|
+
# data=speed_values,
|
|
215
|
+
chunks=speed_chunk_shape,
|
|
216
|
+
compressors=compressors,
|
|
237
217
|
fill_value=np.nan,
|
|
238
|
-
|
|
239
|
-
|
|
218
|
+
attributes=dict(
|
|
219
|
+
units=Coordinates.SPEED_UNITS.value,
|
|
220
|
+
long_name=Coordinates.SPEED_LONG_NAME.value,
|
|
221
|
+
standard_name=Coordinates.SPEED_STANDARD_NAME.value,
|
|
222
|
+
),
|
|
223
|
+
dimension_names=[Coordinates.TIME.value], # Note: _ is indexed by TIME
|
|
224
|
+
overwrite=True,
|
|
240
225
|
)
|
|
241
|
-
|
|
242
|
-
#
|
|
243
|
-
#
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
speed.attrs["long_name"] = Coordinates.SPEED_LONG_NAME.value
|
|
247
|
-
speed.attrs["standard_name"] = Coordinates.SPEED_STANDARD_NAME.value
|
|
248
|
-
|
|
249
|
-
#####################################################################
|
|
250
|
-
# TODO: verify adding this variable with test
|
|
251
|
-
# --- Coordinate: Speed --- #
|
|
252
|
-
distance_data = np.repeat(np.nan, width)
|
|
253
|
-
distance_data.astype(np.dtype(Coordinates.DISTANCE_DTYPE.value), copy=False)
|
|
254
|
-
|
|
255
|
-
distance = root.create_array(
|
|
226
|
+
# #####################################################################
|
|
227
|
+
# # --- Coordinate: Distance --- #
|
|
228
|
+
# distance_values = np.repeat(8.90, width_indices)
|
|
229
|
+
# distance_values.astype(np.dtype(Coordinates.DISTANCE_DTYPE.value))
|
|
230
|
+
root.create_array(
|
|
256
231
|
name=Coordinates.DISTANCE.value,
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
compressors=
|
|
232
|
+
shape=width,
|
|
233
|
+
dtype=np.dtype(Coordinates.DISTANCE_DTYPE.value),
|
|
234
|
+
# data=distance_values,
|
|
235
|
+
chunks=distance_chunk_shape,
|
|
236
|
+
compressors=compressors,
|
|
262
237
|
fill_value=np.nan,
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
distance.attrs["units"] = Coordinates.DISTANCE_UNITS.value
|
|
271
|
-
distance.attrs["long_name"] = Coordinates.DISTANCE_LONG_NAME.value
|
|
272
|
-
distance.attrs["standard_name"] = Coordinates.DISTANCE_STANDARD_NAME.value
|
|
273
|
-
|
|
274
|
-
#####################################################################
|
|
275
|
-
# --- Coordinate: Frequency --- #
|
|
276
|
-
frequency_data = np.array(
|
|
277
|
-
frequencies, dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value)
|
|
238
|
+
attributes=dict(
|
|
239
|
+
units=Coordinates.DISTANCE_UNITS.value,
|
|
240
|
+
long_name=Coordinates.DISTANCE_LONG_NAME.value,
|
|
241
|
+
standard_name=Coordinates.DISTANCE_STANDARD_NAME.value,
|
|
242
|
+
),
|
|
243
|
+
dimension_names=[Coordinates.TIME.value], # Note: _ is indexed by TIME
|
|
244
|
+
overwrite=True,
|
|
278
245
|
)
|
|
279
|
-
#
|
|
280
|
-
|
|
281
|
-
|
|
246
|
+
# #####################################################################
|
|
247
|
+
# # --- Coordinate: Frequency --- #
|
|
248
|
+
root.create_array(
|
|
282
249
|
name=Coordinates.FREQUENCY.value,
|
|
283
|
-
|
|
284
|
-
# shape=len(frequencies),
|
|
285
|
-
chunks=(len(frequencies),),
|
|
250
|
+
# shape=frequency_indices,
|
|
286
251
|
# dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
252
|
+
data=frequencies,
|
|
253
|
+
# chunks=(Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,),
|
|
254
|
+
chunks=frequency_chunk_shape,
|
|
255
|
+
compressors=compressors,
|
|
256
|
+
# fill_value=0,
|
|
257
|
+
attributes=dict(
|
|
258
|
+
units=Coordinates.FREQUENCY_UNITS.value,
|
|
259
|
+
long_name=Coordinates.FREQUENCY_LONG_NAME.value,
|
|
260
|
+
standard_name=Coordinates.FREQUENCY_STANDARD_NAME.value,
|
|
261
|
+
),
|
|
262
|
+
dimension_names=[Coordinates.FREQUENCY.value],
|
|
263
|
+
overwrite=True,
|
|
291
264
|
)
|
|
292
|
-
|
|
293
|
-
#
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
|
|
297
|
-
frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
|
|
298
|
-
frequency.attrs["standard_name"] = Coordinates.FREQUENCY_STANDARD_NAME.value
|
|
299
|
-
|
|
300
|
-
#####################################################################
|
|
301
|
-
# --- Sv Data --- #
|
|
302
|
-
sv = root.create_array(
|
|
265
|
+
# #####################################################################
|
|
266
|
+
# # --- Sv Data --- #
|
|
267
|
+
root.create_array(
|
|
303
268
|
name=Coordinates.SV.value,
|
|
304
269
|
shape=(len(depth_data), width, len(frequencies)),
|
|
305
|
-
chunks=(
|
|
306
|
-
Constants.TILE_SIZE.value,
|
|
307
|
-
Constants.TILE_SIZE.value,
|
|
308
|
-
1,
|
|
309
|
-
),
|
|
310
270
|
dtype=np.dtype(Coordinates.SV_DTYPE.value),
|
|
311
|
-
|
|
271
|
+
# data=,
|
|
272
|
+
chunks=sv_chunk_shape,
|
|
273
|
+
compressors=compressors,
|
|
312
274
|
fill_value=np.nan,
|
|
313
|
-
|
|
314
|
-
|
|
275
|
+
attributes=dict(
|
|
276
|
+
units=Coordinates.SV_UNITS.value,
|
|
277
|
+
long_name=Coordinates.SV_LONG_NAME.value,
|
|
278
|
+
standard_name=Coordinates.SV_STANDARD_NAME.value,
|
|
279
|
+
),
|
|
280
|
+
dimension_names=[
|
|
315
281
|
Coordinates.DEPTH.value,
|
|
316
282
|
Coordinates.TIME.value,
|
|
317
283
|
Coordinates.FREQUENCY.value,
|
|
318
|
-
|
|
284
|
+
],
|
|
285
|
+
overwrite=True,
|
|
319
286
|
)
|
|
320
|
-
# sv.metadata.dimension_names = (
|
|
321
|
-
# Coordinates.DEPTH.value,
|
|
322
|
-
# Coordinates.TIME.value,
|
|
323
|
-
# Coordinates.FREQUENCY.value,
|
|
324
|
-
# )
|
|
325
|
-
# sv.attrs["_ARRAY_DIMENSIONS"] = [
|
|
326
|
-
# Coordinates.DEPTH.value,
|
|
327
|
-
# Coordinates.TIME.value,
|
|
328
|
-
# Coordinates.FREQUENCY.value,
|
|
329
|
-
# ]
|
|
330
|
-
|
|
331
|
-
sv.attrs["units"] = Coordinates.SV_UNITS.value
|
|
332
|
-
sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
|
|
333
|
-
sv.attrs["tile_size"] = Constants.TILE_SIZE.value
|
|
334
|
-
|
|
335
287
|
#####################################################################
|
|
336
|
-
# --- Metadata --- #
|
|
288
|
+
# # --- Metadata --- #
|
|
337
289
|
root.attrs["ship_name"] = ship_name
|
|
338
290
|
root.attrs["cruise_name"] = cruise_name
|
|
339
291
|
root.attrs["sensor_name"] = sensor_name
|
|
340
292
|
#
|
|
341
293
|
root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
# locally first.
|
|
345
|
-
current_project_version = importlib.metadata.version(
|
|
294
|
+
# NOTE: for the version to be parsable you need to build the python package locally first.
|
|
295
|
+
root.attrs["processing_software_version"] = metadata.version(
|
|
346
296
|
"water-column-sonar-processing"
|
|
347
297
|
)
|
|
348
|
-
root.attrs["processing_software_version"] = current_project_version
|
|
349
298
|
root.attrs["processing_software_time"] = Timestamp.get_timestamp()
|
|
350
299
|
#
|
|
351
300
|
root.attrs["calibration_status"] = calibration_status
|
|
352
301
|
root.attrs["tile_size"] = Constants.TILE_SIZE.value
|
|
353
|
-
|
|
354
|
-
# TODO: ZarrUserWarning: Consolidated metadata is currently not part in the Zarr format 3 specification. It may not be supported by other zarr implementations and may change in the future.
|
|
355
|
-
# zarr.consolidate_metadata(zarr_path)
|
|
356
|
-
#####################################################################
|
|
357
|
-
"""
|
|
358
|
-
# zzz = zarr.open('https://echofish-dev-master-118234403147-echofish-zarr-store.s3.us-west-2.amazonaws.com/GU1002_resample.zarr')
|
|
359
|
-
# zzz.time[0] = 1274979445.423
|
|
360
|
-
# Initialize all to origin time, will be overwritten late
|
|
361
|
-
"""
|
|
302
|
+
#
|
|
362
303
|
return zarr_path
|
|
363
304
|
except Exception as err:
|
|
364
305
|
raise RuntimeError(f"Problem trying to create zarr store, {err}")
|
|
365
|
-
# finally:
|
|
366
|
-
# cleaner = Cleaner()
|
|
367
|
-
# cleaner.delete_local_files()
|
|
368
|
-
# TODO: should delete zarr store in temp directory too?
|
|
369
306
|
|
|
370
|
-
#######################################################
|
|
371
|
-
#
|
|
372
|
-
#
|
|
373
|
-
#
|
|
374
|
-
#
|
|
375
|
-
#
|
|
376
|
-
#
|
|
377
|
-
#
|
|
378
|
-
#
|
|
379
|
-
#
|
|
380
|
-
#
|
|
381
|
-
#
|
|
382
|
-
# min_echo_range: float, # smallest resolution in meters --> 1.0 meters
|
|
383
|
-
# max_echo_range: float,
|
|
384
|
-
# cruise_min_epsilon: float,
|
|
385
|
-
# calibration_status: bool = False, # Assume uncalibrated
|
|
307
|
+
# #######################################################
|
|
308
|
+
# def create_zarr_store_old(
|
|
309
|
+
# self,
|
|
310
|
+
# path: str, # 'level_2/Henry_B._Bigelow/HB0707/EK60/HB0707.model/tmp/HB0707.zarr/.zattrs'
|
|
311
|
+
# ship_name: str,
|
|
312
|
+
# cruise_name: str,
|
|
313
|
+
# sensor_name: str,
|
|
314
|
+
# frequencies: list, # units in Hz
|
|
315
|
+
# width: int,
|
|
316
|
+
# max_echo_range: float,
|
|
317
|
+
# # cruise_min_epsilon: float, # smallest resolution in meters
|
|
318
|
+
# calibration_status: bool = False, # Assume uncalibrated
|
|
386
319
|
# ) -> str:
|
|
387
|
-
#
|
|
388
|
-
#
|
|
320
|
+
# """
|
|
321
|
+
# Creates a new zarr store in a local temporary directory(?)
|
|
322
|
+
# """
|
|
389
323
|
# try:
|
|
390
|
-
#
|
|
391
|
-
# print(
|
|
392
|
-
# f"Creating level 3 local zarr_manager store at {cruise_name}.zarr for ship {ship_name}"
|
|
393
|
-
# )
|
|
324
|
+
# print(f"Creating local zarr store, {cruise_name}.zarr for ship {ship_name}")
|
|
394
325
|
# if len(frequencies) != len(set(frequencies)):
|
|
395
326
|
# raise Exception(
|
|
396
327
|
# "Number of frequencies does not match number of channels"
|
|
397
328
|
# )
|
|
398
329
|
#
|
|
399
|
-
# # print(f"Debugging number of threads: {self.__num_threads}")
|
|
400
|
-
#
|
|
401
330
|
# zarr_path = f"{path}/{cruise_name}.zarr"
|
|
402
|
-
# store = zarr.DirectoryStore(path=zarr_path, normalize_keys=False)
|
|
403
|
-
# root = zarr.group(store=store, overwrite=self.__overwrite, cache_attrs=True)
|
|
404
|
-
#
|
|
405
331
|
# #####################################################################
|
|
406
|
-
# #
|
|
407
|
-
# #
|
|
408
|
-
#
|
|
409
|
-
#
|
|
410
|
-
#
|
|
411
|
-
#
|
|
412
|
-
#
|
|
413
|
-
#
|
|
414
|
-
#
|
|
415
|
-
#
|
|
416
|
-
#
|
|
417
|
-
# )
|
|
418
|
-
#
|
|
419
|
-
# time.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
|
|
420
|
-
# time.attrs["calendar"] = Coordinates.TIME_CALENDAR.value
|
|
421
|
-
# time.attrs["units"] = Coordinates.TIME_UNITS.value
|
|
422
|
-
# time.attrs["long_name"] = Coordinates.TIME_LONG_NAME.value
|
|
423
|
-
# time.attrs["standard_name"] = Coordinates.TIME_STANDARD_NAME.value
|
|
332
|
+
# # Define the chunk sizes and the encoding
|
|
333
|
+
# # 1_000_000 data points for quickest download
|
|
334
|
+
# spatiotemporal_chunk_size = int(1e6)
|
|
335
|
+
# depth_chunk_shape = (512,)
|
|
336
|
+
# time_chunk_shape = (spatiotemporal_chunk_size,)
|
|
337
|
+
# frequency_chunk_shape = (len(frequencies),)
|
|
338
|
+
# latitude_chunk_shape = (spatiotemporal_chunk_size,)
|
|
339
|
+
# longitude_chunk_shape = (spatiotemporal_chunk_size,)
|
|
340
|
+
# bottom_chunk_shape = (spatiotemporal_chunk_size,)
|
|
341
|
+
# speed_chunk_shape = (spatiotemporal_chunk_size,)
|
|
342
|
+
# distance_chunk_shape = (spatiotemporal_chunk_size,)
|
|
343
|
+
# sv_chunk_shape = (512, 512, 1) # TODO: move to constants
|
|
424
344
|
#
|
|
425
345
|
# #####################################################################
|
|
426
|
-
#
|
|
427
|
-
#
|
|
428
|
-
# # min_echo_range=min_echo_range,
|
|
346
|
+
# ##### Depth #####
|
|
347
|
+
# depth_data_values = self.get_depth_values(
|
|
429
348
|
# max_echo_range=max_echo_range,
|
|
430
|
-
# cruise_min_epsilon=cruise_min_epsilon,
|
|
431
349
|
# )
|
|
432
350
|
#
|
|
433
|
-
#
|
|
351
|
+
# depth_data = np.array(
|
|
352
|
+
# depth_data_values, dtype=Coordinates.DEPTH_DTYPE.value
|
|
353
|
+
# )
|
|
354
|
+
# depth_da = xr.DataArray(
|
|
355
|
+
# data=depth_data,
|
|
356
|
+
# dims=Coordinates.DEPTH.value,
|
|
434
357
|
# name=Coordinates.DEPTH.value,
|
|
435
|
-
#
|
|
436
|
-
#
|
|
437
|
-
#
|
|
438
|
-
#
|
|
439
|
-
#
|
|
440
|
-
# Coordinates.DEPTH_DTYPE.value # TODO: convert to integers and only get whole number depths
|
|
441
|
-
# ), # float16 == 2 significant digits would be ideal
|
|
442
|
-
# compressor=compressor,
|
|
443
|
-
# # fill_value=np.nan,
|
|
444
|
-
# overwrite=self.__overwrite,
|
|
358
|
+
# attrs=dict(
|
|
359
|
+
# units=Coordinates.DEPTH_UNITS.value,
|
|
360
|
+
# long_name=Coordinates.DEPTH_LONG_NAME.value,
|
|
361
|
+
# standard_name=Coordinates.DEPTH_STANDARD_NAME.value,
|
|
362
|
+
# ),
|
|
445
363
|
# )
|
|
446
364
|
#
|
|
447
|
-
#
|
|
448
|
-
#
|
|
449
|
-
#
|
|
450
|
-
#
|
|
451
|
-
#
|
|
452
|
-
#
|
|
453
|
-
#
|
|
365
|
+
# ##### Time #####
|
|
366
|
+
# # https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
|
|
367
|
+
# time_data = np.array(
|
|
368
|
+
# np.repeat(np.datetime64(0, "ns"), width),
|
|
369
|
+
# dtype="datetime64[ns]",
|
|
370
|
+
# )
|
|
371
|
+
# time_da = xr.DataArray(
|
|
372
|
+
# data=time_data,
|
|
373
|
+
# dims=Coordinates.TIME.value,
|
|
374
|
+
# name=Coordinates.TIME.value,
|
|
375
|
+
# attrs=dict(
|
|
376
|
+
# # Note: cal & units are written automatically by xarray
|
|
377
|
+
# # calendar="proleptic_gregorian",
|
|
378
|
+
# # units="seconds since 1970-01-01 00:00:00",
|
|
379
|
+
# long_name=Coordinates.TIME_LONG_NAME.value,
|
|
380
|
+
# standard_name=Coordinates.TIME_STANDARD_NAME.value,
|
|
381
|
+
# ),
|
|
382
|
+
# )
|
|
454
383
|
#
|
|
455
|
-
#
|
|
456
|
-
#
|
|
457
|
-
#
|
|
458
|
-
#
|
|
459
|
-
#
|
|
460
|
-
#
|
|
461
|
-
#
|
|
462
|
-
#
|
|
463
|
-
#
|
|
464
|
-
#
|
|
465
|
-
#
|
|
466
|
-
#
|
|
384
|
+
# ##### Frequency #####
|
|
385
|
+
# frequency_data = np.array(
|
|
386
|
+
# frequencies,
|
|
387
|
+
# dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
|
|
388
|
+
# )
|
|
389
|
+
# frequency_da = xr.DataArray(
|
|
390
|
+
# data=frequency_data,
|
|
391
|
+
# dims=Coordinates.FREQUENCY.value,
|
|
392
|
+
# name=Coordinates.FREQUENCY.value,
|
|
393
|
+
# attrs=dict(
|
|
394
|
+
# units=Coordinates.FREQUENCY_UNITS.value,
|
|
395
|
+
# long_name=Coordinates.FREQUENCY_LONG_NAME.value,
|
|
396
|
+
# standard_name=Coordinates.FREQUENCY_STANDARD_NAME.value,
|
|
397
|
+
# ),
|
|
467
398
|
# )
|
|
468
399
|
#
|
|
469
|
-
#
|
|
470
|
-
#
|
|
471
|
-
#
|
|
472
|
-
#
|
|
473
|
-
# root.latitude.attrs["standard_name"] = (
|
|
474
|
-
# Coordinates.LATITUDE_STANDARD_NAME.value
|
|
400
|
+
# ##### Latitude #####
|
|
401
|
+
# gps_data = np.array(
|
|
402
|
+
# np.repeat(np.nan, width),
|
|
403
|
+
# dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
|
|
475
404
|
# )
|
|
405
|
+
# latitude_da = xr.DataArray(
|
|
406
|
+
# data=gps_data,
|
|
407
|
+
# coords=dict(
|
|
408
|
+
# time=time_da,
|
|
409
|
+
# ),
|
|
410
|
+
# dims=Coordinates.TIME.value, # Note: "TIME"
|
|
411
|
+
# name=Coordinates.LATITUDE.value,
|
|
412
|
+
# attrs=dict(
|
|
413
|
+
# units=Coordinates.LATITUDE_UNITS.value,
|
|
414
|
+
# long_name=Coordinates.LATITUDE_LONG_NAME.value,
|
|
415
|
+
# standard_name=Coordinates.LATITUDE_STANDARD_NAME.value,
|
|
416
|
+
# ),
|
|
417
|
+
# ) # Note: LATITUDE is indexed by TIME
|
|
476
418
|
#
|
|
477
|
-
#
|
|
478
|
-
#
|
|
479
|
-
#
|
|
419
|
+
# ##### Longitude #####
|
|
420
|
+
# longitude_da = xr.DataArray(
|
|
421
|
+
# data=gps_data,
|
|
422
|
+
# coords=dict(
|
|
423
|
+
# time=time_da,
|
|
424
|
+
# ),
|
|
425
|
+
# dims=Coordinates.TIME.value, # Note: "TIME"
|
|
480
426
|
# name=Coordinates.LONGITUDE.value,
|
|
481
|
-
#
|
|
482
|
-
#
|
|
483
|
-
#
|
|
484
|
-
#
|
|
485
|
-
#
|
|
486
|
-
#
|
|
487
|
-
# fill_value=np.nan,
|
|
488
|
-
# overwrite=self.__overwrite,
|
|
489
|
-
# )
|
|
427
|
+
# attrs=dict(
|
|
428
|
+
# units=Coordinates.LONGITUDE_UNITS.value,
|
|
429
|
+
# long_name=Coordinates.LONGITUDE_LONG_NAME.value,
|
|
430
|
+
# standard_name=Coordinates.LONGITUDE_STANDARD_NAME.value,
|
|
431
|
+
# ),
|
|
432
|
+
# ) # Note: LONGITUDE is indexed by TIME
|
|
490
433
|
#
|
|
491
|
-
#
|
|
492
|
-
#
|
|
493
|
-
#
|
|
494
|
-
# root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
|
|
495
|
-
# root.longitude.attrs["standard_name"] = (
|
|
496
|
-
# Coordinates.LONGITUDE_STANDARD_NAME.value
|
|
434
|
+
# ##### Bottom #####
|
|
435
|
+
# bottom_data = np.array(
|
|
436
|
+
# np.repeat(np.nan, width), dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value)
|
|
497
437
|
# )
|
|
498
|
-
#
|
|
499
|
-
#
|
|
500
|
-
#
|
|
501
|
-
#
|
|
502
|
-
#
|
|
438
|
+
# bottom_da = xr.DataArray(
|
|
439
|
+
# data=bottom_data,
|
|
440
|
+
# coords=dict(
|
|
441
|
+
# time=time_da,
|
|
442
|
+
# ),
|
|
443
|
+
# dims=Coordinates.TIME.value, # Note: "TIME"
|
|
503
444
|
# name=Coordinates.BOTTOM.value,
|
|
504
|
-
#
|
|
505
|
-
#
|
|
506
|
-
#
|
|
507
|
-
#
|
|
508
|
-
#
|
|
509
|
-
# ), # TODO: should also only be integers
|
|
510
|
-
# compressor=compressor,
|
|
511
|
-
# fill_value=0.0,
|
|
512
|
-
# overwrite=self.__overwrite,
|
|
445
|
+
# attrs=dict(
|
|
446
|
+
# units=Coordinates.BOTTOM_UNITS.value,
|
|
447
|
+
# long_name=Coordinates.BOTTOM_LONG_NAME.value,
|
|
448
|
+
# standard_name=Coordinates.BOTTOM_STANDARD_NAME.value,
|
|
449
|
+
# ),
|
|
513
450
|
# )
|
|
514
451
|
#
|
|
515
|
-
#
|
|
516
|
-
#
|
|
517
|
-
#
|
|
518
|
-
#
|
|
519
|
-
#
|
|
520
|
-
#
|
|
521
|
-
#
|
|
522
|
-
#
|
|
523
|
-
#
|
|
524
|
-
#
|
|
452
|
+
# ##### Speed #####
|
|
453
|
+
# speed_data = np.array(
|
|
454
|
+
# np.repeat(np.nan, width), dtype=np.dtype(Coordinates.SPEED_DTYPE.value)
|
|
455
|
+
# )
|
|
456
|
+
# speed_da = xr.DataArray(
|
|
457
|
+
# data=speed_data,
|
|
458
|
+
# coords=dict(
|
|
459
|
+
# time=time_da,
|
|
460
|
+
# ),
|
|
461
|
+
# dims=Coordinates.TIME.value, # Note: "TIME"
|
|
525
462
|
# name=Coordinates.SPEED.value,
|
|
526
|
-
#
|
|
527
|
-
#
|
|
528
|
-
#
|
|
529
|
-
#
|
|
530
|
-
#
|
|
531
|
-
# fill_value=np.nan,
|
|
532
|
-
# overwrite=self.__overwrite,
|
|
463
|
+
# attrs=dict(
|
|
464
|
+
# units=Coordinates.SPEED_UNITS.value,
|
|
465
|
+
# long_name=Coordinates.SPEED_LONG_NAME.value,
|
|
466
|
+
# standard_name=Coordinates.SPEED_STANDARD_NAME.value,
|
|
467
|
+
# ),
|
|
533
468
|
# )
|
|
534
469
|
#
|
|
535
|
-
#
|
|
536
|
-
#
|
|
537
|
-
#
|
|
538
|
-
#
|
|
539
|
-
#
|
|
540
|
-
#
|
|
541
|
-
#
|
|
542
|
-
#
|
|
543
|
-
#
|
|
544
|
-
#
|
|
545
|
-
#
|
|
546
|
-
#
|
|
547
|
-
#
|
|
548
|
-
#
|
|
549
|
-
#
|
|
550
|
-
#
|
|
551
|
-
#
|
|
470
|
+
# ##### Distance #####
|
|
471
|
+
# distance_data = np.array(
|
|
472
|
+
# np.repeat(np.nan, width),
|
|
473
|
+
# dtype=np.dtype(Coordinates.DISTANCE_DTYPE.value),
|
|
474
|
+
# )
|
|
475
|
+
# distance_da = xr.DataArray(
|
|
476
|
+
# data=distance_data,
|
|
477
|
+
# coords=dict(
|
|
478
|
+
# time=time_da,
|
|
479
|
+
# ),
|
|
480
|
+
# dims=Coordinates.TIME.value, # Note: "TIME"
|
|
481
|
+
# name=Coordinates.DISTANCE.value,
|
|
482
|
+
# attrs=dict(
|
|
483
|
+
# units=Coordinates.DISTANCE_UNITS.value,
|
|
484
|
+
# long_name=Coordinates.DISTANCE_LONG_NAME.value,
|
|
485
|
+
# standard_name=Coordinates.DISTANCE_STANDARD_NAME.value,
|
|
486
|
+
# ),
|
|
552
487
|
# )
|
|
553
488
|
#
|
|
554
|
-
#
|
|
555
|
-
#
|
|
556
|
-
#
|
|
557
|
-
#
|
|
558
|
-
#
|
|
559
|
-
#
|
|
560
|
-
#
|
|
561
|
-
#
|
|
489
|
+
# ##### Sv #####
|
|
490
|
+
# gc.collect()
|
|
491
|
+
# # sv_data = np.empty(
|
|
492
|
+
# # (len(depth_data), width, len(frequencies)),
|
|
493
|
+
# # # (2501, 4_100_782, 4), # large cruise used for testing
|
|
494
|
+
# # dtype=np.dtype(Coordinates.SV_DTYPE.value),
|
|
495
|
+
# # )
|
|
496
|
+
# sv_data = np.full(
|
|
497
|
+
# (len(depth_data), width, len(frequencies)),
|
|
498
|
+
# np.nan,
|
|
499
|
+
# dtype=np.dtype(Coordinates.SV_DTYPE.value),
|
|
562
500
|
# )
|
|
501
|
+
# print(f"one: {sys.getsizeof(sv_data)}")
|
|
502
|
+
# # sv_data[:] = np.nan # initialize all
|
|
563
503
|
#
|
|
564
|
-
#
|
|
565
|
-
#
|
|
566
|
-
#
|
|
504
|
+
# sv_da = xr.DataArray(
|
|
505
|
+
# data=sv_data,
|
|
506
|
+
# coords=dict(
|
|
507
|
+
# depth=depth_da,
|
|
508
|
+
# time=time_da,
|
|
509
|
+
# frequency=frequency_da,
|
|
510
|
+
# #
|
|
511
|
+
# latitude=latitude_da,
|
|
512
|
+
# longitude=longitude_da,
|
|
513
|
+
# bottom=bottom_da,
|
|
514
|
+
# speed=speed_da,
|
|
515
|
+
# distance=distance_da,
|
|
516
|
+
# ),
|
|
517
|
+
# dims=( # Depth * Time * Frequency
|
|
518
|
+
# Coordinates.DEPTH.value,
|
|
519
|
+
# Coordinates.TIME.value,
|
|
520
|
+
# Coordinates.FREQUENCY.value,
|
|
521
|
+
# ),
|
|
567
522
|
# name=Coordinates.SV.value,
|
|
568
|
-
#
|
|
569
|
-
#
|
|
570
|
-
#
|
|
571
|
-
#
|
|
572
|
-
#
|
|
523
|
+
# attrs=dict(
|
|
524
|
+
# units=Coordinates.SV_UNITS.value,
|
|
525
|
+
# long_name=Coordinates.SV_LONG_NAME.value,
|
|
526
|
+
# standard_name=Coordinates.SV_STANDARD_NAME.value,
|
|
527
|
+
# tiles_size=Constants.TILE_SIZE.value,
|
|
528
|
+
# _FillValue=np.nan,
|
|
573
529
|
# ),
|
|
574
|
-
# dtype=np.dtype("int8"), # Coordinates.SV_DTYPE.value
|
|
575
|
-
# compressor=compressor, # TODO: get compression working?!
|
|
576
|
-
# # fill_value=np.nan,
|
|
577
|
-
# overwrite=self.__overwrite,
|
|
578
530
|
# )
|
|
579
|
-
#
|
|
580
|
-
#
|
|
581
|
-
#
|
|
582
|
-
#
|
|
583
|
-
#
|
|
584
|
-
# ]
|
|
585
|
-
# root.Sv.attrs["units"] = Coordinates.SV_UNITS.value
|
|
586
|
-
# root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
|
|
587
|
-
# root.Sv.attrs["tile_size"] = TILE_SIZE
|
|
588
|
-
#
|
|
531
|
+
# print(f"two: {sys.getsizeof(sv_data)}") # getting to at least here
|
|
532
|
+
# del sv_data
|
|
533
|
+
# sv_da.encoding = {"compressors": [compressor], "chunks": sv_chunk_shape}
|
|
534
|
+
# # sv_da = sv_da.astype(np.float32) # was crashing here
|
|
535
|
+
# gc.collect()
|
|
589
536
|
# #####################################################################
|
|
590
|
-
#
|
|
591
|
-
#
|
|
592
|
-
#
|
|
593
|
-
#
|
|
594
|
-
#
|
|
595
|
-
#
|
|
596
|
-
#
|
|
597
|
-
#
|
|
598
|
-
#
|
|
537
|
+
# ### Now create the xarray.Dataset
|
|
538
|
+
# ds = xr.Dataset(
|
|
539
|
+
# data_vars=dict(
|
|
540
|
+
# Sv=sv_da,
|
|
541
|
+
# #
|
|
542
|
+
# bottom=bottom_da,
|
|
543
|
+
# speed=speed_da,
|
|
544
|
+
# distance=distance_da,
|
|
545
|
+
# ),
|
|
546
|
+
# coords=dict(
|
|
547
|
+
# depth=depth_da,
|
|
548
|
+
# time=time_da,
|
|
549
|
+
# frequency=frequency_da,
|
|
550
|
+
# #
|
|
551
|
+
# latitude=latitude_da,
|
|
552
|
+
# longitude=longitude_da,
|
|
553
|
+
# ),
|
|
554
|
+
# attrs=dict(
|
|
555
|
+
# # --- Metadata --- #
|
|
556
|
+
# ship_name=ship_name,
|
|
557
|
+
# cruise_name=cruise_name,
|
|
558
|
+
# sensor_name=sensor_name,
|
|
559
|
+
# processing_software_name=Coordinates.PROJECT_NAME.value,
|
|
560
|
+
# # NOTE: for the version to be parsable you need to build the python package
|
|
561
|
+
# # locally first.
|
|
562
|
+
# processing_software_version=importlib.metadata.version(
|
|
563
|
+
# "water-column-sonar-processing"
|
|
564
|
+
# ),
|
|
565
|
+
# processing_software_time=Timestamp.get_timestamp(),
|
|
566
|
+
# calibration_status=calibration_status,
|
|
567
|
+
# tile_size=Constants.TILE_SIZE.value,
|
|
568
|
+
# ),
|
|
569
|
+
# )
|
|
570
|
+
# del sv_da
|
|
571
|
+
# gc.collect()
|
|
572
|
+
# print(f"three: {sys.getsizeof(ds)}")
|
|
573
|
+
# #####################################################################
|
|
574
|
+
# encodings = dict(
|
|
575
|
+
# depth={
|
|
576
|
+
# "compressors": [compressor],
|
|
577
|
+
# "chunks": depth_chunk_shape,
|
|
578
|
+
# },
|
|
579
|
+
# time={
|
|
580
|
+
# "compressors": [compressor],
|
|
581
|
+
# "chunks": time_chunk_shape,
|
|
582
|
+
# "units": Coordinates.TIME_UNITS.value,
|
|
583
|
+
# },
|
|
584
|
+
# frequency={
|
|
585
|
+
# "compressors": [compressor],
|
|
586
|
+
# "chunks": frequency_chunk_shape,
|
|
587
|
+
# },
|
|
588
|
+
# latitude={
|
|
589
|
+
# "compressors": [compressor],
|
|
590
|
+
# "chunks": latitude_chunk_shape,
|
|
591
|
+
# },
|
|
592
|
+
# longitude={
|
|
593
|
+
# "compressors": [compressor],
|
|
594
|
+
# "chunks": longitude_chunk_shape,
|
|
595
|
+
# },
|
|
596
|
+
# bottom={
|
|
597
|
+
# "compressors": [compressor],
|
|
598
|
+
# "chunks": bottom_chunk_shape,
|
|
599
|
+
# },
|
|
600
|
+
# speed={
|
|
601
|
+
# "compressors": [compressor],
|
|
602
|
+
# "chunks": speed_chunk_shape,
|
|
603
|
+
# },
|
|
604
|
+
# distance={
|
|
605
|
+
# "compressors": [compressor],
|
|
606
|
+
# "chunks": distance_chunk_shape,
|
|
607
|
+
# },
|
|
608
|
+
# Sv={
|
|
609
|
+
# "compressors": [compressor],
|
|
610
|
+
# "chunks": sv_chunk_shape,
|
|
611
|
+
# },
|
|
612
|
+
# )
|
|
613
|
+
# gc.collect()
|
|
614
|
+
# ds.to_zarr(
|
|
615
|
+
# store=zarr_path,
|
|
616
|
+
# mode="w", # “w” means create (overwrite if exists)
|
|
617
|
+
# encoding=encodings,
|
|
618
|
+
# consolidated=False,
|
|
619
|
+
# safe_chunks=False,
|
|
620
|
+
# align_chunks=True,
|
|
621
|
+
# zarr_format=3,
|
|
622
|
+
# write_empty_chunks=False, # Might need to change this
|
|
599
623
|
# )
|
|
600
|
-
# root.attrs["processing_software_version"] = current_project_version
|
|
601
|
-
# root.attrs["processing_software_time"] = Timestamp.get_timestamp()
|
|
602
|
-
# #
|
|
603
|
-
# # TODO: add level somewhere?
|
|
604
|
-
# #
|
|
605
|
-
# root.attrs["calibration_status"] = calibration_status
|
|
606
|
-
# root.attrs["tile_size"] = TILE_SIZE
|
|
607
|
-
#
|
|
608
|
-
# zarr.consolidate_metadata(store)
|
|
609
624
|
# #####################################################################
|
|
610
625
|
# return zarr_path
|
|
611
626
|
# except Exception as err:
|
|
612
|
-
# raise RuntimeError(f"Problem trying to create
|
|
627
|
+
# raise RuntimeError(f"Problem trying to create zarr store, {err}")
|
|
613
628
|
# # finally:
|
|
614
629
|
# # cleaner = Cleaner()
|
|
615
630
|
# # cleaner.delete_local_files()
|
|
616
631
|
# # TODO: should delete zarr store in temp directory too?
|
|
617
632
|
|
|
618
|
-
############################################################################
|
|
619
|
-
# def update_zarr_store(
|
|
620
|
-
# self,
|
|
621
|
-
# path: str,
|
|
622
|
-
# ship_name: str,
|
|
623
|
-
# cruise_name: str, # TODO: just pass stem
|
|
624
|
-
# sensor_name: str,
|
|
625
|
-
# ) -> None:
|
|
626
|
-
# """
|
|
627
|
-
# Opens an existing Zarr store living in a s3 bucket for the purpose
|
|
628
|
-
# of updating just a subset of the cruise-level Zarr store associated
|
|
629
|
-
# with a file-level Zarr store.
|
|
630
|
-
# """
|
|
631
|
-
# pass
|
|
632
|
-
|
|
633
633
|
############################################################################
|
|
634
634
|
def open_s3_zarr_store_with_zarr(
|
|
635
635
|
self,
|
|
636
636
|
ship_name: str,
|
|
637
637
|
cruise_name: str,
|
|
638
638
|
sensor_name: str,
|
|
639
|
-
# zarr_synchronizer: Union[str, None] = None, # TODO:
|
|
640
639
|
output_bucket_name: str,
|
|
641
|
-
endpoint_url=None,
|
|
642
|
-
)
|
|
640
|
+
endpoint_url: Optional[str] = None,
|
|
641
|
+
) -> Group:
|
|
643
642
|
# Mounts a Zarr store using pythons Zarr implementation. The mounted store
|
|
644
643
|
# will have read/write privileges so that store can be updated.
|
|
645
644
|
print("Opening L2 Zarr store with Zarr for writing.")
|
|
646
645
|
try:
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
646
|
+
level = str(Constants.LEVEL_2.value)
|
|
647
|
+
store = f"s3://{output_bucket_name}/{level}/{ship_name}/{cruise_name}/{sensor_name}/{cruise_name}.zarr"
|
|
648
|
+
print(f"endpoint url: {endpoint_url}")
|
|
649
|
+
cruise_zarr = zarr.open(
|
|
650
|
+
store=store,
|
|
651
|
+
mode="r+",
|
|
652
|
+
zarr_format=3,
|
|
653
|
+
storage_options={
|
|
654
|
+
"endpoint_url": endpoint_url,
|
|
655
|
+
"key": self.key,
|
|
656
|
+
"secret": self.secret,
|
|
657
|
+
},
|
|
655
658
|
)
|
|
656
|
-
|
|
657
|
-
|
|
659
|
+
print("Done opening store with Zarr.")
|
|
660
|
+
return cruise_zarr
|
|
661
|
+
except Exception as err: # Failure
|
|
662
|
+
raise RuntimeError(f"Exception encountered opening store with Zarr, {err}")
|
|
658
663
|
|
|
659
|
-
|
|
664
|
+
###########################################################################
|
|
665
|
+
@staticmethod
|
|
660
666
|
def open_s3_zarr_store_with_xarray(
|
|
661
|
-
self,
|
|
662
667
|
ship_name: str,
|
|
663
668
|
cruise_name: str,
|
|
664
669
|
sensor_name: str,
|
|
665
670
|
file_name_stem: str,
|
|
666
|
-
|
|
667
|
-
|
|
671
|
+
bucket_name: str,
|
|
672
|
+
# level: str, # TODO: add level
|
|
673
|
+
endpoint_url: Optional[str] = None, # needed for moto testing
|
|
668
674
|
) -> xr.Dataset:
|
|
669
|
-
print(
|
|
670
|
-
"Opening L1 Zarr store in S3 with Xarray."
|
|
671
|
-
) # TODO: Is this only used for reading from?
|
|
675
|
+
print("Opening L1 Zarr store in S3 with Xarray.")
|
|
672
676
|
try:
|
|
673
|
-
zarr_path = f"s3://{
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
+
zarr_path = f"s3://{bucket_name}/level_1/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.zarr"
|
|
678
|
+
kwargs = {"consolidated": False}
|
|
679
|
+
ds = xr.open_dataset(
|
|
680
|
+
filename_or_obj=zarr_path,
|
|
681
|
+
engine="zarr",
|
|
682
|
+
backend_kwargs={
|
|
683
|
+
"storage_options": {
|
|
684
|
+
"endpoint_url": endpoint_url,
|
|
685
|
+
"anon": True,
|
|
686
|
+
},
|
|
687
|
+
},
|
|
688
|
+
**kwargs,
|
|
689
|
+
)
|
|
677
690
|
return ds
|
|
678
691
|
except Exception as err:
|
|
679
692
|
raise RuntimeError(f"Problem opening Zarr store in S3 as Xarray, {err}")
|
|
680
|
-
finally:
|
|
681
|
-
print("Exiting opening Zarr store in S3 as Xarray.")
|
|
682
693
|
|
|
694
|
+
###########################################################################
|
|
695
|
+
# TODO: can this be consolidated with above
|
|
696
|
+
@staticmethod
|
|
683
697
|
def open_l2_zarr_store_with_xarray(
|
|
684
|
-
self,
|
|
685
698
|
ship_name: str,
|
|
686
699
|
cruise_name: str,
|
|
687
700
|
sensor_name: str,
|
|
688
701
|
bucket_name: str,
|
|
689
|
-
endpoint_url=None,
|
|
702
|
+
endpoint_url: Optional[str] = None, # needed for moto testing
|
|
690
703
|
) -> xr.Dataset:
|
|
691
704
|
print("Opening L2 Zarr store in S3 with Xarray.")
|
|
692
705
|
try:
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
706
|
+
level = str(Constants.LEVEL_2.value)
|
|
707
|
+
zarr_path = f"s3://{bucket_name}/{level}/{ship_name}/{cruise_name}/{sensor_name}/{cruise_name}.zarr"
|
|
708
|
+
kwargs = {"consolidated": False}
|
|
696
709
|
ds = xr.open_dataset(
|
|
697
|
-
filename_or_obj=
|
|
710
|
+
filename_or_obj=zarr_path,
|
|
698
711
|
engine="zarr",
|
|
712
|
+
backend_kwargs={
|
|
713
|
+
"storage_options": {
|
|
714
|
+
"endpoint_url": endpoint_url,
|
|
715
|
+
"anon": True,
|
|
716
|
+
}
|
|
717
|
+
},
|
|
718
|
+
**kwargs,
|
|
699
719
|
)
|
|
720
|
+
return ds
|
|
700
721
|
except Exception as err:
|
|
701
722
|
raise RuntimeError(f"Problem opening Zarr store in S3 as Xarray, {err}")
|
|
702
|
-
print("Done opening Zarr store in S3 as Xarray.")
|
|
703
|
-
return ds
|
|
704
723
|
|
|
705
|
-
|
|
724
|
+
###########################################################################
|
|
706
725
|
|
|
707
|
-
|
|
726
|
+
###########################################################################
|
|
708
727
|
# def create_process_synchronizer(self):
|
|
709
728
|
# # TODO: explore aws redis options
|
|
710
729
|
# pass
|
|
711
730
|
|
|
712
|
-
|
|
731
|
+
###########################################################################
|
|
713
732
|
# def verify_cruise_store_data(self):
|
|
714
733
|
# # TODO: run a check on a finished model store to ensure that
|
|
715
734
|
# # none of the time, latitude, longitude, or depth values
|
|
716
735
|
# # are NaN.
|
|
717
736
|
# pass
|
|
718
737
|
|
|
719
|
-
|
|
738
|
+
###########################################################################
|
|
720
739
|
|
|
721
740
|
|
|
722
741
|
###########################################################
|