water-column-sonar-processing 25.3.2__py3-none-any.whl → 25.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (32) hide show
  1. water_column_sonar_processing/aws/dynamodb_manager.py +6 -6
  2. water_column_sonar_processing/aws/s3_manager.py +95 -90
  3. water_column_sonar_processing/aws/s3fs_manager.py +5 -3
  4. water_column_sonar_processing/aws/sqs_manager.py +1 -1
  5. water_column_sonar_processing/cruise/__init__.py +2 -1
  6. water_column_sonar_processing/cruise/create_empty_zarr_store.py +49 -43
  7. water_column_sonar_processing/cruise/create_empty_zarr_store_level_3.py +161 -0
  8. water_column_sonar_processing/cruise/datatree_manager.py +21 -21
  9. water_column_sonar_processing/cruise/resample_regrid.py +57 -47
  10. water_column_sonar_processing/dataset/__init__.py +3 -0
  11. water_column_sonar_processing/dataset/dataset_manager.py +205 -0
  12. water_column_sonar_processing/dataset/feature_manager.py +32 -0
  13. water_column_sonar_processing/geometry/geometry_manager.py +11 -12
  14. water_column_sonar_processing/geometry/line_simplification.py +26 -1
  15. water_column_sonar_processing/geometry/pmtile_generation.py +211 -247
  16. water_column_sonar_processing/index/index_manager.py +18 -17
  17. water_column_sonar_processing/model/zarr_manager.py +504 -256
  18. water_column_sonar_processing/processing/__init__.py +3 -2
  19. water_column_sonar_processing/processing/batch_downloader.py +11 -11
  20. water_column_sonar_processing/processing/raw_to_netcdf.py +319 -0
  21. water_column_sonar_processing/processing/raw_to_zarr.py +41 -31
  22. water_column_sonar_processing/utility/__init__.py +9 -2
  23. water_column_sonar_processing/utility/cleaner.py +1 -2
  24. water_column_sonar_processing/utility/constants.py +26 -7
  25. water_column_sonar_processing/utility/timestamp.py +1 -0
  26. water_column_sonar_processing-25.8.0.dist-info/METADATA +162 -0
  27. water_column_sonar_processing-25.8.0.dist-info/RECORD +39 -0
  28. {water_column_sonar_processing-25.3.2.dist-info → water_column_sonar_processing-25.8.0.dist-info}/WHEEL +1 -1
  29. water_column_sonar_processing-25.3.2.dist-info/licenses/LICENSE → water_column_sonar_processing-25.8.0.dist-info/licenses/LICENSE-MIT +1 -1
  30. water_column_sonar_processing-25.3.2.dist-info/METADATA +0 -170
  31. water_column_sonar_processing-25.3.2.dist-info/RECORD +0 -34
  32. {water_column_sonar_processing-25.3.2.dist-info → water_column_sonar_processing-25.8.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,5 @@
1
1
  import importlib.metadata
2
2
 
3
- import numcodecs
4
3
  import numpy as np
5
4
  import xarray as xr
6
5
  import zarr
@@ -9,50 +8,45 @@ from numcodecs import Blosc
9
8
  from water_column_sonar_processing.aws import S3FSManager
10
9
  from water_column_sonar_processing.utility import Constants, Coordinates, Timestamp
11
10
 
12
- # numcodecs.blosc.use_threads = False
13
- # numcodecs.blosc.set_nthreads(1)
14
-
11
+ Blosc.use_threads = True
12
+ compressor = Blosc(cname="zstd", clevel=9)
15
13
 
16
14
  # TODO: when ready switch to version 3 of model spec
17
15
  # ZARR_V3_EXPERIMENTAL_API = 1
18
16
 
19
17
 
20
- # creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
18
+ # creates the latlon dataset: foo = ep.consolidate.add_location(ds_Sv, echodata)
21
19
  class ZarrManager:
22
20
  #######################################################
23
21
  def __init__(
24
22
  self,
25
23
  ):
26
- # TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
27
- self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
28
24
  self.__overwrite = True
29
- self.__num_threads = numcodecs.blosc.get_nthreads()
30
- # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
31
- # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
32
25
 
33
26
  #######################################################
34
27
  def get_depth_values(
35
28
  self,
36
- min_echo_range: float = 1.0, # minimum depth measured (zero non-inclusive) from whole cruise
37
- max_echo_range: float = 100.0, # maximum depth measured from whole cruise
29
+ # min_echo_range: float, # minimum depth measured (zero non-inclusive) from whole cruise
30
+ max_echo_range: float, # maximum depth measured from whole cruise
38
31
  cruise_min_epsilon: float = 0.25, # resolution between subsequent measurements
39
32
  ):
40
33
  # Gets the set of depth values that will be used when resampling and
41
- # regridding the data to a cruise level model store.
42
- # Note: returned values do not start at zero.
34
+ # regridding the dataset to a cruise level model store.
35
+ # Note: returned values start at zero!
43
36
  # For more info see here: https://echopype.readthedocs.io/en/stable/data-proc-additional.html
44
- print("Getting depth values.")
37
+ print("Computing depth values.")
45
38
  all_cruise_depth_values = np.linspace( # TODO: PROBLEM HERE
46
- start=min_echo_range,
39
+ start=0, # just start it at zero
47
40
  stop=max_echo_range,
48
- num=int((max_echo_range - min_echo_range) / cruise_min_epsilon) + 1,
41
+ num=int(max_echo_range / cruise_min_epsilon)
42
+ + 1, # int(np.ceil(max_echo_range / cruise_min_epsilon))?
49
43
  endpoint=True,
50
44
  ) # np.arange(min_echo_range, max_echo_range, step=min_echo_range) # this is worse
51
45
 
52
46
  if np.any(np.isnan(all_cruise_depth_values)):
53
47
  raise Exception("Problem depth values returned were NaN.")
54
48
 
55
- print("Done getting depth values.")
49
+ print("Done computing depth values.")
56
50
  return all_cruise_depth_values.round(decimals=2)
57
51
 
58
52
  #######################################################
@@ -64,241 +58,496 @@ class ZarrManager:
64
58
  sensor_name: str,
65
59
  frequencies: list, # units in Hz
66
60
  width: int, # TODO: needs better name... "ping_time"
67
- min_echo_range: float, # smallest resolution in meters
61
+ # min_echo_range: float,
62
+ max_echo_range: float,
63
+ cruise_min_epsilon: float, # smallest resolution in meters
64
+ calibration_status: bool = False, # Assume uncalibrated
65
+ ) -> str:
66
+ try:
67
+ # TODO: problem throwing exceptions here
68
+ print(
69
+ f"Creating local zarr_manager store at {cruise_name}.zarr for ship {ship_name}"
70
+ )
71
+ # There can not currently be repeated frequencies
72
+ # TODO: eventually switch coordinate to "channel" because frequencies can repeat
73
+ if len(frequencies) != len(set(frequencies)):
74
+ raise Exception(
75
+ "Number of frequencies does not match number of channels"
76
+ )
77
+
78
+ zarr_path = f"{path}/{cruise_name}.zarr"
79
+ store = zarr.DirectoryStore(path=zarr_path, normalize_keys=False)
80
+ root = zarr.group(store=store, overwrite=self.__overwrite, cache_attrs=True)
81
+
82
+ #####################################################################
83
+ # --- Coordinate: Time --- #
84
+ # https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
85
+ root.create_dataset(
86
+ name=Coordinates.TIME.value,
87
+ data=np.repeat(0.0, width),
88
+ shape=width,
89
+ chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
90
+ dtype=np.dtype(Coordinates.TIME_DTYPE.value),
91
+ compressor=compressor,
92
+ fill_value=np.nan,
93
+ overwrite=self.__overwrite,
94
+ )
95
+
96
+ root.time.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
97
+
98
+ root.time.attrs["calendar"] = Coordinates.TIME_CALENDAR.value
99
+ root.time.attrs["units"] = Coordinates.TIME_UNITS.value
100
+ root.time.attrs["long_name"] = Coordinates.TIME_LONG_NAME.value
101
+ root.time.attrs["standard_name"] = Coordinates.TIME_STANDARD_NAME.value
102
+
103
+ #####################################################################
104
+ # --- Coordinate: Depth --- #
105
+ depth_values = self.get_depth_values(
106
+ # min_echo_range=min_echo_range,
107
+ max_echo_range=max_echo_range,
108
+ cruise_min_epsilon=cruise_min_epsilon,
109
+ )
110
+
111
+ root.create_dataset(
112
+ name=Coordinates.DEPTH.value,
113
+ # TODO: verify that these values are correct
114
+ data=depth_values,
115
+ shape=len(depth_values),
116
+ chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
117
+ dtype=np.dtype(
118
+ Coordinates.DEPTH_DTYPE.value
119
+ ), # float16 == 2 significant digits would be ideal
120
+ compressor=compressor,
121
+ fill_value=np.nan,
122
+ overwrite=self.__overwrite,
123
+ )
124
+
125
+ if np.any(np.isnan(depth_values)):
126
+ raise Exception("Some depth values returned were NaN.")
127
+
128
+ root.depth.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.DEPTH.value]
129
+
130
+ root.depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
131
+ root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
132
+ root.depth.attrs["standard_name"] = Coordinates.DEPTH_STANDARD_NAME.value
133
+
134
+ #####################################################################
135
+ # --- Coordinate: Latitude --- #
136
+ root.create_dataset(
137
+ name=Coordinates.LATITUDE.value,
138
+ # dataset=np.repeat(0.0, width), # root.longitude[:] = np.nan
139
+ data=np.repeat(np.nan, width),
140
+ shape=width,
141
+ chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
142
+ dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
143
+ compressor=compressor,
144
+ fill_value=np.nan,
145
+ overwrite=self.__overwrite,
146
+ )
147
+
148
+ # Note: LATITUDE is indexed by TIME
149
+ root.latitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
150
+
151
+ root.latitude.attrs["units"] = Coordinates.LATITUDE_UNITS.value
152
+ root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
153
+ root.latitude.attrs["standard_name"] = (
154
+ Coordinates.LATITUDE_STANDARD_NAME.value
155
+ )
156
+
157
+ #####################################################################
158
+ # --- Coordinate: Longitude --- #
159
+ root.create_dataset(
160
+ name=Coordinates.LONGITUDE.value,
161
+ # dataset=np.repeat(0.0, width), # root.longitude[:] = np.nan
162
+ data=np.repeat(np.nan, width),
163
+ shape=width,
164
+ chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
165
+ dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
166
+ compressor=compressor,
167
+ fill_value=np.nan,
168
+ overwrite=self.__overwrite,
169
+ )
170
+
171
+ # Note: LONGITUDE is indexed by TIME
172
+ root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
173
+
174
+ root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
175
+ root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
176
+ root.longitude.attrs["standard_name"] = (
177
+ Coordinates.LONGITUDE_STANDARD_NAME.value
178
+ )
179
+
180
+ #####################################################################
181
+ # TODO: verify adding this variable for where the bottom was detected
182
+ # --- Coordinate: Bottom --- #
183
+ root.create_dataset(
184
+ name=Coordinates.BOTTOM.value,
185
+ data=np.repeat(0.0, width), # root.longitude[:] = np.nan
186
+ shape=width,
187
+ chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
188
+ dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value),
189
+ compressor=compressor,
190
+ fill_value=0.0,
191
+ overwrite=self.__overwrite,
192
+ )
193
+
194
+ # BOTTOM is indexed by TIME
195
+ root.bottom.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
196
+
197
+ root.bottom.attrs["units"] = Coordinates.BOTTOM_UNITS.value
198
+ root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
199
+ root.bottom.attrs["standard_name"] = Coordinates.BOTTOM_STANDARD_NAME.value
200
+
201
+ #####################################################################
202
+ # TODO: verify adding this variable with test
203
+ # --- Coordinate: Speed --- #
204
+ root.create_dataset(
205
+ name=Coordinates.SPEED.value,
206
+ data=np.repeat(np.nan, width), # root.longitude[:] = np.nan
207
+ shape=width,
208
+ chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
209
+ dtype=np.dtype(Coordinates.SPEED_DTYPE.value),
210
+ compressor=compressor,
211
+ fill_value=np.nan,
212
+ overwrite=self.__overwrite,
213
+ )
214
+
215
+ # SPEED is indexed by TIME
216
+ root.speed.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
217
+
218
+ root.speed.attrs["units"] = Coordinates.SPEED_UNITS.value
219
+ root.speed.attrs["long_name"] = Coordinates.SPEED_LONG_NAME.value
220
+ root.speed.attrs["standard_name"] = Coordinates.SPEED_STANDARD_NAME.value
221
+
222
+ #####################################################################
223
+ # --- Coordinate: Frequency --- #
224
+ root.create_dataset(
225
+ name=Coordinates.FREQUENCY.value,
226
+ data=frequencies,
227
+ shape=len(frequencies),
228
+ chunks=len(frequencies),
229
+ dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
230
+ compressor=compressor,
231
+ fill_value=0.0,
232
+ overwrite=self.__overwrite,
233
+ )
234
+
235
+ # TODO: best coordinate would be channel with str type
236
+ root.frequency.attrs["_ARRAY_DIMENSIONS"] = [
237
+ Coordinates.FREQUENCY.value
238
+ ] # TODO: is this correct
239
+
240
+ root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
241
+ root.frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
242
+ root.frequency.attrs["standard_name"] = (
243
+ Coordinates.FREQUENCY_STANDARD_NAME.value
244
+ )
245
+
246
+ #####################################################################
247
+ # --- Sv Data --- #
248
+ root.create_dataset(
249
+ name=Coordinates.SV.value,
250
+ shape=(len(depth_values), width, len(frequencies)),
251
+ chunks=(
252
+ Constants.TILE_SIZE.value,
253
+ Constants.TILE_SIZE.value,
254
+ 1,
255
+ ),
256
+ dtype=np.dtype(Coordinates.SV_DTYPE.value),
257
+ compressor=compressor,
258
+ fill_value=np.nan,
259
+ overwrite=self.__overwrite,
260
+ )
261
+
262
+ root.Sv.attrs["_ARRAY_DIMENSIONS"] = [
263
+ Coordinates.DEPTH.value,
264
+ Coordinates.TIME.value,
265
+ Coordinates.FREQUENCY.value,
266
+ ]
267
+
268
+ root.Sv.attrs["units"] = Coordinates.SV_UNITS.value
269
+ root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
270
+ root.Sv.attrs["tile_size"] = Constants.TILE_SIZE.value
271
+
272
+ #####################################################################
273
+ # --- Metadata --- #
274
+ root.attrs["ship_name"] = ship_name
275
+ root.attrs["cruise_name"] = cruise_name
276
+ root.attrs["sensor_name"] = sensor_name
277
+ #
278
+ root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
279
+
280
+ current_project_version = importlib.metadata.version(
281
+ "water-column-sonar-processing"
282
+ )
283
+ root.attrs["processing_software_version"] = current_project_version
284
+ root.attrs["processing_software_time"] = Timestamp.get_timestamp()
285
+ #
286
+ root.attrs["calibration_status"] = calibration_status
287
+ root.attrs["tile_size"] = Constants.TILE_SIZE.value
288
+
289
+ zarr.consolidate_metadata(store)
290
+ #####################################################################
291
+ """
292
+ # zzz = zarr.open('https://echofish-dev-master-118234403147-echofish-zarr-store.s3.us-west-2.amazonaws.com/GU1002_resample.zarr')
293
+ # zzz.time[0] = 1274979445.423
294
+ # Initialize all to origin time, will be overwritten late
295
+ """
296
+ return zarr_path
297
+ except Exception as err:
298
+ raise RuntimeError(f"Problem trying to create zarr store, {err}")
299
+ # finally:
300
+ # cleaner = Cleaner()
301
+ # cleaner.delete_local_files()
302
+ # TODO: should delete zarr store in temp directory too?
303
+
304
+ #######################################################
305
+ #
306
+ # LEVEL 3 - LEVEL 3 - LEVEL 3 - LEVEL 3 # TODO: move to separate project for zarr 3?
307
+ #
308
+ def create_zarr_store_level_3(
309
+ self,
310
+ path: str, # 'level_2/Henry_B._Bigelow/HB0707/EK60/HB0707.model/tmp/HB0707.zarr/.zattrs'
311
+ ship_name: str,
312
+ cruise_name: str,
313
+ sensor_name: str,
314
+ frequencies: list, # units in Hz
315
+ width: int, # TODO: needs better name... "ping_time"
316
+ min_echo_range: float, # smallest resolution in meters --> 1.0 meters
68
317
  max_echo_range: float,
69
318
  cruise_min_epsilon: float,
70
319
  calibration_status: bool = False, # Assume uncalibrated
71
320
  ) -> str:
72
- print(
73
- f"Creating local zarr_manager store at {cruise_name}.zarr for ship {ship_name}"
74
- )
75
- # There can not currently be repeated frequencies
76
- # TODO: eventually switch coordinate to "channel" because frequencies can repeat
77
- if len(frequencies) != len(set(frequencies)):
78
- raise Exception("Number of frequencies does not match number of channels")
79
-
80
- print(f"Debugging number of threads: {self.__num_threads}")
81
-
82
- zarr_path = f"{path}/{cruise_name}.zarr"
83
- store = zarr.DirectoryStore(path=zarr_path, normalize_keys=False)
84
- root = zarr.group(store=store, overwrite=self.__overwrite, cache_attrs=True)
85
-
86
- #####################################################################
87
- # --- Coordinate: Time --- #
88
- # https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
89
- root.create_dataset(
90
- name=Coordinates.TIME.value,
91
- data=np.repeat(0.0, width),
92
- shape=width,
93
- chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
94
- dtype=np.dtype(Coordinates.TIME_DTYPE.value),
95
- compressor=self.__compressor,
96
- fill_value=np.nan,
97
- overwrite=self.__overwrite,
98
- )
99
-
100
- root.time.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
101
-
102
- root.time.attrs["calendar"] = Coordinates.TIME_CALENDAR.value
103
- root.time.attrs["units"] = Coordinates.TIME_UNITS.value
104
- root.time.attrs["long_name"] = Coordinates.TIME_LONG_NAME.value
105
- root.time.attrs["standard_name"] = Coordinates.TIME_STANDARD_NAME.value
106
-
107
- #####################################################################
108
- # --- Coordinate: Depth --- #
109
- depth_values = self.get_depth_values(
110
- min_echo_range=min_echo_range,
111
- max_echo_range=max_echo_range,
112
- cruise_min_epsilon=cruise_min_epsilon,
113
- )
114
-
115
- root.create_dataset(
116
- name=Coordinates.DEPTH.value,
117
- # TODO: verify that these values are correct
118
- data=depth_values,
119
- shape=len(depth_values),
120
- chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
121
- dtype=np.dtype(
122
- Coordinates.DEPTH_DTYPE.value
123
- ), # float16 == 2 significant digits would be ideal
124
- compressor=self.__compressor,
125
- fill_value=np.nan,
126
- overwrite=self.__overwrite,
127
- )
128
-
129
- if np.any(np.isnan(depth_values)):
130
- raise Exception("Some depth values returned were NaN.")
131
-
132
- root.depth.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.DEPTH.value]
133
-
134
- root.depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
135
- root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
136
- root.depth.attrs["standard_name"] = Coordinates.DEPTH_STANDARD_NAME.value
137
-
138
- #####################################################################
139
- # --- Coordinate: Latitude --- #
140
- root.create_dataset(
141
- name=Coordinates.LATITUDE.value,
142
- # data=np.repeat(0.0, width), # root.longitude[:] = np.nan
143
- data=np.repeat(np.nan, width),
144
- shape=width,
145
- chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
146
- dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
147
- compressor=self.__compressor,
148
- fill_value=np.nan,
149
- overwrite=self.__overwrite,
150
- )
151
-
152
- # Note: LATITUDE is indexed by TIME
153
- root.latitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
154
-
155
- root.latitude.attrs["units"] = Coordinates.LATITUDE_UNITS.value
156
- root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
157
- root.latitude.attrs["standard_name"] = Coordinates.LATITUDE_STANDARD_NAME.value
158
-
159
- #####################################################################
160
- # --- Coordinate: Longitude --- #
161
- root.create_dataset(
162
- name=Coordinates.LONGITUDE.value,
163
- # data=np.repeat(0.0, width), # root.longitude[:] = np.nan
164
- data=np.repeat(np.nan, width),
165
- shape=width,
166
- chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
167
- dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
168
- compressor=self.__compressor,
169
- fill_value=np.nan,
170
- overwrite=self.__overwrite,
171
- )
172
-
173
- # Note: LONGITUDE is indexed by TIME
174
- root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
175
-
176
- root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
177
- root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
178
- root.longitude.attrs["standard_name"] = (
179
- Coordinates.LONGITUDE_STANDARD_NAME.value
180
- )
181
-
182
- #####################################################################
183
- # TODO: verify adding this variable for where the bottom was detected
184
- # --- Coordinate: Bottom --- #
185
- root.create_dataset(
186
- name=Coordinates.BOTTOM.value,
187
- data=np.repeat(0.0, width), # root.longitude[:] = np.nan
188
- shape=width,
189
- chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
190
- dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value),
191
- compressor=self.__compressor,
192
- fill_value=0.0,
193
- overwrite=self.__overwrite,
194
- )
195
-
196
- # BOTTOM is indexed by TIME
197
- root.bottom.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
198
-
199
- root.bottom.attrs["units"] = Coordinates.BOTTOM_UNITS.value
200
- root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
201
- root.bottom.attrs["standard_name"] = Coordinates.BOTTOM_STANDARD_NAME.value
202
-
203
- #####################################################################
204
- # TODO: verify adding this variable with test
205
- # --- Coordinate: Speed --- #
206
- root.create_dataset(
207
- name=Coordinates.SPEED.value,
208
- data=np.repeat(np.nan, width), # root.longitude[:] = np.nan
209
- shape=width,
210
- chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
211
- dtype=np.dtype(Coordinates.SPEED_DTYPE.value),
212
- compressor=self.__compressor,
213
- fill_value=np.nan,
214
- overwrite=self.__overwrite,
215
- )
216
-
217
- # SPEED is indexed by TIME
218
- root.speed.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
219
-
220
- root.speed.attrs["units"] = Coordinates.SPEED_UNITS.value
221
- root.speed.attrs["long_name"] = Coordinates.SPEED_LONG_NAME.value
222
- root.speed.attrs["standard_name"] = Coordinates.SPEED_STANDARD_NAME.value
223
-
224
- #####################################################################
225
- # --- Coordinate: Frequency --- #
226
- root.create_dataset(
227
- name=Coordinates.FREQUENCY.value,
228
- data=frequencies,
229
- shape=len(frequencies),
230
- chunks=len(frequencies),
231
- dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
232
- compressor=self.__compressor,
233
- fill_value=0.0,
234
- overwrite=self.__overwrite,
235
- )
236
-
237
- # TODO: best coordinate would be channel with str type
238
- root.frequency.attrs["_ARRAY_DIMENSIONS"] = [
239
- Coordinates.FREQUENCY.value
240
- ] # TODO: is this correct
241
-
242
- root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
243
- root.frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
244
- root.frequency.attrs["standard_name"] = (
245
- Coordinates.FREQUENCY_STANDARD_NAME.value
246
- )
247
-
248
- #####################################################################
249
- # --- Sv Data --- #
250
- root.create_dataset(
251
- name=Coordinates.SV.value,
252
- shape=(len(depth_values), width, len(frequencies)),
253
- # chunks=(Constants.TILE_SIZE.value, Constants.TILE_SIZE.value, len(frequencies)),
254
- chunks=(
255
- Constants.TILE_SIZE.value,
256
- Constants.TILE_SIZE.value,
257
- 1,
258
- ), # 256x256x1 <- speed up for alex
259
- dtype=np.dtype(
260
- Coordinates.SV_DTYPE.value
261
- ), # TODO: try to experiment with 'float16'
262
- compressor=self.__compressor,
263
- fill_value=np.nan,
264
- overwrite=self.__overwrite,
265
- )
266
-
267
- root.Sv.attrs["_ARRAY_DIMENSIONS"] = [
268
- Coordinates.DEPTH.value,
269
- Coordinates.TIME.value,
270
- Coordinates.FREQUENCY.value,
271
- ]
272
-
273
- root.Sv.attrs["units"] = Coordinates.SV_UNITS.value
274
- root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
275
- root.Sv.attrs["tile_size"] = Constants.TILE_SIZE.value
276
-
277
- #####################################################################
278
- # --- Metadata --- #
279
- root.attrs["ship_name"] = ship_name
280
- root.attrs["cruise_name"] = cruise_name
281
- root.attrs["sensor_name"] = sensor_name
282
- #
283
- root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
284
-
285
- current_project_version = importlib.metadata.version(
286
- "water_column_sonar_processing"
287
- )
288
- root.attrs["processing_software_version"] = current_project_version
289
- root.attrs["processing_software_time"] = Timestamp.get_timestamp()
290
- #
291
- root.attrs["calibration_status"] = calibration_status
292
- root.attrs["tile_size"] = Constants.TILE_SIZE.value
293
-
294
- zarr.consolidate_metadata(store)
295
- #####################################################################
296
- """
297
- # zzz = zarr.open('https://echofish-dev-master-118234403147-echofish-zarr-store.s3.us-west-2.amazonaws.com/GU1002_resample.zarr')
298
- # zzz.time[0] = 1274979445.423
299
- # Initialize all to origin time, will be overwritten late
300
- """
301
- return zarr_path
321
+ compressor = Blosc(cname="zstd", clevel=9, shuffle=1)
322
+ TILE_SIZE = 1024
323
+ try:
324
+ # TODO: problem throwing exceptions here
325
+ print(
326
+ f"Creating level 3 local zarr_manager store at {cruise_name}.zarr for ship {ship_name}"
327
+ )
328
+ if len(frequencies) != len(set(frequencies)):
329
+ raise Exception(
330
+ "Number of frequencies does not match number of channels"
331
+ )
332
+
333
+ # print(f"Debugging number of threads: {self.__num_threads}")
334
+
335
+ zarr_path = f"{path}/{cruise_name}.zarr"
336
+ store = zarr.DirectoryStore(path=zarr_path, normalize_keys=False)
337
+ root = zarr.group(store=store, overwrite=self.__overwrite, cache_attrs=True)
338
+
339
+ #####################################################################
340
+ # --- Coordinate: Time --- #
341
+ # https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
342
+ root.create_dataset(
343
+ name=Coordinates.TIME.value,
344
+ data=np.repeat(0.0, width),
345
+ shape=width,
346
+ chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
347
+ dtype=np.dtype(Coordinates.TIME_DTYPE.value),
348
+ compressor=compressor,
349
+ # fill_value=np.nan,
350
+ overwrite=self.__overwrite,
351
+ )
352
+
353
+ root.time.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
354
+ root.time.attrs["calendar"] = Coordinates.TIME_CALENDAR.value
355
+ root.time.attrs["units"] = Coordinates.TIME_UNITS.value
356
+ root.time.attrs["long_name"] = Coordinates.TIME_LONG_NAME.value
357
+ root.time.attrs["standard_name"] = Coordinates.TIME_STANDARD_NAME.value
358
+
359
+ #####################################################################
360
+ # --- Coordinate: Depth --- #
361
+ depth_values = self.get_depth_values(
362
+ # min_echo_range=min_echo_range,
363
+ max_echo_range=max_echo_range,
364
+ cruise_min_epsilon=cruise_min_epsilon,
365
+ )
366
+
367
+ root.create_dataset(
368
+ name=Coordinates.DEPTH.value,
369
+ # TODO: verify that these values are correct
370
+ data=depth_values,
371
+ shape=len(depth_values),
372
+ chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
373
+ dtype=np.dtype(
374
+ Coordinates.DEPTH_DTYPE.value # TODO: convert to integers and only get whole number depths
375
+ ), # float16 == 2 significant digits would be ideal
376
+ compressor=compressor,
377
+ # fill_value=np.nan,
378
+ overwrite=self.__overwrite,
379
+ )
380
+
381
+ if np.any(np.isnan(depth_values)):
382
+ raise Exception("Some depth values returned were NaN.")
383
+
384
+ root.depth.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.DEPTH.value]
385
+ root.depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
386
+ root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
387
+ root.depth.attrs["standard_name"] = Coordinates.DEPTH_STANDARD_NAME.value
388
+
389
+ #####################################################################
390
+ # --- Coordinate: Latitude --- #
391
+ root.create_dataset(
392
+ name=Coordinates.LATITUDE.value,
393
+ # dataset=np.repeat(0.0, width), # root.longitude[:] = np.nan
394
+ data=np.repeat(np.nan, width),
395
+ shape=width,
396
+ chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
397
+ dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
398
+ compressor=compressor,
399
+ fill_value=np.nan, # needs to be nan to validate if any missing
400
+ overwrite=self.__overwrite,
401
+ )
402
+
403
+ # Note: LATITUDE is indexed by TIME
404
+ root.latitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
405
+ root.latitude.attrs["units"] = Coordinates.LATITUDE_UNITS.value
406
+ root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
407
+ root.latitude.attrs["standard_name"] = (
408
+ Coordinates.LATITUDE_STANDARD_NAME.value
409
+ )
410
+
411
+ #####################################################################
412
+ # --- Coordinate: Longitude --- #
413
+ root.create_dataset(
414
+ name=Coordinates.LONGITUDE.value,
415
+ # dataset=np.repeat(0.0, width), # root.longitude[:] = np.nan
416
+ data=np.repeat(np.nan, width),
417
+ shape=width,
418
+ chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
419
+ dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
420
+ compressor=compressor,
421
+ fill_value=np.nan,
422
+ overwrite=self.__overwrite,
423
+ )
424
+
425
+ # Note: LONGITUDE is indexed by TIME
426
+ root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
427
+ root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
428
+ root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
429
+ root.longitude.attrs["standard_name"] = (
430
+ Coordinates.LONGITUDE_STANDARD_NAME.value
431
+ )
432
+
433
+ #####################################################################
434
+ # TODO: verify adding this variable for where the bottom was detected
435
+ # --- Coordinate: Bottom --- #
436
+ root.create_dataset(
437
+ name=Coordinates.BOTTOM.value,
438
+ data=np.repeat(0.0, width), # root.longitude[:] = np.nan
439
+ shape=width,
440
+ chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
441
+ dtype=np.dtype(
442
+ Coordinates.BOTTOM_DTYPE.value
443
+ ), # TODO: should also only be integers
444
+ compressor=compressor,
445
+ fill_value=0.0,
446
+ overwrite=self.__overwrite,
447
+ )
448
+
449
+ # BOTTOM is indexed by TIME
450
+ root.bottom.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
451
+ root.bottom.attrs["units"] = Coordinates.BOTTOM_UNITS.value
452
+ root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
453
+ root.bottom.attrs["standard_name"] = Coordinates.BOTTOM_STANDARD_NAME.value
454
+
455
+ #####################################################################
456
+ # TODO: verify adding this variable with test
457
+ # --- Coordinate: Speed --- #
458
+ root.create_dataset(
459
+ name=Coordinates.SPEED.value,
460
+ data=np.repeat(np.nan, width), # root.longitude[:] = np.nan
461
+ shape=width,
462
+ chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
463
+ dtype=np.dtype(Coordinates.SPEED_DTYPE.value), # TODO: also round?
464
+ compressor=compressor,
465
+ fill_value=np.nan,
466
+ overwrite=self.__overwrite,
467
+ )
468
+
469
+ # SPEED is indexed by TIME
470
+ root.speed.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
471
+ root.speed.attrs["units"] = Coordinates.SPEED_UNITS.value
472
+ root.speed.attrs["long_name"] = Coordinates.SPEED_LONG_NAME.value
473
+ root.speed.attrs["standard_name"] = Coordinates.SPEED_STANDARD_NAME.value
474
+
475
+ #####################################################################
476
+ # --- Coordinate: Frequency --- #
477
+ root.create_dataset(
478
+ name=Coordinates.FREQUENCY.value,
479
+ data=frequencies,
480
+ shape=len(frequencies),
481
+ chunks=len(frequencies),
482
+ dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
483
+ compressor=compressor,
484
+ fill_value=0.0,
485
+ overwrite=self.__overwrite,
486
+ )
487
+
488
+ # TODO: best coordinate would be channel with str type
489
+ root.frequency.attrs["_ARRAY_DIMENSIONS"] = [
490
+ Coordinates.FREQUENCY.value
491
+ ] # TODO: is this correct
492
+ root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
493
+ root.frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
494
+ root.frequency.attrs["standard_name"] = (
495
+ Coordinates.FREQUENCY_STANDARD_NAME.value
496
+ )
497
+
498
+ #####################################################################
499
+ # --- Sv Data --- #
500
+ root.create_dataset(
501
+ name=Coordinates.SV.value,
502
+ shape=(len(depth_values), width, len(frequencies)),
503
+ chunks=(
504
+ TILE_SIZE,
505
+ TILE_SIZE,
506
+ len(frequencies),
507
+ ),
508
+ dtype=np.dtype("int8"), # Coordinates.SV_DTYPE.value
509
+ compressor=compressor, # TODO: get compression working?!
510
+ # fill_value=np.nan,
511
+ overwrite=self.__overwrite,
512
+ )
513
+
514
+ root.Sv.attrs["_ARRAY_DIMENSIONS"] = [
515
+ Coordinates.DEPTH.value,
516
+ Coordinates.TIME.value,
517
+ Coordinates.FREQUENCY.value,
518
+ ]
519
+ root.Sv.attrs["units"] = Coordinates.SV_UNITS.value
520
+ root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
521
+ root.Sv.attrs["tile_size"] = TILE_SIZE
522
+
523
+ #####################################################################
524
+ # --- Metadata --- #
525
+ root.attrs["ship_name"] = ship_name
526
+ root.attrs["cruise_name"] = cruise_name
527
+ root.attrs["sensor_name"] = sensor_name
528
+ #
529
+ root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
530
+
531
+ current_project_version = importlib.metadata.version(
532
+ "water_column_sonar_processing"
533
+ )
534
+ root.attrs["processing_software_version"] = current_project_version
535
+ root.attrs["processing_software_time"] = Timestamp.get_timestamp()
536
+ #
537
+ # TODO: add level somewhere?
538
+ #
539
+ root.attrs["calibration_status"] = calibration_status
540
+ root.attrs["tile_size"] = TILE_SIZE
541
+
542
+ zarr.consolidate_metadata(store)
543
+ #####################################################################
544
+ return zarr_path
545
+ except Exception as err:
546
+ raise RuntimeError(f"Problem trying to create level 3 zarr store, {err}")
547
+ # finally:
548
+ # cleaner = Cleaner()
549
+ # cleaner.delete_local_files()
550
+ # TODO: should delete zarr store in temp directory too?
302
551
 
303
552
  ############################################################################
304
553
  # def update_zarr_store(
@@ -335,8 +584,9 @@ class ZarrManager:
335
584
  # synchronizer = model.ProcessSynchronizer(f"/tmp/{ship_name}_{cruise_name}.sync")
336
585
  cruise_zarr = zarr.open(store=store, mode="r+")
337
586
  except Exception as err: # Failure
338
- print(f"Exception encountered opening Zarr store with Zarr.: {err}")
339
- raise
587
+ raise RuntimeError(
588
+ f"Exception encountered opening Zarr store with Zarr, {err}"
589
+ )
340
590
  print("Done opening Zarr store with Zarr.")
341
591
  return cruise_zarr
342
592
 
@@ -358,12 +608,11 @@ class ZarrManager:
358
608
  s3fs_manager = S3FSManager(endpoint_url=endpoint_url)
359
609
  store_s3_map = s3fs_manager.s3_map(s3_zarr_store_path=zarr_path)
360
610
  ds = xr.open_dataset(filename_or_obj=store_s3_map, engine="zarr", chunks={})
611
+ return ds
361
612
  except Exception as err:
362
- print("Problem opening Zarr store in S3 as Xarray.")
363
- raise err
613
+ raise RuntimeError(f"Problem opening Zarr store in S3 as Xarray, {err}")
364
614
  finally:
365
615
  print("Exiting opening Zarr store in S3 as Xarray.")
366
- return ds
367
616
 
368
617
  def open_l2_zarr_store_with_xarray(
369
618
  self,
@@ -380,8 +629,7 @@ class ZarrManager:
380
629
  store_s3_map = s3fs_manager.s3_map(s3_zarr_store_path=zarr_path)
381
630
  ds = xr.open_zarr(store=store_s3_map, consolidated=None)
382
631
  except Exception as err:
383
- print("Problem opening Zarr store in S3 as Xarray.")
384
- raise err
632
+ raise RuntimeError(f"Problem opening Zarr store in S3 as Xarray, {err}")
385
633
  print("Done opening Zarr store in S3 as Xarray.")
386
634
  return ds
387
635