water-column-sonar-processing 0.0.1__py3-none-any.whl → 25.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (60) hide show
  1. water_column_sonar_processing/__init__.py +13 -0
  2. water_column_sonar_processing/aws/__init__.py +7 -0
  3. water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
  4. water_column_sonar_processing/aws/s3_manager.py +420 -0
  5. water_column_sonar_processing/aws/s3fs_manager.py +72 -0
  6. {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
  7. {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
  8. water_column_sonar_processing/cruise/__init__.py +4 -0
  9. water_column_sonar_processing/cruise/create_empty_zarr_store.py +191 -0
  10. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  11. water_column_sonar_processing/cruise/resample_regrid.py +339 -0
  12. water_column_sonar_processing/geometry/__init__.py +11 -0
  13. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  14. water_column_sonar_processing/geometry/geometry_manager.py +243 -0
  15. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  16. water_column_sonar_processing/geometry/pmtile_generation.py +261 -0
  17. water_column_sonar_processing/index/__init__.py +3 -0
  18. water_column_sonar_processing/index/index_manager.py +384 -0
  19. water_column_sonar_processing/model/__init__.py +3 -0
  20. water_column_sonar_processing/model/zarr_manager.py +722 -0
  21. water_column_sonar_processing/process.py +149 -0
  22. water_column_sonar_processing/processing/__init__.py +4 -0
  23. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  24. water_column_sonar_processing/processing/raw_to_zarr.py +425 -0
  25. water_column_sonar_processing/utility/__init__.py +13 -0
  26. {model → water_column_sonar_processing}/utility/cleaner.py +7 -8
  27. water_column_sonar_processing/utility/constants.py +118 -0
  28. {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
  29. water_column_sonar_processing/utility/timestamp.py +12 -0
  30. water_column_sonar_processing-25.11.1.dist-info/METADATA +182 -0
  31. water_column_sonar_processing-25.11.1.dist-info/RECORD +34 -0
  32. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info}/WHEEL +1 -1
  33. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info/licenses}/LICENSE +1 -1
  34. water_column_sonar_processing-25.11.1.dist-info/top_level.txt +1 -0
  35. __init__.py +0 -0
  36. model/__init__.py +0 -0
  37. model/aws/__init__.py +0 -0
  38. model/aws/dynamodb_manager.py +0 -149
  39. model/aws/s3_manager.py +0 -356
  40. model/aws/s3fs_manager.py +0 -74
  41. model/cruise/__init__.py +0 -0
  42. model/cruise/create_empty_zarr_store.py +0 -166
  43. model/cruise/resample_regrid.py +0 -248
  44. model/geospatial/__init__.py +0 -0
  45. model/geospatial/geometry_manager.py +0 -194
  46. model/geospatial/geometry_simplification.py +0 -81
  47. model/geospatial/pmtile_generation.py +0 -74
  48. model/index/__init__.py +0 -0
  49. model/index/index.py +0 -228
  50. model/model.py +0 -138
  51. model/utility/__init__.py +0 -0
  52. model/utility/constants.py +0 -56
  53. model/utility/timestamp.py +0 -12
  54. model/zarr/__init__.py +0 -0
  55. model/zarr/bar.py +0 -28
  56. model/zarr/foo.py +0 -11
  57. model/zarr/zarr_manager.py +0 -298
  58. water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
  59. water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
  60. water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
@@ -0,0 +1,722 @@
1
+ import importlib.metadata
2
+
3
+ import numpy as np
4
+ import xarray as xr
5
+ import zarr
6
+ from zarr.codecs import BloscCodec, BloscShuffle
7
+ from zarr.storage import LocalStore
8
+
9
+ from water_column_sonar_processing.aws import S3FSManager
10
+ from water_column_sonar_processing.utility import Constants, Coordinates, Timestamp
11
+
12
+ # TODO: change clevel to 9?!
13
+ compressor = BloscCodec(cname="zstd", clevel=9, shuffle=BloscShuffle.shuffle)
14
+
15
+ # TODO: when ready switch to version 3 of model spec
16
+
17
+
18
+ # creates the latlon dataset: foo = ep.consolidate.add_location(ds_Sv, echodata)
19
+ class ZarrManager:
20
+ #######################################################
21
+ def __init__(
22
+ self,
23
+ ):
24
+ self.__overwrite = True
25
+
26
+ #######################################################
27
+ def get_depth_values(
28
+ self,
29
+ # min_echo_range: float, # minimum depth measured (zero non-inclusive) from whole cruise
30
+ max_echo_range: float, # maximum depth measured from whole cruise
31
+ cruise_min_epsilon: float = 0.25, # resolution between subsequent measurements
32
+ ): # TODO: define return type
33
+ # Gets the set of depth values that will be used when resampling and
34
+ # regridding the dataset to a cruise level model store.
35
+ # Note: returned values start at zero!
36
+ # For more info see here: https://echopype.readthedocs.io/en/stable/data-proc-additional.html
37
+ print("Computing depth values.")
38
+ all_cruise_depth_values = np.linspace( # TODO: PROBLEM HERE
39
+ start=0, # just start it at zero
40
+ stop=max_echo_range,
41
+ num=int(max_echo_range / cruise_min_epsilon)
42
+ + 1, # int(np.ceil(max_echo_range / cruise_min_epsilon))?
43
+ endpoint=True,
44
+ ) # np.arange(min_echo_range, max_echo_range, step=min_echo_range) # this is worse
45
+
46
+ if np.any(np.isnan(all_cruise_depth_values)):
47
+ raise Exception("Problem depth values returned were NaN.")
48
+
49
+ print("Done computing depth values.")
50
+ return all_cruise_depth_values.round(decimals=2)
51
+
52
+ #######################################################
53
+ def create_zarr_store(
54
+ self,
55
+ path: str, # 'level_2/Henry_B._Bigelow/HB0707/EK60/HB0707.model/tmp/HB0707.zarr/.zattrs'
56
+ ship_name: str,
57
+ cruise_name: str,
58
+ sensor_name: str,
59
+ frequencies: list, # units in Hz
60
+ width: int, # TODO: needs better name... "ping_time"
61
+ # min_echo_range: float,
62
+ max_echo_range: float,
63
+ cruise_min_epsilon: float, # smallest resolution in meters
64
+ calibration_status: bool = False, # Assume uncalibrated
65
+ ) -> str:
66
+ try:
67
+ # TODO: problem throwing exceptions here
68
+ print(
69
+ f"Creating local zarr_manager store at {cruise_name}.zarr for ship {ship_name}"
70
+ )
71
+ # There can not currently be repeated frequencies
72
+ # TODO: eventually switch coordinate to "channel" because frequencies can repeat
73
+ if len(frequencies) != len(set(frequencies)):
74
+ raise Exception(
75
+ "Number of frequencies does not match number of channels"
76
+ )
77
+
78
+ zarr_path = f"{path}/{cruise_name}.zarr"
79
+ # store = zarr.DirectoryStore(path=zarr_path, normalize_keys=False)
80
+ ### https://zarr.readthedocs.io/en/latest/user-guide/groups/ ###
81
+ # store = zarr.group(path=zarr_path)
82
+ store = LocalStore(root=zarr_path)
83
+ root = zarr.group(
84
+ store=store, # zarr_path,
85
+ overwrite=self.__overwrite, # cache_attrs=True
86
+ zarr_format=3,
87
+ )
88
+
89
+ #####################################################################
90
+ # --- Coordinate: Time --- #
91
+ # https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
92
+ time_data = np.repeat(0.0, width)
93
+ time_data.astype(np.dtype(Coordinates.TIME_DTYPE.value), copy=False)
94
+
95
+ time = root.create_array( # deprecated: Use Group.create_array instead.
96
+ name=Coordinates.TIME.value,
97
+ data=time_data,
98
+ # shape=width,
99
+ chunks=(Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,),
100
+ # dtype=np.dtype(Coordinates.TIME_DTYPE.value),
101
+ compressors=compressor,
102
+ fill_value=np.nan,
103
+ overwrite=self.__overwrite,
104
+ dimension_names=(Coordinates.TIME.value,),
105
+ )
106
+
107
+ # time.metadata.dimension_names = (Coordinates.TIME.value,)
108
+
109
+ time.attrs["calendar"] = Coordinates.TIME_CALENDAR.value
110
+ time.attrs["units"] = Coordinates.TIME_UNITS.value
111
+ time.attrs["long_name"] = Coordinates.TIME_LONG_NAME.value
112
+ time.attrs["standard_name"] = Coordinates.TIME_STANDARD_NAME.value
113
+
114
+ #####################################################################
115
+ # --- Coordinate: Depth --- #
116
+ depth_data = self.get_depth_values(
117
+ # min_echo_range=min_echo_range,
118
+ max_echo_range=max_echo_range,
119
+ cruise_min_epsilon=cruise_min_epsilon,
120
+ )
121
+ depth_data = np.array(
122
+ depth_data, dtype=np.dtype(Coordinates.DEPTH_DTYPE.value)
123
+ )
124
+
125
+ depth = root.create_array(
126
+ name=Coordinates.DEPTH.value,
127
+ # TODO: verify that these values are correct
128
+ data=depth_data,
129
+ # shape=len(depth_values),
130
+ chunks=(Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,),
131
+ # dtype=np.dtype(
132
+ # Coordinates.DEPTH_DTYPE.value
133
+ # ), # float16 == 2 significant digits would be ideal
134
+ compressors=compressor,
135
+ fill_value=np.nan,
136
+ overwrite=self.__overwrite,
137
+ dimension_names=(Coordinates.DEPTH.value,),
138
+ )
139
+
140
+ if np.any(np.isnan(depth_data)):
141
+ raise Exception("Some depth values returned were NaN.")
142
+
143
+ # depth.metadata.dimension_names = (Coordinates.DEPTH.value,)
144
+
145
+ depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
146
+ depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
147
+ depth.attrs["standard_name"] = Coordinates.DEPTH_STANDARD_NAME.value
148
+
149
+ #####################################################################
150
+ # --- Coordinate: Latitude --- #
151
+ gps_data = np.array(
152
+ np.repeat(np.nan, width),
153
+ dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
154
+ )
155
+
156
+ latitude = root.create_array(
157
+ name=Coordinates.LATITUDE.value,
158
+ # dataset=np.repeat(0.0, width), # root.longitude[:] = np.nan
159
+ data=gps_data,
160
+ # shape=width,
161
+ chunks=(Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,),
162
+ # dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
163
+ compressors=compressor,
164
+ fill_value=np.nan,
165
+ overwrite=self.__overwrite,
166
+ dimension_names=(Coordinates.TIME.value,),
167
+ )
168
+
169
+ # Note: LATITUDE is indexed by TIME
170
+ # latitude.metadata.dimension_names = (Coordinates.TIME.value,)
171
+
172
+ latitude.attrs["units"] = Coordinates.LATITUDE_UNITS.value
173
+ latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
174
+ latitude.attrs["standard_name"] = Coordinates.LATITUDE_STANDARD_NAME.value
175
+
176
+ #####################################################################
177
+ # --- Coordinate: Longitude --- #
178
+ longitude = root.create_array(
179
+ name=Coordinates.LONGITUDE.value,
180
+ # dataset=np.repeat(0.0, width), # root.longitude[:] = np.nan
181
+ data=gps_data,
182
+ # shape=width,
183
+ chunks=(Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,),
184
+ # dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
185
+ compressors=compressor,
186
+ fill_value=np.nan,
187
+ overwrite=self.__overwrite,
188
+ dimension_names=(Coordinates.TIME.value,),
189
+ )
190
+
191
+ # Note: LONGITUDE is indexed by TIME
192
+ # longitude.metadata.dimension_names = (Coordinates.TIME.value,)
193
+
194
+ longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
195
+ longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
196
+ longitude.attrs["standard_name"] = Coordinates.LONGITUDE_STANDARD_NAME.value
197
+
198
+ #####################################################################
199
+ # TODO: verify adding this variable for where the bottom was detected
200
+ # --- Coordinate: Bottom --- #
201
+ bottom_data = np.array(
202
+ np.repeat(np.nan, width), dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value)
203
+ )
204
+
205
+ bottom = root.create_array(
206
+ name=Coordinates.BOTTOM.value,
207
+ data=bottom_data,
208
+ # shape=width,
209
+ chunks=(Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,),
210
+ # dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value),
211
+ compressors=compressor,
212
+ fill_value=np.nan,
213
+ overwrite=self.__overwrite,
214
+ dimension_names=(Coordinates.TIME.value,),
215
+ )
216
+
217
+ # BOTTOM is indexed by TIME
218
+ # bottom.metadata.dimension_names = (Coordinates.TIME.value,)
219
+
220
+ bottom.attrs["units"] = Coordinates.BOTTOM_UNITS.value
221
+ bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
222
+ bottom.attrs["standard_name"] = Coordinates.BOTTOM_STANDARD_NAME.value
223
+
224
+ #####################################################################
225
+ # TODO: verify adding this variable with test
226
+ # --- Coordinate: Speed --- #
227
+ speed_data = np.repeat(np.nan, width)
228
+ speed_data.astype(np.dtype(Coordinates.SPEED_DTYPE.value), copy=False)
229
+
230
+ speed = root.create_array(
231
+ name=Coordinates.SPEED.value,
232
+ data=np.repeat(np.nan, width), # root.longitude[:] = np.nan
233
+ # shape=width,
234
+ chunks=(Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,),
235
+ # dtype=np.dtype(Coordinates.SPEED_DTYPE.value),
236
+ compressors=compressor,
237
+ fill_value=np.nan,
238
+ overwrite=self.__overwrite,
239
+ dimension_names=(Coordinates.TIME.value,), # NOTE: 'TIME'
240
+ )
241
+
242
+ # SPEED is indexed by TIME
243
+ # speed.metadata.dimension_names = (Coordinates.TIME.value,)
244
+
245
+ speed.attrs["units"] = Coordinates.SPEED_UNITS.value
246
+ speed.attrs["long_name"] = Coordinates.SPEED_LONG_NAME.value
247
+ speed.attrs["standard_name"] = Coordinates.SPEED_STANDARD_NAME.value
248
+
249
+ #####################################################################
250
+ # TODO: verify adding this variable with test
251
+ # --- Coordinate: Speed --- #
252
+ distance_data = np.repeat(np.nan, width)
253
+ distance_data.astype(np.dtype(Coordinates.DISTANCE_DTYPE.value), copy=False)
254
+
255
+ distance = root.create_array(
256
+ name=Coordinates.DISTANCE.value,
257
+ data=np.repeat(np.nan, width), # root.longitude[:] = np.nan
258
+ # shape=width,
259
+ chunks=(Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,),
260
+ # dtype=np.dtype(Coordinates.SPEED_DTYPE.value),
261
+ compressors=compressor,
262
+ fill_value=np.nan,
263
+ overwrite=self.__overwrite,
264
+ dimension_names=(Coordinates.TIME.value,), # NOTE: 'TIME'
265
+ )
266
+
267
+ # DISTANCE is indexed by TIME
268
+ # distance.metadata.dimension_names = (Coordinates.TIME.value,)
269
+
270
+ distance.attrs["units"] = Coordinates.DISTANCE_UNITS.value
271
+ distance.attrs["long_name"] = Coordinates.DISTANCE_LONG_NAME.value
272
+ distance.attrs["standard_name"] = Coordinates.DISTANCE_STANDARD_NAME.value
273
+
274
+ #####################################################################
275
+ # --- Coordinate: Frequency --- #
276
+ frequency_data = np.array(
277
+ frequencies, dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value)
278
+ )
279
+ # frequency_data.astype(np.dtype(Coordinates.FREQUENCY_DTYPE.value), copy=False)
280
+
281
+ frequency = root.create_array(
282
+ name=Coordinates.FREQUENCY.value,
283
+ data=frequency_data,
284
+ # shape=len(frequencies),
285
+ chunks=(len(frequencies),),
286
+ # dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
287
+ compressors=compressor,
288
+ fill_value=0.0,
289
+ overwrite=self.__overwrite,
290
+ dimension_names=(Coordinates.FREQUENCY.value,),
291
+ )
292
+
293
+ # TODO: best coordinate would be channel with str type
294
+ # frequency.metadata.dimension_names = (Coordinates.FREQUENCY.value,)
295
+
296
+ frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
297
+ frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
298
+ frequency.attrs["standard_name"] = Coordinates.FREQUENCY_STANDARD_NAME.value
299
+
300
+ #####################################################################
301
+ # --- Sv Data --- #
302
+ sv = root.create_array(
303
+ name=Coordinates.SV.value,
304
+ shape=(len(depth_data), width, len(frequencies)),
305
+ chunks=(
306
+ Constants.TILE_SIZE.value,
307
+ Constants.TILE_SIZE.value,
308
+ 1,
309
+ ),
310
+ dtype=np.dtype(Coordinates.SV_DTYPE.value),
311
+ compressors=compressor,
312
+ fill_value=np.nan,
313
+ overwrite=self.__overwrite,
314
+ dimension_names=(
315
+ Coordinates.DEPTH.value,
316
+ Coordinates.TIME.value,
317
+ Coordinates.FREQUENCY.value,
318
+ ),
319
+ )
320
+ # sv.metadata.dimension_names = (
321
+ # Coordinates.DEPTH.value,
322
+ # Coordinates.TIME.value,
323
+ # Coordinates.FREQUENCY.value,
324
+ # )
325
+ # sv.attrs["_ARRAY_DIMENSIONS"] = [
326
+ # Coordinates.DEPTH.value,
327
+ # Coordinates.TIME.value,
328
+ # Coordinates.FREQUENCY.value,
329
+ # ]
330
+
331
+ sv.attrs["units"] = Coordinates.SV_UNITS.value
332
+ sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
333
+ sv.attrs["tile_size"] = Constants.TILE_SIZE.value
334
+
335
+ #####################################################################
336
+ # --- Metadata --- #
337
+ root.attrs["ship_name"] = ship_name
338
+ root.attrs["cruise_name"] = cruise_name
339
+ root.attrs["sensor_name"] = sensor_name
340
+ #
341
+ root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
342
+
343
+ # NOTE: for the version to be parsable you need to build the python package
344
+ # locally first.
345
+ current_project_version = importlib.metadata.version(
346
+ "water-column-sonar-processing"
347
+ )
348
+ root.attrs["processing_software_version"] = current_project_version
349
+ root.attrs["processing_software_time"] = Timestamp.get_timestamp()
350
+ #
351
+ root.attrs["calibration_status"] = calibration_status
352
+ root.attrs["tile_size"] = Constants.TILE_SIZE.value
353
+
354
+ # TODO: ZarrUserWarning: Consolidated metadata is currently not part in the Zarr format 3 specification. It may not be supported by other zarr implementations and may change in the future.
355
+ # zarr.consolidate_metadata(zarr_path)
356
+ #####################################################################
357
+ """
358
+ # zzz = zarr.open('https://echofish-dev-master-118234403147-echofish-zarr-store.s3.us-west-2.amazonaws.com/GU1002_resample.zarr')
359
+ # zzz.time[0] = 1274979445.423
360
+ # Initialize all to origin time, will be overwritten late
361
+ """
362
+ return zarr_path
363
+ except Exception as err:
364
+ raise RuntimeError(f"Problem trying to create zarr store, {err}")
365
+ # finally:
366
+ # cleaner = Cleaner()
367
+ # cleaner.delete_local_files()
368
+ # TODO: should delete zarr store in temp directory too?
369
+
370
+ #######################################################
371
+ #
372
+ # LEVEL 3 - LEVEL 3 - LEVEL 3 - LEVEL 3 # TODO: move to separate project for zarr 3?
373
+ #
374
+ # def create_zarr_store_level_3(
375
+ # self,
376
+ # path: str, # 'level_2/Henry_B._Bigelow/HB0707/EK60/HB0707.model/tmp/HB0707.zarr/.zattrs'
377
+ # ship_name: str,
378
+ # cruise_name: str,
379
+ # sensor_name: str,
380
+ # frequencies: list, # units in Hz
381
+ # width: int, # TODO: needs better name... "ping_time"
382
+ # min_echo_range: float, # smallest resolution in meters --> 1.0 meters
383
+ # max_echo_range: float,
384
+ # cruise_min_epsilon: float,
385
+ # calibration_status: bool = False, # Assume uncalibrated
386
+ # ) -> str:
387
+ # compressor = Blosc(cname="zstd", clevel=9, shuffle=1)
388
+ # TILE_SIZE = 1024
389
+ # try:
390
+ # # TODO: problem throwing exceptions here
391
+ # print(
392
+ # f"Creating level 3 local zarr_manager store at {cruise_name}.zarr for ship {ship_name}"
393
+ # )
394
+ # if len(frequencies) != len(set(frequencies)):
395
+ # raise Exception(
396
+ # "Number of frequencies does not match number of channels"
397
+ # )
398
+ #
399
+ # # print(f"Debugging number of threads: {self.__num_threads}")
400
+ #
401
+ # zarr_path = f"{path}/{cruise_name}.zarr"
402
+ # store = zarr.DirectoryStore(path=zarr_path, normalize_keys=False)
403
+ # root = zarr.group(store=store, overwrite=self.__overwrite, cache_attrs=True)
404
+ #
405
+ # #####################################################################
406
+ # # --- Coordinate: Time --- #
407
+ # # https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
408
+ # time = root.create_array(
409
+ # name=Coordinates.TIME.value,
410
+ # data=np.repeat(0.0, width),
411
+ # shape=width,
412
+ # chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
413
+ # dtype=np.dtype(Coordinates.TIME_DTYPE.value),
414
+ # compressor=compressor,
415
+ # # fill_value=np.nan,
416
+ # overwrite=self.__overwrite,
417
+ # )
418
+ #
419
+ # time.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
420
+ # time.attrs["calendar"] = Coordinates.TIME_CALENDAR.value
421
+ # time.attrs["units"] = Coordinates.TIME_UNITS.value
422
+ # time.attrs["long_name"] = Coordinates.TIME_LONG_NAME.value
423
+ # time.attrs["standard_name"] = Coordinates.TIME_STANDARD_NAME.value
424
+ #
425
+ # #####################################################################
426
+ # # --- Coordinate: Depth --- #
427
+ # depth_values = self.get_depth_values(
428
+ # # min_echo_range=min_echo_range,
429
+ # max_echo_range=max_echo_range,
430
+ # cruise_min_epsilon=cruise_min_epsilon,
431
+ # )
432
+ #
433
+ # root.create_dataset(
434
+ # name=Coordinates.DEPTH.value,
435
+ # # TODO: verify that these values are correct
436
+ # data=depth_values,
437
+ # shape=len(depth_values),
438
+ # chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
439
+ # dtype=np.dtype(
440
+ # Coordinates.DEPTH_DTYPE.value # TODO: convert to integers and only get whole number depths
441
+ # ), # float16 == 2 significant digits would be ideal
442
+ # compressor=compressor,
443
+ # # fill_value=np.nan,
444
+ # overwrite=self.__overwrite,
445
+ # )
446
+ #
447
+ # if np.any(np.isnan(depth_values)):
448
+ # raise Exception("Some depth values returned were NaN.")
449
+ #
450
+ # root.depth.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.DEPTH.value]
451
+ # root.depth.attrs["units"] = Coordinates.DEPTH_UNITS.value
452
+ # root.depth.attrs["long_name"] = Coordinates.DEPTH_LONG_NAME.value
453
+ # root.depth.attrs["standard_name"] = Coordinates.DEPTH_STANDARD_NAME.value
454
+ #
455
+ # #####################################################################
456
+ # # --- Coordinate: Latitude --- #
457
+ # root.create_dataset(
458
+ # name=Coordinates.LATITUDE.value,
459
+ # # dataset=np.repeat(0.0, width), # root.longitude[:] = np.nan
460
+ # data=np.repeat(np.nan, width),
461
+ # shape=width,
462
+ # chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
463
+ # dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
464
+ # compressor=compressor,
465
+ # fill_value=np.nan, # needs to be nan to validate if any missing
466
+ # overwrite=self.__overwrite,
467
+ # )
468
+ #
469
+ # # Note: LATITUDE is indexed by TIME
470
+ # root.latitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
471
+ # root.latitude.attrs["units"] = Coordinates.LATITUDE_UNITS.value
472
+ # root.latitude.attrs["long_name"] = Coordinates.LATITUDE_LONG_NAME.value
473
+ # root.latitude.attrs["standard_name"] = (
474
+ # Coordinates.LATITUDE_STANDARD_NAME.value
475
+ # )
476
+ #
477
+ # #####################################################################
478
+ # # --- Coordinate: Longitude --- #
479
+ # root.create_dataset(
480
+ # name=Coordinates.LONGITUDE.value,
481
+ # # dataset=np.repeat(0.0, width), # root.longitude[:] = np.nan
482
+ # data=np.repeat(np.nan, width),
483
+ # shape=width,
484
+ # chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
485
+ # dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
486
+ # compressor=compressor,
487
+ # fill_value=np.nan,
488
+ # overwrite=self.__overwrite,
489
+ # )
490
+ #
491
+ # # Note: LONGITUDE is indexed by TIME
492
+ # root.longitude.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
493
+ # root.longitude.attrs["units"] = Coordinates.LONGITUDE_UNITS.value
494
+ # root.longitude.attrs["long_name"] = Coordinates.LONGITUDE_LONG_NAME.value
495
+ # root.longitude.attrs["standard_name"] = (
496
+ # Coordinates.LONGITUDE_STANDARD_NAME.value
497
+ # )
498
+ #
499
+ # #####################################################################
500
+ # # TODO: verify adding this variable for where the bottom was detected
501
+ # # --- Coordinate: Bottom --- #
502
+ # root.create_dataset(
503
+ # name=Coordinates.BOTTOM.value,
504
+ # data=np.repeat(0.0, width), # root.longitude[:] = np.nan
505
+ # shape=width,
506
+ # chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
507
+ # dtype=np.dtype(
508
+ # Coordinates.BOTTOM_DTYPE.value
509
+ # ), # TODO: should also only be integers
510
+ # compressor=compressor,
511
+ # fill_value=0.0,
512
+ # overwrite=self.__overwrite,
513
+ # )
514
+ #
515
+ # # BOTTOM is indexed by TIME
516
+ # root.bottom.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
517
+ # root.bottom.attrs["units"] = Coordinates.BOTTOM_UNITS.value
518
+ # root.bottom.attrs["long_name"] = Coordinates.BOTTOM_LONG_NAME.value
519
+ # root.bottom.attrs["standard_name"] = Coordinates.BOTTOM_STANDARD_NAME.value
520
+ #
521
+ # #####################################################################
522
+ # # TODO: verify adding this variable with test
523
+ # # --- Coordinate: Speed --- #
524
+ # root.create_dataset(
525
+ # name=Coordinates.SPEED.value,
526
+ # data=np.repeat(np.nan, width), # root.longitude[:] = np.nan
527
+ # shape=width,
528
+ # chunks=Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,
529
+ # dtype=np.dtype(Coordinates.SPEED_DTYPE.value), # TODO: also round?
530
+ # compressor=compressor,
531
+ # fill_value=np.nan,
532
+ # overwrite=self.__overwrite,
533
+ # )
534
+ #
535
+ # # SPEED is indexed by TIME
536
+ # root.speed.attrs["_ARRAY_DIMENSIONS"] = [Coordinates.TIME.value]
537
+ # root.speed.attrs["units"] = Coordinates.SPEED_UNITS.value
538
+ # root.speed.attrs["long_name"] = Coordinates.SPEED_LONG_NAME.value
539
+ # root.speed.attrs["standard_name"] = Coordinates.SPEED_STANDARD_NAME.value
540
+ #
541
+ # #####################################################################
542
+ # # --- Coordinate: Frequency --- #
543
+ # root.create_dataset(
544
+ # name=Coordinates.FREQUENCY.value,
545
+ # data=frequencies,
546
+ # shape=len(frequencies),
547
+ # chunks=len(frequencies),
548
+ # dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
549
+ # compressor=compressor,
550
+ # fill_value=0.0,
551
+ # overwrite=self.__overwrite,
552
+ # )
553
+ #
554
+ # # TODO: best coordinate would be channel with str type
555
+ # root.frequency.attrs["_ARRAY_DIMENSIONS"] = [
556
+ # Coordinates.FREQUENCY.value
557
+ # ] # TODO: is this correct
558
+ # root.frequency.attrs["units"] = Coordinates.FREQUENCY_UNITS.value
559
+ # root.frequency.attrs["long_name"] = Coordinates.FREQUENCY_LONG_NAME.value
560
+ # root.frequency.attrs["standard_name"] = (
561
+ # Coordinates.FREQUENCY_STANDARD_NAME.value
562
+ # )
563
+ #
564
+ # #####################################################################
565
+ # # --- Sv Data --- #
566
+ # root.create_dataset(
567
+ # name=Coordinates.SV.value,
568
+ # shape=(len(depth_values), width, len(frequencies)),
569
+ # chunks=(
570
+ # TILE_SIZE,
571
+ # TILE_SIZE,
572
+ # len(frequencies),
573
+ # ),
574
+ # dtype=np.dtype("int8"), # Coordinates.SV_DTYPE.value
575
+ # compressor=compressor, # TODO: get compression working?!
576
+ # # fill_value=np.nan,
577
+ # overwrite=self.__overwrite,
578
+ # )
579
+ #
580
+ # root.Sv.attrs["_ARRAY_DIMENSIONS"] = [
581
+ # Coordinates.DEPTH.value,
582
+ # Coordinates.TIME.value,
583
+ # Coordinates.FREQUENCY.value,
584
+ # ]
585
+ # root.Sv.attrs["units"] = Coordinates.SV_UNITS.value
586
+ # root.Sv.attrs["long_name"] = Coordinates.SV_LONG_NAME.value
587
+ # root.Sv.attrs["tile_size"] = TILE_SIZE
588
+ #
589
+ # #####################################################################
590
+ # # --- Metadata --- #
591
+ # root.attrs["ship_name"] = ship_name
592
+ # root.attrs["cruise_name"] = cruise_name
593
+ # root.attrs["sensor_name"] = sensor_name
594
+ # #
595
+ # root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
596
+ #
597
+ # current_project_version = importlib.metadata.version(
598
+ # "water_column_sonar_processing"
599
+ # )
600
+ # root.attrs["processing_software_version"] = current_project_version
601
+ # root.attrs["processing_software_time"] = Timestamp.get_timestamp()
602
+ # #
603
+ # # TODO: add level somewhere?
604
+ # #
605
+ # root.attrs["calibration_status"] = calibration_status
606
+ # root.attrs["tile_size"] = TILE_SIZE
607
+ #
608
+ # zarr.consolidate_metadata(store)
609
+ # #####################################################################
610
+ # return zarr_path
611
+ # except Exception as err:
612
+ # raise RuntimeError(f"Problem trying to create level 3 zarr store, {err}")
613
+ # # finally:
614
+ # # cleaner = Cleaner()
615
+ # # cleaner.delete_local_files()
616
+ # # TODO: should delete zarr store in temp directory too?
617
+
618
+ ############################################################################
619
+ # def update_zarr_store(
620
+ # self,
621
+ # path: str,
622
+ # ship_name: str,
623
+ # cruise_name: str, # TODO: just pass stem
624
+ # sensor_name: str,
625
+ # ) -> None:
626
+ # """
627
+ # Opens an existing Zarr store living in a s3 bucket for the purpose
628
+ # of updating just a subset of the cruise-level Zarr store associated
629
+ # with a file-level Zarr store.
630
+ # """
631
+ # pass
632
+
633
+ ############################################################################
634
+ def open_s3_zarr_store_with_zarr(
635
+ self,
636
+ ship_name: str,
637
+ cruise_name: str,
638
+ sensor_name: str,
639
+ # zarr_synchronizer: Union[str, None] = None, # TODO:
640
+ output_bucket_name: str,
641
+ endpoint_url=None,
642
+ ): # -> zarr.hierarchy.Group:
643
+ # Mounts a Zarr store using pythons Zarr implementation. The mounted store
644
+ # will have read/write privileges so that store can be updated.
645
+ print("Opening L2 Zarr store with Zarr for writing.")
646
+ try:
647
+ s3fs_manager = S3FSManager(endpoint_url=endpoint_url)
648
+ root = f"{output_bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{cruise_name}.zarr"
649
+ store = s3fs_manager.s3_map(s3_zarr_store_path=root)
650
+ # synchronizer = model.ProcessSynchronizer(f"/tmp/{ship_name}_{cruise_name}.sync")
651
+ cruise_zarr = zarr.open(store=store, mode="r+")
652
+ except Exception as err: # Failure
653
+ raise RuntimeError(
654
+ f"Exception encountered opening Zarr store with Zarr, {err}"
655
+ )
656
+ print("Done opening Zarr store with Zarr.")
657
+ return cruise_zarr
658
+
659
+ ############################################################################
660
+ def open_s3_zarr_store_with_xarray(
661
+ self,
662
+ ship_name: str,
663
+ cruise_name: str,
664
+ sensor_name: str,
665
+ file_name_stem: str,
666
+ input_bucket_name: str,
667
+ endpoint_url=None,
668
+ ) -> xr.Dataset:
669
+ print(
670
+ "Opening L1 Zarr store in S3 with Xarray."
671
+ ) # TODO: Is this only used for reading from?
672
+ try:
673
+ zarr_path = f"s3://{input_bucket_name}/level_1/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.zarr"
674
+ s3fs_manager = S3FSManager(endpoint_url=endpoint_url)
675
+ store_s3_map = s3fs_manager.s3_map(s3_zarr_store_path=zarr_path)
676
+ ds = xr.open_dataset(filename_or_obj=store_s3_map, engine="zarr", chunks={})
677
+ return ds
678
+ except Exception as err:
679
+ raise RuntimeError(f"Problem opening Zarr store in S3 as Xarray, {err}")
680
+ finally:
681
+ print("Exiting opening Zarr store in S3 as Xarray.")
682
+
683
+ def open_l2_zarr_store_with_xarray(
684
+ self,
685
+ ship_name: str,
686
+ cruise_name: str,
687
+ sensor_name: str,
688
+ bucket_name: str,
689
+ endpoint_url=None,
690
+ ) -> xr.Dataset:
691
+ print("Opening L2 Zarr store in S3 with Xarray.")
692
+ try:
693
+ zarr_path = f"s3://{bucket_name}/level_2/{ship_name}/{cruise_name}/{sensor_name}/{cruise_name}.zarr"
694
+ s3fs_manager = S3FSManager(endpoint_url=endpoint_url)
695
+ store_s3_map = s3fs_manager.s3_map(s3_zarr_store_path=zarr_path)
696
+ ds = xr.open_dataset(
697
+ filename_or_obj=store_s3_map,
698
+ engine="zarr",
699
+ )
700
+ except Exception as err:
701
+ raise RuntimeError(f"Problem opening Zarr store in S3 as Xarray, {err}")
702
+ print("Done opening Zarr store in S3 as Xarray.")
703
+ return ds
704
+
705
+ ############################################################################
706
+
707
+ #######################################################
708
+ # def create_process_synchronizer(self):
709
+ # # TODO: explore aws redis options
710
+ # pass
711
+
712
+ #######################################################
713
+ # def verify_cruise_store_data(self):
714
+ # # TODO: run a check on a finished model store to ensure that
715
+ # # none of the time, latitude, longitude, or depth values
716
+ # # are NaN.
717
+ # pass
718
+
719
+ #######################################################
720
+
721
+
722
+ ###########################################################