water-column-sonar-processing 0.0.1__py3-none-any.whl → 26.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (60) hide show
  1. water_column_sonar_processing/__init__.py +13 -0
  2. water_column_sonar_processing/aws/__init__.py +7 -0
  3. water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
  4. water_column_sonar_processing/aws/s3_manager.py +418 -0
  5. water_column_sonar_processing/aws/s3fs_manager.py +64 -0
  6. {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
  7. {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
  8. water_column_sonar_processing/cruise/__init__.py +4 -0
  9. water_column_sonar_processing/cruise/create_empty_zarr_store.py +129 -0
  10. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  11. water_column_sonar_processing/cruise/resample_regrid.py +323 -0
  12. water_column_sonar_processing/geometry/__init__.py +13 -0
  13. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  14. water_column_sonar_processing/geometry/geometry_manager.py +241 -0
  15. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  16. water_column_sonar_processing/geometry/pmtile_generation.py +266 -0
  17. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  18. water_column_sonar_processing/index/__init__.py +3 -0
  19. water_column_sonar_processing/index/index_manager.py +381 -0
  20. water_column_sonar_processing/model/__init__.py +3 -0
  21. water_column_sonar_processing/model/zarr_manager.py +741 -0
  22. water_column_sonar_processing/processing/__init__.py +4 -0
  23. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  24. water_column_sonar_processing/processing/raw_to_zarr.py +331 -0
  25. water_column_sonar_processing/utility/__init__.py +13 -0
  26. {model → water_column_sonar_processing}/utility/cleaner.py +7 -7
  27. water_column_sonar_processing/utility/constants.py +118 -0
  28. {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
  29. water_column_sonar_processing/utility/timestamp.py +12 -0
  30. water_column_sonar_processing-26.1.14.dist-info/METADATA +240 -0
  31. water_column_sonar_processing-26.1.14.dist-info/RECORD +34 -0
  32. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/WHEEL +1 -1
  33. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-26.1.14.dist-info/licenses}/LICENSE +1 -1
  34. water_column_sonar_processing-26.1.14.dist-info/top_level.txt +1 -0
  35. __init__.py +0 -0
  36. model/__init__.py +0 -0
  37. model/aws/__init__.py +0 -0
  38. model/aws/dynamodb_manager.py +0 -149
  39. model/aws/s3_manager.py +0 -356
  40. model/aws/s3fs_manager.py +0 -74
  41. model/cruise/__init__.py +0 -0
  42. model/cruise/create_empty_zarr_store.py +0 -166
  43. model/cruise/resample_regrid.py +0 -248
  44. model/geospatial/__init__.py +0 -0
  45. model/geospatial/geometry_manager.py +0 -194
  46. model/geospatial/geometry_simplification.py +0 -81
  47. model/geospatial/pmtile_generation.py +0 -74
  48. model/index/__init__.py +0 -0
  49. model/index/index.py +0 -228
  50. model/model.py +0 -138
  51. model/utility/__init__.py +0 -0
  52. model/utility/constants.py +0 -56
  53. model/utility/timestamp.py +0 -12
  54. model/zarr/__init__.py +0 -0
  55. model/zarr/bar.py +0 -28
  56. model/zarr/foo.py +0 -11
  57. model/zarr/zarr_manager.py +0 -298
  58. water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
  59. water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
  60. water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
@@ -0,0 +1,741 @@
1
+ import os
2
+ from importlib import metadata
3
+ from typing import Optional
4
+
5
+ import numpy as np
6
+ import xarray as xr
7
+ import zarr
8
+ from zarr.codecs import BloscCodec, BloscShuffle
9
+ from zarr.core.group import Group
10
+
11
+ from water_column_sonar_processing.utility import Constants, Coordinates, Timestamp
12
+
13
+ # https://zarr-specs.readthedocs.io/en/latest/v3/codecs/blosc/index.html
14
+ compressors = BloscCodec(
15
+ cname="zstd",
16
+ clevel=9,
17
+ shuffle=BloscShuffle.bitshuffle,
18
+ )
19
+
20
+
21
+ # creates the latlon dataset: foo = ep.consolidate.add_location(ds_Sv, echodata)
22
+ class ZarrManager:
23
+ #######################################################
24
+ def __init__(
25
+ self,
26
+ # endpoint_url: Optional[str] = None,
27
+ ):
28
+ self.__overwrite = True
29
+ self.key = os.environ.get("OUTPUT_BUCKET_ACCESS_KEY")
30
+ self.secret = os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY")
31
+
32
+ #######################################################
33
+ @staticmethod
34
+ def get_depth_values(
35
+ max_echo_range: float, # maximum depth measured from whole cruise
36
+ cruise_min_epsilon: float = 0.20, # delta subsequent measurements
37
+ ) -> np.ndarray[tuple]:
38
+ # Gets the set of depth values that will be used when resampling and
39
+ # regridding the dataset to a cruise level model store.
40
+ # Note: returned values start at zero!
41
+ # For more info see here: https://echopype.readthedocs.io/en/stable/data-proc-additional.html
42
+ all_cruise_depth_values = np.linspace( # TODO: PROBLEM HERE
43
+ start=0, # start it at zero
44
+ stop=np.ceil(max_echo_range), # round up
45
+ num=int(np.ceil(max_echo_range) / cruise_min_epsilon) + 1,
46
+ endpoint=True,
47
+ )
48
+
49
+ if np.any(np.isnan(all_cruise_depth_values)):
50
+ raise Exception("Problem depth values returned were NaN.")
51
+
52
+ return all_cruise_depth_values.round(decimals=2)
53
+
54
+ #######################################################
55
+ def create_zarr_store(
56
+ self,
57
+ path: str, # 'level_2/Henry_B._Bigelow/HB0707/EK60/HB0707.model/tmp/HB0707.zarr/.zattrs'
58
+ ship_name: str,
59
+ cruise_name: str,
60
+ sensor_name: str,
61
+ frequencies: list, # units in Hz, type(frequencies) == np.ndarray
62
+ width: int,
63
+ max_echo_range: float,
64
+ calibration_status: bool = False, # Assume uncalibrated
65
+ ) -> str:
66
+ """
67
+ Creates a new zarr store in a local temporary directory(?)
68
+ This includes the water_level on top of the max_echo_range already, nothing extra needs to be done.
69
+ """
70
+ try:
71
+ print(f"Creating local zarr store, {cruise_name}.zarr for ship {ship_name}")
72
+ if len(frequencies) != len(set(frequencies)):
73
+ raise Exception(
74
+ "Number of frequencies does not match number of channels"
75
+ )
76
+
77
+ zarr_path = f"{path}/{cruise_name}.zarr"
78
+ #####################################################################
79
+ frequencies = np.array(
80
+ frequencies, dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value)
81
+ )
82
+ #####################################################################
83
+ # Define the chunk sizes and the encoding
84
+ depth_chunk_shape = (Constants.TILE_SIZE.value,)
85
+ time_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
86
+ frequency_chunk_shape = (len(frequencies),)
87
+ latitude_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
88
+ longitude_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
89
+ bottom_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
90
+ speed_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
91
+ distance_chunk_shape = (Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,)
92
+ sv_chunk_shape = (Constants.TILE_SIZE.value, Constants.TILE_SIZE.value, 1)
93
+ #####################################################################
94
+ root = zarr.create_group(store=zarr_path, zarr_format=3, overwrite=True)
95
+ #####################################################################
96
+ # --- Coordinate: Time --- #
97
+ # https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
98
+ # "data_type": "int64", "fill_value": 0, "units": "nanoseconds since 1970-01-01", "calendar": "proleptic_gregorian"
99
+ #
100
+ time_values = np.repeat(0.0, width)
101
+ time_values.astype(np.dtype(Coordinates.TIME_DTYPE.value))
102
+ root.create_array(
103
+ name=Coordinates.TIME.value,
104
+ # shape=width_indices,
105
+ # dtype=np.dtype(Coordinates.TIME_DTYPE.value),
106
+ data=time_values,
107
+ chunks=time_chunk_shape,
108
+ compressors=compressors,
109
+ fill_value=np.nan,
110
+ attributes=dict(
111
+ calendar=Coordinates.TIME_CALENDAR.value,
112
+ units=Coordinates.TIME_UNITS.value,
113
+ long_name=Coordinates.TIME_LONG_NAME.value,
114
+ standard_name=Coordinates.TIME_STANDARD_NAME.value,
115
+ ),
116
+ dimension_names=[Coordinates.TIME.value],
117
+ overwrite=True,
118
+ )
119
+ #####################################################################
120
+ #####################################################################
121
+ # # --- Coordinate: Depth --- #
122
+ depth_data_values = self.get_depth_values(
123
+ max_echo_range=max_echo_range,
124
+ )
125
+ depth_data = np.array(
126
+ depth_data_values, dtype=Coordinates.DEPTH_DTYPE.value
127
+ )
128
+ root.create_array(
129
+ name=Coordinates.DEPTH.value,
130
+ # shape=depth_indices,
131
+ # dtype=np.dtype(Coordinates.DEPTH_DTYPE.value),
132
+ data=depth_data,
133
+ chunks=depth_chunk_shape,
134
+ compressors=compressors,
135
+ # fill_value=np.nan,
136
+ attributes=dict(
137
+ units=Coordinates.DEPTH_UNITS.value,
138
+ long_name=Coordinates.DEPTH_LONG_NAME.value,
139
+ standard_name=Coordinates.DEPTH_STANDARD_NAME.value,
140
+ ),
141
+ dimension_names=[Coordinates.DEPTH.value], # TODO: is this right
142
+ overwrite=True,
143
+ )
144
+ # #####################################################################
145
+ # # --- Coordinate: Latitude --- #
146
+ # latitude_values = np.rep(np.nan, width_indices)
147
+ # latitude_values.astype(np.dtype(Coordinates.LATITUDE_DTYPE.value))
148
+ root.create_array(
149
+ name=Coordinates.LATITUDE.value,
150
+ shape=width,
151
+ dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
152
+ # data=latitude_values,
153
+ chunks=latitude_chunk_shape,
154
+ compressors=compressors,
155
+ fill_value=np.nan,
156
+ attributes=dict(
157
+ units=Coordinates.LATITUDE_UNITS.value,
158
+ long_name=Coordinates.LATITUDE_LONG_NAME.value,
159
+ standard_name=Coordinates.LATITUDE_STANDARD_NAME.value,
160
+ ),
161
+ dimension_names=[Coordinates.TIME.value],
162
+ overwrite=True,
163
+ )
164
+ # #####################################################################
165
+ # # --- Coordinate: Longitude --- #
166
+ # longitude_values = np.arange(0, width_indices)
167
+ # longitude_values.astype(np.dtype(Coordinates.LONGITUDE_DTYPE.value))
168
+ root.create_array(
169
+ name=Coordinates.LONGITUDE.value,
170
+ shape=width,
171
+ dtype=np.dtype(Coordinates.LONGITUDE_DTYPE.value),
172
+ # data=longitude_values,
173
+ chunks=longitude_chunk_shape,
174
+ compressors=compressors,
175
+ fill_value=np.nan,
176
+ attributes=dict(
177
+ units=Coordinates.LONGITUDE_UNITS.value,
178
+ long_name=Coordinates.LONGITUDE_LONG_NAME.value,
179
+ standard_name=Coordinates.LONGITUDE_STANDARD_NAME.value,
180
+ ),
181
+ dimension_names=[
182
+ Coordinates.TIME.value
183
+ ], # Note: LONGITUDE is indexed by TIME
184
+ overwrite=True,
185
+ )
186
+ # #####################################################################
187
+ # # --- Coordinate: Bottom --- #
188
+ # bottom_values = np.repeat(12.34, width_indices)
189
+ # bottom_values.astype(np.dtype(Coordinates.BOTTOM_DTYPE.value))
190
+ root.create_array(
191
+ name=Coordinates.BOTTOM.value,
192
+ shape=width,
193
+ dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value),
194
+ # data=bottom_values,
195
+ chunks=bottom_chunk_shape,
196
+ compressors=compressors,
197
+ fill_value=np.nan,
198
+ attributes=dict(
199
+ units=Coordinates.BOTTOM_UNITS.value,
200
+ long_name=Coordinates.BOTTOM_LONG_NAME.value,
201
+ standard_name=Coordinates.BOTTOM_STANDARD_NAME.value,
202
+ ),
203
+ dimension_names=[Coordinates.TIME.value], # Note: _ is indexed by TIME
204
+ overwrite=True,
205
+ )
206
+ # #####################################################################
207
+ # # --- Coordinate: Speed --- #
208
+ # speed_values = np.repeat(5.67, width_indices)
209
+ # speed_values.astype(np.dtype(Coordinates.SPEED_DTYPE.value))
210
+ root.create_array(
211
+ name=Coordinates.SPEED.value,
212
+ shape=width,
213
+ dtype=np.dtype(Coordinates.SPEED_DTYPE.value),
214
+ # data=speed_values,
215
+ chunks=speed_chunk_shape,
216
+ compressors=compressors,
217
+ fill_value=np.nan,
218
+ attributes=dict(
219
+ units=Coordinates.SPEED_UNITS.value,
220
+ long_name=Coordinates.SPEED_LONG_NAME.value,
221
+ standard_name=Coordinates.SPEED_STANDARD_NAME.value,
222
+ ),
223
+ dimension_names=[Coordinates.TIME.value], # Note: _ is indexed by TIME
224
+ overwrite=True,
225
+ )
226
+ # #####################################################################
227
+ # # --- Coordinate: Distance --- #
228
+ # distance_values = np.repeat(8.90, width_indices)
229
+ # distance_values.astype(np.dtype(Coordinates.DISTANCE_DTYPE.value))
230
+ root.create_array(
231
+ name=Coordinates.DISTANCE.value,
232
+ shape=width,
233
+ dtype=np.dtype(Coordinates.DISTANCE_DTYPE.value),
234
+ # data=distance_values,
235
+ chunks=distance_chunk_shape,
236
+ compressors=compressors,
237
+ fill_value=np.nan,
238
+ attributes=dict(
239
+ units=Coordinates.DISTANCE_UNITS.value,
240
+ long_name=Coordinates.DISTANCE_LONG_NAME.value,
241
+ standard_name=Coordinates.DISTANCE_STANDARD_NAME.value,
242
+ ),
243
+ dimension_names=[Coordinates.TIME.value], # Note: _ is indexed by TIME
244
+ overwrite=True,
245
+ )
246
+ # #####################################################################
247
+ # # --- Coordinate: Frequency --- #
248
+ root.create_array(
249
+ name=Coordinates.FREQUENCY.value,
250
+ # shape=frequency_indices,
251
+ # dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
252
+ data=frequencies,
253
+ # chunks=(Constants.SPATIOTEMPORAL_CHUNK_SIZE.value,),
254
+ chunks=frequency_chunk_shape,
255
+ compressors=compressors,
256
+ # fill_value=0,
257
+ attributes=dict(
258
+ units=Coordinates.FREQUENCY_UNITS.value,
259
+ long_name=Coordinates.FREQUENCY_LONG_NAME.value,
260
+ standard_name=Coordinates.FREQUENCY_STANDARD_NAME.value,
261
+ ),
262
+ dimension_names=[Coordinates.FREQUENCY.value],
263
+ overwrite=True,
264
+ )
265
+ # #####################################################################
266
+ # # --- Sv Data --- #
267
+ root.create_array(
268
+ name=Coordinates.SV.value,
269
+ shape=(len(depth_data), width, len(frequencies)),
270
+ dtype=np.dtype(Coordinates.SV_DTYPE.value),
271
+ # data=,
272
+ chunks=sv_chunk_shape,
273
+ compressors=compressors,
274
+ fill_value=np.nan,
275
+ attributes=dict(
276
+ units=Coordinates.SV_UNITS.value,
277
+ long_name=Coordinates.SV_LONG_NAME.value,
278
+ standard_name=Coordinates.SV_STANDARD_NAME.value,
279
+ ),
280
+ dimension_names=[
281
+ Coordinates.DEPTH.value,
282
+ Coordinates.TIME.value,
283
+ Coordinates.FREQUENCY.value,
284
+ ],
285
+ overwrite=True,
286
+ )
287
+ #####################################################################
288
+ # # --- Metadata --- #
289
+ root.attrs["ship_name"] = ship_name
290
+ root.attrs["cruise_name"] = cruise_name
291
+ root.attrs["sensor_name"] = sensor_name
292
+ #
293
+ root.attrs["processing_software_name"] = Coordinates.PROJECT_NAME.value
294
+ # NOTE: for the version to be parsable you need to build the python package locally first.
295
+ root.attrs["processing_software_version"] = metadata.version(
296
+ "water-column-sonar-processing"
297
+ )
298
+ root.attrs["processing_software_time"] = Timestamp.get_timestamp()
299
+ #
300
+ root.attrs["calibration_status"] = calibration_status
301
+ root.attrs["tile_size"] = Constants.TILE_SIZE.value
302
+ #
303
+ return zarr_path
304
+ except Exception as err:
305
+ raise RuntimeError(f"Problem trying to create zarr store, {err}")
306
+
307
+ # #######################################################
308
+ # def create_zarr_store_old(
309
+ # self,
310
+ # path: str, # 'level_2/Henry_B._Bigelow/HB0707/EK60/HB0707.model/tmp/HB0707.zarr/.zattrs'
311
+ # ship_name: str,
312
+ # cruise_name: str,
313
+ # sensor_name: str,
314
+ # frequencies: list, # units in Hz
315
+ # width: int,
316
+ # max_echo_range: float,
317
+ # # cruise_min_epsilon: float, # smallest resolution in meters
318
+ # calibration_status: bool = False, # Assume uncalibrated
319
+ # ) -> str:
320
+ # """
321
+ # Creates a new zarr store in a local temporary directory(?)
322
+ # """
323
+ # try:
324
+ # print(f"Creating local zarr store, {cruise_name}.zarr for ship {ship_name}")
325
+ # if len(frequencies) != len(set(frequencies)):
326
+ # raise Exception(
327
+ # "Number of frequencies does not match number of channels"
328
+ # )
329
+ #
330
+ # zarr_path = f"{path}/{cruise_name}.zarr"
331
+ # #####################################################################
332
+ # # Define the chunk sizes and the encoding
333
+ # # 1_000_000 data points for quickest download
334
+ # spatiotemporal_chunk_size = int(1e6)
335
+ # depth_chunk_shape = (512,)
336
+ # time_chunk_shape = (spatiotemporal_chunk_size,)
337
+ # frequency_chunk_shape = (len(frequencies),)
338
+ # latitude_chunk_shape = (spatiotemporal_chunk_size,)
339
+ # longitude_chunk_shape = (spatiotemporal_chunk_size,)
340
+ # bottom_chunk_shape = (spatiotemporal_chunk_size,)
341
+ # speed_chunk_shape = (spatiotemporal_chunk_size,)
342
+ # distance_chunk_shape = (spatiotemporal_chunk_size,)
343
+ # sv_chunk_shape = (512, 512, 1) # TODO: move to constants
344
+ #
345
+ # #####################################################################
346
+ # ##### Depth #####
347
+ # depth_data_values = self.get_depth_values(
348
+ # max_echo_range=max_echo_range,
349
+ # )
350
+ #
351
+ # depth_data = np.array(
352
+ # depth_data_values, dtype=Coordinates.DEPTH_DTYPE.value
353
+ # )
354
+ # depth_da = xr.DataArray(
355
+ # data=depth_data,
356
+ # dims=Coordinates.DEPTH.value,
357
+ # name=Coordinates.DEPTH.value,
358
+ # attrs=dict(
359
+ # units=Coordinates.DEPTH_UNITS.value,
360
+ # long_name=Coordinates.DEPTH_LONG_NAME.value,
361
+ # standard_name=Coordinates.DEPTH_STANDARD_NAME.value,
362
+ # ),
363
+ # )
364
+ #
365
+ # ##### Time #####
366
+ # # https://zarr.readthedocs.io/en/stable/spec/v2.html#data-type-encoding
367
+ # time_data = np.array(
368
+ # np.repeat(np.datetime64(0, "ns"), width),
369
+ # dtype="datetime64[ns]",
370
+ # )
371
+ # time_da = xr.DataArray(
372
+ # data=time_data,
373
+ # dims=Coordinates.TIME.value,
374
+ # name=Coordinates.TIME.value,
375
+ # attrs=dict(
376
+ # # Note: cal & units are written automatically by xarray
377
+ # # calendar="proleptic_gregorian",
378
+ # # units="seconds since 1970-01-01 00:00:00",
379
+ # long_name=Coordinates.TIME_LONG_NAME.value,
380
+ # standard_name=Coordinates.TIME_STANDARD_NAME.value,
381
+ # ),
382
+ # )
383
+ #
384
+ # ##### Frequency #####
385
+ # frequency_data = np.array(
386
+ # frequencies,
387
+ # dtype=np.dtype(Coordinates.FREQUENCY_DTYPE.value),
388
+ # )
389
+ # frequency_da = xr.DataArray(
390
+ # data=frequency_data,
391
+ # dims=Coordinates.FREQUENCY.value,
392
+ # name=Coordinates.FREQUENCY.value,
393
+ # attrs=dict(
394
+ # units=Coordinates.FREQUENCY_UNITS.value,
395
+ # long_name=Coordinates.FREQUENCY_LONG_NAME.value,
396
+ # standard_name=Coordinates.FREQUENCY_STANDARD_NAME.value,
397
+ # ),
398
+ # )
399
+ #
400
+ # ##### Latitude #####
401
+ # gps_data = np.array(
402
+ # np.repeat(np.nan, width),
403
+ # dtype=np.dtype(Coordinates.LATITUDE_DTYPE.value),
404
+ # )
405
+ # latitude_da = xr.DataArray(
406
+ # data=gps_data,
407
+ # coords=dict(
408
+ # time=time_da,
409
+ # ),
410
+ # dims=Coordinates.TIME.value, # Note: "TIME"
411
+ # name=Coordinates.LATITUDE.value,
412
+ # attrs=dict(
413
+ # units=Coordinates.LATITUDE_UNITS.value,
414
+ # long_name=Coordinates.LATITUDE_LONG_NAME.value,
415
+ # standard_name=Coordinates.LATITUDE_STANDARD_NAME.value,
416
+ # ),
417
+ # ) # Note: LATITUDE is indexed by TIME
418
+ #
419
+ # ##### Longitude #####
420
+ # longitude_da = xr.DataArray(
421
+ # data=gps_data,
422
+ # coords=dict(
423
+ # time=time_da,
424
+ # ),
425
+ # dims=Coordinates.TIME.value, # Note: "TIME"
426
+ # name=Coordinates.LONGITUDE.value,
427
+ # attrs=dict(
428
+ # units=Coordinates.LONGITUDE_UNITS.value,
429
+ # long_name=Coordinates.LONGITUDE_LONG_NAME.value,
430
+ # standard_name=Coordinates.LONGITUDE_STANDARD_NAME.value,
431
+ # ),
432
+ # ) # Note: LONGITUDE is indexed by TIME
433
+ #
434
+ # ##### Bottom #####
435
+ # bottom_data = np.array(
436
+ # np.repeat(np.nan, width), dtype=np.dtype(Coordinates.BOTTOM_DTYPE.value)
437
+ # )
438
+ # bottom_da = xr.DataArray(
439
+ # data=bottom_data,
440
+ # coords=dict(
441
+ # time=time_da,
442
+ # ),
443
+ # dims=Coordinates.TIME.value, # Note: "TIME"
444
+ # name=Coordinates.BOTTOM.value,
445
+ # attrs=dict(
446
+ # units=Coordinates.BOTTOM_UNITS.value,
447
+ # long_name=Coordinates.BOTTOM_LONG_NAME.value,
448
+ # standard_name=Coordinates.BOTTOM_STANDARD_NAME.value,
449
+ # ),
450
+ # )
451
+ #
452
+ # ##### Speed #####
453
+ # speed_data = np.array(
454
+ # np.repeat(np.nan, width), dtype=np.dtype(Coordinates.SPEED_DTYPE.value)
455
+ # )
456
+ # speed_da = xr.DataArray(
457
+ # data=speed_data,
458
+ # coords=dict(
459
+ # time=time_da,
460
+ # ),
461
+ # dims=Coordinates.TIME.value, # Note: "TIME"
462
+ # name=Coordinates.SPEED.value,
463
+ # attrs=dict(
464
+ # units=Coordinates.SPEED_UNITS.value,
465
+ # long_name=Coordinates.SPEED_LONG_NAME.value,
466
+ # standard_name=Coordinates.SPEED_STANDARD_NAME.value,
467
+ # ),
468
+ # )
469
+ #
470
+ # ##### Distance #####
471
+ # distance_data = np.array(
472
+ # np.repeat(np.nan, width),
473
+ # dtype=np.dtype(Coordinates.DISTANCE_DTYPE.value),
474
+ # )
475
+ # distance_da = xr.DataArray(
476
+ # data=distance_data,
477
+ # coords=dict(
478
+ # time=time_da,
479
+ # ),
480
+ # dims=Coordinates.TIME.value, # Note: "TIME"
481
+ # name=Coordinates.DISTANCE.value,
482
+ # attrs=dict(
483
+ # units=Coordinates.DISTANCE_UNITS.value,
484
+ # long_name=Coordinates.DISTANCE_LONG_NAME.value,
485
+ # standard_name=Coordinates.DISTANCE_STANDARD_NAME.value,
486
+ # ),
487
+ # )
488
+ #
489
+ # ##### Sv #####
490
+ # gc.collect()
491
+ # # sv_data = np.empty(
492
+ # # (len(depth_data), width, len(frequencies)),
493
+ # # # (2501, 4_100_782, 4), # large cruise used for testing
494
+ # # dtype=np.dtype(Coordinates.SV_DTYPE.value),
495
+ # # )
496
+ # sv_data = np.full(
497
+ # (len(depth_data), width, len(frequencies)),
498
+ # np.nan,
499
+ # dtype=np.dtype(Coordinates.SV_DTYPE.value),
500
+ # )
501
+ # print(f"one: {sys.getsizeof(sv_data)}")
502
+ # # sv_data[:] = np.nan # initialize all
503
+ #
504
+ # sv_da = xr.DataArray(
505
+ # data=sv_data,
506
+ # coords=dict(
507
+ # depth=depth_da,
508
+ # time=time_da,
509
+ # frequency=frequency_da,
510
+ # #
511
+ # latitude=latitude_da,
512
+ # longitude=longitude_da,
513
+ # bottom=bottom_da,
514
+ # speed=speed_da,
515
+ # distance=distance_da,
516
+ # ),
517
+ # dims=( # Depth * Time * Frequency
518
+ # Coordinates.DEPTH.value,
519
+ # Coordinates.TIME.value,
520
+ # Coordinates.FREQUENCY.value,
521
+ # ),
522
+ # name=Coordinates.SV.value,
523
+ # attrs=dict(
524
+ # units=Coordinates.SV_UNITS.value,
525
+ # long_name=Coordinates.SV_LONG_NAME.value,
526
+ # standard_name=Coordinates.SV_STANDARD_NAME.value,
527
+ # tiles_size=Constants.TILE_SIZE.value,
528
+ # _FillValue=np.nan,
529
+ # ),
530
+ # )
531
+ # print(f"two: {sys.getsizeof(sv_data)}") # getting to at least here
532
+ # del sv_data
533
+ # sv_da.encoding = {"compressors": [compressor], "chunks": sv_chunk_shape}
534
+ # # sv_da = sv_da.astype(np.float32) # was crashing here
535
+ # gc.collect()
536
+ # #####################################################################
537
+ # ### Now create the xarray.Dataset
538
+ # ds = xr.Dataset(
539
+ # data_vars=dict(
540
+ # Sv=sv_da,
541
+ # #
542
+ # bottom=bottom_da,
543
+ # speed=speed_da,
544
+ # distance=distance_da,
545
+ # ),
546
+ # coords=dict(
547
+ # depth=depth_da,
548
+ # time=time_da,
549
+ # frequency=frequency_da,
550
+ # #
551
+ # latitude=latitude_da,
552
+ # longitude=longitude_da,
553
+ # ),
554
+ # attrs=dict(
555
+ # # --- Metadata --- #
556
+ # ship_name=ship_name,
557
+ # cruise_name=cruise_name,
558
+ # sensor_name=sensor_name,
559
+ # processing_software_name=Coordinates.PROJECT_NAME.value,
560
+ # # NOTE: for the version to be parsable you need to build the python package
561
+ # # locally first.
562
+ # processing_software_version=importlib.metadata.version(
563
+ # "water-column-sonar-processing"
564
+ # ),
565
+ # processing_software_time=Timestamp.get_timestamp(),
566
+ # calibration_status=calibration_status,
567
+ # tile_size=Constants.TILE_SIZE.value,
568
+ # ),
569
+ # )
570
+ # del sv_da
571
+ # gc.collect()
572
+ # print(f"three: {sys.getsizeof(ds)}")
573
+ # #####################################################################
574
+ # encodings = dict(
575
+ # depth={
576
+ # "compressors": [compressor],
577
+ # "chunks": depth_chunk_shape,
578
+ # },
579
+ # time={
580
+ # "compressors": [compressor],
581
+ # "chunks": time_chunk_shape,
582
+ # "units": Coordinates.TIME_UNITS.value,
583
+ # },
584
+ # frequency={
585
+ # "compressors": [compressor],
586
+ # "chunks": frequency_chunk_shape,
587
+ # },
588
+ # latitude={
589
+ # "compressors": [compressor],
590
+ # "chunks": latitude_chunk_shape,
591
+ # },
592
+ # longitude={
593
+ # "compressors": [compressor],
594
+ # "chunks": longitude_chunk_shape,
595
+ # },
596
+ # bottom={
597
+ # "compressors": [compressor],
598
+ # "chunks": bottom_chunk_shape,
599
+ # },
600
+ # speed={
601
+ # "compressors": [compressor],
602
+ # "chunks": speed_chunk_shape,
603
+ # },
604
+ # distance={
605
+ # "compressors": [compressor],
606
+ # "chunks": distance_chunk_shape,
607
+ # },
608
+ # Sv={
609
+ # "compressors": [compressor],
610
+ # "chunks": sv_chunk_shape,
611
+ # },
612
+ # )
613
+ # gc.collect()
614
+ # ds.to_zarr(
615
+ # store=zarr_path,
616
+ # mode="w", # “w” means create (overwrite if exists)
617
+ # encoding=encodings,
618
+ # consolidated=False,
619
+ # safe_chunks=False,
620
+ # align_chunks=True,
621
+ # zarr_format=3,
622
+ # write_empty_chunks=False, # Might need to change this
623
+ # )
624
+ # #####################################################################
625
+ # return zarr_path
626
+ # except Exception as err:
627
+ # raise RuntimeError(f"Problem trying to create zarr store, {err}")
628
+ # # finally:
629
+ # # cleaner = Cleaner()
630
+ # # cleaner.delete_local_files()
631
+ # # TODO: should delete zarr store in temp directory too?
632
+
633
+ ############################################################################
634
+ def open_s3_zarr_store_with_zarr(
635
+ self,
636
+ ship_name: str,
637
+ cruise_name: str,
638
+ sensor_name: str,
639
+ output_bucket_name: str,
640
+ endpoint_url: Optional[str] = None,
641
+ ) -> Group:
642
+ # Mounts a Zarr store using pythons Zarr implementation. The mounted store
643
+ # will have read/write privileges so that store can be updated.
644
+ print("Opening L2 Zarr store with Zarr for writing.")
645
+ try:
646
+ level = str(Constants.LEVEL_2.value)
647
+ store = f"s3://{output_bucket_name}/{level}/{ship_name}/{cruise_name}/{sensor_name}/{cruise_name}.zarr"
648
+ print(f"endpoint url: {endpoint_url}")
649
+ cruise_zarr = zarr.open(
650
+ store=store,
651
+ mode="r+",
652
+ zarr_format=3,
653
+ storage_options={
654
+ "endpoint_url": endpoint_url,
655
+ "key": self.key,
656
+ "secret": self.secret,
657
+ },
658
+ )
659
+ print("Done opening store with Zarr.")
660
+ return cruise_zarr
661
+ except Exception as err: # Failure
662
+ raise RuntimeError(f"Exception encountered opening store with Zarr, {err}")
663
+
664
+ ###########################################################################
665
+ @staticmethod
666
+ def open_s3_zarr_store_with_xarray(
667
+ ship_name: str,
668
+ cruise_name: str,
669
+ sensor_name: str,
670
+ file_name_stem: str,
671
+ bucket_name: str,
672
+ # level: str, # TODO: add level
673
+ endpoint_url: Optional[str] = None, # needed for moto testing
674
+ ) -> xr.Dataset:
675
+ print("Opening L1 Zarr store in S3 with Xarray.")
676
+ try:
677
+ zarr_path = f"s3://{bucket_name}/level_1/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.zarr"
678
+ kwargs = {"consolidated": False}
679
+ ds = xr.open_dataset(
680
+ filename_or_obj=zarr_path,
681
+ engine="zarr",
682
+ backend_kwargs={
683
+ "storage_options": {
684
+ "endpoint_url": endpoint_url,
685
+ "anon": True,
686
+ },
687
+ },
688
+ **kwargs,
689
+ )
690
+ return ds
691
+ except Exception as err:
692
+ raise RuntimeError(f"Problem opening Zarr store in S3 as Xarray, {err}")
693
+
694
+ ###########################################################################
695
+ # TODO: can this be consolidated with above
696
+ @staticmethod
697
+ def open_l2_zarr_store_with_xarray(
698
+ ship_name: str,
699
+ cruise_name: str,
700
+ sensor_name: str,
701
+ bucket_name: str,
702
+ endpoint_url: Optional[str] = None, # needed for moto testing
703
+ ) -> xr.Dataset:
704
+ print("Opening L2 Zarr store in S3 with Xarray.")
705
+ try:
706
+ level = str(Constants.LEVEL_2.value)
707
+ zarr_path = f"s3://{bucket_name}/{level}/{ship_name}/{cruise_name}/{sensor_name}/{cruise_name}.zarr"
708
+ kwargs = {"consolidated": False}
709
+ ds = xr.open_dataset(
710
+ filename_or_obj=zarr_path,
711
+ engine="zarr",
712
+ backend_kwargs={
713
+ "storage_options": {
714
+ "endpoint_url": endpoint_url,
715
+ "anon": True,
716
+ }
717
+ },
718
+ **kwargs,
719
+ )
720
+ return ds
721
+ except Exception as err:
722
+ raise RuntimeError(f"Problem opening Zarr store in S3 as Xarray, {err}")
723
+
724
+ ###########################################################################
725
+
726
+ ###########################################################################
727
+ # def create_process_synchronizer(self):
728
+ # # TODO: explore aws redis options
729
+ # pass
730
+
731
+ ###########################################################################
732
+ # def verify_cruise_store_data(self):
733
+ # # TODO: run a check on a finished model store to ensure that
734
+ # # none of the time, latitude, longitude, or depth values
735
+ # # are NaN.
736
+ # pass
737
+
738
+ ###########################################################################
739
+
740
+
741
+ ###########################################################