water-column-sonar-processing 0.0.1__py3-none-any.whl → 25.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (60) hide show
  1. water_column_sonar_processing/__init__.py +13 -0
  2. water_column_sonar_processing/aws/__init__.py +7 -0
  3. water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
  4. water_column_sonar_processing/aws/s3_manager.py +420 -0
  5. water_column_sonar_processing/aws/s3fs_manager.py +72 -0
  6. {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
  7. {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
  8. water_column_sonar_processing/cruise/__init__.py +4 -0
  9. water_column_sonar_processing/cruise/create_empty_zarr_store.py +191 -0
  10. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  11. water_column_sonar_processing/cruise/resample_regrid.py +339 -0
  12. water_column_sonar_processing/geometry/__init__.py +11 -0
  13. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  14. water_column_sonar_processing/geometry/geometry_manager.py +243 -0
  15. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  16. water_column_sonar_processing/geometry/pmtile_generation.py +261 -0
  17. water_column_sonar_processing/index/__init__.py +3 -0
  18. water_column_sonar_processing/index/index_manager.py +384 -0
  19. water_column_sonar_processing/model/__init__.py +3 -0
  20. water_column_sonar_processing/model/zarr_manager.py +722 -0
  21. water_column_sonar_processing/process.py +149 -0
  22. water_column_sonar_processing/processing/__init__.py +4 -0
  23. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  24. water_column_sonar_processing/processing/raw_to_zarr.py +425 -0
  25. water_column_sonar_processing/utility/__init__.py +13 -0
  26. {model → water_column_sonar_processing}/utility/cleaner.py +7 -8
  27. water_column_sonar_processing/utility/constants.py +118 -0
  28. {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
  29. water_column_sonar_processing/utility/timestamp.py +12 -0
  30. water_column_sonar_processing-25.11.1.dist-info/METADATA +182 -0
  31. water_column_sonar_processing-25.11.1.dist-info/RECORD +34 -0
  32. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info}/WHEEL +1 -1
  33. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info/licenses}/LICENSE +1 -1
  34. water_column_sonar_processing-25.11.1.dist-info/top_level.txt +1 -0
  35. __init__.py +0 -0
  36. model/__init__.py +0 -0
  37. model/aws/__init__.py +0 -0
  38. model/aws/dynamodb_manager.py +0 -149
  39. model/aws/s3_manager.py +0 -356
  40. model/aws/s3fs_manager.py +0 -74
  41. model/cruise/__init__.py +0 -0
  42. model/cruise/create_empty_zarr_store.py +0 -166
  43. model/cruise/resample_regrid.py +0 -248
  44. model/geospatial/__init__.py +0 -0
  45. model/geospatial/geometry_manager.py +0 -194
  46. model/geospatial/geometry_simplification.py +0 -81
  47. model/geospatial/pmtile_generation.py +0 -74
  48. model/index/__init__.py +0 -0
  49. model/index/index.py +0 -228
  50. model/model.py +0 -138
  51. model/utility/__init__.py +0 -0
  52. model/utility/constants.py +0 -56
  53. model/utility/timestamp.py +0 -12
  54. model/zarr/__init__.py +0 -0
  55. model/zarr/bar.py +0 -28
  56. model/zarr/foo.py +0 -11
  57. model/zarr/zarr_manager.py +0 -298
  58. water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
  59. water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
  60. water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
@@ -0,0 +1,425 @@
1
+ import gc
2
+ import os
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+
6
+ import echopype as ep
7
+ import numpy as np
8
+ from zarr.codecs import Blosc
9
+
10
+ from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
11
+ from water_column_sonar_processing.geometry import GeometryManager
12
+ from water_column_sonar_processing.utility import Cleaner
13
+
14
+
15
+ # from numcodecs import Blosc
16
+
17
+
18
+ def get_water_level(ds):
19
+ """
20
+ needs to be mocked up so thats why this is broken out
21
+ """
22
+ if "water_level" in ds.keys():
23
+ return ds.water_level.values
24
+ else:
25
+ return 0.0
26
+
27
+
28
+ # This code is getting copied from echofish-aws-raw-to-zarr-lambda
29
+ class RawToZarr:
30
+ #######################################################
31
+ def __init__(
32
+ self,
33
+ # output_bucket_access_key,
34
+ # output_bucket_secret_access_key,
35
+ # # overwrite_existing_zarr_store,
36
+ ):
37
+ # TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
38
+ # self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
39
+ self.__compressor = Blosc(cname="zstd", clevel=9)
40
+ self.__overwrite = True
41
+ # self.__num_threads = numcodecs.blosc.get_nthreads()
42
+ # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
43
+ # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
44
+ # self.__table_name = table_name
45
+ # # self.__overwrite_existing_zarr_store = overwrite_existing_zarr_store
46
+
47
+ ############################################################################
48
+ ############################################################################
49
+ def __zarr_info_to_table(
50
+ self,
51
+ table_name,
52
+ ship_name,
53
+ cruise_name,
54
+ sensor_name, # : Constants, TODO: convert to enum
55
+ file_name,
56
+ min_echo_range,
57
+ max_echo_range,
58
+ num_ping_time_dropna,
59
+ start_time,
60
+ end_time,
61
+ frequencies,
62
+ channels,
63
+ water_level,
64
+ ):
65
+ print("Writing Zarr information to DynamoDB table.")
66
+ dynamodb_manager = DynamoDBManager()
67
+ dynamodb_manager.update_item(
68
+ table_name=table_name,
69
+ key={
70
+ "FILE_NAME": {"S": file_name}, # Partition Key
71
+ "CRUISE_NAME": {"S": cruise_name}, # Sort Key
72
+ },
73
+ expression_attribute_names={
74
+ "#CH": "CHANNELS",
75
+ "#ET": "END_TIME",
76
+ # "#ED": "ERROR_DETAIL",
77
+ "#FR": "FREQUENCIES",
78
+ "#MA": "MAX_ECHO_RANGE",
79
+ "#MI": "MIN_ECHO_RANGE",
80
+ "#ND": "NUM_PING_TIME_DROPNA",
81
+ "#PT": "PIPELINE_TIME",
82
+ "#SE": "SENSOR_NAME",
83
+ "#SH": "SHIP_NAME",
84
+ "#ST": "START_TIME",
85
+ "#WL": "WATER_LEVEL",
86
+ },
87
+ expression_attribute_values={
88
+ ":ch": {"L": [{"S": i} for i in channels]},
89
+ ":et": {"S": end_time},
90
+ # ":ed": {"S": ""},
91
+ ":fr": {"L": [{"N": str(i)} for i in frequencies]},
92
+ ":ma": {"N": str(np.round(max_echo_range, 4))},
93
+ ":mi": {"N": str(np.round(min_echo_range, 4))},
94
+ ":nd": {"N": str(num_ping_time_dropna)},
95
+ ":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
96
+ ":se": {"S": sensor_name},
97
+ ":sh": {"S": ship_name},
98
+ ":st": {"S": start_time},
99
+ ":wl": {"N": str(np.round(water_level, 2))},
100
+ },
101
+ update_expression=(
102
+ "SET "
103
+ "#CH = :ch, "
104
+ "#ET = :et, "
105
+ "#FR = :fr, "
106
+ "#MA = :ma, "
107
+ "#MI = :mi, "
108
+ "#ND = :nd, "
109
+ "#PT = :pt, "
110
+ "#SE = :se, "
111
+ "#SH = :sh, "
112
+ "#ST = :st, "
113
+ "#WL = :wl"
114
+ ),
115
+ )
116
+ print("Done writing Zarr information to DynamoDB table.")
117
+
118
+ ############################################################################
119
+ ############################################################################
120
+ ############################################################################
121
+ def __upload_files_to_output_bucket(
122
+ self,
123
+ output_bucket_name: str,
124
+ local_directory: str, # e.g. 'D20070724-T042400.zarr' # TODO: problem: if this is not in the current directory
125
+ object_prefix: str, # e.g. "level_1/Henry_B._Bigelow/HB0706/EK60/"
126
+ endpoint_url,
127
+ ):
128
+ # Note: this will be passed credentials if using NODD
129
+ # TODO: this will not work if the local_directory is anywhere other than the current folder
130
+ # see test_s3_manager test_upload...pool_executor for solution
131
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
132
+ print("Uploading files using thread pool executor.")
133
+ all_files = []
134
+ for subdir, dirs, files in os.walk(
135
+ local_directory
136
+ ): # os.path.basename(s3_manager_test_path.joinpath("HB0707.zarr/"))
137
+ for file in files:
138
+ local_path = os.path.join(subdir, file)
139
+ s3_key = os.path.join(object_prefix, local_path)
140
+ all_files.append([local_path, s3_key])
141
+ # all_files
142
+ all_uploads = s3_manager.upload_files_with_thread_pool_executor(
143
+ output_bucket_name=output_bucket_name,
144
+ all_files=all_files,
145
+ )
146
+ return all_uploads
147
+
148
+ ############################################################################
149
+
150
+ ############################################################################
151
+ def raw_to_zarr(
152
+ self,
153
+ table_name,
154
+ input_bucket_name,
155
+ output_bucket_name,
156
+ ship_name,
157
+ cruise_name,
158
+ sensor_name,
159
+ raw_file_name,
160
+ endpoint_url=None,
161
+ include_bot=True,
162
+ ):
163
+ """
164
+ Downloads the raw files, processes them with echopype, writes geojson, and uploads files
165
+ to the nodd bucket.
166
+ """
167
+ print(f"Opening raw: {raw_file_name} and creating zarr store.")
168
+ geometry_manager = GeometryManager()
169
+ cleaner = Cleaner()
170
+ cleaner.delete_local_files(
171
+ file_types=["*.zarr", "*.json"]
172
+ ) # TODO: include bot and raw?
173
+
174
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
175
+ s3_file_path = (
176
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
177
+ )
178
+ bottom_file_name = f"{Path(raw_file_name).stem}.bot"
179
+ s3_bottom_file_path = (
180
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
181
+ )
182
+ s3_manager.download_file(
183
+ bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name
184
+ )
185
+ # TODO: add the bottom file
186
+ if include_bot:
187
+ s3_manager.download_file(
188
+ bucket_name=input_bucket_name,
189
+ key=s3_bottom_file_path,
190
+ file_name=bottom_file_name,
191
+ )
192
+
193
+ try:
194
+ gc.collect()
195
+ print("Opening raw file with echopype.")
196
+ # s3_file_path = f"s3://{bucket_name}/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}"
197
+ # s3_file_path = Path(f"s3://noaa-wcsd-pds/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}")
198
+ echodata = ep.open_raw(
199
+ raw_file=raw_file_name,
200
+ sonar_model=sensor_name,
201
+ include_bot=include_bot,
202
+ # include_idx=?
203
+ # use_swap=True,
204
+ # max_chunk_size=300,
205
+ # storage_options={'anon': True } # 'endpoint_url': self.endpoint_url} # this was creating problems
206
+ )
207
+ print("Compute volume backscattering strength (Sv) from raw dataset.")
208
+ ds_sv = ep.calibrate.compute_Sv(echodata)
209
+ ds_sv = ep.consolidate.add_depth(
210
+ ds_sv, echodata
211
+ ) # TODO: consolidate with other depth values
212
+
213
+ water_level = get_water_level(ds_sv)
214
+
215
+ gc.collect()
216
+ print("Done computing volume backscatter strength (Sv) from raw dataset.")
217
+ # Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
218
+ # but is not written out with ds_sv
219
+ if "detected_seafloor_depth" in list(echodata.vendor.variables):
220
+ ds_sv["detected_seafloor_depth"] = (
221
+ echodata.vendor.detected_seafloor_depth
222
+ )
223
+ #
224
+ frequencies = echodata.environment.frequency_nominal.values
225
+ #################################################################
226
+ # Get GPS coordinates
227
+ gps_data, lat, lon = geometry_manager.read_echodata_gps_data(
228
+ echodata=echodata,
229
+ output_bucket_name=output_bucket_name,
230
+ ship_name=ship_name,
231
+ cruise_name=cruise_name,
232
+ sensor_name=sensor_name,
233
+ file_name=raw_file_name,
234
+ endpoint_url=endpoint_url,
235
+ write_geojson=True,
236
+ )
237
+ ds_sv = ep.consolidate.add_location(ds_sv, echodata)
238
+ ds_sv.latitude.values = (
239
+ lat # overwriting echopype gps values to include missing values
240
+ )
241
+ ds_sv.longitude.values = lon
242
+ # gps_data, lat, lon = self.__get_gps_data(echodata=echodata)
243
+ #################################################################
244
+ # Technically the min_echo_range would be 0 m.
245
+ # TODO: this var name is supposed to represent minimum resolution of depth measurements
246
+ # TODO revert this so that smaller diffs can be used
247
+ # The most minimum the resolution can be is as small as 0.25 meters
248
+ min_echo_range = np.round(np.nanmin(np.diff(ds_sv.echo_range.values)), 2)
249
+ # For the HB0710 cruise the depths vary from 499.7215 @19cm to 2999.4805 @ 1cm. Moving that back
250
+ # inline with the
251
+ min_echo_range = np.max(
252
+ [0.20, min_echo_range]
253
+ ) # TODO: experiment with 0.25 and 0.50
254
+
255
+ max_echo_range = float(np.nanmax(ds_sv.echo_range))
256
+
257
+ # This is the number of missing values found throughout the lat/lon
258
+ num_ping_time_dropna = lat[~np.isnan(lat)].shape[0] # symmetric to lon
259
+ #
260
+ start_time = (
261
+ np.datetime_as_string(ds_sv.ping_time.values[0], unit="ms") + "Z"
262
+ )
263
+ end_time = (
264
+ np.datetime_as_string(ds_sv.ping_time.values[-1], unit="ms") + "Z"
265
+ )
266
+ channels = list(ds_sv.channel.values)
267
+ #
268
+ #################################################################
269
+ # Create the zarr store
270
+ store_name = f"{Path(raw_file_name).stem}.zarr"
271
+ # Sv = ds_sv.Sv
272
+ # ds_sv['Sv'] = Sv.astype('int32', copy=False)
273
+ ds_sv.to_zarr(
274
+ store=store_name,
275
+ zarr_format=3,
276
+ consolidated=False,
277
+ write_empty_chunks=False,
278
+ ) # ds_sv.Sv.sel(channel=ds_sv.channel.values[0]).shape
279
+ gc.collect()
280
+ #################################################################
281
+ output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
282
+ #################################################################
283
+ # If zarr store already exists then delete
284
+ s3_manager = S3Manager(endpoint_url=endpoint_url)
285
+ child_objects = s3_manager.get_child_objects(
286
+ bucket_name=output_bucket_name,
287
+ sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.zarr",
288
+ )
289
+ if len(child_objects) > 0:
290
+ print(
291
+ "Zarr store dataset already exists in s3, deleting existing and continuing."
292
+ )
293
+ s3_manager.delete_nodd_objects(
294
+ bucket_name=output_bucket_name,
295
+ objects=child_objects,
296
+ )
297
+ #################################################################
298
+ self.__upload_files_to_output_bucket(
299
+ output_bucket_name=output_bucket_name,
300
+ local_directory=store_name,
301
+ object_prefix=output_zarr_prefix,
302
+ endpoint_url=endpoint_url,
303
+ )
304
+ #################################################################
305
+ self.__zarr_info_to_table(
306
+ table_name=table_name,
307
+ ship_name=ship_name,
308
+ cruise_name=cruise_name,
309
+ sensor_name=sensor_name,
310
+ file_name=raw_file_name,
311
+ min_echo_range=min_echo_range,
312
+ max_echo_range=max_echo_range,
313
+ num_ping_time_dropna=num_ping_time_dropna,
314
+ start_time=start_time,
315
+ end_time=end_time,
316
+ frequencies=frequencies,
317
+ channels=channels,
318
+ water_level=water_level,
319
+ )
320
+ #######################################################################
321
+ # TODO: verify count of objects matches, publish message, update status
322
+ #######################################################################
323
+ print("Finished raw-to-zarr conversion.")
324
+ except Exception as err:
325
+ print(
326
+ f"Exception encountered creating local Zarr store with echopype: {err}"
327
+ )
328
+ raise RuntimeError(f"Problem creating local Zarr store, {err}")
329
+ finally:
330
+ gc.collect()
331
+ print("Finally.")
332
+ cleaner.delete_local_files(
333
+ file_types=["*.raw", "*.bot", "*.zarr", "*.json"]
334
+ )
335
+ print("Done creating local zarr store.")
336
+
337
+ ############################################################################
338
+ # TODO: does this get called?
339
+ # def execute(self, input_message):
340
+ # ship_name = input_message['shipName']
341
+ # cruise_name = input_message['cruiseName']
342
+ # sensor_name = input_message['sensorName']
343
+ # input_file_name = input_message['fileName']
344
+ # #
345
+ # try:
346
+ # self.__update_processing_status(
347
+ # file_name=input_file_name,
348
+ # cruise_name=cruise_name,
349
+ # pipeline_status="PROCESSING_RAW_TO_ZARR"
350
+ # )
351
+ # #######################################################################
352
+ # store_name = f"{os.path.splitext(input_file_name)[0]}.zarr"
353
+ # output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}"
354
+ # bucket_key = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{input_file_name}"
355
+ # zarr_prefix = os.path.join("level_1", ship_name, cruise_name, sensor_name)
356
+ # #
357
+ # os.chdir(TEMPDIR) # Lambdas require use of temp directory
358
+ # #######################################################################
359
+ # #######################################################################
360
+ # # Check if zarr store already exists
361
+ # s3_objects = self.__s3.list_objects(
362
+ # bucket_name=self.__output_bucket,
363
+ # prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
364
+ # access_key_id=self.__output_bucket_access_key,
365
+ # secret_access_key=self.__output_bucket_secret_access_key
366
+ # )
367
+ # if len(s3_objects) > 0:
368
+ # print('Zarr store dataset already exists in s3, deleting existing and continuing.')
369
+ # self.__s3.delete_objects(
370
+ # bucket_name=self.__output_bucket,
371
+ # objects=s3_objects,
372
+ # access_key_id=self.__output_bucket_access_key,
373
+ # secret_access_key=self.__output_bucket_secret_access_key
374
+ # )
375
+ # #######################################################################
376
+ # # self.__delete_all_local_raw_and_zarr_files()
377
+ # Cleaner.delete_local_files(file_types=["*.raw*", "*.zarr"])
378
+ # self.__s3.download_file(
379
+ # bucket_name=self.__input_bucket,
380
+ # key=bucket_key,
381
+ # file_name=input_file_name
382
+ # )
383
+ # self.__create_local_zarr_store(
384
+ # raw_file_name=input_file_name,
385
+ # cruise_name=cruise_name,
386
+ # sensor_name=sensor_name,
387
+ # output_zarr_prefix=output_zarr_prefix,
388
+ # store_name=store_name
389
+ # )
390
+ # #######################################################################
391
+ # self.__upload_files_to_output_bucket(store_name, output_zarr_prefix)
392
+ # #######################################################################
393
+ # # # TODO: verify count of objects matches
394
+ # # s3_objects = self.__s3.list_objects(
395
+ # # bucket_name=self.__output_bucket,
396
+ # # prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
397
+ # # access_key_id=self.__output_bucket_access_key,
398
+ # # secret_access_key=self.__output_bucket_secret_access_key
399
+ # # )
400
+ # #######################################################################
401
+ # self.__update_processing_status(
402
+ # file_name=input_file_name,
403
+ # cruise_name=cruise_name,
404
+ # pipeline_status='SUCCESS_RAW_TO_ZARR'
405
+ # )
406
+ # #######################################################################
407
+ # self.__publish_done_message(input_message)
408
+ # #######################################################################
409
+ # # except Exception as err:
410
+ # # print(f'Exception encountered: {err}')
411
+ # # self.__update_processing_status(
412
+ # # file_name=input_file_name,
413
+ # # cruise_name=cruise_name,
414
+ # # pipeline_status='FAILURE_RAW_TO_ZARR',
415
+ # # error_message=str(err),
416
+ # # )
417
+ # finally:
418
+ # self.__delete_all_local_raw_and_zarr_files()
419
+ #######################################################################
420
+
421
+ ############################################################################
422
+
423
+
424
+ ################################################################################
425
+ ############################################################################
@@ -0,0 +1,13 @@
1
+ from .cleaner import Cleaner
2
+ from .constants import Constants, Coordinates, Instruments
3
+ from .pipeline_status import PipelineStatus
4
+ from .timestamp import Timestamp
5
+
6
+ __all__ = [
7
+ "Cleaner",
8
+ "Instruments",
9
+ "Constants",
10
+ "Coordinates",
11
+ "PipelineStatus",
12
+ "Timestamp",
13
+ ]
@@ -1,21 +1,20 @@
1
- import os
2
1
  import glob
2
+ import os
3
3
  import shutil
4
4
 
5
5
 
6
6
  ###########################################################
7
7
  class Cleaner:
8
- @staticmethod
9
- def delete_local_files(
10
- file_types=['*.raw*', '*.zarr'] # '*.json'
11
- ):
12
- print('Deleting all local raw and zarr files')
8
+ def delete_local_files(self, file_types=["*.raw*", "*.model"]): # '*.json'
9
+ # TODO: add .zarr to this
10
+ print("Deleting all local raw and model files")
13
11
  for i in file_types:
14
12
  for j in glob.glob(i):
15
13
  if os.path.isdir(j):
16
14
  shutil.rmtree(j, ignore_errors=True)
17
15
  elif os.path.isfile(j):
18
16
  os.remove(j)
19
- print('done deleting')
17
+ print("done deleting")
20
18
 
21
- ###########################################################
19
+
20
+ ###########################################################
@@ -0,0 +1,118 @@
1
+ from enum import Enum, unique
2
+
3
+
4
+ @unique
5
+ class Instruments(Enum):
6
+ # Values are determined using scan of the fist byte of data
7
+ EK60 = "EK60"
8
+ EK80 = "EK80"
9
+
10
+
11
+ # @unique
12
+ class Constants(Enum):
13
+ """
14
+ See here for data type support: https://github.com/zarr-developers/zarr-extensions/tree/main/data-types
15
+ """
16
+
17
+ TILE_SIZE = 512
18
+
19
+ # Average https://noaa-wcsd-zarr-pds.s3.us-east-1.amazonaws.com/level_2/Henry_B._Bigelow/HB0902/EK60/HB0902.zarr/time/927
20
+ # chunk size is ~1.3 kB, HB0902 cruise takes ~30 seconds to load all time/lat/lon dataset
21
+ # NOTE: larger value here will speed up the TurfJS download of dataset in the UI
22
+ # Problem interpolating the dataset: cannot reshape array of size 65536 into shape...
23
+ # TODO: needs to be enum
24
+ SPATIOTEMPORAL_CHUNK_SIZE = int(2**16) - 1024
25
+ # int(2**16) - 1024,
26
+ # int(2**16) - 1024,
27
+ # e.g. int(2**14)
28
+ # TODO: create test for SPATIOTEMPORAL_CHUNK_SIZE with requirement!
29
+
30
+ LEVEL_0 = "raw"
31
+ LEVEL_1 = "level_1" # from bucket path
32
+ LEVEL_2 = "level_2"
33
+ LEVEL_3 = "level_3"
34
+
35
+ EK60 = "EK60" # TODO: use for "instrument"
36
+ EK80 = "EK80"
37
+ # INSTRUMENT = EK60 | EK80
38
+
39
+
40
+ class Coordinates(Enum):
41
+ """
42
+ Should try to specify
43
+ dtype
44
+ units
45
+ long_name — most readable description of variable
46
+ standard_name — name in lowercase and snake_case
47
+ """
48
+
49
+ PROJECT_NAME = "echofish"
50
+
51
+ DEPTH = "depth"
52
+ DEPTH_DTYPE = "float32"
53
+ DEPTH_UNITS = "m" # TODO: Pint? <https://pint.readthedocs.io/en/stable/>
54
+ DEPTH_LONG_NAME = "Depth below surface"
55
+ DEPTH_STANDARD_NAME = "depth"
56
+
57
+ TIME = "time"
58
+ TIME_DTYPE = "float64"
59
+ # Note: units and calendar are used downstream by Xarray
60
+ TIME_UNITS = "seconds since 1970-01-01 00:00:00"
61
+ TIME_LONG_NAME = "Timestamp of each ping"
62
+ TIME_STANDARD_NAME = "time"
63
+ TIME_CALENDAR = "proleptic_gregorian"
64
+ # TODO: create test for reading out timestamps in Xarray
65
+
66
+ FREQUENCY = "frequency"
67
+ FREQUENCY_DTYPE = "uint64"
68
+ FREQUENCY_UNITS = "Hz"
69
+ FREQUENCY_LONG_NAME = "Transducer frequency"
70
+ FREQUENCY_STANDARD_NAME = "sound_frequency"
71
+
72
+ LATITUDE = "latitude"
73
+ LATITUDE_DTYPE = "float32"
74
+ LATITUDE_UNITS = "degrees_north"
75
+ LATITUDE_LONG_NAME = "Latitude"
76
+ LATITUDE_STANDARD_NAME = "latitude"
77
+
78
+ LONGITUDE = "longitude"
79
+ LONGITUDE_DTYPE = "float32"
80
+ LONGITUDE_UNITS = "degrees_east"
81
+ LONGITUDE_LONG_NAME = "Longitude"
82
+ LONGITUDE_STANDARD_NAME = "longitude"
83
+
84
+ BOTTOM = "bottom"
85
+ BOTTOM_DTYPE = "float32"
86
+ BOTTOM_UNITS = "m"
87
+ BOTTOM_LONG_NAME = "Detected sea floor depth"
88
+ BOTTOM_STANDARD_NAME = "bottom"
89
+
90
+ SPEED = "speed"
91
+ SPEED_DTYPE = "float32"
92
+ SPEED_UNITS = "Knots"
93
+ SPEED_LONG_NAME = "Nautical miles per hour"
94
+ SPEED_STANDARD_NAME = "speed"
95
+
96
+ # This is the width of each slice of the water columns
97
+ DISTANCE = "distance"
98
+ DISTANCE_DTYPE = "float32"
99
+ DISTANCE_UNITS = "m"
100
+ DISTANCE_LONG_NAME = "GPS distance"
101
+ DISTANCE_STANDARD_NAME = "distance"
102
+
103
+ SV = "Sv"
104
+ SV_DTYPE = "float32" # int64
105
+ SV_UNITS = "dB"
106
+ SV_LONG_NAME = "Volume backscattering strength (Sv re 1 m-1)"
107
+ SV_STANDARD_NAME = "volume_backscattering_strength"
108
+
109
+
110
+ class BatchShape(Enum):
111
+ """
112
+ The tensor shape of a machine learning sample.
113
+ """
114
+
115
+ DEPTH = 2
116
+ TIME = 3
117
+ FREQUENCY = 4
118
+ BATCH_SIZE = 5