water-column-sonar-processing 25.11.1__py3-none-any.whl → 26.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (22) hide show
  1. water_column_sonar_processing/aws/s3_manager.py +2 -4
  2. water_column_sonar_processing/aws/s3fs_manager.py +1 -9
  3. water_column_sonar_processing/cruise/create_empty_zarr_store.py +19 -81
  4. water_column_sonar_processing/cruise/resample_regrid.py +88 -104
  5. water_column_sonar_processing/geometry/__init__.py +2 -0
  6. water_column_sonar_processing/geometry/elevation_manager.py +2 -2
  7. water_column_sonar_processing/geometry/geometry_manager.py +11 -13
  8. water_column_sonar_processing/geometry/line_simplification.py +10 -10
  9. water_column_sonar_processing/geometry/pmtile_generation.py +8 -3
  10. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  11. water_column_sonar_processing/index/index_manager.py +43 -46
  12. water_column_sonar_processing/model/zarr_manager.py +533 -514
  13. water_column_sonar_processing/processing/raw_to_zarr.py +45 -139
  14. water_column_sonar_processing/utility/cleaner.py +2 -1
  15. water_column_sonar_processing/utility/constants.py +29 -29
  16. water_column_sonar_processing-26.1.14.dist-info/METADATA +240 -0
  17. {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/RECORD +20 -20
  18. water_column_sonar_processing/process.py +0 -149
  19. water_column_sonar_processing-25.11.1.dist-info/METADATA +0 -182
  20. {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/WHEEL +0 -0
  21. {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/licenses/LICENSE +0 -0
  22. {water_column_sonar_processing-25.11.1.dist-info → water_column_sonar_processing-26.1.14.dist-info}/top_level.txt +0 -0
@@ -2,22 +2,23 @@ import gc
2
2
  import os
3
3
  from datetime import datetime
4
4
  from pathlib import Path
5
+ from typing import Optional
5
6
 
6
7
  import echopype as ep
7
8
  import numpy as np
8
9
  from zarr.codecs import Blosc
9
10
 
10
11
  from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
11
- from water_column_sonar_processing.geometry import GeometryManager
12
12
  from water_column_sonar_processing.utility import Cleaner
13
-
13
+ from water_column_sonar_processing.utility import Constants
14
14
 
15
15
  # from numcodecs import Blosc
16
+ level_1 = str(Constants.LEVEL_1.value)
16
17
 
17
18
 
18
19
  def get_water_level(ds):
19
20
  """
20
- needs to be mocked up so thats why this is broken out
21
+ needs to be mocked up so that's why this is broken out
21
22
  """
22
23
  if "water_level" in ds.keys():
23
24
  return ds.water_level.values
@@ -46,8 +47,8 @@ class RawToZarr:
46
47
 
47
48
  ############################################################################
48
49
  ############################################################################
50
+ @staticmethod
49
51
  def __zarr_info_to_table(
50
- self,
51
52
  table_name,
52
53
  ship_name,
53
54
  cruise_name,
@@ -118,10 +119,11 @@ class RawToZarr:
118
119
  ############################################################################
119
120
  ############################################################################
120
121
  ############################################################################
122
+ @staticmethod
121
123
  def __upload_files_to_output_bucket(
122
- self,
123
124
  output_bucket_name: str,
124
- local_directory: str, # e.g. 'D20070724-T042400.zarr' # TODO: problem: if this is not in the current directory
125
+ local_directory: str,
126
+ # e.g. 'D20070724-T042400.zarr' # TODO: problem: if this is not in the current directory
125
127
  object_prefix: str, # e.g. "level_1/Henry_B._Bigelow/HB0706/EK60/"
126
128
  endpoint_url,
127
129
  ):
@@ -157,7 +159,7 @@ class RawToZarr:
157
159
  cruise_name,
158
160
  sensor_name,
159
161
  raw_file_name,
160
- endpoint_url=None,
162
+ endpoint_url: Optional[str] = None,
161
163
  include_bot=True,
162
164
  ):
163
165
  """
@@ -165,7 +167,7 @@ class RawToZarr:
165
167
  to the nodd bucket.
166
168
  """
167
169
  print(f"Opening raw: {raw_file_name} and creating zarr store.")
168
- geometry_manager = GeometryManager()
170
+ # geometry_manager = GeometryManager()
169
171
  cleaner = Cleaner()
170
172
  cleaner.delete_local_files(
171
173
  file_types=["*.zarr", "*.json"]
@@ -193,70 +195,61 @@ class RawToZarr:
193
195
  try:
194
196
  gc.collect()
195
197
  print("Opening raw file with echopype.")
196
- # s3_file_path = f"s3://{bucket_name}/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}"
197
- # s3_file_path = Path(f"s3://noaa-wcsd-pds/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}")
198
198
  echodata = ep.open_raw(
199
199
  raw_file=raw_file_name,
200
200
  sonar_model=sensor_name,
201
201
  include_bot=include_bot,
202
- # include_idx=?
203
- # use_swap=True,
204
- # max_chunk_size=300,
205
- # storage_options={'anon': True } # 'endpoint_url': self.endpoint_url} # this was creating problems
206
202
  )
207
203
  print("Compute volume backscattering strength (Sv) from raw dataset.")
208
204
  ds_sv = ep.calibrate.compute_Sv(echodata)
209
- ds_sv = ep.consolidate.add_depth(
210
- ds_sv, echodata
211
- ) # TODO: consolidate with other depth values
212
-
205
+ ds_sv = ep.consolidate.add_depth(ds_sv, echodata)
213
206
  water_level = get_water_level(ds_sv)
214
207
 
215
208
  gc.collect()
216
209
  print("Done computing volume backscatter strength (Sv) from raw dataset.")
217
210
  # Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
218
- # but is not written out with ds_sv
211
+ # but is not written out with ds_sv --> add to ds_sv
219
212
  if "detected_seafloor_depth" in list(echodata.vendor.variables):
220
213
  ds_sv["detected_seafloor_depth"] = (
221
214
  echodata.vendor.detected_seafloor_depth
222
215
  )
223
216
  #
224
217
  frequencies = echodata.environment.frequency_nominal.values
218
+ if len(frequencies) != len(set(frequencies)):
219
+ raise Exception("Problem number of frequencies does not match channels")
225
220
  #################################################################
226
- # Get GPS coordinates
227
- gps_data, lat, lon = geometry_manager.read_echodata_gps_data(
228
- echodata=echodata,
229
- output_bucket_name=output_bucket_name,
230
- ship_name=ship_name,
231
- cruise_name=cruise_name,
232
- sensor_name=sensor_name,
233
- file_name=raw_file_name,
234
- endpoint_url=endpoint_url,
235
- write_geojson=True,
236
- )
221
+ # add gps data
237
222
  ds_sv = ep.consolidate.add_location(ds_sv, echodata)
238
- ds_sv.latitude.values = (
239
- lat # overwriting echopype gps values to include missing values
240
- )
241
- ds_sv.longitude.values = lon
242
- # gps_data, lat, lon = self.__get_gps_data(echodata=echodata)
223
+
224
+ if np.any(ds_sv.latitude.values > 90.0) or np.any(
225
+ ds_sv.latitude.values < -90.0
226
+ ):
227
+ ds_sv.latitude.values[np.where(ds_sv.latitude.values > 90.0)] = np.nan
228
+ ds_sv.latitude.values[np.where(ds_sv.latitude.values < -90.0)] = np.nan
229
+
230
+ if np.any(ds_sv.longitude.values > 180.0) or np.any(
231
+ ds_sv.longitude.values < -180.0
232
+ ):
233
+ ds_sv.longitude.values[np.where(ds_sv.longitude.values > 180.0)] = (
234
+ np.nan
235
+ )
236
+ ds_sv.longitude.values[np.where(ds_sv.longitude.values < -180.0)] = (
237
+ np.nan
238
+ )
239
+
243
240
  #################################################################
244
- # Technically the min_echo_range would be 0 m.
245
- # TODO: this var name is supposed to represent minimum resolution of depth measurements
246
- # TODO revert this so that smaller diffs can be used
247
- # The most minimum the resolution can be is as small as 0.25 meters
248
241
  min_echo_range = np.round(np.nanmin(np.diff(ds_sv.echo_range.values)), 2)
249
- # For the HB0710 cruise the depths vary from 499.7215 @19cm to 2999.4805 @ 1cm. Moving that back
250
- # inline with the
251
- min_echo_range = np.max(
252
- [0.20, min_echo_range]
253
- ) # TODO: experiment with 0.25 and 0.50
254
-
255
242
  max_echo_range = float(np.nanmax(ds_sv.echo_range))
256
243
 
257
244
  # This is the number of missing values found throughout the lat/lon
258
- num_ping_time_dropna = lat[~np.isnan(lat)].shape[0] # symmetric to lon
259
- #
245
+ lat = ds_sv.latitude.values
246
+ lon = ds_sv.longitude.values
247
+ num_ping_time_drop_na = np.min(
248
+ [ # Isn't always symmetric
249
+ lat[~np.isnan(lat)].shape[0],
250
+ lon[~np.isnan(lon)].shape[0],
251
+ ]
252
+ )
260
253
  start_time = (
261
254
  np.datetime_as_string(ds_sv.ping_time.values[0], unit="ms") + "Z"
262
255
  )
@@ -268,23 +261,21 @@ class RawToZarr:
268
261
  #################################################################
269
262
  # Create the zarr store
270
263
  store_name = f"{Path(raw_file_name).stem}.zarr"
271
- # Sv = ds_sv.Sv
272
- # ds_sv['Sv'] = Sv.astype('int32', copy=False)
273
264
  ds_sv.to_zarr(
274
265
  store=store_name,
275
266
  zarr_format=3,
276
267
  consolidated=False,
277
268
  write_empty_chunks=False,
278
- ) # ds_sv.Sv.sel(channel=ds_sv.channel.values[0]).shape
269
+ )
279
270
  gc.collect()
280
271
  #################################################################
281
- output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
272
+ output_zarr_prefix = f"{level_1}/{ship_name}/{cruise_name}/{sensor_name}/"
282
273
  #################################################################
283
274
  # If zarr store already exists then delete
284
275
  s3_manager = S3Manager(endpoint_url=endpoint_url)
285
276
  child_objects = s3_manager.get_child_objects(
286
277
  bucket_name=output_bucket_name,
287
- sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.zarr",
278
+ sub_prefix=f"{level_1}/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.zarr",
288
279
  )
289
280
  if len(child_objects) > 0:
290
281
  print(
@@ -310,7 +301,7 @@ class RawToZarr:
310
301
  file_name=raw_file_name,
311
302
  min_echo_range=min_echo_range,
312
303
  max_echo_range=max_echo_range,
313
- num_ping_time_dropna=num_ping_time_dropna,
304
+ num_ping_time_dropna=num_ping_time_drop_na,
314
305
  start_time=start_time,
315
306
  end_time=end_time,
316
307
  frequencies=frequencies,
@@ -320,7 +311,6 @@ class RawToZarr:
320
311
  #######################################################################
321
312
  # TODO: verify count of objects matches, publish message, update status
322
313
  #######################################################################
323
- print("Finished raw-to-zarr conversion.")
324
314
  except Exception as err:
325
315
  print(
326
316
  f"Exception encountered creating local Zarr store with echopype: {err}"
@@ -328,96 +318,12 @@ class RawToZarr:
328
318
  raise RuntimeError(f"Problem creating local Zarr store, {err}")
329
319
  finally:
330
320
  gc.collect()
331
- print("Finally.")
332
321
  cleaner.delete_local_files(
333
322
  file_types=["*.raw", "*.bot", "*.zarr", "*.json"]
334
323
  )
335
- print("Done creating local zarr store.")
324
+ print("Finished raw-to-zarr conversion.")
336
325
 
337
326
  ############################################################################
338
- # TODO: does this get called?
339
- # def execute(self, input_message):
340
- # ship_name = input_message['shipName']
341
- # cruise_name = input_message['cruiseName']
342
- # sensor_name = input_message['sensorName']
343
- # input_file_name = input_message['fileName']
344
- # #
345
- # try:
346
- # self.__update_processing_status(
347
- # file_name=input_file_name,
348
- # cruise_name=cruise_name,
349
- # pipeline_status="PROCESSING_RAW_TO_ZARR"
350
- # )
351
- # #######################################################################
352
- # store_name = f"{os.path.splitext(input_file_name)[0]}.zarr"
353
- # output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}"
354
- # bucket_key = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{input_file_name}"
355
- # zarr_prefix = os.path.join("level_1", ship_name, cruise_name, sensor_name)
356
- # #
357
- # os.chdir(TEMPDIR) # Lambdas require use of temp directory
358
- # #######################################################################
359
- # #######################################################################
360
- # # Check if zarr store already exists
361
- # s3_objects = self.__s3.list_objects(
362
- # bucket_name=self.__output_bucket,
363
- # prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
364
- # access_key_id=self.__output_bucket_access_key,
365
- # secret_access_key=self.__output_bucket_secret_access_key
366
- # )
367
- # if len(s3_objects) > 0:
368
- # print('Zarr store dataset already exists in s3, deleting existing and continuing.')
369
- # self.__s3.delete_objects(
370
- # bucket_name=self.__output_bucket,
371
- # objects=s3_objects,
372
- # access_key_id=self.__output_bucket_access_key,
373
- # secret_access_key=self.__output_bucket_secret_access_key
374
- # )
375
- # #######################################################################
376
- # # self.__delete_all_local_raw_and_zarr_files()
377
- # Cleaner.delete_local_files(file_types=["*.raw*", "*.zarr"])
378
- # self.__s3.download_file(
379
- # bucket_name=self.__input_bucket,
380
- # key=bucket_key,
381
- # file_name=input_file_name
382
- # )
383
- # self.__create_local_zarr_store(
384
- # raw_file_name=input_file_name,
385
- # cruise_name=cruise_name,
386
- # sensor_name=sensor_name,
387
- # output_zarr_prefix=output_zarr_prefix,
388
- # store_name=store_name
389
- # )
390
- # #######################################################################
391
- # self.__upload_files_to_output_bucket(store_name, output_zarr_prefix)
392
- # #######################################################################
393
- # # # TODO: verify count of objects matches
394
- # # s3_objects = self.__s3.list_objects(
395
- # # bucket_name=self.__output_bucket,
396
- # # prefix=f"{zarr_prefix}/{os.path.splitext(input_file_name)[0]}.zarr/",
397
- # # access_key_id=self.__output_bucket_access_key,
398
- # # secret_access_key=self.__output_bucket_secret_access_key
399
- # # )
400
- # #######################################################################
401
- # self.__update_processing_status(
402
- # file_name=input_file_name,
403
- # cruise_name=cruise_name,
404
- # pipeline_status='SUCCESS_RAW_TO_ZARR'
405
- # )
406
- # #######################################################################
407
- # self.__publish_done_message(input_message)
408
- # #######################################################################
409
- # # except Exception as err:
410
- # # print(f'Exception encountered: {err}')
411
- # # self.__update_processing_status(
412
- # # file_name=input_file_name,
413
- # # cruise_name=cruise_name,
414
- # # pipeline_status='FAILURE_RAW_TO_ZARR',
415
- # # error_message=str(err),
416
- # # )
417
- # finally:
418
- # self.__delete_all_local_raw_and_zarr_files()
419
- #######################################################################
420
-
421
327
  ############################################################################
422
328
 
423
329
 
@@ -5,7 +5,8 @@ import shutil
5
5
 
6
6
  ###########################################################
7
7
  class Cleaner:
8
- def delete_local_files(self, file_types=["*.raw*", "*.model"]): # '*.json'
8
+ @staticmethod
9
+ def delete_local_files(file_types=["*.raw*", "*.model"]): # '*.json'
9
10
  # TODO: add .zarr to this
10
11
  print("Deleting all local raw and model files")
11
12
  for i in file_types:
@@ -1,5 +1,7 @@
1
1
  from enum import Enum, unique
2
2
 
3
+ import numpy as np
4
+
3
5
 
4
6
  @unique
5
7
  class Instruments(Enum):
@@ -21,15 +23,12 @@ class Constants(Enum):
21
23
  # NOTE: larger value here will speed up the TurfJS download of dataset in the UI
22
24
  # Problem interpolating the dataset: cannot reshape array of size 65536 into shape...
23
25
  # TODO: needs to be enum
24
- SPATIOTEMPORAL_CHUNK_SIZE = int(2**16) - 1024
25
- # int(2**16) - 1024,
26
- # int(2**16) - 1024,
27
- # e.g. int(2**14)
26
+ SPATIOTEMPORAL_CHUNK_SIZE = int(1e6) # int(2 ** 16) - 1024
28
27
  # TODO: create test for SPATIOTEMPORAL_CHUNK_SIZE with requirement!
29
28
 
30
29
  LEVEL_0 = "raw"
31
30
  LEVEL_1 = "level_1" # from bucket path
32
- LEVEL_2 = "level_2"
31
+ LEVEL_2 = "level_2a" # updating zarr store path for zarr v3
33
32
  LEVEL_3 = "level_3"
34
33
 
35
34
  EK60 = "EK60" # TODO: use for "instrument"
@@ -39,11 +38,10 @@ class Constants(Enum):
39
38
 
40
39
  class Coordinates(Enum):
41
40
  """
42
- Should try to specify
43
- dtype
44
- units
45
- long_name most readable description of variable
46
- standard_name — name in lowercase and snake_case
41
+ dtype: data type
42
+ units: netcdf defined units
43
+ long_name: most readable description of variable
44
+ standard_name: name in lowercase and snake_case
47
45
  """
48
46
 
49
47
  PROJECT_NAME = "echofish"
@@ -54,65 +52,67 @@ class Coordinates(Enum):
54
52
  DEPTH_LONG_NAME = "Depth below surface"
55
53
  DEPTH_STANDARD_NAME = "depth"
56
54
 
55
+ # https://cfconventions.org/Data/cf-conventions/cf-conventions-1.8/cf-conventions.html#table-supported-units
57
56
  TIME = "time"
58
- TIME_DTYPE = "float64"
57
+ TIME_DTYPE = "datetime64[ns]"
59
58
  # Note: units and calendar are used downstream by Xarray
60
- TIME_UNITS = "seconds since 1970-01-01 00:00:00"
59
+ TIME_UNITS = "nanoseconds since 1970-01-01"
61
60
  TIME_LONG_NAME = "Timestamp of each ping"
62
61
  TIME_STANDARD_NAME = "time"
63
62
  TIME_CALENDAR = "proleptic_gregorian"
64
63
  # TODO: create test for reading out timestamps in Xarray
65
64
 
66
65
  FREQUENCY = "frequency"
67
- FREQUENCY_DTYPE = "uint64"
66
+ FREQUENCY_DTYPE = np.uint64
68
67
  FREQUENCY_UNITS = "Hz"
69
68
  FREQUENCY_LONG_NAME = "Transducer frequency"
70
69
  FREQUENCY_STANDARD_NAME = "sound_frequency"
71
70
 
72
71
  LATITUDE = "latitude"
73
- LATITUDE_DTYPE = "float32"
72
+ LATITUDE_DTYPE = np.float32
74
73
  LATITUDE_UNITS = "degrees_north"
75
74
  LATITUDE_LONG_NAME = "Latitude"
76
75
  LATITUDE_STANDARD_NAME = "latitude"
77
76
 
78
77
  LONGITUDE = "longitude"
79
- LONGITUDE_DTYPE = "float32"
78
+ LONGITUDE_DTYPE = np.float32
80
79
  LONGITUDE_UNITS = "degrees_east"
81
80
  LONGITUDE_LONG_NAME = "Longitude"
82
81
  LONGITUDE_STANDARD_NAME = "longitude"
83
82
 
84
83
  BOTTOM = "bottom"
85
- BOTTOM_DTYPE = "float32"
84
+ BOTTOM_DTYPE = np.float32
86
85
  BOTTOM_UNITS = "m"
87
86
  BOTTOM_LONG_NAME = "Detected sea floor depth"
88
87
  BOTTOM_STANDARD_NAME = "bottom"
89
88
 
90
89
  SPEED = "speed"
91
- SPEED_DTYPE = "float32"
90
+ SPEED_DTYPE = np.float32
92
91
  SPEED_UNITS = "Knots"
93
92
  SPEED_LONG_NAME = "Nautical miles per hour"
94
93
  SPEED_STANDARD_NAME = "speed"
95
94
 
96
- # This is the width of each slice of the water columns
95
+ # This is the width of each 'pixel' of the water columns
97
96
  DISTANCE = "distance"
98
- DISTANCE_DTYPE = "float32"
97
+ DISTANCE_DTYPE = np.float32
99
98
  DISTANCE_UNITS = "m"
100
99
  DISTANCE_LONG_NAME = "GPS distance"
101
100
  DISTANCE_STANDARD_NAME = "distance"
102
101
 
103
102
  SV = "Sv"
104
- SV_DTYPE = "float32" # int64
103
+ SV_DTYPE = np.float32
105
104
  SV_UNITS = "dB"
106
105
  SV_LONG_NAME = "Volume backscattering strength (Sv re 1 m-1)"
107
106
  SV_STANDARD_NAME = "volume_backscattering_strength"
108
107
 
109
108
 
110
- class BatchShape(Enum):
111
- """
112
- The tensor shape of a machine learning sample.
113
- """
114
-
115
- DEPTH = 2
116
- TIME = 3
117
- FREQUENCY = 4
118
- BATCH_SIZE = 5
109
+ # TODO: delete this
110
+ # class BatchShape(Enum):
111
+ # """
112
+ # The tensor shape of a machine learning sample.
113
+ # """
114
+ #
115
+ # DEPTH = 2
116
+ # TIME = 3
117
+ # FREQUENCY = 4
118
+ # BATCH_SIZE = 5