water-column-sonar-processing 25.1.7__py3-none-any.whl → 25.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (26) hide show
  1. water_column_sonar_processing/aws/dynamodb_manager.py +27 -32
  2. water_column_sonar_processing/aws/s3_manager.py +52 -64
  3. water_column_sonar_processing/aws/s3fs_manager.py +3 -9
  4. water_column_sonar_processing/cruise/create_empty_zarr_store.py +14 -14
  5. water_column_sonar_processing/cruise/datatree_manager.py +3 -6
  6. water_column_sonar_processing/cruise/resample_regrid.py +67 -49
  7. water_column_sonar_processing/geometry/__init__.py +7 -2
  8. water_column_sonar_processing/geometry/elevation_manager.py +16 -17
  9. water_column_sonar_processing/geometry/geometry_manager.py +25 -25
  10. water_column_sonar_processing/geometry/line_simplification.py +150 -0
  11. water_column_sonar_processing/geometry/pmtile_generation.py +99 -64
  12. water_column_sonar_processing/index/index_manager.py +67 -32
  13. water_column_sonar_processing/model/zarr_manager.py +32 -21
  14. water_column_sonar_processing/process.py +15 -13
  15. water_column_sonar_processing/processing/__init__.py +2 -2
  16. water_column_sonar_processing/processing/batch_downloader.py +66 -41
  17. water_column_sonar_processing/processing/raw_to_zarr.py +121 -82
  18. water_column_sonar_processing/utility/constants.py +10 -1
  19. water_column_sonar_processing/utility/pipeline_status.py +11 -15
  20. {water_column_sonar_processing-25.1.7.dist-info → water_column_sonar_processing-25.3.0.dist-info}/METADATA +21 -12
  21. water_column_sonar_processing-25.3.0.dist-info/RECORD +34 -0
  22. {water_column_sonar_processing-25.1.7.dist-info → water_column_sonar_processing-25.3.0.dist-info}/WHEEL +1 -1
  23. water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
  24. water_column_sonar_processing-25.1.7.dist-info/RECORD +0 -34
  25. {water_column_sonar_processing-25.1.7.dist-info → water_column_sonar_processing-25.3.0.dist-info/licenses}/LICENSE +0 -0
  26. {water_column_sonar_processing-25.1.7.dist-info → water_column_sonar_processing-25.3.0.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,13 @@
1
- import xarray as xr
1
+ from typing import Optional
2
+
2
3
  import numpy as np
3
4
  import pandas as pd
5
+ import xarray as xr
4
6
  import xbatcher
5
- from typing import Optional
7
+
6
8
  # s3fs.core.setup_logging("DEBUG")
7
9
 
10
+
8
11
  class BatchDownloader:
9
12
  """
10
13
  Uses the xbatcher XbatchDownloader to download data from an xarray dataset. Connection
@@ -12,13 +15,13 @@ class BatchDownloader:
12
15
  """
13
16
 
14
17
  def __init__(
15
- self,
16
- bucket_name: Optional[str] = "noaa-wcsd-zarr-pds",
17
- ship_name: Optional[str] = "Henry_B._Bigelow",
18
- cruise_name: Optional[str] = "HB0707",
19
- sensor_name: Optional[str] = "EK60",
20
- patch_dims: Optional[int] = 64, # TODO: change to 64
21
- # input_steps: Optional[int] = 3,
18
+ self,
19
+ bucket_name: Optional[str] = "noaa-wcsd-zarr-pds",
20
+ ship_name: Optional[str] = "Henry_B._Bigelow",
21
+ cruise_name: Optional[str] = "HB0707",
22
+ sensor_name: Optional[str] = "EK60",
23
+ patch_dims: Optional[int] = 64, # TODO: change to 64
24
+ # input_steps: Optional[int] = 3,
22
25
  ):
23
26
  self.bucket_name = bucket_name
24
27
  self.ship_name = ship_name
@@ -28,7 +31,7 @@ class BatchDownloader:
28
31
 
29
32
  # TODO: move this to the s3fs module
30
33
  def get_s3_zarr_store(self) -> xr.Dataset:
31
- """ Returns an Xarray Dataset """
34
+ """Returns an Xarray Dataset"""
32
35
  s3_zarr_store_path = f"{self.bucket_name}/level_2/{self.ship_name}/{self.cruise_name}/{self.sensor_name}/{self.cruise_name}.zarr"
33
36
  # Info about the HB0707 cruise:
34
37
  # Time: ["2007-07-11T18:20:33.657573888", "2007-07-11T18:20:53.657573888", "2007-07-13T00:55:17.454448896"]
@@ -40,7 +43,9 @@ class BatchDownloader:
40
43
  # store = s3fs.S3Map(root=s3_zarr_store_path, s3=s3_file_system, check=False)
41
44
 
42
45
  # return xr.open_zarr(store=f"s3://{s3_zarr_store_path}", consolidated=True, storage_options={'anon': True})
43
- return xr.open_dataset(f"s3://{s3_zarr_store_path}", engine="zarr", storage_options={'anon': True})
46
+ return xr.open_dataset(
47
+ f"s3://{s3_zarr_store_path}", engine="zarr", storage_options={"anon": True}
48
+ )
44
49
  # return xr.open_zarr(store, consolidated=True)
45
50
 
46
51
  def get_toy_batch_generator(self) -> xbatcher.BatchGenerator:
@@ -48,14 +53,12 @@ class BatchDownloader:
48
53
  Returns a BatchGenerator with subsets of Sv data
49
54
  Note: this is synthetic data, for a smaller toy example
50
55
  """
51
- depth = np.arange(1, 21) # N meters
52
- time = pd.date_range(start="2025-01-01", end="2025-01-31", freq='D') # N days
53
- frequency = [1_000, 2_000, 3_000] # N frequencies
54
- Sv = np.random.rand(len(depth), len(time), len(frequency)) # synthetic data
56
+ depth = np.arange(1, 21) # N meters
57
+ time = pd.date_range(start="2025-01-01", end="2025-01-31", freq="D") # N days
58
+ frequency = [1_000, 2_000, 3_000] # N frequencies
59
+ Sv = np.random.rand(len(depth), len(time), len(frequency)) # synthetic data
55
60
  cruise = xr.Dataset(
56
- data_vars={
57
- "Sv": (["depth", "time", "frequency"], Sv)
58
- },
61
+ data_vars={"Sv": (["depth", "time", "frequency"], Sv)},
59
62
  coords={
60
63
  "depth": depth,
61
64
  "time": time,
@@ -66,28 +69,45 @@ class BatchDownloader:
66
69
  batch_generator = xbatcher.BatchGenerator(
67
70
  ds=cruise,
68
71
  # get samples that are shaped 10x10x3
69
- input_dims={ 'depth': 10, 'time': 10, 'frequency': cruise.frequency.shape[0] }, # A dictionary specifying the size of the inputs in each dimension, e.g. ``{'lat': 30, 'lon': 30}`` These are the dimensions the ML library will see. All other dimensions will be stacked into one dimension called ``sample``.
72
+ input_dims={
73
+ "depth": 10,
74
+ "time": 10,
75
+ "frequency": cruise.frequency.shape[0],
76
+ }, # A dictionary specifying the size of the inputs in each dimension, e.g. ``{'lat': 30, 'lon': 30}`` These are the dimensions the ML library will see. All other dimensions will be stacked into one dimension called ``sample``.
70
77
  # no overlap between samples
71
- input_overlap={ 'depth': 0, 'time': 0, 'frequency': 0 }, # Zero means no overlap. A dictionary specifying the overlap along each dimension
78
+ input_overlap={
79
+ "depth": 0,
80
+ "time": 0,
81
+ "frequency": 0,
82
+ }, # Zero means no overlap. A dictionary specifying the overlap along each dimension
72
83
  )
73
84
  return batch_generator
74
85
 
75
86
  def get_s3_batch_generator(self) -> xbatcher.BatchGenerator:
76
- """ Returns a BatchGenerator with subsets of Sv data from s3 Zarr store """
87
+ """Returns a BatchGenerator with subsets of Sv data from s3 Zarr store"""
77
88
  cruise = self.get_s3_zarr_store()
78
89
 
79
90
  # TODO: temporarily limits to a smaller slice of the data
80
- cruise_select = (cruise
81
- .where(cruise.depth < 100., drop=True)
82
- .sel(time=slice("2007-07-11T18:20:33", "2007-07-11T18:20:53"))
91
+ cruise_select = (
92
+ cruise.where(cruise.depth < 100.0, drop=True).sel(
93
+ time=slice("2007-07-11T18:20:33", "2007-07-11T18:20:53")
94
+ )
83
95
  # .sel(time=slice("2007-07-11T18:20:00", "2007-07-11T19:20:00"))
84
96
  )
85
- print(cruise_select.Sv.shape) # (526 depth, 21 time, 4 freq)
97
+ print(cruise_select.Sv.shape) # (526 depth, 21 time, 4 freq)
86
98
 
87
99
  batch_generator = xbatcher.BatchGenerator(
88
100
  ds=cruise_select,
89
- input_dims={ 'depth': 10, 'time': 10, 'frequency': cruise.frequency.shape[0] }, # A dictionary specifying the size of the inputs in each dimension, e.g. ``{'lat': 30, 'lon': 30}`` These are the dimensions the ML library will see. All other dimensions will be stacked into one dimension called ``sample``.
90
- input_overlap={ 'depth': 0, 'time': 0, 'frequency': 0 }, # Zero means no overlap. A dictionary specifying the overlap along each dimension
101
+ input_dims={
102
+ "depth": 10,
103
+ "time": 10,
104
+ "frequency": cruise.frequency.shape[0],
105
+ }, # A dictionary specifying the size of the inputs in each dimension, e.g. ``{'lat': 30, 'lon': 30}`` These are the dimensions the ML library will see. All other dimensions will be stacked into one dimension called ``sample``.
106
+ input_overlap={
107
+ "depth": 0,
108
+ "time": 0,
109
+ "frequency": 0,
110
+ }, # Zero means no overlap. A dictionary specifying the overlap along each dimension
91
111
  preload_batch=False,
92
112
  )
93
113
 
@@ -104,15 +124,22 @@ class BatchDownloader:
104
124
  cruise = self.get_s3_zarr_store()
105
125
 
106
126
  # TODO: temporarily limits to a smaller slice of the data
107
- cruise_select = (cruise
108
- .where(cruise.depth < 100., drop=True)
109
- .sel(time=slice("2007-07-11T18:20:33", "2007-07-11T18:20:53"))
127
+ cruise_select = cruise.where(cruise.depth < 100.0, drop=True).sel(
128
+ time=slice("2007-07-11T18:20:33", "2007-07-11T18:20:53")
110
129
  )
111
- print(cruise_select.Sv.shape) # (526 depth, 21 time, 4 freq)
130
+ print(cruise_select.Sv.shape) # (526 depth, 21 time, 4 freq)
112
131
  batch_generator = xbatcher.BatchGenerator(
113
132
  ds=cruise_select,
114
- input_dims={ 'depth': 10, 'time': 10, 'frequency': cruise.frequency.shape[0] }, # A dictionary specifying the size of the inputs in each dimension, e.g. ``{'lat': 30, 'lon': 30}`` These are the dimensions the ML library will see. All other dimensions will be stacked into one dimension called ``sample``.
115
- input_overlap={ 'depth': 0, 'time': 0, 'frequency': 0 }, # Zero means no overlap. A dictionary specifying the overlap along each dimension
133
+ input_dims={
134
+ "depth": 10,
135
+ "time": 10,
136
+ "frequency": cruise.frequency.shape[0],
137
+ }, # A dictionary specifying the size of the inputs in each dimension, e.g. ``{'lat': 30, 'lon': 30}`` These are the dimensions the ML library will see. All other dimensions will be stacked into one dimension called ``sample``.
138
+ input_overlap={
139
+ "depth": 0,
140
+ "time": 0,
141
+ "frequency": 0,
142
+ }, # Zero means no overlap. A dictionary specifying the overlap along each dimension
116
143
  preload_batch=True,
117
144
  )
118
145
 
@@ -121,12 +148,10 @@ class BatchDownloader:
121
148
  return batch_generator
122
149
  # https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_generator
123
150
 
124
- """
125
- (105, 21, 4)
126
151
 
127
- depth-start: 0.1899999976158142, depth-end: 1.899999976158142
128
- time-start: 2007-07-11T18:20:33.657573888, time-end: 2007-07-11T18:20:42.657573888
129
- frequency-start: 18000.0, frequency-end: 200000.0
130
- (10, 10, 4)
131
- np.nanmean: -53.70000076293945
132
- """
152
+ # (105, 21, 4)
153
+ # depth-start: 0.1899999976158142, depth-end: 1.899999976158142
154
+ # time-start: 2007-07-11T18:20:33.657573888, time-end: 2007-07-11T18:20:42.657573888
155
+ # frequency-start: 18000.0, frequency-end: 200000.0
156
+ # (10, 10, 4)
157
+ # np.nanmean: -53.70000076293945
@@ -1,25 +1,26 @@
1
1
  import gc
2
2
  import os
3
+ from datetime import datetime
4
+ from pathlib import Path # , PurePath
5
+
3
6
  import echopype as ep
4
7
  import numcodecs
5
8
  import numpy as np
6
9
  from numcodecs import Blosc
7
- from datetime import datetime
8
- from pathlib import Path # , PurePath
9
10
 
10
11
  from water_column_sonar_processing.aws import DynamoDBManager, S3Manager
11
12
  from water_column_sonar_processing.geometry import GeometryManager
12
- from water_column_sonar_processing.utility import Cleaner, PipelineStatus
13
+ from water_column_sonar_processing.utility import Cleaner
13
14
 
14
15
 
15
16
  # This code is getting copied from echofish-aws-raw-to-zarr-lambda
16
17
  class RawToZarr:
17
18
  #######################################################
18
19
  def __init__(
19
- self,
20
- # output_bucket_access_key,
21
- # output_bucket_secret_access_key,
22
- # # overwrite_existing_zarr_store,
20
+ self,
21
+ # output_bucket_access_key,
22
+ # output_bucket_secret_access_key,
23
+ # # overwrite_existing_zarr_store,
23
24
  ):
24
25
  # TODO: revert to Blosc.BITSHUFFLE, troubleshooting misc error
25
26
  self.__compressor = Blosc(cname="zstd", clevel=2) # shuffle=Blosc.NOSHUFFLE
@@ -33,45 +34,47 @@ class RawToZarr:
33
34
  ############################################################################
34
35
  ############################################################################
35
36
  def __zarr_info_to_table(
36
- self,
37
- output_bucket_name,
38
- table_name,
39
- ship_name,
40
- cruise_name,
41
- sensor_name,
42
- file_name,
43
- zarr_path,
44
- min_echo_range,
45
- max_echo_range,
46
- num_ping_time_dropna,
47
- start_time,
48
- end_time,
49
- frequencies,
50
- channels
37
+ self,
38
+ # output_bucket_name,
39
+ table_name,
40
+ ship_name,
41
+ cruise_name,
42
+ sensor_name,
43
+ file_name,
44
+ # zarr_path,
45
+ min_echo_range,
46
+ max_echo_range,
47
+ num_ping_time_dropna,
48
+ start_time,
49
+ end_time,
50
+ frequencies,
51
+ channels,
52
+ water_level,
51
53
  ):
52
- print('Writing Zarr information to DynamoDB table.')
54
+ print("Writing Zarr information to DynamoDB table.")
53
55
  dynamodb_manager = DynamoDBManager()
54
56
  dynamodb_manager.update_item(
55
57
  table_name=table_name,
56
58
  key={
57
- 'FILE_NAME': {'S': file_name}, # Partition Key
58
- 'CRUISE_NAME': {'S': cruise_name}, # Sort Key
59
+ "FILE_NAME": {"S": file_name}, # Partition Key
60
+ "CRUISE_NAME": {"S": cruise_name}, # Sort Key
59
61
  },
60
62
  expression_attribute_names={
61
- '#CH': 'CHANNELS',
62
- '#ET': 'END_TIME',
63
+ "#CH": "CHANNELS",
64
+ "#ET": "END_TIME",
63
65
  # "#ED": "ERROR_DETAIL",
64
- '#FR': 'FREQUENCIES',
65
- '#MA': 'MAX_ECHO_RANGE',
66
- '#MI': 'MIN_ECHO_RANGE',
67
- '#ND': 'NUM_PING_TIME_DROPNA',
68
- "#PS": "PIPELINE_STATUS",
66
+ "#FR": "FREQUENCIES",
67
+ "#MA": "MAX_ECHO_RANGE",
68
+ "#MI": "MIN_ECHO_RANGE",
69
+ "#ND": "NUM_PING_TIME_DROPNA",
70
+ # "#PS": "PIPELINE_STATUS",
69
71
  "#PT": "PIPELINE_TIME",
70
72
  "#SE": "SENSOR_NAME",
71
73
  "#SH": "SHIP_NAME",
72
- '#ST': 'START_TIME',
73
- '#ZB': 'ZARR_BUCKET',
74
- '#ZP': 'ZARR_PATH',
74
+ "#ST": "START_TIME",
75
+ # "#ZB": "ZARR_BUCKET",
76
+ # "#ZP": "ZARR_PATH",
77
+ "#WL": "WATER_LEVEL",
75
78
  },
76
79
  expression_attribute_values={
77
80
  ":ch": {"L": [{"S": i} for i in channels]},
@@ -82,13 +85,14 @@ class RawToZarr:
82
85
  ":mi": {"N": str(np.round(min_echo_range, 4))},
83
86
  ":nd": {"N": str(num_ping_time_dropna)},
84
87
  # ":ps": {"S": "PROCESSING_RESAMPLE_AND_WRITE_TO_ZARR_STORE"},
85
- ":ps": {"S": PipelineStatus.LEVEL_1_PROCESSING.name},
88
+ # ":ps": {"S": PipelineStatus.LEVEL_1_PROCESSING.name},
86
89
  ":pt": {"S": datetime.now().isoformat(timespec="seconds") + "Z"},
87
90
  ":se": {"S": sensor_name},
88
91
  ":sh": {"S": ship_name},
89
92
  ":st": {"S": start_time},
90
- ":zb": {"S": output_bucket_name},
91
- ":zp": { "S": zarr_path },
93
+ ":wl": {"N": str(np.round(water_level, 2))},
94
+ # ":zb": {"S": output_bucket_name},
95
+ # ":zp": {"S": zarr_path},
92
96
  },
93
97
  update_expression=(
94
98
  "SET "
@@ -99,30 +103,31 @@ class RawToZarr:
99
103
  "#MA = :ma, "
100
104
  "#MI = :mi, "
101
105
  "#ND = :nd, "
102
- "#PS = :ps, "
106
+ # "#PS = :ps, "
103
107
  "#PT = :pt, "
104
108
  "#SE = :se, "
105
109
  "#SH = :sh, "
106
110
  "#ST = :st, "
107
- "#ZB = :zb, "
108
- "#ZP = :zp"
111
+ "#WL = :wl"
112
+ # "#ZB = :zb, "
113
+ # "#ZP = :zp"
109
114
  ),
110
115
  )
111
- print('Done writing Zarr information to DynamoDB table.')
116
+ print("Done writing Zarr information to DynamoDB table.")
112
117
 
113
118
  ############################################################################
114
119
  ############################################################################
115
120
  ############################################################################
116
121
  def __upload_files_to_output_bucket(
117
- self,
118
- output_bucket_name,
119
- local_directory,
120
- object_prefix,
121
- endpoint_url,
122
+ self,
123
+ output_bucket_name,
124
+ local_directory,
125
+ object_prefix,
126
+ endpoint_url,
122
127
  ):
123
128
  # Note: this will be passed credentials if using NODD
124
129
  s3_manager = S3Manager(endpoint_url=endpoint_url)
125
- print('Uploading files using thread pool executor.')
130
+ print("Uploading files using thread pool executor.")
126
131
  all_files = []
127
132
  for subdir, dirs, files in os.walk(local_directory):
128
133
  for file in files:
@@ -138,38 +143,50 @@ class RawToZarr:
138
143
 
139
144
  ############################################################################
140
145
  def raw_to_zarr(
141
- self,
142
- table_name,
143
- input_bucket_name,
144
- output_bucket_name,
145
- ship_name,
146
- cruise_name,
147
- sensor_name,
148
- raw_file_name,
149
- endpoint_url=None,
150
- include_bot=True,
146
+ self,
147
+ table_name,
148
+ input_bucket_name,
149
+ output_bucket_name,
150
+ ship_name,
151
+ cruise_name,
152
+ sensor_name,
153
+ raw_file_name,
154
+ endpoint_url=None,
155
+ include_bot=True,
151
156
  ):
152
157
  """
153
158
  Downloads the raw files, processes them with echopype, writes geojson, and uploads files
154
159
  to the nodd bucket.
155
160
  """
156
- print(f'Opening raw: {raw_file_name} and creating zarr store.')
161
+ print(f"Opening raw: {raw_file_name} and creating zarr store.")
157
162
  geometry_manager = GeometryManager()
158
163
  cleaner = Cleaner()
159
- cleaner.delete_local_files(file_types=["*.zarr", "*.json"]) # TODO: include bot and raw?
164
+ cleaner.delete_local_files(
165
+ file_types=["*.zarr", "*.json"]
166
+ ) # TODO: include bot and raw?
160
167
 
161
168
  s3_manager = S3Manager(endpoint_url=endpoint_url)
162
- s3_file_path = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
169
+ s3_file_path = (
170
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{raw_file_name}"
171
+ )
163
172
  bottom_file_name = f"{Path(raw_file_name).stem}.bot"
164
- s3_bottom_file_path = f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
165
- s3_manager.download_file(bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name)
173
+ s3_bottom_file_path = (
174
+ f"data/raw/{ship_name}/{cruise_name}/{sensor_name}/{bottom_file_name}"
175
+ )
176
+ s3_manager.download_file(
177
+ bucket_name=input_bucket_name, key=s3_file_path, file_name=raw_file_name
178
+ )
166
179
  # TODO: add the bottom file
167
180
  if include_bot:
168
- s3_manager.download_file(bucket_name=input_bucket_name, key=s3_bottom_file_path, file_name=bottom_file_name)
181
+ s3_manager.download_file(
182
+ bucket_name=input_bucket_name,
183
+ key=s3_bottom_file_path,
184
+ file_name=bottom_file_name,
185
+ )
169
186
 
170
187
  try:
171
188
  gc.collect()
172
- print('Opening raw file with echopype.')
189
+ print("Opening raw file with echopype.")
173
190
  # s3_file_path = f"s3://{bucket_name}/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}"
174
191
  # s3_file_path = Path(f"s3://noaa-wcsd-pds/data/raw/{ship_name}/{cruise_name}/{sensor_name}/{file_name}")
175
192
  echodata = ep.open_raw(
@@ -180,14 +197,20 @@ class RawToZarr:
180
197
  # max_chunk_size=300,
181
198
  # storage_options={'anon': True } # 'endpoint_url': self.endpoint_url} # this was creating problems
182
199
  )
183
- print('Compute volume backscattering strength (Sv) from raw data.')
200
+ print("Compute volume backscattering strength (Sv) from raw data.")
184
201
  ds_sv = ep.calibrate.compute_Sv(echodata)
202
+ ds_sv = ep.consolidate.add_depth(
203
+ ds_sv, echodata
204
+ ) # TODO: consolidate with other depth values
205
+ water_level = ds_sv["water_level"].values
185
206
  gc.collect()
186
- print('Done computing volume backscatter strength (Sv) from raw data.')
207
+ print("Done computing volume backscatter strength (Sv) from raw data.")
187
208
  # Note: detected_seafloor_depth is located at echodata.vendor.detected_seafloor_depth
188
209
  # but is not written out with ds_sv
189
210
  if "detected_seafloor_depth" in list(echodata.vendor.variables):
190
- ds_sv["detected_seafloor_depth"] = echodata.vendor.detected_seafloor_depth
211
+ ds_sv["detected_seafloor_depth"] = (
212
+ echodata.vendor.detected_seafloor_depth
213
+ )
191
214
  #
192
215
  frequencies = echodata.environment.frequency_nominal.values
193
216
  #################################################################
@@ -200,10 +223,12 @@ class RawToZarr:
200
223
  sensor_name=sensor_name,
201
224
  file_name=raw_file_name,
202
225
  endpoint_url=endpoint_url,
203
- write_geojson=True
226
+ write_geojson=True,
204
227
  )
205
228
  ds_sv = ep.consolidate.add_location(ds_sv, echodata)
206
- ds_sv.latitude.values = lat # overwriting echopype gps values to include missing values
229
+ ds_sv.latitude.values = (
230
+ lat # overwriting echopype gps values to include missing values
231
+ )
207
232
  ds_sv.longitude.values = lon
208
233
  # gps_data, lat, lon = self.__get_gps_data(echodata=echodata)
209
234
  #################################################################
@@ -216,8 +241,12 @@ class RawToZarr:
216
241
  # This is the number of missing values found throughout the lat/lon
217
242
  num_ping_time_dropna = lat[~np.isnan(lat)].shape[0] # symmetric to lon
218
243
  #
219
- start_time = np.datetime_as_string(ds_sv.ping_time.values[0], unit='ms') + "Z"
220
- end_time = np.datetime_as_string(ds_sv.ping_time.values[-1], unit='ms') + "Z"
244
+ start_time = (
245
+ np.datetime_as_string(ds_sv.ping_time.values[0], unit="ms") + "Z"
246
+ )
247
+ end_time = (
248
+ np.datetime_as_string(ds_sv.ping_time.values[-1], unit="ms") + "Z"
249
+ )
221
250
  channels = list(ds_sv.channel.values)
222
251
  #
223
252
  #################################################################
@@ -225,7 +254,9 @@ class RawToZarr:
225
254
  store_name = f"{Path(raw_file_name).stem}.zarr"
226
255
  # Sv = ds_sv.Sv
227
256
  # ds_sv['Sv'] = Sv.astype('int32', copy=False)
228
- ds_sv.to_zarr(store=store_name) # ds_sv.Sv.sel(channel=ds_sv.channel.values[0]).shape
257
+ ds_sv.to_zarr(
258
+ store=store_name
259
+ ) # ds_sv.Sv.sel(channel=ds_sv.channel.values[0]).shape
229
260
  gc.collect()
230
261
  #################################################################
231
262
  output_zarr_prefix = f"level_1/{ship_name}/{cruise_name}/{sensor_name}/"
@@ -237,7 +268,9 @@ class RawToZarr:
237
268
  sub_prefix=f"level_1/{ship_name}/{cruise_name}/{sensor_name}/{Path(raw_file_name).stem}.zarr",
238
269
  )
239
270
  if len(child_objects) > 0:
240
- print('Zarr store data already exists in s3, deleting existing and continuing.')
271
+ print(
272
+ "Zarr store data already exists in s3, deleting existing and continuing."
273
+ )
241
274
  s3_manager.delete_nodd_objects(
242
275
  bucket_name=output_bucket_name,
243
276
  objects=child_objects,
@@ -247,37 +280,42 @@ class RawToZarr:
247
280
  output_bucket_name=output_bucket_name,
248
281
  local_directory=store_name,
249
282
  object_prefix=output_zarr_prefix,
250
- endpoint_url=endpoint_url
283
+ endpoint_url=endpoint_url,
251
284
  )
252
285
  #################################################################
253
286
  self.__zarr_info_to_table(
254
- output_bucket_name=output_bucket_name,
287
+ # output_bucket_name=output_bucket_name,
255
288
  table_name=table_name,
256
289
  ship_name=ship_name,
257
290
  cruise_name=cruise_name,
258
291
  sensor_name=sensor_name,
259
292
  file_name=raw_file_name,
260
- zarr_path=os.path.join(output_zarr_prefix, store_name),
293
+ # zarr_path=os.path.join(output_zarr_prefix, store_name),
261
294
  min_echo_range=min_echo_range,
262
295
  max_echo_range=max_echo_range,
263
296
  num_ping_time_dropna=num_ping_time_dropna,
264
297
  start_time=start_time,
265
298
  end_time=end_time,
266
299
  frequencies=frequencies,
267
- channels=channels
300
+ channels=channels,
301
+ water_level=water_level,
268
302
  )
269
303
  #######################################################################
270
304
  # TODO: verify count of objects matches, publish message, update status
271
305
  #######################################################################
272
- print('Finished raw-to-zarr conversion.')
306
+ print("Finished raw-to-zarr conversion.")
273
307
  except Exception as err:
274
- print(f'Exception encountered creating local Zarr store with echopype: {err}')
308
+ print(
309
+ f"Exception encountered creating local Zarr store with echopype: {err}"
310
+ )
275
311
  raise RuntimeError(f"Problem creating local Zarr store, {err}")
276
312
  finally:
277
313
  gc.collect()
278
314
  print("Finally.")
279
- cleaner.delete_local_files(file_types=["*.raw", "*.bot", "*.zarr", "*.json"])
280
- print('Done creating local zarr store.')
315
+ cleaner.delete_local_files(
316
+ file_types=["*.raw", "*.bot", "*.zarr", "*.json"]
317
+ )
318
+ print("Done creating local zarr store.")
281
319
 
282
320
  ############################################################################
283
321
  # TODO: does this get called?
@@ -365,5 +403,6 @@ class RawToZarr:
365
403
 
366
404
  ############################################################################
367
405
 
406
+
368
407
  ################################################################################
369
408
  ############################################################################
@@ -9,7 +9,16 @@ class Constants(Flag):
9
9
  # chunk size is ~1.3 kB, HB0902 cruise takes ~30 seconds to load all time/lat/lon data
10
10
  # NOTE: larger value here will speed up the TurfJS download of data in the UI
11
11
  # Problem interpolating the data: cannot reshape array of size 65536 into shape...
12
- SPATIOTEMPORAL_CHUNK_SIZE = 16384 # e.g. int(2**14)
12
+ SPATIOTEMPORAL_CHUNK_SIZE = int(2**16) - 1024 # e.g. int(2**14)
13
+ # TODO: create test for SPATIOTEMPORAL_CHUNK_SIZE with requirement!
14
+
15
+ LEVEL_0 = "raw"
16
+ LEVEL_1 = "level_1"
17
+ LEVEL_2 = "level_2"
18
+ LEVEL_3 = "level_3"
19
+
20
+ EK60 = "EK60" # TODO: use for "instrument"
21
+ EK80 = "EK80"
13
22
 
14
23
 
15
24
  class Coordinates(Enum):
@@ -107,19 +107,15 @@ class PipelineStatus(Flag):
107
107
  # Status.LEVEL_1_PROCESSING.value < Status.LEVEL_2_PROCESSING.value
108
108
 
109
109
  # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-cloudformation-stack.html
110
- """
111
- CREATE_IN_PROGRESS | CREATE_FAILED | CREATE_COMPLETE |
112
- ROLLBACK_IN_PROGRESS | ROLLBACK_FAILED | ROLLBACK_COMPLETE |
113
- DELETE_IN_PROGRESS | DELETE_FAILED | DELETE_COMPLETE |
114
- UPDATE_IN_PROGRESS | UPDATE_COMPLETE_CLEANUP_IN_PROGRESS | UPDATE_COMPLETE |
115
- UPDATE_FAILED | UPDATE_ROLLBACK_IN_PROGRESS | UPDATE_ROLLBACK_FAILED |
116
- UPDATE_ROLLBACK_COMPLETE_CLEANUP_IN_PROGRESS | UPDATE_ROLLBACK_COMPLETE |
117
- REVIEW_IN_PROGRESS | IMPORT_IN_PROGRESS | IMPORT_COMPLETE |
118
- IMPORT_ROLLBACK_IN_PROGRESS | IMPORT_ROLLBACK_FAILED | IMPORT_ROLLBACK_COMPLETE
119
110
 
120
- failure - noun -
121
- failed - verb - "verbs should be avoided"
122
-
123
- success - noun
124
-
125
- """
111
+ # CREATE_IN_PROGRESS | CREATE_FAILED | CREATE_COMPLETE |
112
+ # ROLLBACK_IN_PROGRESS | ROLLBACK_FAILED | ROLLBACK_COMPLETE |
113
+ # DELETE_IN_PROGRESS | DELETE_FAILED | DELETE_COMPLETE |
114
+ # UPDATE_IN_PROGRESS | UPDATE_COMPLETE_CLEANUP_IN_PROGRESS | UPDATE_COMPLETE |
115
+ # UPDATE_FAILED | UPDATE_ROLLBACK_IN_PROGRESS | UPDATE_ROLLBACK_FAILED |
116
+ # UPDATE_ROLLBACK_COMPLETE_CLEANUP_IN_PROGRESS | UPDATE_ROLLBACK_COMPLETE |
117
+ # REVIEW_IN_PROGRESS | IMPORT_IN_PROGRESS | IMPORT_COMPLETE |
118
+ # IMPORT_ROLLBACK_IN_PROGRESS | IMPORT_ROLLBACK_FAILED | IMPORT_ROLLBACK_COMPLETE
119
+ # failure - noun -
120
+ # failed - verb - "verbs should be avoided"
121
+ # success - noun