water-column-sonar-processing 25.3.2__py3-none-any.whl → 25.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (32) hide show
  1. water_column_sonar_processing/aws/dynamodb_manager.py +6 -6
  2. water_column_sonar_processing/aws/s3_manager.py +95 -90
  3. water_column_sonar_processing/aws/s3fs_manager.py +5 -3
  4. water_column_sonar_processing/aws/sqs_manager.py +1 -1
  5. water_column_sonar_processing/cruise/__init__.py +2 -1
  6. water_column_sonar_processing/cruise/create_empty_zarr_store.py +49 -43
  7. water_column_sonar_processing/cruise/create_empty_zarr_store_level_3.py +161 -0
  8. water_column_sonar_processing/cruise/datatree_manager.py +21 -21
  9. water_column_sonar_processing/cruise/resample_regrid.py +57 -47
  10. water_column_sonar_processing/dataset/__init__.py +3 -0
  11. water_column_sonar_processing/dataset/dataset_manager.py +205 -0
  12. water_column_sonar_processing/dataset/feature_manager.py +32 -0
  13. water_column_sonar_processing/geometry/geometry_manager.py +11 -12
  14. water_column_sonar_processing/geometry/line_simplification.py +26 -1
  15. water_column_sonar_processing/geometry/pmtile_generation.py +211 -247
  16. water_column_sonar_processing/index/index_manager.py +18 -17
  17. water_column_sonar_processing/model/zarr_manager.py +504 -256
  18. water_column_sonar_processing/processing/__init__.py +3 -2
  19. water_column_sonar_processing/processing/batch_downloader.py +11 -11
  20. water_column_sonar_processing/processing/raw_to_netcdf.py +319 -0
  21. water_column_sonar_processing/processing/raw_to_zarr.py +41 -31
  22. water_column_sonar_processing/utility/__init__.py +9 -2
  23. water_column_sonar_processing/utility/cleaner.py +1 -2
  24. water_column_sonar_processing/utility/constants.py +26 -7
  25. water_column_sonar_processing/utility/timestamp.py +1 -0
  26. water_column_sonar_processing-25.8.0.dist-info/METADATA +162 -0
  27. water_column_sonar_processing-25.8.0.dist-info/RECORD +39 -0
  28. {water_column_sonar_processing-25.3.2.dist-info → water_column_sonar_processing-25.8.0.dist-info}/WHEEL +1 -1
  29. water_column_sonar_processing-25.3.2.dist-info/licenses/LICENSE → water_column_sonar_processing-25.8.0.dist-info/licenses/LICENSE-MIT +1 -1
  30. water_column_sonar_processing-25.3.2.dist-info/METADATA +0 -170
  31. water_column_sonar_processing-25.3.2.dist-info/RECORD +0 -34
  32. {water_column_sonar_processing-25.3.2.dist-info → water_column_sonar_processing-25.8.0.dist-info}/top_level.txt +0 -0
@@ -127,7 +127,7 @@ class DynamoDBManager:
127
127
  # assert status_code == 200, "Problem, unable to update dynamodb table."
128
128
  # assert response['ConsumedCapacity']['TableName'] == table_name
129
129
  except Exception as err:
130
- print(f"Problem was encountered while updating item: {err}")
130
+ raise RuntimeError(f"Problem was encountered while updating item, {err}")
131
131
 
132
132
  #####################################################################
133
133
  # TODO: change to "get_cruise_as_df"
@@ -135,7 +135,7 @@ class DynamoDBManager:
135
135
  self,
136
136
  # ship_name,
137
137
  cruise_name,
138
- # sensor_name,
138
+ # sensor_name, # TODO: need to add this back for EK80
139
139
  table_name,
140
140
  ) -> pd.DataFrame:
141
141
  """
@@ -230,7 +230,7 @@ class DynamoDBManager:
230
230
  # if len(response["Items"]) == 0 and "LastEvaluatedKey" not in response:
231
231
  # return pd.DataFrame() # If no results, return empty dataframe
232
232
  #
233
- # data = response["Items"]
233
+ # dataset = response["Items"]
234
234
  #
235
235
  # while response.get('LastEvaluatedKey'): #"LastEvaluatedKey" in response:
236
236
  # response = self.dynamodb_client.scan(
@@ -252,10 +252,10 @@ class DynamoDBManager:
252
252
  # ConsistentRead=True,
253
253
  # ExclusiveStartKey=response["LastEvaluatedKey"],
254
254
  # )
255
- # data.extend(response["Items"])
255
+ # dataset.extend(response["Items"])
256
256
  #
257
257
  # deserializer = self.type_deserializer
258
- # df = pd.DataFrame([deserializer.deserialize({"M": i}) for i in data])
258
+ # df = pd.DataFrame([deserializer.deserialize({"M": i}) for i in dataset])
259
259
  #
260
260
  # return df.sort_values(by="START_TIME", ignore_index=True)
261
261
 
@@ -273,7 +273,7 @@ class DynamoDBManager:
273
273
  response = self.dynamodb_client.delete_item(
274
274
  Key={"CRUISE_NAME": {"S": cruise_name}, "FILE_NAME": {"S": file_name}},
275
275
  TableName=table_name,
276
- ReturnConsumedCapacity="TOTALS",
276
+ ReturnConsumedCapacity="TOTAL",
277
277
  )
278
278
  # TODO: there should be attributes included in response but they are missing
279
279
  # if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
@@ -2,6 +2,7 @@ import json
2
2
  import os
3
3
  from collections.abc import Generator
4
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
+ from time import sleep
5
6
  from typing import Optional
6
7
 
7
8
  import boto3
@@ -80,14 +81,8 @@ class S3Manager:
80
81
  self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
81
82
  )
82
83
 
83
- # def get_client(self): # TODO: do i need this?
84
- # return self.s3_session.client(
85
- # service_name="s3",
86
- # config=self.s3_client_config,
87
- # region_name=self.s3_region,
88
- # )
89
-
90
84
  #####################################################################
85
+ # tested
91
86
  def create_bucket(
92
87
  self,
93
88
  bucket_name: str,
@@ -116,12 +111,13 @@ class S3Manager:
116
111
  # print(response)
117
112
 
118
113
  #####################################################################
114
+ # tested
119
115
  def list_buckets(self):
120
- # client = self.get_client()
121
116
  client = self.s3_client
122
117
  return client.list_buckets()
123
118
 
124
119
  #####################################################################
120
+ # tested
125
121
  def upload_nodd_file(
126
122
  self,
127
123
  file_name: str,
@@ -137,6 +133,7 @@ class S3Manager:
137
133
  return key
138
134
 
139
135
  #####################################################################
136
+ # tested
140
137
  def upload_files_with_thread_pool_executor(
141
138
  self,
142
139
  output_bucket_name: str,
@@ -160,58 +157,66 @@ class S3Manager:
160
157
  if result:
161
158
  all_uploads.extend([result])
162
159
  except Exception as err:
163
- print(err)
160
+ raise RuntimeError(f"Problem, {err}")
161
+
164
162
  print("Done uploading files using threading pool.")
165
163
  return all_uploads
166
164
 
167
165
  #####################################################################
168
- # TODO: this uses resource, try to use client
166
+ # tested
167
+ def upload_zarr_store_to_s3(
168
+ self,
169
+ output_bucket_name: str,
170
+ local_directory: str,
171
+ object_prefix: str,
172
+ cruise_name: str,
173
+ ) -> None:
174
+ print("uploading model store to s3")
175
+ try:
176
+ #
177
+ print("Starting upload with thread pool executor.")
178
+ # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
179
+ all_files = []
180
+ for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
181
+ for file in files:
182
+ local_path = os.path.join(subdir, file)
183
+ # TODO: find a better method for splitting strings here:
184
+ # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
185
+ # s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
186
+ s3_key = os.path.join(
187
+ object_prefix,
188
+ os.path.join(
189
+ subdir[subdir.find(f"{cruise_name}.zarr") :], file
190
+ ),
191
+ )
192
+ all_files.append([local_path, s3_key])
193
+ self.upload_files_with_thread_pool_executor(
194
+ output_bucket_name=output_bucket_name,
195
+ all_files=all_files,
196
+ )
197
+ print("Done uploading with thread pool executor.")
198
+ except Exception as err:
199
+ raise RuntimeError(f"Problem uploading zarr store to s3, {err}")
200
+
201
+ #####################################################################
202
+ # tested
169
203
  def upload_file(
170
204
  self,
171
205
  filename: str,
172
206
  bucket_name: str,
173
207
  key: str,
174
208
  ):
175
- # self.s3_client.upload_file(Filename=filename, Bucket=bucket, Key=key)
176
209
  self.s3_resource.Bucket(bucket_name).upload_file(Filename=filename, Key=key)
177
210
 
178
211
  #####################################################################
179
- def upload_zarr_files_to_bucket( # noaa-wcsd-model-pds
180
- self,
181
- local_directory,
182
- remote_directory,
183
- output_bucket_name,
184
- ):
185
- # Right now this is just for uploading a model store to s3
186
- print("Uploading files to output bucket.")
187
- store_name = os.path.basename(local_directory)
188
- all_files = []
189
- for subdir, dirs, files in os.walk(local_directory):
190
- for file in files:
191
- local_path = os.path.join(subdir, file)
192
- # s3_key = os.path.join(object_prefix, local_path)
193
- s3_key = os.path.join(
194
- remote_directory,
195
- store_name,
196
- subdir.split(store_name)[-1].strip("/"),
197
- )
198
- all_files.append([local_path, s3_key])
199
-
200
- all_uploads = self.upload_files_with_thread_pool_executor(
201
- output_bucket_name=output_bucket_name,
202
- all_files=all_files,
203
- )
204
- print("Done uploading files to output bucket.")
205
- return all_uploads
206
-
207
- #####################################################################
212
+ # tested
208
213
  def check_if_object_exists(self, bucket_name, key_name) -> bool:
209
214
  s3_manager2 = S3Manager()
210
215
  s3_manager2.list_objects(bucket_name=bucket_name, prefix=key_name)
211
216
  s3_client_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds
212
217
  try:
213
- # response = s3_resource_noaa_wcsd_zarr_pds.Object(bucket_name, key_name).load()
214
218
  s3_client_noaa_wcsd_zarr_pds.head_object(Bucket=bucket_name, Key=key_name)
219
+ return True
215
220
  except botocore.exceptions.ClientError as e:
216
221
  if e.response["Error"]["Code"] == "404":
217
222
  # The object does not exist.
@@ -222,10 +227,9 @@ class S3Manager:
222
227
  else:
223
228
  # Something else has gone wrong.
224
229
  raise
225
- return True
226
230
 
227
231
  #####################################################################
228
- # used: raw-to-zarr
232
+ # tested
229
233
  def list_objects(self, bucket_name, prefix): # noaa-wcsd-pds and noaa-wcsd-zarr-pds
230
234
  # TODO: this isn't working for geojson detecting objects!!!!!!!
231
235
  # analog to "find_children_objects"
@@ -239,32 +243,20 @@ class S3Manager:
239
243
  keys.extend([k["Key"] for k in page["Contents"]])
240
244
  return keys
241
245
 
242
- # def list_nodd_objects( # These are used by the geometry for uploading data
243
- # self,
244
- # prefix,
245
- # ):
246
- # # Returns a list of key strings for each object in bucket defined by prefix
247
- # keys = []
248
- # page_iterator = self.paginator_noaa_wcsd_zarr_pds.paginate(Bucket=output_bucket_name, Prefix=prefix):
249
- # for page in paginator.paginate(Bucket=output_bucket_name, Prefix=prefix):
250
- # if "Contents" in page.keys():
251
- # keys.extend([k["Key"] for k in page["Contents"]])
252
- # return keys
253
-
254
246
  #####################################################################
255
247
  # TODO: change name to "directory"
256
- def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
257
- if not path.endswith("/"):
258
- path = path + "/"
259
- # s3_client = self.s3_client
260
- resp = self.list_objects(
261
- bucket_name=bucket_name, prefix=path
262
- ) # TODO: this is returning root folder and doesn't include children or hidden folders
263
- # resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
264
- return "Contents" in resp
248
+ # def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
249
+ # if not path.endswith("/"):
250
+ # path = path + "/"
251
+ # # s3_client = self.s3_client
252
+ # resp = self.list_objects(
253
+ # bucket_name=bucket_name, prefix=path
254
+ # ) # TODO: this is returning root folder and doesn't include children or hidden folders
255
+ # # resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
256
+ # return "Contents" in resp
265
257
 
266
258
  #####################################################################
267
- # used
259
+ # private
268
260
  def __paginate_child_objects(
269
261
  self,
270
262
  bucket_name: str,
@@ -279,6 +271,8 @@ class S3Manager:
279
271
  objects.extend(page["Contents"])
280
272
  return objects
281
273
 
274
+ #####################################################################
275
+ # tested
282
276
  def get_child_objects(
283
277
  self,
284
278
  bucket_name: str,
@@ -310,13 +304,14 @@ class S3Manager:
310
304
  return raw_files
311
305
 
312
306
  #####################################################################
313
- def get_object( # TODO: Move this to index.py
314
- # noaa-wcsd-pds or noaa-wcsd-model-pds
307
+ # tested
308
+ def get_object( # noaa-wcsd-pds or noaa-wcsd-zarr-pds
315
309
  self,
316
310
  bucket_name,
317
311
  key_name,
318
312
  ):
319
313
  # Meant for getting singular objects from a bucket, used by indexing lambda
314
+ # can also return byte range potentially.
320
315
  print(f"Getting object {key_name} from {bucket_name}")
321
316
  try:
322
317
  response = self.s3_client.get_object(
@@ -325,27 +320,31 @@ class S3Manager:
325
320
  )
326
321
  # status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")
327
322
  # if status == 200:
323
+ print(f"Done getting object {key_name} from {bucket_name}")
324
+ return response
328
325
  except ClientError as err:
329
326
  print(f"Problem was encountered while getting s3 file: {err}")
330
327
  raise
331
- print(f"Done getting object {key_name} from {bucket_name}")
332
- return response
333
328
 
334
329
  #####################################################################
335
- # used raw-to-model
336
- def download_file( # TODO: change to download_object
337
- # noaa-wcsd-pds or noaa-wcsd-model-pds
330
+ # tested
331
+ def download_file(
338
332
  self,
339
333
  bucket_name,
340
334
  key,
341
- file_name, # where the file will be saved
335
+ file_name, # path to where the file will be saved
342
336
  ):
343
- self.s3_client.download_file(Bucket=bucket_name, Key=key, Filename=file_name)
344
- # TODO: if bottom file doesn't exist, don't fail downloader
345
- print("downloaded file")
337
+ try:
338
+ self.s3_client.download_file(
339
+ Bucket=bucket_name, Key=key, Filename=file_name
340
+ )
341
+ # TODO: if bottom file doesn't exist, don't fail downloader
342
+ print("downloaded file")
343
+ except Exception as err:
344
+ raise RuntimeError(f"Problem was encountered while downloading_file, {err}")
346
345
 
347
346
  #####################################################################
348
- # TODO: need to test this!!!
347
+ # tested
349
348
  def delete_nodd_objects( # nodd-bucket
350
349
  self,
351
350
  bucket_name,
@@ -358,16 +357,20 @@ class S3Manager:
358
357
  objects_to_delete.append({"Key": obj["Key"]})
359
358
  # Note: request can contain a list of up to 1000 keys
360
359
  for batch in chunked(ll=objects_to_delete, n=1000):
360
+ # An error occurred (SlowDown) when calling the DeleteObjects operation (reached max retries: 4):
361
+ # Please reduce your request rate.
362
+ sleep(0.5)
363
+ #
361
364
  self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
362
365
  Bucket=bucket_name, Delete={"Objects": batch}
363
366
  )
364
367
  print("Deleted files.")
365
368
  except Exception as err:
366
- print(f"Problem was encountered while deleting objects: {err}")
369
+ raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
367
370
 
368
371
  #####################################################################
369
- # TODO: need to test this!!!
370
- def delete_nodd_object(
372
+ # tested
373
+ def delete_nodd_object( # only used to delete geojson it looks like?! Remove.
371
374
  self,
372
375
  bucket_name,
373
376
  key_name,
@@ -379,22 +382,27 @@ class S3Manager:
379
382
  )
380
383
  print("Deleted file.")
381
384
  except Exception as err:
382
- print(f"Problem was encountered while deleting objects: {err}")
385
+ raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
383
386
 
384
387
  #####################################################################
388
+ # tested
385
389
  def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
386
- self.s3_client.put_object(
387
- Bucket=bucket_name, Key=key, Body=body
388
- ) # "Body" can be a file
390
+ try:
391
+ self.s3_client.put_object(
392
+ Bucket=bucket_name, Key=key, Body=body
393
+ ) # "Body" can be a file
394
+ except Exception as err:
395
+ raise RuntimeError(f"Problem was encountered putting object, {err}")
389
396
 
390
397
  #####################################################################
398
+ # tested
391
399
  def read_s3_json(
392
400
  self,
393
401
  ship_name,
394
402
  cruise_name,
395
403
  sensor_name,
396
404
  file_name_stem,
397
- output_bucket_name,
405
+ output_bucket_name, # TODO: change to just bucket_name
398
406
  ) -> str:
399
407
  try:
400
408
  resource = self.s3_resource_noaa_wcsd_zarr_pds
@@ -405,11 +413,8 @@ class S3Manager:
405
413
  file_content = content_object["Body"].read().decode("utf-8")
406
414
  json_content = json.loads(file_content)
407
415
  return json_content
408
- except Exception as err: # Failure
409
- print(f"Exception encountered reading s3 GeoJSON: {err}")
410
- raise
411
-
412
- #####################################################################
416
+ except Exception as err:
417
+ raise RuntimeError(f"Exception encountered reading s3 GeoJSON, {err}")
413
418
 
414
419
 
415
420
  #########################################################################
@@ -4,6 +4,7 @@ from typing import Optional
4
4
  import s3fs
5
5
 
6
6
  # TODO: S3FS_LOGGING_LEVEL=DEBUG
7
+ # S3FS_LOGGING_LEVEL=DEBUG
7
8
 
8
9
 
9
10
  class S3FSManager:
@@ -13,15 +14,16 @@ class S3FSManager:
13
14
  endpoint_url: Optional[str] = None,
14
15
  ):
15
16
  self.endpoint_url = endpoint_url
16
- # self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
17
- # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
17
+ self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
18
+ self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
18
19
  self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
19
20
  self.s3fs = s3fs.S3FileSystem(
20
- # asynchronous=False,
21
21
  endpoint_url=endpoint_url,
22
22
  key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
23
23
  secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
24
+ # asynchronous=True,
24
25
  )
26
+ # self.s3fs.ls("")
25
27
 
26
28
  # s3_fs = s3fs.S3FileSystem( # TODO: use s3fs_manager?
27
29
  # anon=True,
@@ -35,7 +35,7 @@ class SQSManager:
35
35
  #######################################################
36
36
  def list_queues(self, queue_name_prefix):
37
37
  # Note: SQS control plane is eventually consistent, meaning that it
38
- # takes a while to propagate the data accross the systems.
38
+ # takes a while to propagate the dataset accross the systems.
39
39
  response = self.__sqs_client.list_queues(QueueNamePrefix=queue_name_prefix)
40
40
  print(response)
41
41
 
@@ -1,4 +1,5 @@
1
1
  from .create_empty_zarr_store import CreateEmptyZarrStore
2
+ from .create_empty_zarr_store_level_3 import CreateEmptyZarrStoreLevel3
2
3
  from .resample_regrid import ResampleRegrid
3
4
 
4
- __all__ = ["CreateEmptyZarrStore", "ResampleRegrid"]
5
+ __all__ = ["CreateEmptyZarrStore", "CreateEmptyZarrStoreLevel3", "ResampleRegrid"]
@@ -13,7 +13,7 @@ numcodecs.blosc.set_nthreads(1)
13
13
 
14
14
  # TODO: when ready switch to version 3 of model spec
15
15
  # ZARR_V3_EXPERIMENTAL_API = 1
16
- # creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
16
+ # creates the latlon dataset: foo = ep.consolidate.add_location(ds_Sv, echodata)
17
17
 
18
18
 
19
19
  # TODO: change name to "CreateLocalEmptyZarrStore"
@@ -27,35 +27,35 @@ class CreateEmptyZarrStore:
27
27
  # self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
28
28
 
29
29
  #######################################################
30
- # TODO: move this to the s3_manager
31
- def upload_zarr_store_to_s3(
32
- self,
33
- output_bucket_name: str,
34
- local_directory: str,
35
- object_prefix: str,
36
- cruise_name: str,
37
- ) -> None:
38
- print("uploading model store to s3")
39
- s3_manager = S3Manager()
40
- #
41
- print("Starting upload with thread pool executor.")
42
- # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
43
- all_files = []
44
- for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
45
- for file in files:
46
- local_path = os.path.join(subdir, file)
47
- # TODO: find a better method for splitting strings here:
48
- # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
49
- s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
50
- all_files.append([local_path, s3_key])
51
- #
52
- # print(all_files)
53
- s3_manager.upload_files_with_thread_pool_executor(
54
- output_bucket_name=output_bucket_name,
55
- all_files=all_files,
56
- )
57
- print("Done uploading with thread pool executor.")
58
- # TODO: move to common place
30
+ # TODO: moved this to the s3_manager
31
+ # def upload_zarr_store_to_s3(
32
+ # self,
33
+ # output_bucket_name: str,
34
+ # local_directory: str,
35
+ # object_prefix: str,
36
+ # cruise_name: str,
37
+ # ) -> None:
38
+ # print("uploading model store to s3")
39
+ # s3_manager = S3Manager()
40
+ # #
41
+ # print("Starting upload with thread pool executor.")
42
+ # # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
43
+ # all_files = []
44
+ # for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
45
+ # for file in files:
46
+ # local_path = os.path.join(subdir, file)
47
+ # # TODO: find a better method for splitting strings here:
48
+ # # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
49
+ # s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
50
+ # all_files.append([local_path, s3_key])
51
+ # #
52
+ # # print(all_files)
53
+ # s3_manager.upload_files_with_thread_pool_executor(
54
+ # output_bucket_name=output_bucket_name,
55
+ # all_files=all_files,
56
+ # )
57
+ # print("Done uploading with thread pool executor.")
58
+ # # TODO: move to common place
59
59
 
60
60
  #######################################################
61
61
  def create_cruise_level_zarr_store(
@@ -65,7 +65,11 @@ class CreateEmptyZarrStore:
65
65
  cruise_name: str,
66
66
  sensor_name: str,
67
67
  table_name: str,
68
+ # override_cruise_min_epsilon=None,
68
69
  ) -> None:
70
+ """
71
+ Initialize zarr store. The water_level needs to be integrated.
72
+ """
69
73
  tempdir = tempfile.TemporaryDirectory()
70
74
  try:
71
75
  # HB0806 - 123, HB0903 - 220
@@ -93,20 +97,19 @@ class CreateEmptyZarrStore:
93
97
  )
94
98
 
95
99
  # [3] calculate the max/min measurement resolutions for the whole cruise
96
- cruise_min_echo_range = np.min(
97
- (df["MIN_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
98
- )
100
+ # cruise_min_echo_range = np.min(
101
+ # (df["MIN_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
102
+ # )
99
103
 
100
- # [4] calculate the maximum of the max depth values
104
+ # [4] calculate the np.max(max_echo_range + water_level)
101
105
  cruise_max_echo_range = np.max(
102
106
  (df["MAX_ECHO_RANGE"] + df["WATER_LEVEL"]).dropna().astype(float)
103
107
  )
104
108
 
109
+ # TODO: set this to either 1 or 0.5 meters
105
110
  cruise_min_epsilon = np.min(df["MIN_ECHO_RANGE"].dropna().astype(float))
106
111
 
107
- print(
108
- f"cruise_min_echo_range: {cruise_min_echo_range}, cruise_max_echo_range: {cruise_max_echo_range}"
109
- )
112
+ print(f"cruise_max_echo_range: {cruise_max_echo_range}")
110
113
 
111
114
  # [5] get number of channels
112
115
  cruise_frequencies = [
@@ -126,6 +129,7 @@ class CreateEmptyZarrStore:
126
129
  bucket_name=output_bucket_name,
127
130
  sub_prefix=zarr_prefix,
128
131
  )
132
+ #
129
133
  if len(child_objects) > 0:
130
134
  s3_manager.delete_nodd_objects(
131
135
  bucket_name=output_bucket_name,
@@ -134,9 +138,9 @@ class CreateEmptyZarrStore:
134
138
  ################################################################
135
139
  # Create new model store
136
140
  zarr_manager = ZarrManager()
137
- new_height = len(
138
- zarr_manager.get_depth_values(
139
- min_echo_range=cruise_min_echo_range,
141
+ new_height = len( # [0.19m down to 1001.744m] = 5272 samples, 10.3 tiles @ 512
142
+ zarr_manager.get_depth_values( # these depths should be from min_epsilon to max_range+water_level
143
+ # min_echo_range=cruise_min_echo_range,
140
144
  max_echo_range=cruise_max_echo_range,
141
145
  cruise_min_epsilon=cruise_min_epsilon,
142
146
  )
@@ -150,13 +154,13 @@ class CreateEmptyZarrStore:
150
154
  sensor_name=sensor_name,
151
155
  frequencies=cruise_frequencies,
152
156
  width=new_width,
153
- min_echo_range=cruise_min_echo_range,
157
+ # min_echo_range=cruise_min_echo_range,
154
158
  max_echo_range=cruise_max_echo_range,
155
159
  cruise_min_epsilon=cruise_min_epsilon,
156
160
  calibration_status=True,
157
161
  )
158
162
  #################################################################
159
- self.upload_zarr_store_to_s3(
163
+ s3_manager.upload_zarr_store_to_s3(
160
164
  output_bucket_name=output_bucket_name,
161
165
  local_directory=tempdir.name, # TODO: need to use .name or problem
162
166
  object_prefix=zarr_prefix,
@@ -182,7 +186,9 @@ class CreateEmptyZarrStore:
182
186
  print("Done creating cruise level zarr store.")
183
187
  #################################################################
184
188
  except Exception as err:
185
- print(f"Problem trying to create new cruise model store: {err}")
189
+ raise RuntimeError(
190
+ f"Problem trying to create new cruise model store, {err}"
191
+ )
186
192
  finally:
187
193
  cleaner = Cleaner()
188
194
  cleaner.delete_local_files()