water-column-sonar-processing 0.0.1__py3-none-any.whl → 25.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (60) hide show
  1. water_column_sonar_processing/__init__.py +13 -0
  2. water_column_sonar_processing/aws/__init__.py +7 -0
  3. water_column_sonar_processing/aws/dynamodb_manager.py +355 -0
  4. water_column_sonar_processing/aws/s3_manager.py +420 -0
  5. water_column_sonar_processing/aws/s3fs_manager.py +72 -0
  6. {model → water_column_sonar_processing}/aws/sns_manager.py +10 -21
  7. {model → water_column_sonar_processing}/aws/sqs_manager.py +11 -19
  8. water_column_sonar_processing/cruise/__init__.py +4 -0
  9. water_column_sonar_processing/cruise/create_empty_zarr_store.py +191 -0
  10. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  11. water_column_sonar_processing/cruise/resample_regrid.py +339 -0
  12. water_column_sonar_processing/geometry/__init__.py +11 -0
  13. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  14. water_column_sonar_processing/geometry/geometry_manager.py +243 -0
  15. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  16. water_column_sonar_processing/geometry/pmtile_generation.py +261 -0
  17. water_column_sonar_processing/index/__init__.py +3 -0
  18. water_column_sonar_processing/index/index_manager.py +384 -0
  19. water_column_sonar_processing/model/__init__.py +3 -0
  20. water_column_sonar_processing/model/zarr_manager.py +722 -0
  21. water_column_sonar_processing/process.py +149 -0
  22. water_column_sonar_processing/processing/__init__.py +4 -0
  23. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  24. water_column_sonar_processing/processing/raw_to_zarr.py +425 -0
  25. water_column_sonar_processing/utility/__init__.py +13 -0
  26. {model → water_column_sonar_processing}/utility/cleaner.py +7 -8
  27. water_column_sonar_processing/utility/constants.py +118 -0
  28. {model → water_column_sonar_processing}/utility/pipeline_status.py +47 -24
  29. water_column_sonar_processing/utility/timestamp.py +12 -0
  30. water_column_sonar_processing-25.11.1.dist-info/METADATA +182 -0
  31. water_column_sonar_processing-25.11.1.dist-info/RECORD +34 -0
  32. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info}/WHEEL +1 -1
  33. {water_column_sonar_processing-0.0.1.dist-info → water_column_sonar_processing-25.11.1.dist-info/licenses}/LICENSE +1 -1
  34. water_column_sonar_processing-25.11.1.dist-info/top_level.txt +1 -0
  35. __init__.py +0 -0
  36. model/__init__.py +0 -0
  37. model/aws/__init__.py +0 -0
  38. model/aws/dynamodb_manager.py +0 -149
  39. model/aws/s3_manager.py +0 -356
  40. model/aws/s3fs_manager.py +0 -74
  41. model/cruise/__init__.py +0 -0
  42. model/cruise/create_empty_zarr_store.py +0 -166
  43. model/cruise/resample_regrid.py +0 -248
  44. model/geospatial/__init__.py +0 -0
  45. model/geospatial/geometry_manager.py +0 -194
  46. model/geospatial/geometry_simplification.py +0 -81
  47. model/geospatial/pmtile_generation.py +0 -74
  48. model/index/__init__.py +0 -0
  49. model/index/index.py +0 -228
  50. model/model.py +0 -138
  51. model/utility/__init__.py +0 -0
  52. model/utility/constants.py +0 -56
  53. model/utility/timestamp.py +0 -12
  54. model/zarr/__init__.py +0 -0
  55. model/zarr/bar.py +0 -28
  56. model/zarr/foo.py +0 -11
  57. model/zarr/zarr_manager.py +0 -298
  58. water_column_sonar_processing-0.0.1.dist-info/METADATA +0 -89
  59. water_column_sonar_processing-0.0.1.dist-info/RECORD +0 -32
  60. water_column_sonar_processing-0.0.1.dist-info/top_level.txt +0 -2
model/aws/s3_manager.py DELETED
@@ -1,356 +0,0 @@
1
- import json
2
- import os
3
- import boto3
4
- import pandas as pd
5
- from collections.abc import Generator
6
-
7
- import geopandas
8
- from botocore.config import Config
9
- from boto3.s3.transfer import TransferConfig
10
- from botocore.exceptions import ClientError
11
- from concurrent.futures import ThreadPoolExecutor
12
- from concurrent.futures import as_completed
13
-
14
- MAX_POOL_CONNECTIONS = 64
15
- MAX_CONCURRENCY = 64
16
- MAX_WORKERS = 64
17
- GB = 1024 ** 3
18
-
19
- #########################################################################
20
- def chunked(ll: list, n: int) -> Generator:
21
- # Yields successively n-sized chunks from ll.
22
- for i in range(0, len(ll), n):
23
- yield ll[i:i + n]
24
-
25
-
26
- class S3Manager:
27
- #####################################################################
28
- def __init__(
29
- self,
30
- # TODO: Need to allow passing in of credentials when writing to protected bucket
31
- ):
32
- self.input_bucket_name = os.environ.get('INPUT_BUCKET_NAME')
33
- self.output_bucket_name = os.environ.get('OUTPUT_BUCKET_NAME')
34
- self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
35
- self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
36
- self.s3_transfer_config = TransferConfig(
37
- max_concurrency=MAX_CONCURRENCY,
38
- use_threads=True,
39
- max_bandwidth=None,
40
- multipart_threshold=10 * GB
41
- )
42
- self.s3_session = boto3.Session(
43
- aws_access_key_id=os.environ.get('ACCESS_KEY_ID'),
44
- aws_secret_access_key=os.environ.get('SECRET_ACCESS_KEY'),
45
- region_name=self.s3_region,
46
- )
47
- self.s3_client = self.s3_session.client(
48
- service_name="s3",
49
- config=self.s3_client_config,
50
- region_name=self.s3_region,
51
- )
52
- self.s3_resource = boto3.resource(
53
- service_name="s3",
54
- config=self.s3_client_config,
55
- region_name=self.s3_region,
56
- )
57
- # self.paginator = self.s3_client.get_paginator(operation_name='list_objects_v2')
58
- # TODO: create both "s3_client_input" and "s3_client_output" ???
59
- self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
60
- aws_access_key_id=os.environ.get('OUTPUT_BUCKET_ACCESS_KEY'),
61
- aws_secret_access_key=os.environ.get('OUTPUT_BUCKET_SECRET_ACCESS_KEY'),
62
- region_name=self.s3_region,
63
- )
64
- self.s3_client_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.client(
65
- service_name="s3",
66
- config=self.s3_client_config,
67
- region_name=self.s3_region,
68
- )
69
- self.s3_resource_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.resource(
70
- service_name="s3",
71
- config=self.s3_client_config,
72
- region_name=self.s3_region,
73
- )
74
-
75
- def get_client(
76
- self
77
- ):
78
- return self.s3_session.client(
79
- service_name="s3",
80
- config=self.__s3_client_config,
81
- region_name=self.s3_region,
82
- )
83
-
84
- #####################################################################
85
- def create_bucket(
86
- self,
87
- bucket_name: str,
88
- ):
89
- self.s3_client.create_bucket(
90
- Bucket=bucket_name,
91
- # Required when region is different then us-east-1
92
- #
93
- # TODO: if region is us-east-1, don't include this line somehow
94
- # CreateBucketConfiguration={'LocationConstraint': self.__s3_region}
95
- )
96
-
97
- #####################################################################
98
- def list_buckets(
99
- self
100
- ):
101
- # client = self.get_client()
102
- client = self.s3_client
103
- return client.list_buckets()
104
-
105
- #####################################################################
106
- def upload_nodd_file(
107
- self,
108
- file_name: str,
109
- key: str,
110
- ):
111
- self.s3_client_noaa_wcsd_zarr_pds.upload_file(
112
- Filename=file_name,
113
- Bucket=self.output_bucket_name,
114
- Key=key,
115
- )
116
- return key
117
-
118
- #####################################################################
119
- def upload_files_with_thread_pool_executor(
120
- self,
121
- all_files: list,
122
- ):
123
- # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
124
- all_uploads = []
125
- try: # TODO: problem with threadpool here, missing child files
126
- with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
127
- futures = [executor.submit(
128
- self.upload_nodd_file,
129
- all_file[0], # file_name
130
- all_file[1] # key
131
- ) for all_file in all_files]
132
- for future in as_completed(futures):
133
- result = future.result()
134
- if result:
135
- all_uploads.extend(result)
136
- except Exception as err:
137
- print(err)
138
- print('Done uploading files using threading pool.')
139
- return all_uploads
140
-
141
- #####################################################################
142
- def upload_zarr_files_to_bucket( # noaa-wcsd-zarr-pds
143
- self,
144
- local_directory,
145
- remote_directory,
146
- ):
147
- # Right now this is just for uploading a zarr store to s3
148
- print('Uploading files to output bucket.')
149
- store_name = os.path.basename(local_directory)
150
- all_files = []
151
- for subdir, dirs, files in os.walk(local_directory):
152
- for file in files:
153
- local_path = os.path.join(subdir, file)
154
- # s3_key = os.path.join(object_prefix, local_path)
155
- s3_key = os.path.join(remote_directory, store_name, subdir.split(store_name)[-1].strip('/'))
156
- all_files.append([local_path, s3_key])
157
-
158
- all_uploads = self.upload_files_with_thread_pool_executor(
159
- all_files=all_files,
160
- )
161
- print('Done uploading files to output bucket.')
162
- return all_uploads
163
-
164
- #####################################################################
165
- # used: raw-to-zarr
166
- def list_objects( # noaa-wcsd-pds and noaa-wcsd-zarr-pds
167
- self,
168
- bucket_name,
169
- prefix
170
- ):
171
- # analog to "find_children_objects"
172
- # Returns a list of key strings for each object in bucket defined by prefix
173
- s3_client = self.s3_client
174
- keys = []
175
- paginator = s3_client.get_paginator('list_objects_v2')
176
- page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
177
- for page in page_iterator:
178
- if 'Contents' in page.keys():
179
- keys.extend([k['Key'] for k in page['Contents']])
180
- return keys
181
-
182
- def list_nodd_objects( # These are used by the geometry_manager for uploading data
183
- self,
184
- prefix,
185
- ):
186
- # Returns a list of key strings for each object in bucket defined by prefix
187
- keys = []
188
- paginator = self.s3_client_noaa_wcsd_zarr_pds.get_paginator('list_objects_v2')
189
- for page in paginator.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
190
- if 'Contents' in page.keys():
191
- keys.extend([k['Key'] for k in page['Contents']])
192
- return keys
193
-
194
- #####################################################################
195
- # TODO: change name to "directory"
196
- def folder_exists_and_not_empty(
197
- self,
198
- bucket_name: str,
199
- path: str
200
- ) -> bool:
201
- if not path.endswith('/'):
202
- path = path + '/'
203
- s3_client = self.s3_client
204
- resp = self.list_objects(bucket_name=bucket_name, prefix=path) # TODO: this is returning root folder and doesn't include children or hidden folders
205
- #resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
206
- return 'Contents' in resp
207
-
208
- #####################################################################
209
- # used
210
- def __paginate_child_objects(
211
- self,
212
- bucket_name: str,
213
- sub_prefix: str = None,
214
- ) -> list:
215
- page_iterator = self.s3_client.get_paginator('list_objects_v2').paginate(Bucket=bucket_name, Prefix=sub_prefix)
216
- objects = []
217
- for page in page_iterator:
218
- if 'Contents' in page.keys():
219
- objects.extend(page['Contents'])
220
- return objects
221
-
222
- def get_child_objects(
223
- self,
224
- bucket_name: str,
225
- sub_prefix: str,
226
- file_suffix: str = None,
227
- ) -> list:
228
- print('Getting child objects')
229
- raw_files = []
230
- try:
231
- children = self.__paginate_child_objects(
232
- bucket_name=bucket_name,
233
- sub_prefix=sub_prefix,
234
- )
235
- if file_suffix is None:
236
- raw_files = children
237
- else:
238
- for child in children:
239
- # Note: Any files with predicate 'NOISE' are to be ignored
240
- # see: "Bell_M._Shimada/SH1507" cruise for more details.
241
- if child['Key'].endswith(file_suffix) and not os.path.basename(child['Key']).startswith(
242
- 'NOISE'
243
- ):
244
- raw_files.append(child['Key'])
245
- return raw_files
246
- except ClientError as err:
247
- print(f"Problem was encountered while getting s3 files: {err}")
248
- raise
249
- print(f"Found {len(raw_files)} files.")
250
- return raw_files
251
-
252
- #####################################################################
253
- def get_object( # TODO: Move this to index.py
254
- # noaa-wcsd-pds or noaa-wcsd-zarr-pds
255
- self,
256
- bucket_name,
257
- key_name,
258
- ):
259
- # Meant for getting singular objects from a bucket, used by indexing lambda
260
- print(f"Getting object {key_name} from {bucket_name}")
261
- try:
262
- response = self.s3_client.get_object(
263
- Bucket=bucket_name,
264
- Key=key_name,
265
- )
266
- # status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")
267
- # if status == 200:
268
- except ClientError as err:
269
- print(f"Problem was encountered while getting s3 file: {err}")
270
- raise
271
- print(f"Done getting object {key_name} from {bucket_name}")
272
- return response
273
-
274
- #####################################################################
275
- # used raw-to-zarr
276
- def download_file( # TODO: change to download_object
277
- # noaa-wcsd-pds or noaa-wcsd-zarr-pds
278
- self,
279
- bucket_name,
280
- key,
281
- file_name,
282
- ):
283
- self.s3_client.download_file(
284
- Bucket=bucket_name,
285
- Key=key,
286
- Filename=file_name
287
- )
288
- print('downloaded file')
289
-
290
- #####################################################################
291
- # not used
292
- # def delete_nodd_object( # noaa-wcsd-zarr-pds
293
- # self,
294
- # bucket_name,
295
- # key
296
- # ): # -> dict:
297
- # #return self.__s3_client.delete_object(Bucket=bucket_name, Key=key)
298
- # self.s3_client.delete_object(Bucket=bucket_name, Key=key)
299
-
300
- #####################################################################
301
- def delete_nodd_objects( # nodd-bucket
302
- self,
303
- objects: list,
304
- ):
305
- try:
306
- print(f"Deleting {len(objects)} objects in {self.output_bucket_name} in batches.")
307
- objects_to_delete = []
308
- for obj in objects:
309
- objects_to_delete.append({'Key': obj['Key']})
310
- # Note: request can contain a list of up to 1000 keys
311
- for batch in chunked(ll=objects_to_delete, n=1000):
312
- self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
313
- Bucket=self.output_bucket_name,
314
- Delete={'Objects': batch}
315
- )
316
- print(f"Deleted files.")
317
- except Exception as err:
318
- print(f"Problem was encountered while deleting objects: {err}")
319
-
320
- #####################################################################
321
- # not used TODO: remove
322
- def put( # noaa-wcsd-zarr-pds
323
- self,
324
- bucket_name,
325
- key,
326
- body
327
- ):
328
- self.s3_client.put_object(
329
- Bucket=bucket_name,
330
- Key=key,
331
- Body=body
332
- )
333
-
334
- #####################################################################
335
- def read_s3_json(
336
- self,
337
- ship_name,
338
- cruise_name,
339
- sensor_name,
340
- file_name_stem,
341
- ) -> str:
342
- try:
343
- content_object = self.s3_resource_noaa_wcsd_zarr_pds.Object(
344
- bucket_name=self.output_bucket_name,
345
- key=f'spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.json'
346
- ).get()
347
- file_content = content_object['Body'].read().decode('utf-8')
348
- json_content = json.loads(file_content)
349
- return json_content
350
- except Exception as err: # Failure
351
- print(f'Exception encountered reading s3 GeoJSON: {err}')
352
- raise
353
-
354
- #####################################################################
355
-
356
- #########################################################################
model/aws/s3fs_manager.py DELETED
@@ -1,74 +0,0 @@
1
- import os
2
- import s3fs
3
-
4
-
5
- # TODO: S3FS_LOGGING_LEVEL=DEBUG
6
-
7
-
8
- class S3FSManager:
9
- #####################################################################
10
- def __init__(
11
- self,
12
- ):
13
- self.__s3_region = os.environ.get("AWS_REGION", default="us-east-1")
14
- self.s3fs = s3fs.S3FileSystem(
15
- key=os.environ.get('OUTPUT_BUCKET_ACCESS_KEY'),
16
- secret=os.environ.get('OUTPUT_BUCKET_SECRET_ACCESS_KEY'),
17
- # asynchronous=True
18
- # use_ssl=False,
19
- # skip_instance_cache=True,
20
- # default_block_size='100MB', # if no specific value is given at all time. The built-in default is 5MB
21
- # client_kwargs={
22
- # "region_name": self.__s3_region
23
- # }
24
- )
25
-
26
- #####################################################################
27
- def add_file(
28
- self,
29
- filename
30
- ):
31
- full_path = f"{os.getenv('OUTPUT_BUCKET_NAME')}/testing/{filename}"
32
- print(full_path)
33
-
34
- self.s3fs.touch(full_path)
35
- ff = self.s3fs.ls(f"{os.getenv('OUTPUT_BUCKET_NAME')}/")
36
-
37
- print(ff)
38
-
39
- #####################################################################
40
- def upload_data(
41
- self,
42
- bucket_name,
43
- file_path,
44
- prefix
45
- ):
46
- # TODO: this works in theory but use boto3 to upload files
47
- s3_path = f"s3://{bucket_name}/{prefix}/"
48
- s3_file_system = self.s3fs
49
- s3_file_system.put(file_path, s3_path, recursive=True)
50
-
51
- #####################################################################
52
- def s3_map(
53
- self,
54
- s3_zarr_store_path, # f's3://{bucket}/{input_zarr_path}'
55
- ):
56
- # The "s3_zarr_store_path" is defined as f's3://{bucket}/{input_zarr_path}'
57
- # create=False, not false because will be writing
58
- # return s3fs.S3Map(root=s3_zarr_store_path, s3=self.s3fs, check=True)
59
- return s3fs.S3Map(root=s3_zarr_store_path, s3=self.s3fs) # create=False, not false because will be writing
60
-
61
- #####################################################################
62
- def exists(
63
- self,
64
- geo_json_s3_path,
65
- ):
66
- s3_file_system = self.s3fs
67
- return s3_file_system.exists(path=geo_json_s3_path)
68
-
69
- #####################################################################
70
- # def put(
71
- # self
72
- # ):
73
- # s3_file_system = self.s3fs
74
- # return
model/cruise/__init__.py DELETED
File without changes
@@ -1,166 +0,0 @@
1
- import os
2
- import numcodecs
3
- import numpy as np
4
- from ..utility.cleaner import Cleaner
5
- from ..aws.dynamodb_manager import DynamoDBManager
6
- from ..aws.s3_manager import S3Manager
7
- from ..zarr.zarr_manager import ZarrManager
8
-
9
- numcodecs.blosc.use_threads = False
10
- numcodecs.blosc.set_nthreads(1)
11
-
12
- TEMPDIR = "/tmp"
13
-
14
- # TODO: when ready switch to version 3 of zarr spec
15
- # ZARR_V3_EXPERIMENTAL_API = 1
16
- # creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
17
-
18
- class CreateEmptyZarrStore:
19
- #######################################################
20
- def __init__(
21
- self,
22
- ):
23
- self.__overwrite = True
24
- # TODO: create output_bucket and input_bucket variables here?
25
- self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
26
- self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
27
-
28
- #######################################################
29
-
30
- def upload_zarr_store_to_s3(
31
- self,
32
- local_directory: str,
33
- object_prefix: str,
34
- cruise_name: str,
35
- ) -> None:
36
- print('uploading zarr store to s3')
37
- s3_manager = S3Manager()
38
- #
39
- print('Starting upload with thread pool executor.')
40
- # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
41
- all_files = []
42
- for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
43
- for file in files:
44
- local_path = os.path.join(subdir, file)
45
- # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
46
- s3_key = f'{object_prefix}/{cruise_name}.zarr{local_path.split(f"{cruise_name}.zarr")[-1]}'
47
- all_files.append([local_path, s3_key])
48
- #
49
- # print(all_files)
50
- s3_manager.upload_files_with_thread_pool_executor(
51
- all_files=all_files,
52
- )
53
- print('Done uploading with thread pool executor.')
54
- # TODO: move to common place
55
-
56
- #######################################################
57
- def create_cruise_level_zarr_store(
58
- self,
59
- ship_name: str,
60
- cruise_name: str,
61
- sensor_name: str,
62
- table_name: str
63
- ) -> None:
64
- try:
65
- # HB0806 - 123, HB0903 - 220
66
- dynamo_db_manager = DynamoDBManager()
67
-
68
- df = dynamo_db_manager.get_table_as_df(
69
- table_name=table_name,
70
- ship_name=ship_name,
71
- cruise_name=cruise_name,
72
- sensor_name=sensor_name
73
- )
74
-
75
- # filter the dataframe just for enums >= LEVEL_1_PROCESSING
76
- # df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
77
-
78
- # TODO: VERIFY GEOJSON EXISTS as prerequisite!!!
79
-
80
- print(f"DataFrame shape: {df.shape}")
81
- cruise_channels = list(set([i for sublist in df['CHANNELS'].dropna() for i in sublist]))
82
- cruise_channels.sort()
83
-
84
- consolidated_zarr_width = np.sum(df['NUM_PING_TIME_DROPNA'].dropna().astype(int))
85
-
86
- # [3] calculate the max/min measurement resolutions for the whole cruise
87
- cruise_min_echo_range = float(np.min(df['MIN_ECHO_RANGE'].dropna().astype(float)))
88
-
89
- # [4] calculate the maximum of the max depth values
90
- cruise_max_echo_range = float(np.max(df['MAX_ECHO_RANGE'].dropna().astype(float)))
91
- print(f"cruise_min_echo_range: {cruise_min_echo_range}, cruise_max_echo_range: {cruise_max_echo_range}")
92
-
93
- # [5] get number of channels
94
- cruise_frequencies = [float(i) for i in df['FREQUENCIES'].dropna().values.flatten()[0]]
95
- print(cruise_frequencies)
96
-
97
- new_width = int(consolidated_zarr_width)
98
- print(f"new_width: {new_width}")
99
- #################################################################
100
- store_name = f"{cruise_name}.zarr"
101
- print(store_name)
102
- ################################################################
103
- # Delete existing zarr store if it exists
104
- s3_manager = S3Manager()
105
- zarr_prefix = os.path.join("level_2", ship_name, cruise_name, sensor_name)
106
- child_objects = s3_manager.get_child_objects(
107
- bucket_name=self.output_bucket_name,
108
- sub_prefix=zarr_prefix,
109
- )
110
- if len(child_objects) > 0:
111
- s3_manager.delete_nodd_objects(
112
- objects=child_objects,
113
- )
114
- ################################################################
115
- # Create new zarr store
116
- zarr_manager = ZarrManager()
117
- new_height = len(zarr_manager.get_depth_values(
118
- min_echo_range=cruise_min_echo_range,
119
- max_echo_range=cruise_max_echo_range
120
- ))
121
- print(f"new_height: {new_height}")
122
-
123
- zarr_manager.create_zarr_store(
124
- path=TEMPDIR,
125
- ship_name=ship_name,
126
- cruise_name=cruise_name,
127
- sensor_name=sensor_name,
128
- frequencies=cruise_frequencies,
129
- width=new_width,
130
- min_echo_range=cruise_min_echo_range,
131
- max_echo_range=cruise_max_echo_range,
132
- calibration_status=True,
133
- )
134
- #################################################################
135
- self.upload_zarr_store_to_s3(
136
- local_directory=TEMPDIR,
137
- object_prefix=zarr_prefix,
138
- cruise_name=cruise_name,
139
- )
140
- # https://noaa-wcsd-zarr-pds.s3.amazonaws.com/index.html
141
- #################################################################
142
- # Verify count of the files uploaded
143
- # count = self.__get_file_count(store_name=store_name)
144
- # #
145
- # raw_zarr_files = self.__get_s3_files( # TODO: just need count
146
- # bucket_name=self.__output_bucket,
147
- # sub_prefix=os.path.join(zarr_prefix, store_name),
148
- # )
149
- # if len(raw_zarr_files) != count:
150
- # print(f'Problem writing {store_name} with proper count {count}.')
151
- # raise Exception("File count doesnt equal number of s3 Zarr store files.")
152
- # else:
153
- # print("File counts match.")
154
- #################################################################
155
- # Success
156
- # TODO: update enum in dynamodb
157
- #################################################################
158
- except Exception as err:
159
- print(f"Problem trying to create new cruise zarr store: {err}")
160
- finally:
161
- cleaner = Cleaner()
162
- cleaner.delete_local_files()
163
- print("Done creating cruise level zarr store")
164
-
165
-
166
- ###########################################################