water-column-sonar-processing 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (30) hide show
  1. water_column_sonar_processing/__init__.py +16 -0
  2. water_column_sonar_processing/aws/__init__.py +7 -4
  3. water_column_sonar_processing/aws/dynamodb_manager.py +70 -49
  4. water_column_sonar_processing/aws/s3_manager.py +112 -122
  5. water_column_sonar_processing/aws/s3fs_manager.py +13 -19
  6. water_column_sonar_processing/aws/sns_manager.py +10 -21
  7. water_column_sonar_processing/aws/sqs_manager.py +10 -18
  8. water_column_sonar_processing/cruise/__init__.py +4 -0
  9. water_column_sonar_processing/cruise/create_empty_zarr_store.py +51 -33
  10. water_column_sonar_processing/cruise/resample_regrid.py +109 -58
  11. water_column_sonar_processing/geometry/__init__.py +5 -0
  12. water_column_sonar_processing/geometry/geometry_manager.py +79 -48
  13. water_column_sonar_processing/geometry/geometry_simplification.py +13 -12
  14. water_column_sonar_processing/geometry/pmtile_generation.py +24 -23
  15. water_column_sonar_processing/index/__init__.py +3 -0
  16. water_column_sonar_processing/index/index_manager.py +104 -80
  17. water_column_sonar_processing/model/__init__.py +3 -0
  18. water_column_sonar_processing/model/zarr_manager.py +113 -75
  19. water_column_sonar_processing/process.py +76 -69
  20. water_column_sonar_processing/utility/__init__.py +6 -0
  21. water_column_sonar_processing/utility/cleaner.py +6 -7
  22. water_column_sonar_processing/utility/constants.py +42 -35
  23. water_column_sonar_processing/utility/pipeline_status.py +37 -10
  24. water_column_sonar_processing/utility/timestamp.py +3 -2
  25. {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.7.dist-info}/METADATA +32 -1
  26. water_column_sonar_processing-0.0.7.dist-info/RECORD +29 -0
  27. water_column_sonar_processing-0.0.5.dist-info/RECORD +0 -29
  28. {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.7.dist-info}/LICENSE +0 -0
  29. {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.7.dist-info}/WHEEL +0 -0
  30. {water_column_sonar_processing-0.0.5.dist-info → water_column_sonar_processing-0.0.7.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  import os
2
- import s3fs
3
2
 
3
+ import s3fs
4
4
 
5
5
  # TODO: S3FS_LOGGING_LEVEL=DEBUG
6
6
 
@@ -8,12 +8,12 @@ import s3fs
8
8
  class S3FSManager:
9
9
  #####################################################################
10
10
  def __init__(
11
- self,
11
+ self,
12
12
  ):
13
13
  self.__s3_region = os.environ.get("AWS_REGION", default="us-east-1")
14
14
  self.s3fs = s3fs.S3FileSystem(
15
- key=os.environ.get('OUTPUT_BUCKET_ACCESS_KEY'),
16
- secret=os.environ.get('OUTPUT_BUCKET_SECRET_ACCESS_KEY'),
15
+ key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
16
+ secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
17
17
  # asynchronous=True
18
18
  # use_ssl=False,
19
19
  # skip_instance_cache=True,
@@ -24,10 +24,7 @@ class S3FSManager:
24
24
  )
25
25
 
26
26
  #####################################################################
27
- def add_file(
28
- self,
29
- filename
30
- ):
27
+ def add_file(self, filename):
31
28
  full_path = f"{os.getenv('OUTPUT_BUCKET_NAME')}/testing/{filename}"
32
29
  print(full_path)
33
30
 
@@ -37,12 +34,7 @@ class S3FSManager:
37
34
  print(ff)
38
35
 
39
36
  #####################################################################
40
- def upload_data(
41
- self,
42
- bucket_name,
43
- file_path,
44
- prefix
45
- ):
37
+ def upload_data(self, bucket_name, file_path, prefix):
46
38
  # TODO: this works in theory but use boto3 to upload files
47
39
  s3_path = f"s3://{bucket_name}/{prefix}/"
48
40
  s3_file_system = self.s3fs
@@ -50,18 +42,20 @@ class S3FSManager:
50
42
 
51
43
  #####################################################################
52
44
  def s3_map(
53
- self,
54
- s3_zarr_store_path, # f's3://{bucket}/{input_zarr_path}'
45
+ self,
46
+ s3_zarr_store_path, # f's3://{bucket}/{input_zarr_path}'
55
47
  ):
56
48
  # The "s3_zarr_store_path" is defined as f's3://{bucket}/{input_zarr_path}'
57
49
  # create=False, not false because will be writing
58
50
  # return s3fs.S3Map(root=s3_zarr_store_path, s3=self.s3fs, check=True)
59
- return s3fs.S3Map(root=s3_zarr_store_path, s3=self.s3fs) # create=False, not false because will be writing
51
+ return s3fs.S3Map(
52
+ root=s3_zarr_store_path, s3=self.s3fs
53
+ ) # create=False, not false because will be writing
60
54
 
61
55
  #####################################################################
62
56
  def exists(
63
- self,
64
- geo_json_s3_path,
57
+ self,
58
+ geo_json_s3_path,
65
59
  ):
66
60
  s3_file_system = self.s3fs
67
61
  return s3_file_system.exists(path=geo_json_s3_path)
@@ -1,5 +1,5 @@
1
1
  import os
2
- # import json
2
+
3
3
  import boto3
4
4
 
5
5
 
@@ -7,32 +7,22 @@ import boto3
7
7
  class SNSManager:
8
8
  #######################################################
9
9
  def __init__(
10
- self,
10
+ self,
11
11
  ):
12
12
  self.__sns_region = os.environ.get("AWS_REGION", default="us-east-1")
13
13
  self.__sns_session = boto3.Session(
14
- aws_access_key_id=os.environ.get('ACCESS_KEY_ID'),
15
- aws_secret_access_key=os.environ.get('SECRET_ACCESS_KEY'),
16
- region_name=self.__sns_region
14
+ aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
15
+ aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
16
+ region_name=self.__sns_region,
17
17
  )
18
18
  self.__sns_resource = self.__sns_session.resource(
19
- service_name="sns",
20
- region_name=self.__sns_region
19
+ service_name="sns", region_name=self.__sns_region
21
20
  )
22
21
  self.__sns_client = self.__sns_session.client(
23
- service_name="sns",
24
- region_name=self.__sns_region
22
+ service_name="sns", region_name=self.__sns_region
25
23
  )
26
24
 
27
25
  #######################################################
28
- # TODO: pick one
29
- # def publish_message(self, topic_arn, message):
30
- # response = self.__sns_client.publish(
31
- # TopicArn=topic_arn,
32
- # Message=message
33
- # )
34
- # print(f"Topic Response: {topic_arn} : '{message}' => {response}")
35
-
36
26
  # TODO: pick one
37
27
  def publish(self, topic_arn, message):
38
28
  response = self.__sns_client.publish(
@@ -55,13 +45,12 @@ class SNSManager:
55
45
  #######################################################
56
46
  def subscribe(self, topic_arn, endpoint):
57
47
  self.__sns_client.subscribe(
58
- TopicArn=topic_arn,
59
- Protocol='sqs',
60
- Endpoint=endpoint
48
+ TopicArn=topic_arn, Protocol="sqs", Endpoint=endpoint
61
49
  )
62
50
 
63
51
  #######################################################
64
52
  def list_topics(self):
65
53
  print(self.__sns_client.list_topics())
66
54
 
67
- ###########################################################
55
+
56
+ ###########################################################
@@ -1,34 +1,29 @@
1
1
  import os
2
+
2
3
  import boto3
3
- # import time
4
4
 
5
5
 
6
6
  ###########################################################
7
7
  class SQSManager:
8
8
  #######################################################
9
9
  def __init__(
10
- self,
10
+ self,
11
11
  ):
12
12
  self.__sqs_region = os.environ.get("AWS_REGION", default="us-east-1")
13
13
  self.__sqs_session = boto3.Session(
14
- aws_access_key_id=os.environ.get('ACCESS_KEY_ID'),
15
- aws_secret_access_key=os.environ.get('SECRET_ACCESS_KEY'),
16
- region_name=self.__sqs_region
14
+ aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
15
+ aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
16
+ region_name=self.__sqs_region,
17
17
  )
18
18
  self.__sqs_resource = self.__sqs_session.resource(
19
- service_name="sqs",
20
- region_name=self.__sqs_region
19
+ service_name="sqs", region_name=self.__sqs_region
21
20
  )
22
21
  self.__sqs_client = self.__sqs_session.client(
23
- service_name="sqs",
24
- region_name=self.__sqs_region
22
+ service_name="sqs", region_name=self.__sqs_region
25
23
  )
26
24
 
27
25
  #######################################################
28
- def create_queue(
29
- self,
30
- queue_name
31
- ):
26
+ def create_queue(self, queue_name):
32
27
  response = self.__sqs_client.create_queue(QueueName=queue_name)
33
28
  return response
34
29
 
@@ -38,13 +33,10 @@ class SQSManager:
38
33
  return sqs_queue
39
34
 
40
35
  #######################################################
41
- def list_queues(
42
- self,
43
- queue_name_prefix
44
- ):
36
+ def list_queues(self, queue_name_prefix):
45
37
  # Note: SQS control plane is eventually consistent, meaning that it
46
38
  # takes a while to propagate the data accross the systems.
47
39
  response = self.__sqs_client.list_queues(QueueNamePrefix=queue_name_prefix)
48
40
  print(response)
49
41
 
50
- #######################################################
42
+ #######################################################
@@ -0,0 +1,4 @@
1
+ from .create_empty_zarr_store import CreateEmptyZarrStore
2
+ from .resample_regrid import ResampleRegrid
3
+
4
+ __all__ = ["CreateEmptyZarrStore", "ResampleRegrid"]
@@ -1,46 +1,48 @@
1
1
  import os
2
+
2
3
  import numcodecs
3
4
  import numpy as np
4
5
 
5
- from water_column_sonar_processing.utility.cleaner import Cleaner
6
6
  from water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
7
7
  from water_column_sonar_processing.aws.s3_manager import S3Manager
8
8
  from water_column_sonar_processing.model.zarr_manager import ZarrManager
9
+ from water_column_sonar_processing.utility.cleaner import Cleaner
9
10
 
10
11
  numcodecs.blosc.use_threads = False
11
12
  numcodecs.blosc.set_nthreads(1)
12
13
 
13
- TEMPDIR = "/tmp"
14
-
14
+ # TEMPDIR = "/tmp"
15
15
  # TODO: when ready switch to version 3 of model spec
16
16
  # ZARR_V3_EXPERIMENTAL_API = 1
17
17
  # creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
18
18
 
19
+
19
20
  class CreateEmptyZarrStore:
20
21
  #######################################################
21
22
  def __init__(
22
- self,
23
+ self,
23
24
  ):
24
25
  self.__overwrite = True
25
- # TODO: create output_bucket and input_bucket variables here?
26
26
  self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
27
27
  self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
28
28
 
29
29
  #######################################################
30
30
 
31
31
  def upload_zarr_store_to_s3(
32
- self,
33
- local_directory: str,
34
- object_prefix: str,
35
- cruise_name: str,
32
+ self,
33
+ local_directory: str,
34
+ object_prefix: str,
35
+ cruise_name: str,
36
36
  ) -> None:
37
- print('uploading model store to s3')
37
+ print("uploading model store to s3")
38
38
  s3_manager = S3Manager()
39
39
  #
40
- print('Starting upload with thread pool executor.')
40
+ print("Starting upload with thread pool executor.")
41
41
  # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
42
42
  all_files = []
43
- for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr_manager"):
43
+ for subdir, dirs, files in os.walk(
44
+ f"{local_directory}/{cruise_name}.zarr_manager"
45
+ ):
44
46
  for file in files:
45
47
  local_path = os.path.join(subdir, file)
46
48
  # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.model/.zattrs'
@@ -51,48 +53,62 @@ class CreateEmptyZarrStore:
51
53
  s3_manager.upload_files_with_thread_pool_executor(
52
54
  all_files=all_files,
53
55
  )
54
- print('Done uploading with thread pool executor.')
56
+ print("Done uploading with thread pool executor.")
55
57
  # TODO: move to common place
56
58
 
57
59
  #######################################################
58
60
  def create_cruise_level_zarr_store(
59
- self,
60
- ship_name: str,
61
- cruise_name: str,
62
- sensor_name: str,
63
- table_name: str
61
+ self,
62
+ ship_name: str,
63
+ cruise_name: str,
64
+ sensor_name: str,
65
+ table_name: str,
66
+ tempdir: str,
64
67
  ) -> None:
65
68
  try:
66
69
  # HB0806 - 123, HB0903 - 220
67
70
  dynamo_db_manager = DynamoDBManager()
71
+ s3_manager = S3Manager()
68
72
 
69
73
  df = dynamo_db_manager.get_table_as_df(
70
74
  table_name=table_name,
71
75
  ship_name=ship_name,
72
76
  cruise_name=cruise_name,
73
- sensor_name=sensor_name
77
+ sensor_name=sensor_name,
74
78
  )
75
79
 
76
- # filter the dataframe just for enums >= LEVEL_1_PROCESSING
80
+ # TODO: filter the dataframe just for enums >= LEVEL_1_PROCESSING
77
81
  # df[df['PIPELINE_STATUS'] < PipelineStatus.LEVEL_1_PROCESSING] = np.nan
78
82
 
79
83
  # TODO: VERIFY GEOJSON EXISTS as prerequisite!!!
80
84
 
81
85
  print(f"DataFrame shape: {df.shape}")
82
- cruise_channels = list(set([i for sublist in df['CHANNELS'].dropna() for i in sublist]))
86
+ cruise_channels = list(
87
+ set([i for sublist in df["CHANNELS"].dropna() for i in sublist])
88
+ )
83
89
  cruise_channels.sort()
84
90
 
85
- consolidated_zarr_width = np.sum(df['NUM_PING_TIME_DROPNA'].dropna().astype(int))
91
+ consolidated_zarr_width = np.sum(
92
+ df["NUM_PING_TIME_DROPNA"].dropna().astype(int)
93
+ )
86
94
 
87
95
  # [3] calculate the max/min measurement resolutions for the whole cruise
88
- cruise_min_echo_range = float(np.min(df['MIN_ECHO_RANGE'].dropna().astype(float)))
96
+ cruise_min_echo_range = float(
97
+ np.min(df["MIN_ECHO_RANGE"].dropna().astype(float))
98
+ )
89
99
 
90
100
  # [4] calculate the maximum of the max depth values
91
- cruise_max_echo_range = float(np.max(df['MAX_ECHO_RANGE'].dropna().astype(float)))
92
- print(f"cruise_min_echo_range: {cruise_min_echo_range}, cruise_max_echo_range: {cruise_max_echo_range}")
101
+ cruise_max_echo_range = float(
102
+ np.max(df["MAX_ECHO_RANGE"].dropna().astype(float))
103
+ )
104
+ print(
105
+ f"cruise_min_echo_range: {cruise_min_echo_range}, cruise_max_echo_range: {cruise_max_echo_range}"
106
+ )
93
107
 
94
108
  # [5] get number of channels
95
- cruise_frequencies = [float(i) for i in df['FREQUENCIES'].dropna().values.flatten()[0]]
109
+ cruise_frequencies = [
110
+ float(i) for i in df["FREQUENCIES"].dropna().values.flatten()[0]
111
+ ]
96
112
  print(cruise_frequencies)
97
113
 
98
114
  new_width = int(consolidated_zarr_width)
@@ -102,7 +118,6 @@ class CreateEmptyZarrStore:
102
118
  print(store_name)
103
119
  ################################################################
104
120
  # Delete existing model store if it exists
105
- s3_manager = S3Manager()
106
121
  zarr_prefix = os.path.join("level_2", ship_name, cruise_name, sensor_name)
107
122
  child_objects = s3_manager.get_child_objects(
108
123
  bucket_name=self.output_bucket_name,
@@ -115,14 +130,16 @@ class CreateEmptyZarrStore:
115
130
  ################################################################
116
131
  # Create new model store
117
132
  zarr_manager = ZarrManager()
118
- new_height = len(zarr_manager.get_depth_values(
119
- min_echo_range=cruise_min_echo_range,
120
- max_echo_range=cruise_max_echo_range
121
- ))
133
+ new_height = len(
134
+ zarr_manager.get_depth_values(
135
+ min_echo_range=cruise_min_echo_range,
136
+ max_echo_range=cruise_max_echo_range,
137
+ )
138
+ )
122
139
  print(f"new_height: {new_height}")
123
140
 
124
141
  zarr_manager.create_zarr_store(
125
- path=TEMPDIR,
142
+ path=tempdir,
126
143
  ship_name=ship_name,
127
144
  cruise_name=cruise_name,
128
145
  sensor_name=sensor_name,
@@ -134,7 +151,7 @@ class CreateEmptyZarrStore:
134
151
  )
135
152
  #################################################################
136
153
  self.upload_zarr_store_to_s3(
137
- local_directory=TEMPDIR,
154
+ local_directory=tempdir,
138
155
  object_prefix=zarr_prefix,
139
156
  cruise_name=cruise_name,
140
157
  )
@@ -161,6 +178,7 @@ class CreateEmptyZarrStore:
161
178
  finally:
162
179
  cleaner = Cleaner()
163
180
  cleaner.delete_local_files()
181
+ # TODO: should delete zarr store in temp directory too?
164
182
  print("Done creating cruise level model store")
165
183
 
166
184