water-column-sonar-processing 0.0.7__py3-none-any.whl → 0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of water-column-sonar-processing might be problematic. Click here for more details.

Files changed (21) hide show
  1. water_column_sonar_processing/__init__.py +4 -5
  2. water_column_sonar_processing/aws/dynamodb_manager.py +149 -43
  3. water_column_sonar_processing/aws/s3_manager.py +71 -37
  4. water_column_sonar_processing/cruise/create_empty_zarr_store.py +6 -4
  5. water_column_sonar_processing/cruise/resample_regrid.py +3 -3
  6. water_column_sonar_processing/geometry/geometry_manager.py +21 -6
  7. water_column_sonar_processing/geometry/pmtile_generation.py +202 -13
  8. water_column_sonar_processing/index/index_manager.py +25 -13
  9. water_column_sonar_processing/model/zarr_manager.py +26 -25
  10. water_column_sonar_processing/process.py +4 -4
  11. water_column_sonar_processing/processing/__init__.py +4 -0
  12. water_column_sonar_processing/processing/cruise_sampler.py +342 -0
  13. water_column_sonar_processing/processing/raw_to_zarr.py +349 -0
  14. water_column_sonar_processing/utility/cleaner.py +1 -0
  15. water_column_sonar_processing/utility/constants.py +6 -2
  16. {water_column_sonar_processing-0.0.7.dist-info → water_column_sonar_processing-0.0.8.dist-info}/METADATA +20 -10
  17. water_column_sonar_processing-0.0.8.dist-info/RECORD +32 -0
  18. {water_column_sonar_processing-0.0.7.dist-info → water_column_sonar_processing-0.0.8.dist-info}/WHEEL +1 -1
  19. water_column_sonar_processing-0.0.7.dist-info/RECORD +0 -29
  20. {water_column_sonar_processing-0.0.7.dist-info → water_column_sonar_processing-0.0.8.dist-info}/LICENSE +0 -0
  21. {water_column_sonar_processing-0.0.7.dist-info → water_column_sonar_processing-0.0.8.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,8 @@
1
1
  from __future__ import absolute_import
2
2
 
3
- from . import aws, cruise, geometry, index, model, utility, process
4
- from .model import ZarrManager
5
- from .process import Process
3
+ from . import aws, cruise, geometry, index, model, processing, utility
4
+ # from .model import ZarrManager
5
+ # from .process import Process
6
6
 
7
7
  __all__ = [
8
8
  "aws",
@@ -10,7 +10,6 @@ __all__ = [
10
10
  "geometry",
11
11
  "index",
12
12
  "model",
13
+ "processing",
13
14
  "utility",
14
- "process",
15
- "Process",
16
15
  ]
@@ -8,7 +8,11 @@ from boto3.dynamodb.types import TypeDeserializer, TypeSerializer
8
8
  #########################################################################
9
9
  class DynamoDBManager:
10
10
  #####################################################################
11
- def __init__(self):
11
+ def __init__(
12
+ self,
13
+ # endpoint_url
14
+ ):
15
+ # self.endpoint_url = endpoint_url
12
16
  self.__dynamodb_session = boto3.Session(
13
17
  aws_access_key_id=os.environ.get("ACCESS_KEY_ID"),
14
18
  aws_secret_access_key=os.environ.get("SECRET_ACCESS_KEY"),
@@ -16,9 +20,11 @@ class DynamoDBManager:
16
20
  )
17
21
  self.__dynamodb_resource = self.__dynamodb_session.resource(
18
22
  service_name="dynamodb",
23
+ # endpoint_url=self.endpoint_url
19
24
  )
20
25
  self.__dynamodb_client = self.__dynamodb_session.client(
21
26
  service_name="dynamodb",
27
+ # endpoint_url=self.endpoint_url
22
28
  )
23
29
  self.type_serializer = TypeSerializer() # https://stackoverflow.com/a/46738251
24
30
  self.type_deserializer = TypeDeserializer()
@@ -35,31 +41,14 @@ class DynamoDBManager:
35
41
  # assert (status_code == 200), "Problem, unable to update dynamodb table."
36
42
 
37
43
  #####################################################################
38
- def create_table(
39
- self,
40
- table_name,
41
- key_schema,
42
- attribute_definitions,
43
- ):
44
- self.__dynamodb_client.create_table(
45
- AttributeDefinitions=attribute_definitions,
46
- TableName=table_name,
47
- KeySchema=key_schema,
48
- BillingMode="PAY_PER_REQUEST", # "PROVISIONED",
49
- # ProvisionedThroughput={
50
- # 'ReadCapacityUnits': 1_000,
51
- # 'WriteCapacityUnits': 1_000
52
- # }
53
- )
54
-
55
44
  #####################################################################
56
45
  def create_water_column_sonar_table(
57
46
  self,
58
47
  table_name,
59
48
  ):
60
- self.create_table(
61
- table_name=table_name,
62
- key_schema=[
49
+ self.__dynamodb_client.create_table(
50
+ TableName=table_name,
51
+ KeySchema=[
63
52
  {
64
53
  "AttributeName": "FILE_NAME",
65
54
  "KeyType": "HASH",
@@ -69,20 +58,50 @@ class DynamoDBManager:
69
58
  "KeyType": "RANGE",
70
59
  },
71
60
  ],
72
- attribute_definitions=[
61
+ AttributeDefinitions=[
73
62
  {"AttributeName": "FILE_NAME", "AttributeType": "S"},
74
63
  {"AttributeName": "CRUISE_NAME", "AttributeType": "S"},
75
64
  ],
65
+ BillingMode="PAY_PER_REQUEST"
66
+ # ProvisionedThroughput={
67
+ # 'ReadCapacityUnits': 1_000,
68
+ # 'WriteCapacityUnits': 1_000
69
+ # }
76
70
  )
71
+ # TODO: after creating status is 'CREATING', wait until 'ACTIVE'
72
+ response = self.__dynamodb_client.describe_table(TableName=table_name)
73
+ print(response) # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb/client/describe_table.html
74
+ # sleep then response['Table']['TableStatus'] == 'ACTIVE'
75
+
76
+ #####################################################################
77
+ # don't think this is used?
78
+ # def get_item(
79
+ # self,
80
+ # table_name,
81
+ # key
82
+ # ):
83
+ # response = self.__dynamodb_client.get_item(TableName=table_name, Key=key)
84
+ # item = None
85
+ # if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
86
+ # if "Item" in response:
87
+ # item = response["Item"]
88
+ # return item
77
89
 
78
90
  #####################################################################
79
- def get_item(self, table_name, key):
80
- response = self.__dynamodb_client.get_item(TableName=table_name, Key=key)
81
- item = None
82
- if response["ResponseMetadata"]["HTTPStatusCode"] == 200:
83
- if "Item" in response:
84
- item = response["Item"]
85
- return item
91
+ def get_table_item(
92
+ self,
93
+ table_name,
94
+ key,
95
+ ):
96
+ """
97
+ Gets a single row from the db.
98
+ """
99
+ table = self.__dynamodb_resource.Table(table_name)
100
+ response = table.get_item(Key=key)
101
+ # TODO:
102
+ # if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
103
+ # throw error
104
+ return response
86
105
 
87
106
  #####################################################################
88
107
  def update_item(
@@ -101,17 +120,22 @@ class DynamoDBManager:
101
120
  UpdateExpression=update_expression,
102
121
  )
103
122
  status_code = response["ResponseMetadata"]["HTTPStatusCode"]
104
- # TODO: change to exception
123
+ assert response['ConsumedCapacity']['TableName'] == table_name
105
124
  assert status_code == 200, "Problem, unable to update dynamodb table."
106
125
 
107
126
  #####################################################################
127
+ # TODO: change to "get_cruise_as_df"
108
128
  def get_table_as_df(
109
129
  self,
110
130
  ship_name,
111
131
  cruise_name,
112
132
  sensor_name,
113
133
  table_name,
114
- ):
134
+ ) -> pd.DataFrame:
135
+ """
136
+ To be used to initialize a cruise, deletes all entries associated with that cruise
137
+ in the database.
138
+ """
115
139
  expression_attribute_values = {
116
140
  ":cr": {"S": cruise_name},
117
141
  ":se": {"S": sensor_name},
@@ -128,6 +152,9 @@ class DynamoDBManager:
128
152
  FilterExpression=filter_expression,
129
153
  )
130
154
  # Note: table.scan() has 1 MB limit on results so pagination is used
155
+ if len(response["Items"]) == 0:
156
+ return pd.DataFrame() # If no results, return empty dataframe
157
+
131
158
  data = response["Items"]
132
159
 
133
160
  while "LastEvaluatedKey" in response:
@@ -146,25 +173,104 @@ class DynamoDBManager:
146
173
  return df.sort_values(by="START_TIME", ignore_index=True)
147
174
 
148
175
  #####################################################################
149
- # is this used?
150
- def get_table_item(
176
+ # TODO: WIP
177
+ def delete_item(
151
178
  self,
152
179
  table_name,
153
- key,
180
+ cruise_name,
181
+ file_name,
154
182
  ):
155
- # a bit more high level, uses resource to get table item
156
- table = self.__dynamodb_resource.Table(table_name)
157
- response = table.get_item(Key=key)
183
+ """
184
+ Finds all rows associated with a cruise and deletes them.
185
+ """
186
+ response = self.__dynamodb_client.delete_item(
187
+ Key={
188
+ "CRUISE_NAME": {
189
+ "S": cruise_name
190
+ },
191
+ "FILE_NAME": {
192
+ "S": file_name
193
+ }
194
+ },
195
+ TableName=table_name,
196
+ ReturnConsumedCapacity="TOTALS",
197
+ )
198
+ # TODO: there should be attributes included in response but they are missing
199
+ # if response["ResponseMetadata"]["HTTPStatusCode"] != 200:
200
+ # throw error
158
201
  return response
159
202
 
160
203
  #####################################################################
161
- # TODO: add helper method to delete the data
162
- def delete_cruise(
163
- self,
164
- table_name,
165
- cruise_name,
204
+ def describe_table(
205
+ self,
206
+ table_name,
166
207
  ):
167
- pass
208
+ """
209
+ Get a description of the table. Used to verify that records were added/removed.
210
+ """
211
+ response = self.__dynamodb_client.describe_table(TableName=table_name)
212
+ print(response)
213
+ return response
214
+
215
+
168
216
 
217
+ #####################################################################
218
+ # TODO: from test_raw_to_zarr get enum and use here
219
+ # def __update_processing_status(
220
+ # self,
221
+ # file_name: str,
222
+ # cruise_name: str,
223
+ # pipeline_status: str,
224
+ # error_message: str = None,
225
+ # ):
226
+ # print(f"Updating processing status to {pipeline_status}.")
227
+ # if error_message:
228
+ # print(f"Error message: {error_message}")
229
+ # self.__dynamo.update_item(
230
+ # table_name=self.__table_name,
231
+ # key={
232
+ # 'FILE_NAME': {'S': file_name}, # Partition Key
233
+ # 'CRUISE_NAME': {'S': cruise_name}, # Sort Key
234
+ # },
235
+ # attribute_names={
236
+ # '#PT': 'PIPELINE_TIME',
237
+ # '#PS': 'PIPELINE_STATUS',
238
+ # '#EM': 'ERROR_MESSAGE',
239
+ # },
240
+ # expression='SET #PT = :pt, #PS = :ps, #EM = :em',
241
+ # attribute_values={
242
+ # ':pt': {
243
+ # 'S': datetime.now().isoformat(timespec="seconds") + "Z"
244
+ # },
245
+ # ':ps': {
246
+ # 'S': pipeline_status
247
+ # },
248
+ # ':em': {
249
+ # 'S': error_message
250
+ # }
251
+ # }
252
+ # )
253
+ # else:
254
+ # self.__dynamo.update_item(
255
+ # table_name=self.__table_name,
256
+ # key={
257
+ # 'FILE_NAME': {'S': file_name}, # Partition Key
258
+ # 'CRUISE_NAME': {'S': cruise_name}, # Sort Key
259
+ # },
260
+ # attribute_names={
261
+ # '#PT': 'PIPELINE_TIME',
262
+ # '#PS': 'PIPELINE_STATUS',
263
+ # },
264
+ # expression='SET #PT = :pt, #PS = :ps',
265
+ # attribute_values={
266
+ # ':pt': {
267
+ # 'S': datetime.now().isoformat(timespec="seconds") + "Z"
268
+ # },
269
+ # ':ps': {
270
+ # 'S': pipeline_status
271
+ # }
272
+ # }
273
+ # )
274
+ # print("Done updating processing status.")
169
275
 
170
276
  #########################################################################
@@ -1,9 +1,8 @@
1
1
  import json
2
2
  import os
3
+ import boto3
3
4
  from collections.abc import Generator
4
5
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
-
6
- import boto3
7
6
  from boto3.s3.transfer import TransferConfig
8
7
  from botocore.config import Config
9
8
  from botocore.exceptions import ClientError
@@ -25,10 +24,16 @@ class S3Manager:
25
24
  #####################################################################
26
25
  def __init__(
27
26
  self,
27
+ # input_endpoint_url: str,
28
+ # output_endpoint_url: str,
29
+ # endpoint_url
28
30
  # TODO: Need to allow passing in of credentials when writing to protected bucket
29
31
  ):
30
32
  self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
31
33
  self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
34
+ # self.endpoint_url = endpoint_url
35
+ # self.input_endpoint_url = input_endpoint_url
36
+ # self.output_endpoint_url = output_endpoint_url
32
37
  self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
33
38
  self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
34
39
  self.s3_transfer_config = TransferConfig(
@@ -46,6 +51,7 @@ class S3Manager:
46
51
  service_name="s3",
47
52
  config=self.s3_client_config,
48
53
  region_name=self.s3_region,
54
+ # endpoint_url=endpoint_url, # TODO: temporary
49
55
  )
50
56
  self.s3_resource = boto3.resource(
51
57
  service_name="s3",
@@ -53,7 +59,6 @@ class S3Manager:
53
59
  region_name=self.s3_region,
54
60
  )
55
61
  # self.paginator = self.s3_client.get_paginator(operation_name='list_objects_v2')
56
- # TODO: create both "s3_client_input" and "s3_client_output" ???
57
62
  self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
58
63
  aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
59
64
  aws_secret_access_key=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
@@ -63,19 +68,20 @@ class S3Manager:
63
68
  service_name="s3",
64
69
  config=self.s3_client_config,
65
70
  region_name=self.s3_region,
71
+ # endpoint_url=endpoint_url, # TODO: temporary
66
72
  )
67
- self.s3_resource_noaa_wcsd_zarr_pds = (
68
- self.s3_session_noaa_wcsd_zarr_pds.resource(
69
- service_name="s3",
70
- config=self.s3_client_config,
71
- region_name=self.s3_region,
72
- )
73
+ self.s3_resource_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.resource(
74
+ service_name="s3",
75
+ config=self.s3_client_config,
76
+ region_name=self.s3_region,
73
77
  )
78
+ self.paginator = self.s3_client.get_paginator('list_objects_v2')
79
+ self.paginator_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds.get_paginator('list_objects_v2')
74
80
 
75
- def get_client(self):
81
+ def get_client(self): # TODO: do i need this?
76
82
  return self.s3_session.client(
77
83
  service_name="s3",
78
- config=self.__s3_client_config,
84
+ config=self.s3_client_config,
79
85
  region_name=self.s3_region,
80
86
  )
81
87
 
@@ -103,17 +109,18 @@ class S3Manager:
103
109
  self,
104
110
  file_name: str,
105
111
  key: str,
112
+ output_bucket_name: str,
106
113
  ):
107
- self.s3_client_noaa_wcsd_zarr_pds.upload_file(
108
- Filename=file_name,
109
- Bucket=self.output_bucket_name,
110
- Key=key,
111
- )
114
+ """
115
+ Used to upload a single file, e.g. the GeoJSON file to the NODD bucket
116
+ """
117
+ self.s3_resource_noaa_wcsd_zarr_pds.Bucket(output_bucket_name).upload_file(Filename=file_name, Key=key)
112
118
  return key
113
119
 
114
120
  #####################################################################
115
121
  def upload_files_with_thread_pool_executor(
116
122
  self,
123
+ output_bucket_name: str,
117
124
  all_files: list,
118
125
  ):
119
126
  # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
@@ -122,21 +129,45 @@ class S3Manager:
122
129
  with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
123
130
  futures = [
124
131
  executor.submit(
125
- self.upload_nodd_file,
132
+ self.upload_nodd_file, # TODO: verify which one is using this
126
133
  all_file[0], # file_name
127
134
  all_file[1], # key
135
+ output_bucket_name, # output_bucket_name
128
136
  )
129
137
  for all_file in all_files
130
138
  ]
131
139
  for future in as_completed(futures):
132
140
  result = future.result()
133
141
  if result:
134
- all_uploads.extend(result)
142
+ all_uploads.extend([result])
135
143
  except Exception as err:
136
144
  print(err)
137
145
  print("Done uploading files using threading pool.")
138
146
  return all_uploads
139
147
 
148
+ #####################################################################
149
+ # def upload_nodd_file2(
150
+ # self,
151
+ # body: str,
152
+ # bucket: str,
153
+ # key: str,
154
+ # ):
155
+ # self.s3_client_noaa_wcsd_zarr_pds.put_object(
156
+ # Body=body,
157
+ # Bucket=bucket,
158
+ # Key=key,
159
+ # )
160
+
161
+ # TODO: this uses resource, try to use client
162
+ def upload_file(
163
+ self,
164
+ filename: str,
165
+ bucket_name: str,
166
+ key: str,
167
+ ):
168
+ # self.s3_client.upload_file(Filename=filename, Bucket=bucket, Key=key)
169
+ self.s3_resource.Bucket(bucket_name).upload_file(Filename=filename, Key=key)
170
+
140
171
  #####################################################################
141
172
  def upload_zarr_files_to_bucket( # noaa-wcsd-model-pds
142
173
  self,
@@ -165,32 +196,34 @@ class S3Manager:
165
196
  return all_uploads
166
197
 
167
198
  #####################################################################
168
- # used: raw-to-model
169
- def list_objects( # noaa-wcsd-pds and noaa-wcsd-model-pds
170
- self, bucket_name, prefix
199
+ # used: raw-to-zarr
200
+ def list_objects( # noaa-wcsd-pds and noaa-wcsd-zarr-pds
201
+ self,
202
+ bucket_name,
203
+ prefix
171
204
  ):
172
205
  # analog to "find_children_objects"
173
206
  # Returns a list of key strings for each object in bucket defined by prefix
174
- s3_client = self.s3_client
207
+ # s3_client = self.s3_client
175
208
  keys = []
176
- paginator = s3_client.get_paginator("list_objects_v2")
177
- page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
209
+ # paginator = s3_client.get_paginator("list_objects_v2")
210
+ page_iterator = self.paginator.paginate(Bucket=bucket_name, Prefix=prefix)
178
211
  for page in page_iterator:
179
212
  if "Contents" in page.keys():
180
213
  keys.extend([k["Key"] for k in page["Contents"]])
181
214
  return keys
182
215
 
183
- def list_nodd_objects( # These are used by the geometry for uploading data
184
- self,
185
- prefix,
186
- ):
187
- # Returns a list of key strings for each object in bucket defined by prefix
188
- keys = []
189
- paginator = self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
190
- for page in paginator.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
191
- if "Contents" in page.keys():
192
- keys.extend([k["Key"] for k in page["Contents"]])
193
- return keys
216
+ # def list_nodd_objects( # These are used by the geometry for uploading data
217
+ # self,
218
+ # prefix,
219
+ # ):
220
+ # # Returns a list of key strings for each object in bucket defined by prefix
221
+ # keys = []
222
+ # page_iterator = self.paginator_noaa_wcsd_zarr_pds.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
223
+ # for page in paginator.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
224
+ # if "Contents" in page.keys():
225
+ # keys.extend([k["Key"] for k in page["Contents"]])
226
+ # return keys
194
227
 
195
228
  #####################################################################
196
229
  # TODO: change name to "directory"
@@ -279,9 +312,10 @@ class S3Manager:
279
312
  self,
280
313
  bucket_name,
281
314
  key,
282
- file_name,
315
+ file_name, # where the file will be saved
283
316
  ):
284
317
  self.s3_client.download_file(Bucket=bucket_name, Key=key, Filename=file_name)
318
+ # TODO: if bottom file doesn't exist, don't fail downloader
285
319
  print("downloaded file")
286
320
 
287
321
  #####################################################################
@@ -318,7 +352,7 @@ class S3Manager:
318
352
  #####################################################################
319
353
  # not used TODO: remove
320
354
  def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
321
- self.s3_client.put_object(Bucket=bucket_name, Key=key, Body=body)
355
+ self.s3_client.put_object(Bucket=bucket_name, Key=key, Body=body) # "Body" can be a file
322
356
 
323
357
  #####################################################################
324
358
  def read_s3_json(
@@ -3,10 +3,10 @@ import os
3
3
  import numcodecs
4
4
  import numpy as np
5
5
 
6
- from water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
7
- from water_column_sonar_processing.aws.s3_manager import S3Manager
8
- from water_column_sonar_processing.model.zarr_manager import ZarrManager
9
- from water_column_sonar_processing.utility.cleaner import Cleaner
6
+ from src.water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
7
+ from src.water_column_sonar_processing.aws.s3_manager import S3Manager
8
+ from src.water_column_sonar_processing.model.zarr_manager import ZarrManager
9
+ from src.water_column_sonar_processing.utility.cleaner import Cleaner
10
10
 
11
11
  numcodecs.blosc.use_threads = False
12
12
  numcodecs.blosc.set_nthreads(1)
@@ -17,6 +17,7 @@ numcodecs.blosc.set_nthreads(1)
17
17
  # creates the latlon data: foo = ep.consolidate.add_location(ds_Sv, echodata)
18
18
 
19
19
 
20
+ # TODO: change name to "CreateLocalEmptyZarrStore"
20
21
  class CreateEmptyZarrStore:
21
22
  #######################################################
22
23
  def __init__(
@@ -28,6 +29,7 @@ class CreateEmptyZarrStore:
28
29
 
29
30
  #######################################################
30
31
 
32
+ # TODO: move this to the s3_manager
31
33
  def upload_zarr_store_to_s3(
32
34
  self,
33
35
  local_directory: str,
@@ -7,9 +7,9 @@ import numpy as np
7
7
  import pandas as pd
8
8
  import xarray as xr
9
9
 
10
- from water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
11
- from water_column_sonar_processing.geometry.geometry_manager import GeometryManager
12
- from water_column_sonar_processing.model.zarr_manager import ZarrManager
10
+ from src.water_column_sonar_processing.aws.dynamodb_manager import DynamoDBManager
11
+ from src.water_column_sonar_processing.geometry.geometry_manager import GeometryManager
12
+ from src.water_column_sonar_processing.model.zarr_manager import ZarrManager
13
13
 
14
14
  numcodecs.blosc.use_threads = False
15
15
  numcodecs.blosc.set_nthreads(1)
@@ -1,11 +1,12 @@
1
+ import os
1
2
  from pathlib import Path
2
3
 
3
4
  import geopandas
4
5
  import numpy as np
5
6
  import pandas as pd
6
7
 
7
- from water_column_sonar_processing.aws.s3_manager import S3Manager
8
- from water_column_sonar_processing.utility.cleaner import Cleaner
8
+ from src.water_column_sonar_processing.aws.s3_manager import S3Manager
9
+ from src.water_column_sonar_processing.utility.cleaner import Cleaner
9
10
 
10
11
  """
11
12
  // [Decimal / Places / Degrees / Object that can be recognized at scale / N/S or E/W at equator, E/W at 23N/S, E/W at 45N/S, E/W at 67N/S]
@@ -26,12 +27,13 @@ class GeometryManager:
26
27
  self,
27
28
  ):
28
29
  self.DECIMAL_PRECISION = 5 # precision for GPS coordinates
29
- self.SIMPLIFICATION_TOLERANCE = 0.0001 # RDP simplification to street level
30
+ self.SIMPLIFICATION_TOLERANCE = 0.0001 # RDP simplification to "street level"
30
31
 
31
32
  #######################################################
32
33
  def read_echodata_gps_data(
33
34
  self,
34
35
  echodata,
36
+ output_bucket_name,
35
37
  ship_name,
36
38
  cruise_name,
37
39
  sensor_name,
@@ -123,12 +125,12 @@ class GeometryManager:
123
125
  crs="epsg:4326",
124
126
  )
125
127
  # Note: We set np.nan to 0,0 so downstream missing values can be omitted
126
-
128
+ # TODO: so what ends up here is data with corruption at null island!!!
127
129
  geo_json_line = gps_gdf.to_json()
128
130
  if write_geojson:
129
131
  print("Creating local copy of geojson file.")
130
132
  with open(geo_json_name, "w") as write_file:
131
- write_file.write(geo_json_line)
133
+ write_file.write(geo_json_line) # NOTE: this file can include zeros for lat lon
132
134
 
133
135
  geo_json_prefix = (
134
136
  f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}"
@@ -136,7 +138,8 @@ class GeometryManager:
136
138
 
137
139
  print("Checking s3 and deleting any existing GeoJSON file.")
138
140
  s3_manager = S3Manager()
139
- s3_objects = s3_manager.list_nodd_objects(
141
+ s3_objects = s3_manager.list_objects(
142
+ bucket_name=output_bucket_name,
140
143
  prefix=f"{geo_json_prefix}/{geo_json_name}"
141
144
  )
142
145
  if len(s3_objects) > 0:
@@ -149,6 +152,7 @@ class GeometryManager:
149
152
  s3_manager.upload_nodd_file(
150
153
  file_name=geo_json_name, # file_name
151
154
  key=f"{geo_json_prefix}/{geo_json_name}", # key
155
+ output_bucket_name=output_bucket_name,
152
156
  )
153
157
 
154
158
  # TODO: delete geo_json file
@@ -221,5 +225,16 @@ class GeometryManager:
221
225
  print(f"Exception encountered reading s3 GeoJSON: {err}")
222
226
  raise
223
227
 
228
+ ############################################################################
229
+ # COMES from the raw-to-zarr conversion
230
+ def __write_geojson_to_file(
231
+ self,
232
+ store_name,
233
+ data
234
+ ) -> None:
235
+ print('Writing GeoJSON to file.')
236
+ with open(os.path.join(store_name, 'geo.json'), "w") as outfile:
237
+ outfile.write(data)
238
+
224
239
 
225
240
  ###########################################################