water-column-sonar-processing 0.0.6__py3-none-any.whl → 26.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. water_column_sonar_processing/__init__.py +2 -5
  2. water_column_sonar_processing/aws/__init__.py +2 -2
  3. water_column_sonar_processing/aws/dynamodb_manager.py +257 -72
  4. water_column_sonar_processing/aws/s3_manager.py +184 -112
  5. water_column_sonar_processing/aws/s3fs_manager.py +29 -33
  6. water_column_sonar_processing/aws/sqs_manager.py +1 -1
  7. water_column_sonar_processing/cruise/create_empty_zarr_store.py +38 -97
  8. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  9. water_column_sonar_processing/cruise/resample_regrid.py +144 -129
  10. water_column_sonar_processing/geometry/__init__.py +10 -2
  11. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  12. water_column_sonar_processing/geometry/geometry_manager.py +60 -44
  13. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  14. water_column_sonar_processing/geometry/pmtile_generation.py +242 -51
  15. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  16. water_column_sonar_processing/index/index_manager.py +157 -27
  17. water_column_sonar_processing/model/zarr_manager.py +663 -258
  18. water_column_sonar_processing/processing/__init__.py +4 -0
  19. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  20. water_column_sonar_processing/processing/raw_to_zarr.py +341 -0
  21. water_column_sonar_processing/utility/__init__.py +9 -2
  22. water_column_sonar_processing/utility/cleaner.py +1 -0
  23. water_column_sonar_processing/utility/constants.py +69 -14
  24. water_column_sonar_processing/utility/pipeline_status.py +11 -15
  25. water_column_sonar_processing/utility/timestamp.py +3 -4
  26. water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
  27. water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
  28. {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
  29. {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
  30. water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
  31. water_column_sonar_processing/process.py +0 -147
  32. water_column_sonar_processing-0.0.6.dist-info/METADATA +0 -123
  33. water_column_sonar_processing-0.0.6.dist-info/RECORD +0 -29
  34. {water_column_sonar_processing-0.0.6.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
@@ -2,8 +2,11 @@ import json
2
2
  import os
3
3
  from collections.abc import Generator
4
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
+ from time import sleep
6
+ from typing import Optional
5
7
 
6
8
  import boto3
9
+ import botocore
7
10
  from boto3.s3.transfer import TransferConfig
8
11
  from botocore.config import Config
9
12
  from botocore.exceptions import ClientError
@@ -25,10 +28,9 @@ class S3Manager:
25
28
  #####################################################################
26
29
  def __init__(
27
30
  self,
28
- # TODO: Need to allow passing in of credentials when writing to protected bucket
31
+ endpoint_url: Optional[str] = None,
29
32
  ):
30
- self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
31
- self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
33
+ self.endpoint_url = endpoint_url
32
34
  self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
33
35
  self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
34
36
  self.s3_transfer_config = TransferConfig(
@@ -46,14 +48,14 @@ class S3Manager:
46
48
  service_name="s3",
47
49
  config=self.s3_client_config,
48
50
  region_name=self.s3_region,
51
+ endpoint_url=self.endpoint_url,
49
52
  )
50
53
  self.s3_resource = boto3.resource(
51
54
  service_name="s3",
52
55
  config=self.s3_client_config,
53
56
  region_name=self.s3_region,
57
+ endpoint_url=self.endpoint_url,
54
58
  )
55
- # self.paginator = self.s3_client.get_paginator(operation_name='list_objects_v2')
56
- # TODO: create both "s3_client_input" and "s3_client_output" ???
57
59
  self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
58
60
  aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
59
61
  aws_secret_access_key=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
@@ -63,38 +65,54 @@ class S3Manager:
63
65
  service_name="s3",
64
66
  config=self.s3_client_config,
65
67
  region_name=self.s3_region,
68
+ endpoint_url=self.endpoint_url,
66
69
  )
67
70
  self.s3_resource_noaa_wcsd_zarr_pds = (
68
71
  self.s3_session_noaa_wcsd_zarr_pds.resource(
69
72
  service_name="s3",
70
73
  config=self.s3_client_config,
71
74
  region_name=self.s3_region,
75
+ endpoint_url=self.endpoint_url,
72
76
  )
73
77
  )
74
-
75
- def get_client(self):
76
- return self.s3_session.client(
77
- service_name="s3",
78
- config=self.__s3_client_config,
79
- region_name=self.s3_region,
78
+ #
79
+ self.paginator = self.s3_client.get_paginator("list_objects_v2")
80
+ self.paginator_noaa_wcsd_zarr_pds = (
81
+ self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
80
82
  )
81
83
 
82
84
  #####################################################################
85
+ # tested
83
86
  def create_bucket(
84
87
  self,
85
88
  bucket_name: str,
86
89
  ):
87
- self.s3_client.create_bucket(
88
- Bucket=bucket_name,
89
- # Required when region is different then us-east-1
90
- #
91
- # TODO: if region is us-east-1, don't include this line somehow
92
- # CreateBucketConfiguration={'LocationConstraint': self.__s3_region}
93
- )
90
+ """
91
+ Note: this function is only really meant to be used for creating test
92
+ buckets. It allows public read of all objects.
93
+ """
94
+ # https://github.com/aodn/aodn_cloud_optimised/blob/e5035495e782783cc8b9e58711d63ed466420350/test_aodn_cloud_optimised/test_schema.py#L7
95
+ # public_policy = {
96
+ # "Version": "2012-10-17",
97
+ # "Statement": [
98
+ # {
99
+ # "Effect": "Allow",
100
+ # "Principal": "*",
101
+ # "Action": "s3:GetObject",
102
+ # "Resource": f"arn:aws:s3:::{bucket_name}/*",
103
+ # }
104
+ # ],
105
+ # }
106
+ response1 = self.s3_client.create_bucket(Bucket=bucket_name, ACL="public-read")
107
+ print(response1)
108
+ # response = self.s3_client.put_bucket_policy(
109
+ # Bucket=bucket_name, Policy=json.dumps(public_policy)
110
+ # )
111
+ # print(response)
94
112
 
95
113
  #####################################################################
114
+ # tested
96
115
  def list_buckets(self):
97
- # client = self.get_client()
98
116
  client = self.s3_client
99
117
  return client.list_buckets()
100
118
 
@@ -103,17 +121,20 @@ class S3Manager:
103
121
  self,
104
122
  file_name: str,
105
123
  key: str,
124
+ output_bucket_name: str,
106
125
  ):
107
- self.s3_client_noaa_wcsd_zarr_pds.upload_file(
108
- Filename=file_name,
109
- Bucket=self.output_bucket_name,
110
- Key=key,
126
+ """
127
+ Used to upload a single file, e.g. the GeoJSON file to the NODD bucket
128
+ """
129
+ self.s3_resource_noaa_wcsd_zarr_pds.Bucket(output_bucket_name).upload_file(
130
+ Filename=file_name, Key=key
111
131
  )
112
132
  return key
113
133
 
114
134
  #####################################################################
115
135
  def upload_files_with_thread_pool_executor(
116
136
  self,
137
+ output_bucket_name: str,
117
138
  all_files: list,
118
139
  ):
119
140
  # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
@@ -122,90 +143,118 @@ class S3Manager:
122
143
  with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
123
144
  futures = [
124
145
  executor.submit(
125
- self.upload_nodd_file,
146
+ self.upload_nodd_file, # TODO: verify which one is using this
126
147
  all_file[0], # file_name
127
148
  all_file[1], # key
149
+ output_bucket_name, # output_bucket_name
128
150
  )
129
151
  for all_file in all_files
130
152
  ]
131
153
  for future in as_completed(futures):
132
154
  result = future.result()
133
155
  if result:
134
- all_uploads.extend(result)
156
+ all_uploads.extend([result])
135
157
  except Exception as err:
136
- print(err)
158
+ raise RuntimeError(f"Problem, {err}")
159
+
137
160
  print("Done uploading files using threading pool.")
138
161
  return all_uploads
139
162
 
140
163
  #####################################################################
141
- def upload_zarr_files_to_bucket( # noaa-wcsd-model-pds
164
+ # tested
165
+ def upload_zarr_store_to_s3(
166
+ self,
167
+ output_bucket_name: str,
168
+ local_directory: str,
169
+ object_prefix: str,
170
+ cruise_name: str,
171
+ ) -> None:
172
+ print("uploading model store to s3")
173
+ try:
174
+ #
175
+ print("Starting upload with thread pool executor.")
176
+ # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
177
+ all_files = []
178
+ for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
179
+ for file in files:
180
+ local_path = os.path.join(subdir, file)
181
+ # TODO: find a better method for splitting strings here:
182
+ # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
183
+ # s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
184
+ s3_key = os.path.join(
185
+ object_prefix,
186
+ os.path.join(
187
+ subdir[subdir.find(f"{cruise_name}.zarr") :], file
188
+ ),
189
+ )
190
+ all_files.append([local_path, s3_key])
191
+ self.upload_files_with_thread_pool_executor(
192
+ output_bucket_name=output_bucket_name,
193
+ all_files=all_files,
194
+ )
195
+ print("Done uploading with thread pool executor.")
196
+ except Exception as err:
197
+ raise RuntimeError(f"Problem uploading zarr store to s3, {err}")
198
+
199
+ #####################################################################
200
+ # tested
201
+ def upload_file(
142
202
  self,
143
- local_directory,
144
- remote_directory,
203
+ filename: str,
204
+ bucket_name: str,
205
+ key: str,
145
206
  ):
146
- # Right now this is just for uploading a model store to s3
147
- print("Uploading files to output bucket.")
148
- store_name = os.path.basename(local_directory)
149
- all_files = []
150
- for subdir, dirs, files in os.walk(local_directory):
151
- for file in files:
152
- local_path = os.path.join(subdir, file)
153
- # s3_key = os.path.join(object_prefix, local_path)
154
- s3_key = os.path.join(
155
- remote_directory,
156
- store_name,
157
- subdir.split(store_name)[-1].strip("/"),
158
- )
159
- all_files.append([local_path, s3_key])
207
+ self.s3_resource.Bucket(bucket_name).upload_file(Filename=filename, Key=key)
160
208
 
161
- all_uploads = self.upload_files_with_thread_pool_executor(
162
- all_files=all_files,
163
- )
164
- print("Done uploading files to output bucket.")
165
- return all_uploads
209
+ #####################################################################
210
+ # tested
211
+ def check_if_object_exists(self, bucket_name, key_name) -> bool:
212
+ s3_manager2 = S3Manager()
213
+ s3_manager2.list_objects(bucket_name=bucket_name, prefix=key_name)
214
+ s3_client_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds
215
+ try:
216
+ s3_client_noaa_wcsd_zarr_pds.head_object(Bucket=bucket_name, Key=key_name)
217
+ return True
218
+ except botocore.exceptions.ClientError as e:
219
+ if e.response["Error"]["Code"] == "404":
220
+ # The object does not exist.
221
+ return False
222
+ elif e.response["Error"]["Code"] == 403:
223
+ # Unauthorized, including invalid bucket
224
+ return False
225
+ else:
226
+ # Something else has gone wrong.
227
+ raise
166
228
 
167
229
  #####################################################################
168
- # used: raw-to-model
169
- def list_objects( # noaa-wcsd-pds and noaa-wcsd-model-pds
170
- self, bucket_name, prefix
171
- ):
230
+ # tested
231
+ def list_objects(self, bucket_name, prefix): # noaa-wcsd-pds and noaa-wcsd-zarr-pds
232
+ # TODO: this isn't working for geojson detecting objects!!!!!!!
172
233
  # analog to "find_children_objects"
173
234
  # Returns a list of key strings for each object in bucket defined by prefix
174
- s3_client = self.s3_client
235
+ # s3_client = self.s3_client
175
236
  keys = []
176
- paginator = s3_client.get_paginator("list_objects_v2")
177
- page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
237
+ # paginator = s3_client.get_paginator("list_objects_v2")
238
+ page_iterator = self.paginator.paginate(Bucket=bucket_name, Prefix=prefix)
178
239
  for page in page_iterator:
179
240
  if "Contents" in page.keys():
180
241
  keys.extend([k["Key"] for k in page["Contents"]])
181
242
  return keys
182
243
 
183
- def list_nodd_objects( # These are used by the geometry for uploading data
184
- self,
185
- prefix,
186
- ):
187
- # Returns a list of key strings for each object in bucket defined by prefix
188
- keys = []
189
- paginator = self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
190
- for page in paginator.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
191
- if "Contents" in page.keys():
192
- keys.extend([k["Key"] for k in page["Contents"]])
193
- return keys
194
-
195
244
  #####################################################################
196
245
  # TODO: change name to "directory"
197
- def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
198
- if not path.endswith("/"):
199
- path = path + "/"
200
- s3_client = self.s3_client
201
- resp = self.list_objects(
202
- bucket_name=bucket_name, prefix=path
203
- ) # TODO: this is returning root folder and doesn't include children or hidden folders
204
- # resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
205
- return "Contents" in resp
246
+ # def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
247
+ # if not path.endswith("/"):
248
+ # path = path + "/"
249
+ # # s3_client = self.s3_client
250
+ # resp = self.list_objects(
251
+ # bucket_name=bucket_name, prefix=path
252
+ # ) # TODO: this is returning root folder and doesn't include children or hidden folders
253
+ # # resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
254
+ # return "Contents" in resp
206
255
 
207
256
  #####################################################################
208
- # used
257
+ # private
209
258
  def __paginate_child_objects(
210
259
  self,
211
260
  bucket_name: str,
@@ -220,6 +269,8 @@ class S3Manager:
220
269
  objects.extend(page["Contents"])
221
270
  return objects
222
271
 
272
+ #####################################################################
273
+ # tested
223
274
  def get_child_objects(
224
275
  self,
225
276
  bucket_name: str,
@@ -251,13 +302,14 @@ class S3Manager:
251
302
  return raw_files
252
303
 
253
304
  #####################################################################
254
- def get_object( # TODO: Move this to index.py
255
- # noaa-wcsd-pds or noaa-wcsd-model-pds
305
+ # tested
306
+ def get_object( # noaa-wcsd-pds or noaa-wcsd-zarr-pds
256
307
  self,
257
308
  bucket_name,
258
309
  key_name,
259
310
  ):
260
311
  # Meant for getting singular objects from a bucket, used by indexing lambda
312
+ # can also return byte range potentially.
261
313
  print(f"Getting object {key_name} from {bucket_name}")
262
314
  try:
263
315
  response = self.s3_client.get_object(
@@ -266,81 +318,101 @@ class S3Manager:
266
318
  )
267
319
  # status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")
268
320
  # if status == 200:
321
+ print(f"Done getting object {key_name} from {bucket_name}")
322
+ return response
269
323
  except ClientError as err:
270
324
  print(f"Problem was encountered while getting s3 file: {err}")
271
325
  raise
272
- print(f"Done getting object {key_name} from {bucket_name}")
273
- return response
274
326
 
275
327
  #####################################################################
276
- # used raw-to-model
277
- def download_file( # TODO: change to download_object
278
- # noaa-wcsd-pds or noaa-wcsd-model-pds
328
+ # tested
329
+ def download_file(
279
330
  self,
280
331
  bucket_name,
281
332
  key,
282
- file_name,
333
+ file_name, # path to where the file will be saved
283
334
  ):
284
- self.s3_client.download_file(Bucket=bucket_name, Key=key, Filename=file_name)
285
- print("downloaded file")
286
-
287
- #####################################################################
288
- # not used
289
- # def delete_nodd_object( # noaa-wcsd-model-pds
290
- # self,
291
- # bucket_name,
292
- # key
293
- # ): # -> dict:
294
- # #return self.__s3_client.delete_object(Bucket=bucket_name, Key=key)
295
- # self.s3_client.delete_object(Bucket=bucket_name, Key=key)
335
+ try:
336
+ self.s3_client.download_file(
337
+ Bucket=bucket_name, Key=key, Filename=file_name
338
+ )
339
+ # TODO: if bottom file doesn't exist, don't fail downloader
340
+ print("downloaded file")
341
+ except Exception as err:
342
+ raise RuntimeError(f"Problem was encountered while downloading_file, {err}")
296
343
 
297
344
  #####################################################################
345
+ # tested
298
346
  def delete_nodd_objects( # nodd-bucket
299
347
  self,
348
+ bucket_name,
300
349
  objects: list,
301
350
  ):
302
351
  try:
303
- print(
304
- f"Deleting {len(objects)} objects in {self.output_bucket_name} in batches."
305
- )
352
+ print(f"Deleting {len(objects)} objects in {bucket_name} in batches.")
306
353
  objects_to_delete = []
307
354
  for obj in objects:
308
355
  objects_to_delete.append({"Key": obj["Key"]})
309
356
  # Note: request can contain a list of up to 1000 keys
310
357
  for batch in chunked(ll=objects_to_delete, n=1000):
358
+ # An error occurred (SlowDown) when calling the DeleteObjects operation (reached max retries: 4):
359
+ # Please reduce your request rate.
360
+ sleep(0.5)
361
+ #
311
362
  self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
312
- Bucket=self.output_bucket_name, Delete={"Objects": batch}
363
+ Bucket=bucket_name, Delete={"Objects": batch}
313
364
  )
314
- print(f"Deleted files.")
365
+ print("Deleted files.")
315
366
  except Exception as err:
316
- print(f"Problem was encountered while deleting objects: {err}")
367
+ raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
317
368
 
318
369
  #####################################################################
319
- # not used TODO: remove
370
+ # tested
371
+ def delete_nodd_object( # only used to delete geojson it looks like?! Remove.
372
+ self,
373
+ bucket_name,
374
+ key_name,
375
+ ):
376
+ try:
377
+ print(f"Deleting {key_name} objects in {bucket_name}.")
378
+ self.s3_client_noaa_wcsd_zarr_pds.delete_object(
379
+ Bucket=bucket_name, Key=key_name
380
+ )
381
+ print("Deleted file.")
382
+ except Exception as err:
383
+ raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
384
+
385
+ #####################################################################
386
+ # tested
320
387
  def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
321
- self.s3_client.put_object(Bucket=bucket_name, Key=key, Body=body)
388
+ try:
389
+ self.s3_client.put_object(
390
+ Bucket=bucket_name, Key=key, Body=body
391
+ ) # "Body" can be a file
392
+ except Exception as err:
393
+ raise RuntimeError(f"Problem was encountered putting object, {err}")
322
394
 
323
395
  #####################################################################
396
+ # tested
324
397
  def read_s3_json(
325
398
  self,
326
399
  ship_name,
327
400
  cruise_name,
328
401
  sensor_name,
329
402
  file_name_stem,
403
+ output_bucket_name, # TODO: change to just bucket_name
330
404
  ) -> str:
331
405
  try:
332
- content_object = self.s3_resource_noaa_wcsd_zarr_pds.Object(
333
- bucket_name=self.output_bucket_name,
406
+ resource = self.s3_resource_noaa_wcsd_zarr_pds
407
+ content_object = resource.Object(
408
+ bucket_name=output_bucket_name,
334
409
  key=f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.json",
335
410
  ).get()
336
411
  file_content = content_object["Body"].read().decode("utf-8")
337
412
  json_content = json.loads(file_content)
338
413
  return json_content
339
- except Exception as err: # Failure
340
- print(f"Exception encountered reading s3 GeoJSON: {err}")
341
- raise
342
-
343
- #####################################################################
414
+ except Exception as err:
415
+ raise RuntimeError(f"Exception encountered reading s3 GeoJSON, {err}")
344
416
 
345
417
 
346
418
  #########################################################################
@@ -1,45 +1,29 @@
1
1
  import os
2
+ from typing import Optional
2
3
 
3
4
  import s3fs
4
5
 
6
+
5
7
  # TODO: S3FS_LOGGING_LEVEL=DEBUG
8
+ # S3FS_LOGGING_LEVEL=DEBUG
6
9
 
7
10
 
8
11
  class S3FSManager:
9
12
  #####################################################################
10
13
  def __init__(
11
14
  self,
15
+ endpoint_url: Optional[str] = None,
12
16
  ):
13
- self.__s3_region = os.environ.get("AWS_REGION", default="us-east-1")
17
+ self.endpoint_url = endpoint_url
18
+ self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
19
+ self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
20
+ self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
14
21
  self.s3fs = s3fs.S3FileSystem(
22
+ endpoint_url=endpoint_url,
15
23
  key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
16
24
  secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
17
- # asynchronous=True
18
- # use_ssl=False,
19
- # skip_instance_cache=True,
20
- # default_block_size='100MB', # if no specific value is given at all time. The built-in default is 5MB
21
- # client_kwargs={
22
- # "region_name": self.__s3_region
23
- # }
24
25
  )
25
26
 
26
- #####################################################################
27
- def add_file(self, filename):
28
- full_path = f"{os.getenv('OUTPUT_BUCKET_NAME')}/testing/{filename}"
29
- print(full_path)
30
-
31
- self.s3fs.touch(full_path)
32
- ff = self.s3fs.ls(f"{os.getenv('OUTPUT_BUCKET_NAME')}/")
33
-
34
- print(ff)
35
-
36
- #####################################################################
37
- def upload_data(self, bucket_name, file_path, prefix):
38
- # TODO: this works in theory but use boto3 to upload files
39
- s3_path = f"s3://{bucket_name}/{prefix}/"
40
- s3_file_system = self.s3fs
41
- s3_file_system.put(file_path, s3_path, recursive=True)
42
-
43
27
  #####################################################################
44
28
  def s3_map(
45
29
  self,
@@ -52,17 +36,29 @@ class S3FSManager:
52
36
  root=s3_zarr_store_path, s3=self.s3fs
53
37
  ) # create=False, not false because will be writing
54
38
 
39
+ #####################################################################
40
+ # def add_file(self, filename):
41
+ # full_path = f"{os.getenv('OUTPUT_BUCKET_NAME')}/testing/{filename}"
42
+ # print(full_path)
43
+ #
44
+ # self.s3fs.touch(full_path)
45
+ # ff = self.s3fs.ls(f"{os.getenv('OUTPUT_BUCKET_NAME')}/")
46
+ #
47
+ # print(ff)
48
+
49
+ #####################################################################
50
+ def upload_data(self, bucket_name, file_path, prefix):
51
+ # TODO: this works in theory but use boto3 to upload files
52
+ s3_path = f"s3://{bucket_name}/{prefix}/"
53
+ s3_file_system = self.s3fs
54
+ s3_file_system.put(file_path, s3_path, recursive=True)
55
+
55
56
  #####################################################################
56
57
  def exists(
57
58
  self,
58
- geo_json_s3_path,
59
+ s3_path,
59
60
  ):
60
- s3_file_system = self.s3fs
61
- return s3_file_system.exists(path=geo_json_s3_path)
61
+ # s3_file_system =
62
+ return self.s3fs.exists(s3_path)
62
63
 
63
64
  #####################################################################
64
- # def put(
65
- # self
66
- # ):
67
- # s3_file_system = self.s3fs
68
- # return
@@ -35,7 +35,7 @@ class SQSManager:
35
35
  #######################################################
36
36
  def list_queues(self, queue_name_prefix):
37
37
  # Note: SQS control plane is eventually consistent, meaning that it
38
- # takes a while to propagate the data accross the systems.
38
+ # takes a while to propagate the dataset accross the systems.
39
39
  response = self.__sqs_client.list_queues(QueueNamePrefix=queue_name_prefix)
40
40
  print(response)
41
41