water-column-sonar-processing 0.0.9__py3-none-any.whl → 26.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. water_column_sonar_processing/aws/dynamodb_manager.py +138 -59
  2. water_column_sonar_processing/aws/s3_manager.py +179 -141
  3. water_column_sonar_processing/aws/s3fs_manager.py +29 -33
  4. water_column_sonar_processing/aws/sqs_manager.py +1 -1
  5. water_column_sonar_processing/cruise/create_empty_zarr_store.py +35 -96
  6. water_column_sonar_processing/cruise/datatree_manager.py +21 -0
  7. water_column_sonar_processing/cruise/resample_regrid.py +142 -127
  8. water_column_sonar_processing/geometry/__init__.py +10 -2
  9. water_column_sonar_processing/geometry/elevation_manager.py +111 -0
  10. water_column_sonar_processing/geometry/geometry_manager.py +50 -49
  11. water_column_sonar_processing/geometry/line_simplification.py +176 -0
  12. water_column_sonar_processing/geometry/pmtile_generation.py +227 -223
  13. water_column_sonar_processing/geometry/spatiotemporal.py +106 -0
  14. water_column_sonar_processing/index/index_manager.py +151 -33
  15. water_column_sonar_processing/model/zarr_manager.py +665 -262
  16. water_column_sonar_processing/processing/__init__.py +3 -3
  17. water_column_sonar_processing/processing/raw_to_netcdf.py +320 -0
  18. water_column_sonar_processing/processing/raw_to_zarr.py +206 -214
  19. water_column_sonar_processing/utility/__init__.py +9 -2
  20. water_column_sonar_processing/utility/constants.py +69 -18
  21. water_column_sonar_processing/utility/pipeline_status.py +11 -15
  22. water_column_sonar_processing/utility/timestamp.py +3 -4
  23. water_column_sonar_processing-26.1.9.dist-info/METADATA +239 -0
  24. water_column_sonar_processing-26.1.9.dist-info/RECORD +34 -0
  25. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/WHEEL +1 -1
  26. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info/licenses}/LICENSE +1 -1
  27. water_column_sonar_processing/geometry/geometry_simplification.py +0 -82
  28. water_column_sonar_processing/process.py +0 -147
  29. water_column_sonar_processing/processing/cruise_sampler.py +0 -342
  30. water_column_sonar_processing-0.0.9.dist-info/METADATA +0 -134
  31. water_column_sonar_processing-0.0.9.dist-info/RECORD +0 -32
  32. {water_column_sonar_processing-0.0.9.dist-info → water_column_sonar_processing-26.1.9.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,12 @@
1
1
  import json
2
2
  import os
3
- import boto3
4
3
  from collections.abc import Generator
5
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
+ from time import sleep
6
+ from typing import Optional
7
+
8
+ import boto3
9
+ import botocore
6
10
  from boto3.s3.transfer import TransferConfig
7
11
  from botocore.config import Config
8
12
  from botocore.exceptions import ClientError
@@ -24,16 +28,9 @@ class S3Manager:
24
28
  #####################################################################
25
29
  def __init__(
26
30
  self,
27
- # input_endpoint_url: str,
28
- # output_endpoint_url: str,
29
- # endpoint_url
30
- # TODO: Need to allow passing in of credentials when writing to protected bucket
31
+ endpoint_url: Optional[str] = None,
31
32
  ):
32
- self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
33
- self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
34
- # self.endpoint_url = endpoint_url
35
- # self.input_endpoint_url = input_endpoint_url
36
- # self.output_endpoint_url = output_endpoint_url
33
+ self.endpoint_url = endpoint_url
37
34
  self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
38
35
  self.s3_client_config = Config(max_pool_connections=MAX_POOL_CONNECTIONS)
39
36
  self.s3_transfer_config = TransferConfig(
@@ -51,14 +48,14 @@ class S3Manager:
51
48
  service_name="s3",
52
49
  config=self.s3_client_config,
53
50
  region_name=self.s3_region,
54
- # endpoint_url=endpoint_url, # TODO: temporary
51
+ endpoint_url=self.endpoint_url,
55
52
  )
56
53
  self.s3_resource = boto3.resource(
57
54
  service_name="s3",
58
55
  config=self.s3_client_config,
59
56
  region_name=self.s3_region,
57
+ endpoint_url=self.endpoint_url,
60
58
  )
61
- # self.paginator = self.s3_client.get_paginator(operation_name='list_objects_v2')
62
59
  self.s3_session_noaa_wcsd_zarr_pds = boto3.Session(
63
60
  aws_access_key_id=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
64
61
  aws_secret_access_key=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
@@ -68,39 +65,54 @@ class S3Manager:
68
65
  service_name="s3",
69
66
  config=self.s3_client_config,
70
67
  region_name=self.s3_region,
71
- # endpoint_url=endpoint_url, # TODO: temporary
68
+ endpoint_url=self.endpoint_url,
72
69
  )
73
- self.s3_resource_noaa_wcsd_zarr_pds = self.s3_session_noaa_wcsd_zarr_pds.resource(
74
- service_name="s3",
75
- config=self.s3_client_config,
76
- region_name=self.s3_region,
70
+ self.s3_resource_noaa_wcsd_zarr_pds = (
71
+ self.s3_session_noaa_wcsd_zarr_pds.resource(
72
+ service_name="s3",
73
+ config=self.s3_client_config,
74
+ region_name=self.s3_region,
75
+ endpoint_url=self.endpoint_url,
76
+ )
77
77
  )
78
- self.paginator = self.s3_client.get_paginator('list_objects_v2')
79
- self.paginator_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds.get_paginator('list_objects_v2')
80
-
81
- def get_client(self): # TODO: do i need this?
82
- return self.s3_session.client(
83
- service_name="s3",
84
- config=self.s3_client_config,
85
- region_name=self.s3_region,
78
+ #
79
+ self.paginator = self.s3_client.get_paginator("list_objects_v2")
80
+ self.paginator_noaa_wcsd_zarr_pds = (
81
+ self.s3_client_noaa_wcsd_zarr_pds.get_paginator("list_objects_v2")
86
82
  )
87
83
 
88
84
  #####################################################################
85
+ # tested
89
86
  def create_bucket(
90
87
  self,
91
88
  bucket_name: str,
92
89
  ):
93
- self.s3_client.create_bucket(
94
- Bucket=bucket_name,
95
- # Required when region is different then us-east-1
96
- #
97
- # TODO: if region is us-east-1, don't include this line somehow
98
- # CreateBucketConfiguration={'LocationConstraint': self.__s3_region}
99
- )
90
+ """
91
+ Note: this function is only really meant to be used for creating test
92
+ buckets. It allows public read of all objects.
93
+ """
94
+ # https://github.com/aodn/aodn_cloud_optimised/blob/e5035495e782783cc8b9e58711d63ed466420350/test_aodn_cloud_optimised/test_schema.py#L7
95
+ # public_policy = {
96
+ # "Version": "2012-10-17",
97
+ # "Statement": [
98
+ # {
99
+ # "Effect": "Allow",
100
+ # "Principal": "*",
101
+ # "Action": "s3:GetObject",
102
+ # "Resource": f"arn:aws:s3:::{bucket_name}/*",
103
+ # }
104
+ # ],
105
+ # }
106
+ response1 = self.s3_client.create_bucket(Bucket=bucket_name, ACL="public-read")
107
+ print(response1)
108
+ # response = self.s3_client.put_bucket_policy(
109
+ # Bucket=bucket_name, Policy=json.dumps(public_policy)
110
+ # )
111
+ # print(response)
100
112
 
101
113
  #####################################################################
114
+ # tested
102
115
  def list_buckets(self):
103
- # client = self.get_client()
104
116
  client = self.s3_client
105
117
  return client.list_buckets()
106
118
 
@@ -114,7 +126,9 @@ class S3Manager:
114
126
  """
115
127
  Used to upload a single file, e.g. the GeoJSON file to the NODD bucket
116
128
  """
117
- self.s3_resource_noaa_wcsd_zarr_pds.Bucket(output_bucket_name).upload_file(Filename=file_name, Key=key)
129
+ self.s3_resource_noaa_wcsd_zarr_pds.Bucket(output_bucket_name).upload_file(
130
+ Filename=file_name, Key=key
131
+ )
118
132
  return key
119
133
 
120
134
  #####################################################################
@@ -141,67 +155,81 @@ class S3Manager:
141
155
  if result:
142
156
  all_uploads.extend([result])
143
157
  except Exception as err:
144
- print(err)
158
+ raise RuntimeError(f"Problem, {err}")
159
+
145
160
  print("Done uploading files using threading pool.")
146
161
  return all_uploads
147
162
 
148
163
  #####################################################################
149
- # def upload_nodd_file2(
150
- # self,
151
- # body: str,
152
- # bucket: str,
153
- # key: str,
154
- # ):
155
- # self.s3_client_noaa_wcsd_zarr_pds.put_object(
156
- # Body=body,
157
- # Bucket=bucket,
158
- # Key=key,
159
- # )
164
+ # tested
165
+ def upload_zarr_store_to_s3(
166
+ self,
167
+ output_bucket_name: str,
168
+ local_directory: str,
169
+ object_prefix: str,
170
+ cruise_name: str,
171
+ ) -> None:
172
+ print("uploading model store to s3")
173
+ try:
174
+ #
175
+ print("Starting upload with thread pool executor.")
176
+ # # 'all_files' is passed a list of lists: [[local_path, s3_key], [...], ...]
177
+ all_files = []
178
+ for subdir, dirs, files in os.walk(f"{local_directory}/{cruise_name}.zarr"):
179
+ for file in files:
180
+ local_path = os.path.join(subdir, file)
181
+ # TODO: find a better method for splitting strings here:
182
+ # 'level_2/Henry_B._Bigelow/HB0806/EK60/HB0806.zarr/.zattrs'
183
+ # s3_key = f"{object_prefix}/{cruise_name}.zarr{local_path.split(f'{cruise_name}.zarr')[-1]}"
184
+ s3_key = os.path.join(
185
+ object_prefix,
186
+ os.path.join(
187
+ subdir[subdir.find(f"{cruise_name}.zarr") :], file
188
+ ),
189
+ )
190
+ all_files.append([local_path, s3_key])
191
+ self.upload_files_with_thread_pool_executor(
192
+ output_bucket_name=output_bucket_name,
193
+ all_files=all_files,
194
+ )
195
+ print("Done uploading with thread pool executor.")
196
+ except Exception as err:
197
+ raise RuntimeError(f"Problem uploading zarr store to s3, {err}")
160
198
 
161
- # TODO: this uses resource, try to use client
199
+ #####################################################################
200
+ # tested
162
201
  def upload_file(
163
- self,
164
- filename: str,
165
- bucket_name: str,
166
- key: str,
202
+ self,
203
+ filename: str,
204
+ bucket_name: str,
205
+ key: str,
167
206
  ):
168
- # self.s3_client.upload_file(Filename=filename, Bucket=bucket, Key=key)
169
207
  self.s3_resource.Bucket(bucket_name).upload_file(Filename=filename, Key=key)
170
208
 
171
209
  #####################################################################
172
- def upload_zarr_files_to_bucket( # noaa-wcsd-model-pds
173
- self,
174
- local_directory,
175
- remote_directory,
176
- ):
177
- # Right now this is just for uploading a model store to s3
178
- print("Uploading files to output bucket.")
179
- store_name = os.path.basename(local_directory)
180
- all_files = []
181
- for subdir, dirs, files in os.walk(local_directory):
182
- for file in files:
183
- local_path = os.path.join(subdir, file)
184
- # s3_key = os.path.join(object_prefix, local_path)
185
- s3_key = os.path.join(
186
- remote_directory,
187
- store_name,
188
- subdir.split(store_name)[-1].strip("/"),
189
- )
190
- all_files.append([local_path, s3_key])
191
-
192
- all_uploads = self.upload_files_with_thread_pool_executor(
193
- all_files=all_files,
194
- )
195
- print("Done uploading files to output bucket.")
196
- return all_uploads
210
+ # tested
211
+ def check_if_object_exists(self, bucket_name, key_name) -> bool:
212
+ s3_manager2 = S3Manager()
213
+ s3_manager2.list_objects(bucket_name=bucket_name, prefix=key_name)
214
+ s3_client_noaa_wcsd_zarr_pds = self.s3_client_noaa_wcsd_zarr_pds
215
+ try:
216
+ s3_client_noaa_wcsd_zarr_pds.head_object(Bucket=bucket_name, Key=key_name)
217
+ return True
218
+ except botocore.exceptions.ClientError as e:
219
+ if e.response["Error"]["Code"] == "404":
220
+ # The object does not exist.
221
+ return False
222
+ elif e.response["Error"]["Code"] == 403:
223
+ # Unauthorized, including invalid bucket
224
+ return False
225
+ else:
226
+ # Something else has gone wrong.
227
+ raise
197
228
 
198
229
  #####################################################################
199
- # used: raw-to-zarr
200
- def list_objects( # noaa-wcsd-pds and noaa-wcsd-zarr-pds
201
- self,
202
- bucket_name,
203
- prefix
204
- ):
230
+ # tested
231
+ def list_objects(self, bucket_name, prefix): # noaa-wcsd-pds and noaa-wcsd-zarr-pds
232
+ # TODO: this isn't working for geojson detecting objects!!!!!!!
205
233
  # analog to "find_children_objects"
206
234
  # Returns a list of key strings for each object in bucket defined by prefix
207
235
  # s3_client = self.s3_client
@@ -213,32 +241,20 @@ class S3Manager:
213
241
  keys.extend([k["Key"] for k in page["Contents"]])
214
242
  return keys
215
243
 
216
- # def list_nodd_objects( # These are used by the geometry for uploading data
217
- # self,
218
- # prefix,
219
- # ):
220
- # # Returns a list of key strings for each object in bucket defined by prefix
221
- # keys = []
222
- # page_iterator = self.paginator_noaa_wcsd_zarr_pds.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
223
- # for page in paginator.paginate(Bucket=self.output_bucket_name, Prefix=prefix):
224
- # if "Contents" in page.keys():
225
- # keys.extend([k["Key"] for k in page["Contents"]])
226
- # return keys
227
-
228
244
  #####################################################################
229
245
  # TODO: change name to "directory"
230
- def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
231
- if not path.endswith("/"):
232
- path = path + "/"
233
- s3_client = self.s3_client
234
- resp = self.list_objects(
235
- bucket_name=bucket_name, prefix=path
236
- ) # TODO: this is returning root folder and doesn't include children or hidden folders
237
- # resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
238
- return "Contents" in resp
246
+ # def folder_exists_and_not_empty(self, bucket_name: str, path: str) -> bool:
247
+ # if not path.endswith("/"):
248
+ # path = path + "/"
249
+ # # s3_client = self.s3_client
250
+ # resp = self.list_objects(
251
+ # bucket_name=bucket_name, prefix=path
252
+ # ) # TODO: this is returning root folder and doesn't include children or hidden folders
253
+ # # resp = s3_client.list_objects(Bucket=bucket, Prefix=path, Delimiter='/', MaxKeys=1)
254
+ # return "Contents" in resp
239
255
 
240
256
  #####################################################################
241
- # used
257
+ # private
242
258
  def __paginate_child_objects(
243
259
  self,
244
260
  bucket_name: str,
@@ -253,6 +269,8 @@ class S3Manager:
253
269
  objects.extend(page["Contents"])
254
270
  return objects
255
271
 
272
+ #####################################################################
273
+ # tested
256
274
  def get_child_objects(
257
275
  self,
258
276
  bucket_name: str,
@@ -284,13 +302,14 @@ class S3Manager:
284
302
  return raw_files
285
303
 
286
304
  #####################################################################
287
- def get_object( # TODO: Move this to index.py
288
- # noaa-wcsd-pds or noaa-wcsd-model-pds
305
+ # tested
306
+ def get_object( # noaa-wcsd-pds or noaa-wcsd-zarr-pds
289
307
  self,
290
308
  bucket_name,
291
309
  key_name,
292
310
  ):
293
311
  # Meant for getting singular objects from a bucket, used by indexing lambda
312
+ # can also return byte range potentially.
294
313
  print(f"Getting object {key_name} from {bucket_name}")
295
314
  try:
296
315
  response = self.s3_client.get_object(
@@ -299,82 +318,101 @@ class S3Manager:
299
318
  )
300
319
  # status = response.get("ResponseMetadata", {}).get("HTTPStatusCode")
301
320
  # if status == 200:
321
+ print(f"Done getting object {key_name} from {bucket_name}")
322
+ return response
302
323
  except ClientError as err:
303
324
  print(f"Problem was encountered while getting s3 file: {err}")
304
325
  raise
305
- print(f"Done getting object {key_name} from {bucket_name}")
306
- return response
307
326
 
308
327
  #####################################################################
309
- # used raw-to-model
310
- def download_file( # TODO: change to download_object
311
- # noaa-wcsd-pds or noaa-wcsd-model-pds
328
+ # tested
329
+ def download_file(
312
330
  self,
313
331
  bucket_name,
314
332
  key,
315
- file_name, # where the file will be saved
333
+ file_name, # path to where the file will be saved
316
334
  ):
317
- self.s3_client.download_file(Bucket=bucket_name, Key=key, Filename=file_name)
318
- # TODO: if bottom file doesn't exist, don't fail downloader
319
- print("downloaded file")
320
-
321
- #####################################################################
322
- # not used
323
- # def delete_nodd_object( # noaa-wcsd-model-pds
324
- # self,
325
- # bucket_name,
326
- # key
327
- # ): # -> dict:
328
- # #return self.__s3_client.delete_object(Bucket=bucket_name, Key=key)
329
- # self.s3_client.delete_object(Bucket=bucket_name, Key=key)
335
+ try:
336
+ self.s3_client.download_file(
337
+ Bucket=bucket_name, Key=key, Filename=file_name
338
+ )
339
+ # TODO: if bottom file doesn't exist, don't fail downloader
340
+ print("downloaded file")
341
+ except Exception as err:
342
+ raise RuntimeError(f"Problem was encountered while downloading_file, {err}")
330
343
 
331
344
  #####################################################################
345
+ # tested
332
346
  def delete_nodd_objects( # nodd-bucket
333
347
  self,
348
+ bucket_name,
334
349
  objects: list,
335
350
  ):
336
351
  try:
337
- print(
338
- f"Deleting {len(objects)} objects in {self.output_bucket_name} in batches."
339
- )
352
+ print(f"Deleting {len(objects)} objects in {bucket_name} in batches.")
340
353
  objects_to_delete = []
341
354
  for obj in objects:
342
355
  objects_to_delete.append({"Key": obj["Key"]})
343
356
  # Note: request can contain a list of up to 1000 keys
344
357
  for batch in chunked(ll=objects_to_delete, n=1000):
358
+ # An error occurred (SlowDown) when calling the DeleteObjects operation (reached max retries: 4):
359
+ # Please reduce your request rate.
360
+ sleep(0.5)
361
+ #
345
362
  self.s3_client_noaa_wcsd_zarr_pds.delete_objects(
346
- Bucket=self.output_bucket_name, Delete={"Objects": batch}
363
+ Bucket=bucket_name, Delete={"Objects": batch}
347
364
  )
348
- print(f"Deleted files.")
365
+ print("Deleted files.")
349
366
  except Exception as err:
350
- print(f"Problem was encountered while deleting objects: {err}")
367
+ raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
351
368
 
352
369
  #####################################################################
353
- # not used TODO: remove
370
+ # tested
371
+ def delete_nodd_object( # only used to delete geojson it looks like?! Remove.
372
+ self,
373
+ bucket_name,
374
+ key_name,
375
+ ):
376
+ try:
377
+ print(f"Deleting {key_name} objects in {bucket_name}.")
378
+ self.s3_client_noaa_wcsd_zarr_pds.delete_object(
379
+ Bucket=bucket_name, Key=key_name
380
+ )
381
+ print("Deleted file.")
382
+ except Exception as err:
383
+ raise RuntimeError(f"Problem was encountered while deleting objects, {err}")
384
+
385
+ #####################################################################
386
+ # tested
354
387
  def put(self, bucket_name, key, body): # noaa-wcsd-model-pds
355
- self.s3_client.put_object(Bucket=bucket_name, Key=key, Body=body) # "Body" can be a file
388
+ try:
389
+ self.s3_client.put_object(
390
+ Bucket=bucket_name, Key=key, Body=body
391
+ ) # "Body" can be a file
392
+ except Exception as err:
393
+ raise RuntimeError(f"Problem was encountered putting object, {err}")
356
394
 
357
395
  #####################################################################
396
+ # tested
358
397
  def read_s3_json(
359
398
  self,
360
399
  ship_name,
361
400
  cruise_name,
362
401
  sensor_name,
363
402
  file_name_stem,
403
+ output_bucket_name, # TODO: change to just bucket_name
364
404
  ) -> str:
365
405
  try:
366
- content_object = self.s3_resource_noaa_wcsd_zarr_pds.Object(
367
- bucket_name=self.output_bucket_name,
406
+ resource = self.s3_resource_noaa_wcsd_zarr_pds
407
+ content_object = resource.Object(
408
+ bucket_name=output_bucket_name,
368
409
  key=f"spatial/geojson/{ship_name}/{cruise_name}/{sensor_name}/{file_name_stem}.json",
369
410
  ).get()
370
411
  file_content = content_object["Body"].read().decode("utf-8")
371
412
  json_content = json.loads(file_content)
372
413
  return json_content
373
- except Exception as err: # Failure
374
- print(f"Exception encountered reading s3 GeoJSON: {err}")
375
- raise
376
-
377
- #####################################################################
414
+ except Exception as err:
415
+ raise RuntimeError(f"Exception encountered reading s3 GeoJSON, {err}")
378
416
 
379
417
 
380
418
  #########################################################################
@@ -1,45 +1,29 @@
1
1
  import os
2
+ from typing import Optional
2
3
 
3
4
  import s3fs
4
5
 
6
+
5
7
  # TODO: S3FS_LOGGING_LEVEL=DEBUG
8
+ # S3FS_LOGGING_LEVEL=DEBUG
6
9
 
7
10
 
8
11
  class S3FSManager:
9
12
  #####################################################################
10
13
  def __init__(
11
14
  self,
15
+ endpoint_url: Optional[str] = None,
12
16
  ):
13
- self.__s3_region = os.environ.get("AWS_REGION", default="us-east-1")
17
+ self.endpoint_url = endpoint_url
18
+ self.input_bucket_name = os.environ.get("INPUT_BUCKET_NAME")
19
+ self.output_bucket_name = os.environ.get("OUTPUT_BUCKET_NAME")
20
+ self.s3_region = os.environ.get("AWS_REGION", default="us-east-1")
14
21
  self.s3fs = s3fs.S3FileSystem(
22
+ endpoint_url=endpoint_url,
15
23
  key=os.environ.get("OUTPUT_BUCKET_ACCESS_KEY"),
16
24
  secret=os.environ.get("OUTPUT_BUCKET_SECRET_ACCESS_KEY"),
17
- # asynchronous=True
18
- # use_ssl=False,
19
- # skip_instance_cache=True,
20
- # default_block_size='100MB', # if no specific value is given at all time. The built-in default is 5MB
21
- # client_kwargs={
22
- # "region_name": self.__s3_region
23
- # }
24
25
  )
25
26
 
26
- #####################################################################
27
- def add_file(self, filename):
28
- full_path = f"{os.getenv('OUTPUT_BUCKET_NAME')}/testing/{filename}"
29
- print(full_path)
30
-
31
- self.s3fs.touch(full_path)
32
- ff = self.s3fs.ls(f"{os.getenv('OUTPUT_BUCKET_NAME')}/")
33
-
34
- print(ff)
35
-
36
- #####################################################################
37
- def upload_data(self, bucket_name, file_path, prefix):
38
- # TODO: this works in theory but use boto3 to upload files
39
- s3_path = f"s3://{bucket_name}/{prefix}/"
40
- s3_file_system = self.s3fs
41
- s3_file_system.put(file_path, s3_path, recursive=True)
42
-
43
27
  #####################################################################
44
28
  def s3_map(
45
29
  self,
@@ -52,17 +36,29 @@ class S3FSManager:
52
36
  root=s3_zarr_store_path, s3=self.s3fs
53
37
  ) # create=False, not false because will be writing
54
38
 
39
+ #####################################################################
40
+ # def add_file(self, filename):
41
+ # full_path = f"{os.getenv('OUTPUT_BUCKET_NAME')}/testing/{filename}"
42
+ # print(full_path)
43
+ #
44
+ # self.s3fs.touch(full_path)
45
+ # ff = self.s3fs.ls(f"{os.getenv('OUTPUT_BUCKET_NAME')}/")
46
+ #
47
+ # print(ff)
48
+
49
+ #####################################################################
50
+ def upload_data(self, bucket_name, file_path, prefix):
51
+ # TODO: this works in theory but use boto3 to upload files
52
+ s3_path = f"s3://{bucket_name}/{prefix}/"
53
+ s3_file_system = self.s3fs
54
+ s3_file_system.put(file_path, s3_path, recursive=True)
55
+
55
56
  #####################################################################
56
57
  def exists(
57
58
  self,
58
- geo_json_s3_path,
59
+ s3_path,
59
60
  ):
60
- s3_file_system = self.s3fs
61
- return s3_file_system.exists(path=geo_json_s3_path)
61
+ # s3_file_system =
62
+ return self.s3fs.exists(s3_path)
62
63
 
63
64
  #####################################################################
64
- # def put(
65
- # self
66
- # ):
67
- # s3_file_system = self.s3fs
68
- # return
@@ -35,7 +35,7 @@ class SQSManager:
35
35
  #######################################################
36
36
  def list_queues(self, queue_name_prefix):
37
37
  # Note: SQS control plane is eventually consistent, meaning that it
38
- # takes a while to propagate the data accross the systems.
38
+ # takes a while to propagate the dataset accross the systems.
39
39
  response = self.__sqs_client.list_queues(QueueNamePrefix=queue_name_prefix)
40
40
  print(response)
41
41