tilebox-storage 0.41.0__py3-none-any.whl → 0.43.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tilebox/storage/__init__.py +16 -0
- tilebox/storage/aio.py +365 -196
- tilebox/storage/granule.py +72 -5
- tilebox/storage/providers.py +2 -0
- {tilebox_storage-0.41.0.dist-info → tilebox_storage-0.43.0.dist-info}/METADATA +2 -3
- tilebox_storage-0.43.0.dist-info/RECORD +7 -0
- tilebox_storage-0.41.0.dist-info/RECORD +0 -7
- {tilebox_storage-0.41.0.dist-info → tilebox_storage-0.43.0.dist-info}/WHEEL +0 -0
tilebox/storage/__init__.py
CHANGED
|
@@ -3,6 +3,7 @@ from pathlib import Path
|
|
|
3
3
|
from tilebox.storage.aio import ASFStorageClient as _ASFStorageClient
|
|
4
4
|
from tilebox.storage.aio import CopernicusStorageClient as _CopernicusStorageClient
|
|
5
5
|
from tilebox.storage.aio import UmbraStorageClient as _UmbraStorageClient
|
|
6
|
+
from tilebox.storage.aio import USGSLandsatStorageClient as _USGSLandsatStorageClient
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class ASFStorageClient(_ASFStorageClient):
|
|
@@ -50,3 +51,18 @@ class CopernicusStorageClient(_CopernicusStorageClient):
|
|
|
50
51
|
"""
|
|
51
52
|
super().__init__(access_key, secret_access_key, cache_directory)
|
|
52
53
|
self._syncify()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class USGSLandsatStorageClient(_USGSLandsatStorageClient):
|
|
57
|
+
def __init__(self, cache_directory: Path | None = Path.home() / ".cache" / "tilebox") -> None:
|
|
58
|
+
"""A tilebox storage client that downloads data from the USGS Landsat S3 bucket.
|
|
59
|
+
|
|
60
|
+
This client handles the requester-pays nature of the bucket and provides methods for listing and downloading
|
|
61
|
+
data.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
cache_directory: The directory to store downloaded data in. Defaults to ~/.cache/tilebox. If set to None
|
|
65
|
+
no cache is used and the `output_dir` parameter will need be set when downloading data.
|
|
66
|
+
"""
|
|
67
|
+
super().__init__(cache_directory)
|
|
68
|
+
self._syncify()
|
tilebox/storage/aio.py
CHANGED
|
@@ -1,26 +1,31 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import hashlib
|
|
2
3
|
import os
|
|
3
4
|
import shutil
|
|
4
5
|
import tempfile
|
|
5
|
-
import warnings
|
|
6
6
|
import zipfile
|
|
7
|
-
from
|
|
7
|
+
from asyncio import Queue, QueueEmpty
|
|
8
|
+
from collections.abc import AsyncIterator
|
|
8
9
|
from pathlib import Path
|
|
9
|
-
from typing import
|
|
10
|
+
from typing import Any, TypeAlias
|
|
10
11
|
|
|
11
12
|
import anyio
|
|
12
|
-
import
|
|
13
|
+
import obstore as obs
|
|
14
|
+
import xarray as xr
|
|
13
15
|
from aiofile import async_open
|
|
14
|
-
from botocore import UNSIGNED
|
|
15
|
-
from botocore.client import Config
|
|
16
16
|
from httpx import AsyncClient
|
|
17
|
-
from
|
|
18
|
-
from
|
|
17
|
+
from obstore.auth.boto3 import Boto3CredentialProvider
|
|
18
|
+
from obstore.store import GCSStore, LocalStore, S3Store
|
|
19
19
|
from tqdm.auto import tqdm
|
|
20
20
|
|
|
21
21
|
from _tilebox.grpc.aio.producer_consumer import async_producer_consumer
|
|
22
22
|
from _tilebox.grpc.aio.syncify import Syncifiable
|
|
23
|
-
from tilebox.storage.granule import
|
|
23
|
+
from tilebox.storage.granule import (
|
|
24
|
+
ASFStorageGranule,
|
|
25
|
+
CopernicusStorageGranule,
|
|
26
|
+
UmbraStorageGranule,
|
|
27
|
+
USGSLandsatStorageGranule,
|
|
28
|
+
)
|
|
24
29
|
from tilebox.storage.providers import login
|
|
25
30
|
|
|
26
31
|
try:
|
|
@@ -38,7 +43,7 @@ except ImportError:
|
|
|
38
43
|
raise RuntimeError("IPython is not available. Diagram can only be displayed in a notebook.")
|
|
39
44
|
|
|
40
45
|
|
|
41
|
-
|
|
46
|
+
ObjectStore: TypeAlias = S3Store | LocalStore | GCSStore
|
|
42
47
|
|
|
43
48
|
|
|
44
49
|
class _HttpClient(Syncifiable):
|
|
@@ -47,6 +52,10 @@ class _HttpClient(Syncifiable):
|
|
|
47
52
|
self._clients: dict[str, AsyncClient] = {}
|
|
48
53
|
self._auth = auth
|
|
49
54
|
|
|
55
|
+
def __del__(self) -> None:
|
|
56
|
+
for client in self._clients.values():
|
|
57
|
+
asyncio.run(client.aclose())
|
|
58
|
+
|
|
50
59
|
async def download_quicklook(
|
|
51
60
|
self, datapoint: xr.Dataset | ASFStorageGranule, output_dir: Path | None = None
|
|
52
61
|
) -> Path:
|
|
@@ -94,7 +103,7 @@ class _HttpClient(Syncifiable):
|
|
|
94
103
|
image_data = await self._download_quicklook(granule)
|
|
95
104
|
assert granule.urls.quicklook is not None # otherwise _download_quicklook would have raised a ValueError
|
|
96
105
|
image_name = granule.urls.quicklook.rsplit("/", 1)[-1]
|
|
97
|
-
_display_quicklook(image_data,
|
|
106
|
+
_display_quicklook(image_data, width, height, f"<code>Image {image_name} © ASF {granule.time.year}</code>")
|
|
98
107
|
|
|
99
108
|
async def _download_quicklook(self, granule: ASFStorageGranule) -> bytes:
|
|
100
109
|
"""Download a granules quicklook image into a memory buffer."""
|
|
@@ -224,10 +233,10 @@ class _HttpClient(Syncifiable):
|
|
|
224
233
|
return client
|
|
225
234
|
|
|
226
235
|
|
|
227
|
-
def _display_quicklook(image_data: bytes | Path,
|
|
236
|
+
def _display_quicklook(image_data: bytes | Path, width: int, height: int, image_caption: str | None = None) -> None:
|
|
228
237
|
display(Image(image_data, width=width, height=height))
|
|
229
|
-
|
|
230
|
-
|
|
238
|
+
if image_caption is not None:
|
|
239
|
+
display(HTML(image_caption))
|
|
231
240
|
|
|
232
241
|
|
|
233
242
|
class StorageClient(Syncifiable):
|
|
@@ -248,6 +257,71 @@ class StorageClient(Syncifiable):
|
|
|
248
257
|
shutil.rmtree(self._cache)
|
|
249
258
|
|
|
250
259
|
|
|
260
|
+
async def list_object_paths(store: ObjectStore, prefix: str) -> list[str]:
|
|
261
|
+
objects = await obs.list(store, prefix).collect_async()
|
|
262
|
+
prefix_path = Path(prefix)
|
|
263
|
+
return sorted(str(Path(obj["path"]).relative_to(prefix_path)) for obj in objects)
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
async def download_objects( # noqa: PLR0913
|
|
267
|
+
store: ObjectStore,
|
|
268
|
+
prefix: str,
|
|
269
|
+
objects: list[str],
|
|
270
|
+
output_dir: Path,
|
|
271
|
+
show_progress: bool = True,
|
|
272
|
+
max_concurrent_downloads: int = 10,
|
|
273
|
+
) -> None:
|
|
274
|
+
queue = Queue()
|
|
275
|
+
for obj in objects:
|
|
276
|
+
await queue.put((prefix, obj))
|
|
277
|
+
|
|
278
|
+
max_concurrent_downloads = max(1, min(max_concurrent_downloads, len(objects)))
|
|
279
|
+
async with anyio.create_task_group() as task_group:
|
|
280
|
+
for _ in range(max_concurrent_downloads):
|
|
281
|
+
task_group.start_soon(_download_worker, store, queue, output_dir, show_progress)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
async def _download_worker(
|
|
285
|
+
store: ObjectStore,
|
|
286
|
+
queue: Queue[tuple[str, str]],
|
|
287
|
+
output_dir: Path,
|
|
288
|
+
show_progress: bool = True,
|
|
289
|
+
) -> None:
|
|
290
|
+
while True:
|
|
291
|
+
try:
|
|
292
|
+
prefix, obj = queue.get_nowait()
|
|
293
|
+
except QueueEmpty:
|
|
294
|
+
break
|
|
295
|
+
|
|
296
|
+
await _download_object(store, prefix, obj, output_dir, show_progress)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
async def _download_object(
|
|
300
|
+
store: ObjectStore, prefix: str, obj: str, output_dir: Path, show_progress: bool = True
|
|
301
|
+
) -> Path:
|
|
302
|
+
key = str(Path(prefix) / obj)
|
|
303
|
+
output_path = output_dir / obj
|
|
304
|
+
if output_path.exists(): # already cached
|
|
305
|
+
return output_path
|
|
306
|
+
|
|
307
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
308
|
+
download_path = output_path.parent / f"{output_path.name}.part"
|
|
309
|
+
response = await obs.get_async(store, key)
|
|
310
|
+
file_size = response.meta["size"]
|
|
311
|
+
with download_path.open("wb") as f:
|
|
312
|
+
if show_progress:
|
|
313
|
+
with tqdm(desc=obj, total=file_size, unit="B", unit_scale=True, unit_divisor=1024) as progress:
|
|
314
|
+
async for bytes_chunk in response:
|
|
315
|
+
f.write(bytes_chunk)
|
|
316
|
+
progress.update(len(bytes_chunk))
|
|
317
|
+
else:
|
|
318
|
+
async for bytes_chunk in response:
|
|
319
|
+
f.write(bytes_chunk)
|
|
320
|
+
|
|
321
|
+
shutil.move(download_path, output_path)
|
|
322
|
+
return output_path
|
|
323
|
+
|
|
324
|
+
|
|
251
325
|
class ASFStorageClient(StorageClient):
|
|
252
326
|
def __init__(self, user: str, password: str, cache_directory: Path = Path.home() / ".cache" / "tilebox") -> None:
|
|
253
327
|
"""A tilebox storage client that downloads data from the Alaska Satellite Facility.
|
|
@@ -342,7 +416,7 @@ class ASFStorageClient(StorageClient):
|
|
|
342
416
|
if Image is None:
|
|
343
417
|
raise ImportError("IPython is not available, please use download_preview instead.")
|
|
344
418
|
quicklook = await self._download_quicklook(datapoint)
|
|
345
|
-
_display_quicklook(quicklook, quicklook.name
|
|
419
|
+
_display_quicklook(quicklook, width, height, f"<code>Image {quicklook.name} © ASF {granule.time.year}</code>")
|
|
346
420
|
|
|
347
421
|
async def _download_quicklook(self, datapoint: xr.Dataset | ASFStorageGranule) -> Path:
|
|
348
422
|
granule = ASFStorageGranule.from_data(datapoint)
|
|
@@ -359,49 +433,15 @@ class ASFStorageClient(StorageClient):
|
|
|
359
433
|
return await self._client.download_quicklook(datapoint, output_file.parent)
|
|
360
434
|
|
|
361
435
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
self._s3 = s3
|
|
366
|
-
|
|
367
|
-
def list_objects(self, prefix: str) -> Iterator[ObjectTypeDef]:
|
|
368
|
-
"""Returns an iterator over the objects in the S3 bucket that starts with the given prefix."""
|
|
369
|
-
paginator = self._s3.get_paginator("list_objects_v2")
|
|
370
|
-
for page in paginator.paginate(Bucket=self._bucket, Prefix=prefix):
|
|
371
|
-
yield from page.get("Contents", [])
|
|
372
|
-
|
|
373
|
-
async def download_object( # noqa: PLR0913
|
|
374
|
-
self, key: str, name: str, size: int, download_file: IO[Any], verify: bool, show_progress: bool
|
|
375
|
-
) -> None:
|
|
376
|
-
"""Download an object from S3 into a file."""
|
|
377
|
-
progress = None
|
|
378
|
-
if show_progress:
|
|
379
|
-
progress = tqdm(
|
|
380
|
-
desc=name,
|
|
381
|
-
total=size,
|
|
382
|
-
unit="B",
|
|
383
|
-
unit_scale=True,
|
|
384
|
-
unit_divisor=1024,
|
|
385
|
-
)
|
|
386
|
-
|
|
387
|
-
self._s3.download_fileobj(
|
|
388
|
-
Bucket=self._bucket,
|
|
389
|
-
Key=key,
|
|
390
|
-
Fileobj=download_file,
|
|
391
|
-
ExtraArgs={"ChecksumMode": "ENABLED"} if verify else None,
|
|
392
|
-
Callback=progress.update if progress else None,
|
|
393
|
-
)
|
|
394
|
-
|
|
395
|
-
if progress is not None:
|
|
396
|
-
if progress.total != progress.n:
|
|
397
|
-
progress.n = progress.total
|
|
398
|
-
progress.refresh()
|
|
399
|
-
progress.close()
|
|
436
|
+
def _umbra_s3_prefix(datapoint: xr.Dataset | UmbraStorageGranule) -> str:
|
|
437
|
+
granule = UmbraStorageGranule.from_data(datapoint)
|
|
438
|
+
return f"sar-data/tasks/{granule.location}/"
|
|
400
439
|
|
|
401
440
|
|
|
402
441
|
class UmbraStorageClient(StorageClient):
|
|
403
442
|
_STORAGE_PROVIDER = "Umbra"
|
|
404
443
|
_BUCKET = "umbra-open-data-catalog"
|
|
444
|
+
_REGION = "us-west-2"
|
|
405
445
|
|
|
406
446
|
def __init__(self, cache_directory: Path | None = Path.home() / ".cache" / "tilebox") -> None:
|
|
407
447
|
"""A tilebox storage client that downloads data from the Umbra Open Data Catalog.
|
|
@@ -412,14 +452,9 @@ class UmbraStorageClient(StorageClient):
|
|
|
412
452
|
"""
|
|
413
453
|
super().__init__(cache_directory)
|
|
414
454
|
|
|
415
|
-
|
|
416
|
-
# https://github.com/boto/boto3/issues/3889
|
|
417
|
-
warnings.filterwarnings("ignore", category=DeprecationWarning, message=".*datetime.utcnow.*")
|
|
418
|
-
boto3_client = boto3.client("s3", config=Config(signature_version=UNSIGNED))
|
|
455
|
+
self._store: ObjectStore = S3Store(self._BUCKET, region=self._REGION, skip_signature=True)
|
|
419
456
|
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
def list_objects(self, datapoint: xr.Dataset | UmbraStorageGranule) -> list[str]:
|
|
457
|
+
async def list_objects(self, datapoint: xr.Dataset | UmbraStorageGranule) -> list[str]:
|
|
423
458
|
"""List all available objects for a given datapoint.
|
|
424
459
|
|
|
425
460
|
Args:
|
|
@@ -427,38 +462,36 @@ class UmbraStorageClient(StorageClient):
|
|
|
427
462
|
|
|
428
463
|
Returns:
|
|
429
464
|
List of object keys available for the given datapoint, relative to the granule location."""
|
|
430
|
-
|
|
431
|
-
prefix = f"sar-data/tasks/{granule.location}/"
|
|
432
|
-
keys = [object_metadata.get("Key") for object_metadata in self._s3.list_objects(prefix)]
|
|
433
|
-
return [k.removeprefix(prefix) for k in keys if k is not None]
|
|
465
|
+
return await list_object_paths(self._store, _umbra_s3_prefix(datapoint))
|
|
434
466
|
|
|
435
467
|
async def download(
|
|
436
468
|
self,
|
|
437
469
|
datapoint: xr.Dataset | UmbraStorageGranule,
|
|
438
470
|
output_dir: Path | None = None,
|
|
439
|
-
verify: bool = True,
|
|
440
471
|
show_progress: bool = True,
|
|
472
|
+
max_concurrent_downloads: int = 4,
|
|
441
473
|
) -> Path:
|
|
442
474
|
"""Download the data for a given datapoint.
|
|
443
475
|
|
|
444
476
|
Args:
|
|
445
477
|
datapoint: The datapoint to download the data for.
|
|
446
478
|
output_dir: The directory to download the data to. Optional, defaults to the cache directory.
|
|
447
|
-
verify: Whether to verify the md5sum of the downloaded file. Defaults to True.
|
|
448
479
|
show_progress: Whether to show a progress bar while downloading. Defaults to True.
|
|
480
|
+
max_concurrent_downloads: The maximum number of concurrent downloads. Defaults to 4.
|
|
449
481
|
|
|
450
482
|
Returns:
|
|
451
483
|
The path to the downloaded data directory.
|
|
452
484
|
"""
|
|
453
|
-
|
|
485
|
+
all_objects = await list_object_paths(self._store, _umbra_s3_prefix(datapoint))
|
|
486
|
+
return await self._download_objects(datapoint, all_objects, output_dir, show_progress, max_concurrent_downloads)
|
|
454
487
|
|
|
455
488
|
async def download_objects(
|
|
456
489
|
self,
|
|
457
490
|
datapoint: xr.Dataset | UmbraStorageGranule,
|
|
458
491
|
objects: list[str],
|
|
459
492
|
output_dir: Path | None = None,
|
|
460
|
-
verify: bool = True,
|
|
461
493
|
show_progress: bool = True,
|
|
494
|
+
max_concurrent_downloads: int = 4,
|
|
462
495
|
) -> Path:
|
|
463
496
|
"""Download a subset of the data for a given datapoint.
|
|
464
497
|
|
|
@@ -470,80 +503,39 @@ class UmbraStorageClient(StorageClient):
|
|
|
470
503
|
list_objects to get a list of available objects to filter on. Object names are considered relative
|
|
471
504
|
to the granule location.
|
|
472
505
|
output_dir: The directory to download the data to. Optional, defaults to the cache directory.
|
|
473
|
-
verify: Whether to verify the md5sum of the downloaded file. Defaults to True.
|
|
474
506
|
show_progress: Whether to show a progress bar while downloading. Defaults to True.
|
|
507
|
+
max_concurrent_downloads: The maximum number of concurrent downloads. Defaults to 4.
|
|
475
508
|
|
|
476
509
|
Returns:
|
|
477
510
|
The path to the downloaded data directory.
|
|
478
511
|
"""
|
|
479
|
-
return await self.
|
|
512
|
+
return await self._download_objects(datapoint, objects, output_dir, show_progress, max_concurrent_downloads)
|
|
480
513
|
|
|
481
|
-
async def
|
|
514
|
+
async def _download_objects(
|
|
482
515
|
self,
|
|
483
516
|
datapoint: xr.Dataset | UmbraStorageGranule,
|
|
484
|
-
|
|
517
|
+
objects: list[str],
|
|
485
518
|
output_dir: Path | None = None,
|
|
486
|
-
verify: bool = True,
|
|
487
519
|
show_progress: bool = True,
|
|
520
|
+
max_concurrent_downloads: int = 4,
|
|
488
521
|
) -> Path:
|
|
489
|
-
|
|
522
|
+
prefix = _umbra_s3_prefix(datapoint)
|
|
490
523
|
|
|
491
524
|
base_folder = output_dir or self._cache
|
|
492
525
|
if base_folder is None:
|
|
493
526
|
raise ValueError("No cache directory or output directory provided.")
|
|
494
|
-
output_folder = base_folder / self._STORAGE_PROVIDER /
|
|
495
|
-
|
|
496
|
-
prefix = f"sar-data/tasks/{granule.location}/"
|
|
497
|
-
|
|
498
|
-
objects = self._s3.list_objects(prefix)
|
|
499
|
-
objects = [obj for obj in objects if "Key" in obj] # Key is optional, so just in case filter out obj without
|
|
500
|
-
|
|
501
|
-
if obj_filter_func is not None:
|
|
502
|
-
# get object names relative to the granule location, so we can pass it to our filter function
|
|
503
|
-
object_names = [obj["Key"].removeprefix(prefix) for obj in objects if "Key" in obj]
|
|
504
|
-
objects = [
|
|
505
|
-
object_metadata
|
|
506
|
-
for (object_metadata, object_name) in zip(objects, object_names, strict=True)
|
|
507
|
-
if obj_filter_func(object_name)
|
|
508
|
-
]
|
|
527
|
+
output_folder = base_folder / self._STORAGE_PROVIDER / Path(prefix)
|
|
509
528
|
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
task_group.start_soon(
|
|
513
|
-
self._download_object, object_metadata, prefix, output_folder, verify, show_progress
|
|
514
|
-
)
|
|
529
|
+
if len(objects) == 0:
|
|
530
|
+
return output_folder
|
|
515
531
|
|
|
532
|
+
await download_objects(self._store, prefix, objects, output_folder, show_progress, max_concurrent_downloads)
|
|
516
533
|
return output_folder
|
|
517
534
|
|
|
518
|
-
async def _download_object(
|
|
519
|
-
self, object_metadata: ObjectTypeDef, prefix: str, output_folder: Path, verify: bool, show_progress: bool
|
|
520
|
-
) -> None:
|
|
521
|
-
key = object_metadata.get("Key", "")
|
|
522
|
-
relative_path = key.removeprefix(prefix)
|
|
523
|
-
if relative_path.removeprefix("/") == "": # skip the root folder if it shows up in the list for some reason
|
|
524
|
-
return
|
|
525
|
-
if object_metadata.get("Size", 0) == 0: # skip empty objects (they are just folder markers)
|
|
526
|
-
return
|
|
527
535
|
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
# we download into a temporary file, which we then move to the final location once the download is complete
|
|
533
|
-
# this way we can be sure that the files in the download location are complete and not partially downloaded
|
|
534
|
-
with tempfile.NamedTemporaryFile(prefix="tilebox", delete=False) as download_file:
|
|
535
|
-
await self._s3.download_object(
|
|
536
|
-
key,
|
|
537
|
-
# as "name" for the progress bar we display the relative path to the root of the download
|
|
538
|
-
relative_path,
|
|
539
|
-
object_metadata.get("Size", 0),
|
|
540
|
-
download_file,
|
|
541
|
-
verify,
|
|
542
|
-
show_progress,
|
|
543
|
-
)
|
|
544
|
-
|
|
545
|
-
output_folder.mkdir(parents=True, exist_ok=True)
|
|
546
|
-
shutil.move(download_file.name, output_file)
|
|
536
|
+
def _copernicus_s3_prefix(datapoint: xr.Dataset | CopernicusStorageGranule) -> str:
|
|
537
|
+
granule = CopernicusStorageGranule.from_data(datapoint)
|
|
538
|
+
return granule.location.removeprefix("/eodata/")
|
|
547
539
|
|
|
548
540
|
|
|
549
541
|
class CopernicusStorageClient(StorageClient):
|
|
@@ -588,22 +580,14 @@ class CopernicusStorageClient(StorageClient):
|
|
|
588
580
|
f"To get access to the Copernicus data, please visit: https://documentation.dataspace.copernicus.eu/APIs/S3.html"
|
|
589
581
|
)
|
|
590
582
|
|
|
591
|
-
|
|
592
|
-
# https://github.com/boto/boto3/issues/3889
|
|
593
|
-
warnings.filterwarnings("ignore", category=DeprecationWarning, message=".*datetime.utcnow.*")
|
|
594
|
-
boto3_client = boto3.client(
|
|
595
|
-
"s3",
|
|
596
|
-
aws_access_key_id=access_key,
|
|
597
|
-
aws_secret_access_key=secret_access_key,
|
|
598
|
-
endpoint_url=self._ENDPOINT_URL,
|
|
599
|
-
)
|
|
600
|
-
|
|
601
|
-
self._s3 = _S3Client(
|
|
602
|
-
s3=boto3_client,
|
|
583
|
+
self._store = S3Store(
|
|
603
584
|
bucket=self._BUCKET,
|
|
585
|
+
endpoint=self._ENDPOINT_URL,
|
|
586
|
+
access_key_id=access_key,
|
|
587
|
+
secret_access_key=secret_access_key,
|
|
604
588
|
)
|
|
605
589
|
|
|
606
|
-
def list_objects(self, datapoint: xr.Dataset | CopernicusStorageGranule) -> list[str]:
|
|
590
|
+
async def list_objects(self, datapoint: xr.Dataset | CopernicusStorageGranule) -> list[str]:
|
|
607
591
|
"""List all available objects for a given datapoint.
|
|
608
592
|
|
|
609
593
|
Args:
|
|
@@ -611,38 +595,54 @@ class CopernicusStorageClient(StorageClient):
|
|
|
611
595
|
|
|
612
596
|
Returns:
|
|
613
597
|
List of object keys available for the given datapoint, relative to the granule location."""
|
|
598
|
+
return await self._list_objects(datapoint)
|
|
599
|
+
|
|
600
|
+
async def _list_objects(self, datapoint: xr.Dataset | CopernicusStorageGranule) -> list[str]:
|
|
601
|
+
"""List all available objects for a given datapoint.
|
|
602
|
+
|
|
603
|
+
Args:
|
|
604
|
+
datapoint: The datapoint to list available objects the data for.
|
|
605
|
+
|
|
606
|
+
Returns:
|
|
607
|
+
List of object keys available for the given datapoint, relative to the granule location."""
|
|
608
|
+
|
|
614
609
|
granule = CopernicusStorageGranule.from_data(datapoint)
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
610
|
+
# special handling for Sentinel-5P, where the location is not a folder but a single file
|
|
611
|
+
if granule.location.endswith(".nc"):
|
|
612
|
+
return [Path(granule.granule_name).name]
|
|
613
|
+
|
|
614
|
+
return await list_object_paths(self._store, _copernicus_s3_prefix(granule))
|
|
618
615
|
|
|
619
616
|
async def download(
|
|
620
617
|
self,
|
|
621
618
|
datapoint: xr.Dataset | CopernicusStorageGranule,
|
|
622
619
|
output_dir: Path | None = None,
|
|
623
|
-
verify: bool = True,
|
|
624
620
|
show_progress: bool = True,
|
|
621
|
+
max_concurrent_downloads: int = 4,
|
|
625
622
|
) -> Path:
|
|
626
623
|
"""Download the data for a given datapoint.
|
|
627
624
|
|
|
628
625
|
Args:
|
|
629
626
|
datapoint: The datapoint to download the data for.
|
|
630
627
|
output_dir: The directory to download the data to. Optional, defaults to the cache directory.
|
|
631
|
-
verify: Whether to verify the md5sum of the downloaded file. Defaults to True.
|
|
632
628
|
show_progress: Whether to show a progress bar while downloading. Defaults to True.
|
|
629
|
+
max_concurrent_downloads: The maximum number of concurrent downloads. Defaults to 4.
|
|
633
630
|
|
|
634
631
|
Returns:
|
|
635
632
|
The path to the downloaded data directory.
|
|
636
633
|
"""
|
|
637
|
-
|
|
634
|
+
granule = CopernicusStorageGranule.from_data(datapoint)
|
|
635
|
+
|
|
636
|
+
all_objects = await self._list_objects(granule)
|
|
637
|
+
return await self._download_objects(granule, all_objects, output_dir, show_progress, max_concurrent_downloads)
|
|
638
638
|
|
|
639
639
|
async def download_objects(
|
|
640
640
|
self,
|
|
641
641
|
datapoint: xr.Dataset | CopernicusStorageGranule,
|
|
642
642
|
objects: list[str],
|
|
643
643
|
output_dir: Path | None = None,
|
|
644
|
-
verify: bool = True,
|
|
645
644
|
show_progress: bool = True,
|
|
645
|
+
max_concurrent_downloads: int = 4,
|
|
646
646
|
) -> Path:
|
|
647
647
|
"""Download a subset of the data for a given datapoint.
|
|
648
648
|
|
|
@@ -654,80 +654,249 @@ class CopernicusStorageClient(StorageClient):
|
|
|
654
654
|
list_objects to get a list of available objects to filter on. Object names are considered relative
|
|
655
655
|
to the granule location.
|
|
656
656
|
output_dir: The directory to download the data to. Optional, defaults to the cache directory.
|
|
657
|
-
verify: Whether to verify the md5sum of the downloaded file. Defaults to True.
|
|
658
657
|
show_progress: Whether to show a progress bar while downloading. Defaults to True.
|
|
658
|
+
max_concurrent_downloads: The maximum number of concurrent downloads. Defaults to 4.
|
|
659
659
|
|
|
660
660
|
Returns:
|
|
661
661
|
The path to the downloaded data directory.
|
|
662
662
|
"""
|
|
663
|
-
return await self.
|
|
663
|
+
return await self._download_objects(datapoint, objects, output_dir, show_progress, max_concurrent_downloads)
|
|
664
664
|
|
|
665
|
-
async def
|
|
665
|
+
async def _download_objects(
|
|
666
666
|
self,
|
|
667
667
|
datapoint: xr.Dataset | CopernicusStorageGranule,
|
|
668
|
-
|
|
668
|
+
objects: list[str],
|
|
669
669
|
output_dir: Path | None = None,
|
|
670
|
-
verify: bool = True,
|
|
671
670
|
show_progress: bool = True,
|
|
671
|
+
max_concurrent_downloads: int = 4,
|
|
672
672
|
) -> Path:
|
|
673
673
|
granule = CopernicusStorageGranule.from_data(datapoint)
|
|
674
|
+
prefix = _copernicus_s3_prefix(granule)
|
|
675
|
+
single_file = False
|
|
676
|
+
|
|
677
|
+
# special handling for Sentinel-5P, where the location is not a folder but a single file
|
|
678
|
+
if granule.location.endswith(".nc"):
|
|
679
|
+
single_file = True
|
|
680
|
+
prefix = str(Path(prefix).parent)
|
|
674
681
|
|
|
675
682
|
base_folder = output_dir or self._cache
|
|
676
683
|
if base_folder is None:
|
|
677
684
|
raise ValueError("No cache directory or output directory provided.")
|
|
678
|
-
output_folder = base_folder / Path(
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
objects = [obj for obj in objects if "Key" in obj] # Key is optional, so just in case filter out obj without
|
|
683
|
-
|
|
684
|
-
if obj_filter_func is not None:
|
|
685
|
-
# get object names relative to the granule location, so we can pass it to our filter function
|
|
686
|
-
object_names = [obj["Key"].removeprefix(prefix) for obj in objects if "Key" in obj]
|
|
687
|
-
objects = [
|
|
688
|
-
object_metadata
|
|
689
|
-
for (object_metadata, object_name) in zip(objects, object_names, strict=True)
|
|
690
|
-
if obj_filter_func(object_name)
|
|
691
|
-
]
|
|
692
|
-
|
|
693
|
-
async with anyio.create_task_group() as task_group:
|
|
694
|
-
# even though this is a async task group, the downloads are still synchronous
|
|
695
|
-
# because the S3 client is synchronous
|
|
696
|
-
# we could work around this by using anyio.to_thread.run_sync
|
|
697
|
-
# but then we download all files in parallel, which might be too much
|
|
698
|
-
for object_metadata in objects:
|
|
699
|
-
task_group.start_soon(
|
|
700
|
-
self._download_object, object_metadata, prefix, output_folder, verify, show_progress
|
|
701
|
-
)
|
|
685
|
+
output_folder = base_folder / self._STORAGE_PROVIDER / Path(prefix)
|
|
686
|
+
|
|
687
|
+
if len(objects) == 0:
|
|
688
|
+
return output_folder
|
|
702
689
|
|
|
690
|
+
await download_objects(self._store, prefix, objects, output_folder, show_progress, max_concurrent_downloads)
|
|
691
|
+
if single_file:
|
|
692
|
+
return output_folder / objects[0]
|
|
703
693
|
return output_folder
|
|
704
694
|
|
|
705
|
-
async def
|
|
706
|
-
|
|
695
|
+
async def download_quicklook(self, datapoint: xr.Dataset | CopernicusStorageGranule) -> Path:
|
|
696
|
+
"""Download the quicklook image for a given datapoint.
|
|
697
|
+
|
|
698
|
+
Args:
|
|
699
|
+
datapoint: The datapoint to download the quicklook for.
|
|
700
|
+
|
|
701
|
+
Raises:
|
|
702
|
+
ValueError: If no quicklook is available for the given datapoint.
|
|
703
|
+
|
|
704
|
+
Returns:
|
|
705
|
+
The path to the downloaded quicklook image.
|
|
706
|
+
"""
|
|
707
|
+
return await self._download_quicklook(datapoint)
|
|
708
|
+
|
|
709
|
+
async def quicklook(
|
|
710
|
+
self, datapoint: xr.Dataset | CopernicusStorageGranule, width: int = 600, height: int = 600
|
|
707
711
|
) -> None:
|
|
708
|
-
|
|
709
|
-
relative_path = key.removeprefix(prefix)
|
|
710
|
-
if relative_path.removeprefix("/") == "": # skip the root folder if it shows up in the list for some reason
|
|
711
|
-
return
|
|
712
|
-
if object_metadata.get("Size", 0) == 0: # skip empty objects (they are just folder markers)
|
|
713
|
-
return
|
|
712
|
+
"""Display the quicklook image for a given datapoint.
|
|
714
713
|
|
|
715
|
-
|
|
716
|
-
if output_file.exists():
|
|
717
|
-
return
|
|
714
|
+
Requires an IPython kernel to be running. If you are not using IPython, use download_quicklook instead.
|
|
718
715
|
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
716
|
+
Args:
|
|
717
|
+
datapoint: The datapoint to download the quicklook for.
|
|
718
|
+
width: Display width of the image in pixels. Defaults to 600.
|
|
719
|
+
height: Display height of the image in pixels. Defaults to 600.
|
|
720
|
+
|
|
721
|
+
Raises:
|
|
722
|
+
ImportError: In case IPython is not available.
|
|
723
|
+
ValueError: If no quicklook is available for the given datapoint.
|
|
724
|
+
"""
|
|
725
|
+
if Image is None:
|
|
726
|
+
raise ImportError("IPython is not available, please use download_preview instead.")
|
|
727
|
+
granule = CopernicusStorageGranule.from_data(datapoint)
|
|
728
|
+
quicklook = await self._download_quicklook(granule)
|
|
729
|
+
_display_quicklook(quicklook, width, height, f"<code>{granule.granule_name} © ESA {granule.time.year}</code>")
|
|
730
|
+
|
|
731
|
+
async def _download_quicklook(self, datapoint: xr.Dataset | CopernicusStorageGranule) -> Path:
|
|
732
|
+
granule = CopernicusStorageGranule.from_data(datapoint)
|
|
733
|
+
if granule.thumbnail is None:
|
|
734
|
+
raise ValueError(f"No quicklook available for {granule.granule_name}")
|
|
735
|
+
|
|
736
|
+
prefix = _copernicus_s3_prefix(granule)
|
|
737
|
+
output_folder = (
|
|
738
|
+
self._cache / self._STORAGE_PROVIDER / Path(prefix)
|
|
739
|
+
if self._cache is not None
|
|
740
|
+
else Path.cwd() / self._STORAGE_PROVIDER
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
await download_objects(self._store, prefix, [granule.thumbnail], output_folder, show_progress=False)
|
|
744
|
+
return output_folder / granule.thumbnail
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
def _landsat_s3_prefix(datapoint: xr.Dataset | USGSLandsatStorageGranule) -> str:
|
|
748
|
+
granule = USGSLandsatStorageGranule.from_data(datapoint)
|
|
749
|
+
return granule.location.removeprefix("s3://usgs-landsat/")
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
class USGSLandsatStorageClient(StorageClient):
|
|
753
|
+
"""
|
|
754
|
+
A client for downloading USGS Landsat data from the usgs-landsat and usgs-landsat-ard S3 bucket.
|
|
755
|
+
|
|
756
|
+
This client handles the requester-pays nature of the bucket and provides methods for listing and downloading data.
|
|
757
|
+
"""
|
|
758
|
+
|
|
759
|
+
_STORAGE_PROVIDER = "USGSLandsat"
|
|
760
|
+
_BUCKET = "usgs-landsat"
|
|
761
|
+
_REGION = "us-west-2"
|
|
762
|
+
|
|
763
|
+
def __init__(self, cache_directory: Path | None = Path.home() / ".cache" / "tilebox") -> None:
|
|
764
|
+
"""A tilebox storage client that downloads data from the USGS Landsat S3 bucket.
|
|
765
|
+
|
|
766
|
+
Args:
|
|
767
|
+
cache_directory: The directory to store downloaded data in. Defaults to ~/.cache/tilebox. If set to None
|
|
768
|
+
no cache is used and the `output_dir` parameter will need be set when downloading data.
|
|
769
|
+
"""
|
|
770
|
+
super().__init__(cache_directory)
|
|
771
|
+
|
|
772
|
+
self._store = S3Store(
|
|
773
|
+
self._BUCKET, region=self._REGION, request_payer=True, credential_provider=Boto3CredentialProvider()
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
async def list_objects(self, datapoint: xr.Dataset | USGSLandsatStorageGranule) -> list[str]:
|
|
777
|
+
"""List all available objects for a given datapoint.
|
|
778
|
+
|
|
779
|
+
Args:
|
|
780
|
+
datapoint: The datapoint to list available objects the data for.
|
|
781
|
+
|
|
782
|
+
Returns:
|
|
783
|
+
List of object keys available for the given datapoint, relative to the granule location."""
|
|
784
|
+
return await list_object_paths(self._store, _landsat_s3_prefix(datapoint))
|
|
785
|
+
|
|
786
|
+
async def download(
|
|
787
|
+
self,
|
|
788
|
+
datapoint: xr.Dataset | USGSLandsatStorageGranule,
|
|
789
|
+
output_dir: Path | None = None,
|
|
790
|
+
show_progress: bool = True,
|
|
791
|
+
max_concurrent_downloads: int = 4,
|
|
792
|
+
) -> Path:
|
|
793
|
+
"""Download the data for a given datapoint.
|
|
794
|
+
|
|
795
|
+
Args:
|
|
796
|
+
datapoint: The datapoint to download the data for.
|
|
797
|
+
output_dir: The directory to download the data to. Optional, defaults to the cache directory.
|
|
798
|
+
show_progress: Whether to show a progress bar while downloading. Defaults to True.
|
|
799
|
+
max_concurrent_downloads: The maximum number of concurrent downloads. Defaults to 4.
|
|
800
|
+
|
|
801
|
+
Returns:
|
|
802
|
+
The path to the downloaded data directory.
|
|
803
|
+
"""
|
|
804
|
+
all_objects = await list_object_paths(self._store, _landsat_s3_prefix(datapoint))
|
|
805
|
+
return await self._download_objects(datapoint, all_objects, output_dir, show_progress, max_concurrent_downloads)
|
|
806
|
+
|
|
807
|
+
async def download_objects(
|
|
808
|
+
self,
|
|
809
|
+
datapoint: xr.Dataset | USGSLandsatStorageGranule,
|
|
810
|
+
objects: list[str],
|
|
811
|
+
output_dir: Path | None = None,
|
|
812
|
+
show_progress: bool = True,
|
|
813
|
+
max_concurrent_downloads: int = 4,
|
|
814
|
+
) -> Path:
|
|
815
|
+
"""Download a subset of the data for a given datapoint.
|
|
816
|
+
|
|
817
|
+
Typically used in conjunction with list_objects to filter the available objects beforehand.
|
|
818
|
+
|
|
819
|
+
Args:
|
|
820
|
+
datapoint: The datapoint to download the data for.
|
|
821
|
+
objects: A list of objects to download. Only objects that are in this list will be downloaded. See
|
|
822
|
+
list_objects to get a list of available objects to filter on. Object names are considered relative
|
|
823
|
+
to the granule location.
|
|
824
|
+
output_dir: The directory to download the data to. Optional, defaults to the cache directory.
|
|
825
|
+
show_progress: Whether to show a progress bar while downloading. Defaults to True.
|
|
826
|
+
max_concurrent_downloads: The maximum number of concurrent downloads. Defaults to 4.
|
|
827
|
+
|
|
828
|
+
Returns:
|
|
829
|
+
The path to the downloaded data directory.
|
|
830
|
+
"""
|
|
831
|
+
return await self._download_objects(datapoint, objects, output_dir, show_progress, max_concurrent_downloads)
|
|
832
|
+
|
|
833
|
+
async def _download_objects(
|
|
834
|
+
self,
|
|
835
|
+
datapoint: xr.Dataset | USGSLandsatStorageGranule,
|
|
836
|
+
objects: list[str],
|
|
837
|
+
output_dir: Path | None = None,
|
|
838
|
+
show_progress: bool = True,
|
|
839
|
+
max_concurrent_downloads: int = 4,
|
|
840
|
+
) -> Path:
|
|
841
|
+
prefix = _landsat_s3_prefix(datapoint)
|
|
842
|
+
|
|
843
|
+
base_folder = output_dir or self._cache
|
|
844
|
+
if base_folder is None:
|
|
845
|
+
raise ValueError("No cache directory or output directory provided.")
|
|
846
|
+
output_folder = base_folder / Path(prefix)
|
|
847
|
+
|
|
848
|
+
if len(objects) == 0:
|
|
849
|
+
return output_folder
|
|
850
|
+
|
|
851
|
+
await download_objects(self._store, prefix, objects, output_folder, show_progress, max_concurrent_downloads)
|
|
852
|
+
return output_folder
|
|
853
|
+
|
|
854
|
+
async def download_quicklook(self, datapoint: xr.Dataset | USGSLandsatStorageGranule) -> Path:
|
|
855
|
+
"""Download the quicklook image for a given datapoint.
|
|
856
|
+
|
|
857
|
+
Args:
|
|
858
|
+
datapoint: The datapoint to download the quicklook for.
|
|
859
|
+
|
|
860
|
+
Raises:
|
|
861
|
+
ValueError: If no quicklook is available for the given datapoint.
|
|
862
|
+
|
|
863
|
+
Returns:
|
|
864
|
+
The path to the downloaded quicklook image.
|
|
865
|
+
"""
|
|
866
|
+
return await self._download_quicklook(datapoint)
|
|
867
|
+
|
|
868
|
+
async def quicklook(
|
|
869
|
+
self, datapoint: xr.Dataset | USGSLandsatStorageGranule, width: int = 600, height: int = 600
|
|
870
|
+
) -> None:
|
|
871
|
+
"""Display the quicklook image for a given datapoint.
|
|
872
|
+
|
|
873
|
+
Requires an IPython kernel to be running. If you are not using IPython, use download_quicklook instead.
|
|
874
|
+
|
|
875
|
+
Args:
|
|
876
|
+
datapoint: The datapoint to download the quicklook for.
|
|
877
|
+
width: Display width of the image in pixels. Defaults to 600.
|
|
878
|
+
height: Display height of the image in pixels. Defaults to 600.
|
|
879
|
+
|
|
880
|
+
Raises:
|
|
881
|
+
ImportError: In case IPython is not available.
|
|
882
|
+
ValueError: If no quicklook is available for the given datapoint.
|
|
883
|
+
"""
|
|
884
|
+
if Image is None:
|
|
885
|
+
raise ImportError("IPython is not available, please use download_preview instead.")
|
|
886
|
+
quicklook = await self._download_quicklook(datapoint)
|
|
887
|
+
_display_quicklook(quicklook, width, height, f"<code>Image {quicklook.name} © USGS</code>")
|
|
888
|
+
|
|
889
|
+
async def _download_quicklook(self, datapoint: xr.Dataset | USGSLandsatStorageGranule) -> Path:
|
|
890
|
+
granule = USGSLandsatStorageGranule.from_data(datapoint)
|
|
891
|
+
if granule.thumbnail is None:
|
|
892
|
+
raise ValueError(f"No quicklook available for {granule.granule_name}")
|
|
893
|
+
|
|
894
|
+
prefix = _landsat_s3_prefix(datapoint)
|
|
895
|
+
output_folder = (
|
|
896
|
+
self._cache / self._STORAGE_PROVIDER / Path(prefix)
|
|
897
|
+
if self._cache is not None
|
|
898
|
+
else Path.cwd() / self._STORAGE_PROVIDER
|
|
899
|
+
)
|
|
731
900
|
|
|
732
|
-
|
|
733
|
-
|
|
901
|
+
await download_objects(self._store, prefix, [granule.thumbnail], output_folder, show_progress=False)
|
|
902
|
+
return output_folder / granule.thumbnail
|
tilebox/storage/granule.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
2
|
from datetime import datetime
|
|
3
|
+
from pathlib import Path
|
|
3
4
|
|
|
4
5
|
import xarray as xr
|
|
5
6
|
|
|
@@ -64,7 +65,6 @@ def _asf_download_urls(granule_name: str) -> StorageURLs:
|
|
|
64
65
|
class UmbraStorageGranule:
|
|
65
66
|
time: datetime
|
|
66
67
|
granule_name: str
|
|
67
|
-
processing_level: str
|
|
68
68
|
location: str
|
|
69
69
|
|
|
70
70
|
@classmethod
|
|
@@ -84,17 +84,34 @@ class UmbraStorageGranule:
|
|
|
84
84
|
return cls(
|
|
85
85
|
time,
|
|
86
86
|
dataset.granule_name.item(),
|
|
87
|
-
dataset.processing_level.item(),
|
|
88
87
|
dataset.location.item(),
|
|
89
88
|
)
|
|
90
89
|
|
|
91
90
|
|
|
91
|
+
def _thumbnail_relative_to_eodata_location(thumbnail_url: str, location: str) -> str:
|
|
92
|
+
"""
|
|
93
|
+
Returns a thumbnail path from a URL as a path relative to a storage location.
|
|
94
|
+
|
|
95
|
+
For example:
|
|
96
|
+
>>> _thumbnail_relative_to_location(
|
|
97
|
+
>>> "https://catalogue.dataspace.copernicus.eu/get-object?path=/Sentinel-1/SAR/EW_GRDM_1S/2025/08/07/S1A_EW_GRDM_1SDH_20250807T111242_20250807T111346_060429_078305_DB6A.SAFE/preview/thumbnail.png",
|
|
98
|
+
>>> "/eodata/Sentinel-1/SAR/EW_GRDM_1S/2025/08/07/S1A_EW_GRDM_1SDH_20250807T111242_20250807T111346_060429_078305_DB6A.SAFE"
|
|
99
|
+
>>> )
|
|
100
|
+
"preview/thumbnail.png"
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
url_path = thumbnail_url.split("?path=")[-1]
|
|
104
|
+
url_path = url_path.removeprefix("/")
|
|
105
|
+
location = location.removeprefix("/eodata/")
|
|
106
|
+
return str(Path(url_path).relative_to(location))
|
|
107
|
+
|
|
108
|
+
|
|
92
109
|
@dataclass
|
|
93
110
|
class CopernicusStorageGranule:
|
|
94
111
|
time: datetime
|
|
95
112
|
granule_name: str
|
|
96
113
|
location: str
|
|
97
|
-
|
|
114
|
+
thumbnail: str | None = None
|
|
98
115
|
|
|
99
116
|
@classmethod
|
|
100
117
|
def from_data(cls, dataset: "xr.Dataset | CopernicusStorageGranule") -> "CopernicusStorageGranule":
|
|
@@ -110,9 +127,59 @@ class CopernicusStorageGranule:
|
|
|
110
127
|
|
|
111
128
|
time = datetime.combine(dataset.time.dt.date.item(), dataset.time.dt.time.item())
|
|
112
129
|
|
|
130
|
+
location = dataset.location.item()
|
|
131
|
+
|
|
132
|
+
thumbnail_path = None
|
|
133
|
+
if "thumbnail" in dataset:
|
|
134
|
+
thumbnail_path = dataset.thumbnail.item().strip()
|
|
135
|
+
|
|
136
|
+
thumbnail = (
|
|
137
|
+
_thumbnail_relative_to_eodata_location(thumbnail_path, location)
|
|
138
|
+
if isinstance(thumbnail_path, str) and len(thumbnail_path) > 0
|
|
139
|
+
else None
|
|
140
|
+
)
|
|
141
|
+
|
|
113
142
|
return cls(
|
|
114
143
|
time,
|
|
115
144
|
dataset.granule_name.item(),
|
|
116
|
-
|
|
117
|
-
|
|
145
|
+
location,
|
|
146
|
+
thumbnail,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@dataclass
|
|
151
|
+
class USGSLandsatStorageGranule:
|
|
152
|
+
time: datetime
|
|
153
|
+
granule_name: str
|
|
154
|
+
location: str
|
|
155
|
+
thumbnail: str | None = None
|
|
156
|
+
|
|
157
|
+
@classmethod
|
|
158
|
+
def from_data(cls, dataset: "xr.Dataset | USGSLandsatStorageGranule") -> "USGSLandsatStorageGranule":
|
|
159
|
+
"""Extract the granule information from a datapoint given as xarray dataset."""
|
|
160
|
+
if isinstance(dataset, USGSLandsatStorageGranule):
|
|
161
|
+
return dataset
|
|
162
|
+
|
|
163
|
+
if "time" in dataset.dims:
|
|
164
|
+
if dataset.sizes["time"] == 1:
|
|
165
|
+
dataset = dataset.isel(time=0)
|
|
166
|
+
else:
|
|
167
|
+
raise ValueError("The given dataset has more than one granule.")
|
|
168
|
+
|
|
169
|
+
time = datetime.combine(dataset.time.dt.date.item(), dataset.time.dt.time.item())
|
|
170
|
+
|
|
171
|
+
thumbnail_path: str | None = None
|
|
172
|
+
if "thumbnail" in dataset:
|
|
173
|
+
thumbnail_path = dataset.thumbnail.item()
|
|
174
|
+
elif "overview" in dataset:
|
|
175
|
+
thumbnail_path = dataset.overview.item()
|
|
176
|
+
|
|
177
|
+
thumbnail = thumbnail_path.split("/")[-1] if isinstance(thumbnail_path, str) else None
|
|
178
|
+
|
|
179
|
+
return cls(
|
|
180
|
+
time,
|
|
181
|
+
dataset.granule_name.item(),
|
|
182
|
+
# Landsat 2 STAC items have an incorrect bucket name set, it should be usgs-landsat as well
|
|
183
|
+
dataset.location.item().replace("s3://usgs-landsat-ard/", "s3://usgs-landsat/"),
|
|
184
|
+
thumbnail,
|
|
118
185
|
)
|
tilebox/storage/providers.py
CHANGED
|
@@ -60,6 +60,8 @@ async def _asf_login(auth: tuple[str, str]) -> AsyncClient:
|
|
|
60
60
|
"redirect_uri": "https://auth.asf.alaska.edu/login",
|
|
61
61
|
},
|
|
62
62
|
)
|
|
63
|
+
await response.aclose()
|
|
63
64
|
if response.status_code == 401:
|
|
65
|
+
await client.aclose()
|
|
64
66
|
raise ValueError("Invalid username or password.")
|
|
65
67
|
return client
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tilebox-storage
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.43.0
|
|
4
4
|
Summary: Storage client for Tilebox
|
|
5
5
|
Project-URL: Homepage, https://tilebox.com
|
|
6
6
|
Project-URL: Documentation, https://docs.tilebox.com/
|
|
@@ -20,10 +20,9 @@ Classifier: Topic :: Scientific/Engineering
|
|
|
20
20
|
Classifier: Topic :: Software Development
|
|
21
21
|
Requires-Python: >=3.10
|
|
22
22
|
Requires-Dist: aiofile>=3.8
|
|
23
|
-
Requires-Dist: boto3-stubs[essential]>=1.33
|
|
24
|
-
Requires-Dist: boto3>=1.33
|
|
25
23
|
Requires-Dist: folium>=0.15
|
|
26
24
|
Requires-Dist: httpx>=0.27
|
|
25
|
+
Requires-Dist: obstore>=0.8.0
|
|
27
26
|
Requires-Dist: shapely>=2
|
|
28
27
|
Requires-Dist: tilebox-datasets
|
|
29
28
|
Description-Content-Type: text/markdown
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
tilebox/storage/__init__.py,sha256=nQYsEKee3lBCDi_rmISGd-kKgqDV75ogiadbpLKLGww,3290
|
|
2
|
+
tilebox/storage/aio.py,sha256=kNahmyUUXeFMgA-XvBXq3MCqBkZw-8BPLr7n2HLf5gA,38383
|
|
3
|
+
tilebox/storage/granule.py,sha256=RPw3UkiIwGwQEqmiuxy2tbWAMrjoMYNNigXimB4jJGI,6179
|
|
4
|
+
tilebox/storage/providers.py,sha256=vOTxSj2VIQhbFyvxu_eOcPmBGETDaijRoCWi9heUwRs,1832
|
|
5
|
+
tilebox_storage-0.43.0.dist-info/METADATA,sha256=HySqn_TW_yk3NuEVAyxVN3SkJoxqC_GiibHXuuXCElw,4103
|
|
6
|
+
tilebox_storage-0.43.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
7
|
+
tilebox_storage-0.43.0.dist-info/RECORD,,
|
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
tilebox/storage/__init__.py,sha256=_6zp6xwCpkUxdaFblrdz_3vSC_danbTg_MYi30_y8JM,2510
|
|
2
|
-
tilebox/storage/aio.py,sha256=S5vGBQ-rf45yPF3rND_D_HTE0N84iblReNfnnVdxVYw,32181
|
|
3
|
-
tilebox/storage/granule.py,sha256=o3RMnTKPabdLhmWQomjpANNzoD9TkPOSxpljkthEsLc,3648
|
|
4
|
-
tilebox/storage/providers.py,sha256=91CPye5pNdVlbBfYZDcyB9SH7tjtvCWL-vLUp7YvknY,1774
|
|
5
|
-
tilebox_storage-0.41.0.dist-info/METADATA,sha256=lh_YO3KkPWQEAMeF01HamZfChPRZSIZNC17VKJFhQq4,4144
|
|
6
|
-
tilebox_storage-0.41.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
7
|
-
tilebox_storage-0.41.0.dist-info/RECORD,,
|
|
File without changes
|