sqlspec 0.16.1__cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- 51ff5a9eadfdefd49f98__mypyc.cpython-39-aarch64-linux-gnu.so +0 -0
- sqlspec/__init__.py +92 -0
- sqlspec/__main__.py +12 -0
- sqlspec/__metadata__.py +14 -0
- sqlspec/_serialization.py +77 -0
- sqlspec/_sql.py +1780 -0
- sqlspec/_typing.py +680 -0
- sqlspec/adapters/__init__.py +0 -0
- sqlspec/adapters/adbc/__init__.py +5 -0
- sqlspec/adapters/adbc/_types.py +12 -0
- sqlspec/adapters/adbc/config.py +361 -0
- sqlspec/adapters/adbc/driver.py +512 -0
- sqlspec/adapters/aiosqlite/__init__.py +19 -0
- sqlspec/adapters/aiosqlite/_types.py +13 -0
- sqlspec/adapters/aiosqlite/config.py +253 -0
- sqlspec/adapters/aiosqlite/driver.py +248 -0
- sqlspec/adapters/asyncmy/__init__.py +19 -0
- sqlspec/adapters/asyncmy/_types.py +12 -0
- sqlspec/adapters/asyncmy/config.py +180 -0
- sqlspec/adapters/asyncmy/driver.py +274 -0
- sqlspec/adapters/asyncpg/__init__.py +21 -0
- sqlspec/adapters/asyncpg/_types.py +17 -0
- sqlspec/adapters/asyncpg/config.py +229 -0
- sqlspec/adapters/asyncpg/driver.py +344 -0
- sqlspec/adapters/bigquery/__init__.py +18 -0
- sqlspec/adapters/bigquery/_types.py +12 -0
- sqlspec/adapters/bigquery/config.py +298 -0
- sqlspec/adapters/bigquery/driver.py +558 -0
- sqlspec/adapters/duckdb/__init__.py +22 -0
- sqlspec/adapters/duckdb/_types.py +12 -0
- sqlspec/adapters/duckdb/config.py +504 -0
- sqlspec/adapters/duckdb/driver.py +368 -0
- sqlspec/adapters/oracledb/__init__.py +32 -0
- sqlspec/adapters/oracledb/_types.py +14 -0
- sqlspec/adapters/oracledb/config.py +317 -0
- sqlspec/adapters/oracledb/driver.py +538 -0
- sqlspec/adapters/psqlpy/__init__.py +16 -0
- sqlspec/adapters/psqlpy/_types.py +11 -0
- sqlspec/adapters/psqlpy/config.py +214 -0
- sqlspec/adapters/psqlpy/driver.py +530 -0
- sqlspec/adapters/psycopg/__init__.py +32 -0
- sqlspec/adapters/psycopg/_types.py +17 -0
- sqlspec/adapters/psycopg/config.py +426 -0
- sqlspec/adapters/psycopg/driver.py +796 -0
- sqlspec/adapters/sqlite/__init__.py +15 -0
- sqlspec/adapters/sqlite/_types.py +11 -0
- sqlspec/adapters/sqlite/config.py +240 -0
- sqlspec/adapters/sqlite/driver.py +294 -0
- sqlspec/base.py +571 -0
- sqlspec/builder/__init__.py +62 -0
- sqlspec/builder/_base.py +473 -0
- sqlspec/builder/_column.py +320 -0
- sqlspec/builder/_ddl.py +1346 -0
- sqlspec/builder/_ddl_utils.py +103 -0
- sqlspec/builder/_delete.py +76 -0
- sqlspec/builder/_insert.py +256 -0
- sqlspec/builder/_merge.py +71 -0
- sqlspec/builder/_parsing_utils.py +140 -0
- sqlspec/builder/_select.py +170 -0
- sqlspec/builder/_update.py +188 -0
- sqlspec/builder/mixins/__init__.py +55 -0
- sqlspec/builder/mixins/_cte_and_set_ops.py +222 -0
- sqlspec/builder/mixins/_delete_operations.py +41 -0
- sqlspec/builder/mixins/_insert_operations.py +244 -0
- sqlspec/builder/mixins/_join_operations.py +122 -0
- sqlspec/builder/mixins/_merge_operations.py +476 -0
- sqlspec/builder/mixins/_order_limit_operations.py +135 -0
- sqlspec/builder/mixins/_pivot_operations.py +153 -0
- sqlspec/builder/mixins/_select_operations.py +603 -0
- sqlspec/builder/mixins/_update_operations.py +187 -0
- sqlspec/builder/mixins/_where_clause.py +621 -0
- sqlspec/cli.py +247 -0
- sqlspec/config.py +395 -0
- sqlspec/core/__init__.py +63 -0
- sqlspec/core/cache.cpython-39-aarch64-linux-gnu.so +0 -0
- sqlspec/core/cache.py +871 -0
- sqlspec/core/compiler.cpython-39-aarch64-linux-gnu.so +0 -0
- sqlspec/core/compiler.py +417 -0
- sqlspec/core/filters.cpython-39-aarch64-linux-gnu.so +0 -0
- sqlspec/core/filters.py +830 -0
- sqlspec/core/hashing.cpython-39-aarch64-linux-gnu.so +0 -0
- sqlspec/core/hashing.py +310 -0
- sqlspec/core/parameters.cpython-39-aarch64-linux-gnu.so +0 -0
- sqlspec/core/parameters.py +1237 -0
- sqlspec/core/result.cpython-39-aarch64-linux-gnu.so +0 -0
- sqlspec/core/result.py +677 -0
- sqlspec/core/splitter.cpython-39-aarch64-linux-gnu.so +0 -0
- sqlspec/core/splitter.py +819 -0
- sqlspec/core/statement.cpython-39-aarch64-linux-gnu.so +0 -0
- sqlspec/core/statement.py +676 -0
- sqlspec/driver/__init__.py +19 -0
- sqlspec/driver/_async.py +502 -0
- sqlspec/driver/_common.py +631 -0
- sqlspec/driver/_sync.py +503 -0
- sqlspec/driver/mixins/__init__.py +6 -0
- sqlspec/driver/mixins/_result_tools.py +193 -0
- sqlspec/driver/mixins/_sql_translator.py +86 -0
- sqlspec/exceptions.py +193 -0
- sqlspec/extensions/__init__.py +0 -0
- sqlspec/extensions/aiosql/__init__.py +10 -0
- sqlspec/extensions/aiosql/adapter.py +461 -0
- sqlspec/extensions/litestar/__init__.py +6 -0
- sqlspec/extensions/litestar/_utils.py +52 -0
- sqlspec/extensions/litestar/cli.py +48 -0
- sqlspec/extensions/litestar/config.py +92 -0
- sqlspec/extensions/litestar/handlers.py +260 -0
- sqlspec/extensions/litestar/plugin.py +145 -0
- sqlspec/extensions/litestar/providers.py +454 -0
- sqlspec/loader.cpython-39-aarch64-linux-gnu.so +0 -0
- sqlspec/loader.py +760 -0
- sqlspec/migrations/__init__.py +35 -0
- sqlspec/migrations/base.py +414 -0
- sqlspec/migrations/commands.py +443 -0
- sqlspec/migrations/loaders.py +402 -0
- sqlspec/migrations/runner.py +213 -0
- sqlspec/migrations/tracker.py +140 -0
- sqlspec/migrations/utils.py +129 -0
- sqlspec/protocols.py +407 -0
- sqlspec/py.typed +0 -0
- sqlspec/storage/__init__.py +23 -0
- sqlspec/storage/backends/__init__.py +0 -0
- sqlspec/storage/backends/base.py +163 -0
- sqlspec/storage/backends/fsspec.py +386 -0
- sqlspec/storage/backends/obstore.py +459 -0
- sqlspec/storage/capabilities.py +102 -0
- sqlspec/storage/registry.py +239 -0
- sqlspec/typing.py +299 -0
- sqlspec/utils/__init__.py +3 -0
- sqlspec/utils/correlation.py +150 -0
- sqlspec/utils/deprecation.py +106 -0
- sqlspec/utils/fixtures.cpython-39-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/fixtures.py +58 -0
- sqlspec/utils/logging.py +127 -0
- sqlspec/utils/module_loader.py +89 -0
- sqlspec/utils/serializers.py +4 -0
- sqlspec/utils/singleton.py +32 -0
- sqlspec/utils/sync_tools.cpython-39-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/sync_tools.py +237 -0
- sqlspec/utils/text.cpython-39-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/text.py +96 -0
- sqlspec/utils/type_guards.cpython-39-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/type_guards.py +1139 -0
- sqlspec-0.16.1.dist-info/METADATA +365 -0
- sqlspec-0.16.1.dist-info/RECORD +148 -0
- sqlspec-0.16.1.dist-info/WHEEL +7 -0
- sqlspec-0.16.1.dist-info/entry_points.txt +2 -0
- sqlspec-0.16.1.dist-info/licenses/LICENSE +21 -0
- sqlspec-0.16.1.dist-info/licenses/NOTICE +29 -0
|
File without changes
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""Base class for storage backends."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from collections.abc import AsyncIterator, Iterator
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from mypy_extensions import mypyc_attr
|
|
8
|
+
|
|
9
|
+
from sqlspec.typing import ArrowRecordBatch, ArrowTable
|
|
10
|
+
|
|
11
|
+
__all__ = ("ObjectStoreBase",)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
15
|
+
class ObjectStoreBase(ABC):
|
|
16
|
+
"""Base class for storage backends."""
|
|
17
|
+
|
|
18
|
+
__slots__ = ()
|
|
19
|
+
|
|
20
|
+
@abstractmethod
|
|
21
|
+
def read_bytes(self, path: str, **kwargs: Any) -> bytes:
|
|
22
|
+
"""Read bytes from storage."""
|
|
23
|
+
raise NotImplementedError
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def write_bytes(self, path: str, data: bytes, **kwargs: Any) -> None:
|
|
27
|
+
"""Write bytes to storage."""
|
|
28
|
+
raise NotImplementedError
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def read_text(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
32
|
+
"""Read text from storage."""
|
|
33
|
+
raise NotImplementedError
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def write_text(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
37
|
+
"""Write text to storage."""
|
|
38
|
+
raise NotImplementedError
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
|
|
42
|
+
"""List objects in storage."""
|
|
43
|
+
raise NotImplementedError
|
|
44
|
+
|
|
45
|
+
@abstractmethod
|
|
46
|
+
def exists(self, path: str, **kwargs: Any) -> bool:
|
|
47
|
+
"""Check if object exists in storage."""
|
|
48
|
+
raise NotImplementedError
|
|
49
|
+
|
|
50
|
+
@abstractmethod
|
|
51
|
+
def delete(self, path: str, **kwargs: Any) -> None:
|
|
52
|
+
"""Delete object from storage."""
|
|
53
|
+
raise NotImplementedError
|
|
54
|
+
|
|
55
|
+
@abstractmethod
|
|
56
|
+
def copy(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
57
|
+
"""Copy object within storage."""
|
|
58
|
+
raise NotImplementedError
|
|
59
|
+
|
|
60
|
+
@abstractmethod
|
|
61
|
+
def move(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
62
|
+
"""Move object within storage."""
|
|
63
|
+
raise NotImplementedError
|
|
64
|
+
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def glob(self, pattern: str, **kwargs: Any) -> list[str]:
|
|
67
|
+
"""Find objects matching pattern."""
|
|
68
|
+
raise NotImplementedError
|
|
69
|
+
|
|
70
|
+
@abstractmethod
|
|
71
|
+
def get_metadata(self, path: str, **kwargs: Any) -> dict[str, Any]:
|
|
72
|
+
"""Get object metadata from storage."""
|
|
73
|
+
raise NotImplementedError
|
|
74
|
+
|
|
75
|
+
@abstractmethod
|
|
76
|
+
def is_object(self, path: str) -> bool:
|
|
77
|
+
"""Check if path points to an object."""
|
|
78
|
+
raise NotImplementedError
|
|
79
|
+
|
|
80
|
+
@abstractmethod
|
|
81
|
+
def is_path(self, path: str) -> bool:
|
|
82
|
+
"""Check if path points to a directory."""
|
|
83
|
+
raise NotImplementedError
|
|
84
|
+
|
|
85
|
+
@abstractmethod
|
|
86
|
+
def read_arrow(self, path: str, **kwargs: Any) -> ArrowTable:
|
|
87
|
+
"""Read Arrow table from storage."""
|
|
88
|
+
raise NotImplementedError
|
|
89
|
+
|
|
90
|
+
@abstractmethod
|
|
91
|
+
def write_arrow(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
|
|
92
|
+
"""Write Arrow table to storage."""
|
|
93
|
+
raise NotImplementedError
|
|
94
|
+
|
|
95
|
+
@abstractmethod
|
|
96
|
+
def stream_arrow(self, pattern: str, **kwargs: Any) -> Iterator[ArrowRecordBatch]:
|
|
97
|
+
"""Stream Arrow record batches from storage."""
|
|
98
|
+
raise NotImplementedError
|
|
99
|
+
|
|
100
|
+
@abstractmethod
|
|
101
|
+
async def read_bytes_async(self, path: str, **kwargs: Any) -> bytes:
|
|
102
|
+
"""Read bytes from storage asynchronously."""
|
|
103
|
+
raise NotImplementedError
|
|
104
|
+
|
|
105
|
+
@abstractmethod
|
|
106
|
+
async def write_bytes_async(self, path: str, data: bytes, **kwargs: Any) -> None:
|
|
107
|
+
"""Write bytes to storage asynchronously."""
|
|
108
|
+
raise NotImplementedError
|
|
109
|
+
|
|
110
|
+
@abstractmethod
|
|
111
|
+
async def read_text_async(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
112
|
+
"""Read text from storage asynchronously."""
|
|
113
|
+
raise NotImplementedError
|
|
114
|
+
|
|
115
|
+
@abstractmethod
|
|
116
|
+
async def write_text_async(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
117
|
+
"""Write text to storage asynchronously."""
|
|
118
|
+
raise NotImplementedError
|
|
119
|
+
|
|
120
|
+
@abstractmethod
|
|
121
|
+
async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
|
|
122
|
+
"""List objects in storage asynchronously."""
|
|
123
|
+
raise NotImplementedError
|
|
124
|
+
|
|
125
|
+
@abstractmethod
|
|
126
|
+
async def exists_async(self, path: str, **kwargs: Any) -> bool:
|
|
127
|
+
"""Check if object exists in storage asynchronously."""
|
|
128
|
+
raise NotImplementedError
|
|
129
|
+
|
|
130
|
+
@abstractmethod
|
|
131
|
+
async def delete_async(self, path: str, **kwargs: Any) -> None:
|
|
132
|
+
"""Delete object from storage asynchronously."""
|
|
133
|
+
raise NotImplementedError
|
|
134
|
+
|
|
135
|
+
@abstractmethod
|
|
136
|
+
async def copy_async(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
137
|
+
"""Copy object within storage asynchronously."""
|
|
138
|
+
raise NotImplementedError
|
|
139
|
+
|
|
140
|
+
@abstractmethod
|
|
141
|
+
async def move_async(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
142
|
+
"""Move object within storage asynchronously."""
|
|
143
|
+
raise NotImplementedError
|
|
144
|
+
|
|
145
|
+
@abstractmethod
|
|
146
|
+
async def get_metadata_async(self, path: str, **kwargs: Any) -> dict[str, Any]:
|
|
147
|
+
"""Get object metadata from storage asynchronously."""
|
|
148
|
+
raise NotImplementedError
|
|
149
|
+
|
|
150
|
+
@abstractmethod
|
|
151
|
+
async def read_arrow_async(self, path: str, **kwargs: Any) -> ArrowTable:
|
|
152
|
+
"""Read Arrow table from storage asynchronously."""
|
|
153
|
+
raise NotImplementedError
|
|
154
|
+
|
|
155
|
+
@abstractmethod
|
|
156
|
+
async def write_arrow_async(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
|
|
157
|
+
"""Write Arrow table to storage asynchronously."""
|
|
158
|
+
raise NotImplementedError
|
|
159
|
+
|
|
160
|
+
@abstractmethod
|
|
161
|
+
def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator[ArrowRecordBatch]:
|
|
162
|
+
"""Stream Arrow record batches from storage asynchronously."""
|
|
163
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union
|
|
4
|
+
|
|
5
|
+
from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
|
|
6
|
+
from sqlspec.storage.backends.base import ObjectStoreBase
|
|
7
|
+
from sqlspec.storage.capabilities import StorageCapabilities
|
|
8
|
+
from sqlspec.typing import FSSPEC_INSTALLED, PYARROW_INSTALLED
|
|
9
|
+
from sqlspec.utils.sync_tools import async_
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from collections.abc import AsyncIterator, Iterator
|
|
13
|
+
|
|
14
|
+
from fsspec import AbstractFileSystem
|
|
15
|
+
|
|
16
|
+
from sqlspec.typing import ArrowRecordBatch, ArrowTable
|
|
17
|
+
|
|
18
|
+
__all__ = ("FSSpecBackend",)
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class _ArrowStreamer:
|
|
24
|
+
def __init__(self, backend: "FSSpecBackend", pattern: str, **kwargs: Any) -> None:
|
|
25
|
+
self.backend = backend
|
|
26
|
+
self.pattern = pattern
|
|
27
|
+
self.kwargs = kwargs
|
|
28
|
+
self.paths_iterator: Optional[Iterator[str]] = None
|
|
29
|
+
self.batch_iterator: Optional[Iterator[ArrowRecordBatch]] = None
|
|
30
|
+
|
|
31
|
+
def __aiter__(self) -> "_ArrowStreamer":
|
|
32
|
+
return self
|
|
33
|
+
|
|
34
|
+
async def _initialize(self) -> None:
|
|
35
|
+
"""Initialize the paths iterator."""
|
|
36
|
+
if self.paths_iterator is None:
|
|
37
|
+
paths = await async_(self.backend.glob)(self.pattern, **self.kwargs)
|
|
38
|
+
self.paths_iterator = iter(paths)
|
|
39
|
+
|
|
40
|
+
async def __anext__(self) -> "ArrowRecordBatch":
|
|
41
|
+
await self._initialize()
|
|
42
|
+
|
|
43
|
+
if self.batch_iterator:
|
|
44
|
+
try:
|
|
45
|
+
return next(self.batch_iterator)
|
|
46
|
+
except StopIteration:
|
|
47
|
+
self.batch_iterator = None
|
|
48
|
+
|
|
49
|
+
if self.paths_iterator:
|
|
50
|
+
try:
|
|
51
|
+
path = next(self.paths_iterator)
|
|
52
|
+
self.batch_iterator = await async_(self.backend._stream_file_batches)(path)
|
|
53
|
+
return await self.__anext__()
|
|
54
|
+
except StopIteration:
|
|
55
|
+
raise StopAsyncIteration
|
|
56
|
+
raise StopAsyncIteration
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class FSSpecBackend(ObjectStoreBase):
|
|
60
|
+
"""Storage backend using fsspec.
|
|
61
|
+
|
|
62
|
+
Implements the ObjectStoreProtocol using fsspec,
|
|
63
|
+
providing support for various protocols including HTTP, HTTPS, FTP,
|
|
64
|
+
and cloud storage services.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
_default_capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
|
|
68
|
+
supports_arrow=PYARROW_INSTALLED,
|
|
69
|
+
supports_streaming=PYARROW_INSTALLED,
|
|
70
|
+
supports_async=True,
|
|
71
|
+
supports_compression=True,
|
|
72
|
+
is_remote=True,
|
|
73
|
+
is_cloud_native=False,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
def __init__(self, fs: "Union[str, AbstractFileSystem]", base_path: str = "") -> None:
|
|
77
|
+
if not FSSPEC_INSTALLED:
|
|
78
|
+
raise MissingDependencyError(package="fsspec", install_package="fsspec")
|
|
79
|
+
|
|
80
|
+
self.base_path = base_path.rstrip("/") if base_path else ""
|
|
81
|
+
|
|
82
|
+
if isinstance(fs, str):
|
|
83
|
+
import fsspec
|
|
84
|
+
|
|
85
|
+
self.fs = fsspec.filesystem(fs.split("://")[0])
|
|
86
|
+
self.protocol = fs.split("://")[0]
|
|
87
|
+
self._fs_uri = fs
|
|
88
|
+
else:
|
|
89
|
+
self.fs = fs
|
|
90
|
+
self.protocol = getattr(fs, "protocol", "unknown")
|
|
91
|
+
self._fs_uri = f"{self.protocol}://"
|
|
92
|
+
|
|
93
|
+
self._instance_capabilities = self._detect_capabilities()
|
|
94
|
+
|
|
95
|
+
super().__init__()
|
|
96
|
+
|
|
97
|
+
@classmethod
|
|
98
|
+
def from_config(cls, config: "dict[str, Any]") -> "FSSpecBackend":
|
|
99
|
+
protocol = config["protocol"]
|
|
100
|
+
fs_config = config.get("fs_config", {})
|
|
101
|
+
base_path = config.get("base_path", "")
|
|
102
|
+
|
|
103
|
+
import fsspec
|
|
104
|
+
|
|
105
|
+
fs_instance = fsspec.filesystem(protocol, **fs_config)
|
|
106
|
+
|
|
107
|
+
return cls(fs=fs_instance, base_path=base_path)
|
|
108
|
+
|
|
109
|
+
def _resolve_path(self, path: Union[str, Path]) -> str:
|
|
110
|
+
"""Resolve path relative to base_path."""
|
|
111
|
+
path_str = str(path)
|
|
112
|
+
if self.base_path:
|
|
113
|
+
clean_base = self.base_path.rstrip("/")
|
|
114
|
+
clean_path = path_str.lstrip("/")
|
|
115
|
+
return f"{clean_base}/{clean_path}"
|
|
116
|
+
return path_str
|
|
117
|
+
|
|
118
|
+
def _detect_capabilities(self) -> StorageCapabilities:
|
|
119
|
+
"""Detect capabilities based on underlying filesystem protocol."""
|
|
120
|
+
protocol = self.protocol.lower()
|
|
121
|
+
|
|
122
|
+
if protocol in {"s3", "s3a", "s3n"}:
|
|
123
|
+
return StorageCapabilities.s3_compatible()
|
|
124
|
+
if protocol in {"gcs", "gs"}:
|
|
125
|
+
return StorageCapabilities.gcs()
|
|
126
|
+
if protocol in {"abfs", "az", "azure"}:
|
|
127
|
+
return StorageCapabilities.azure_blob()
|
|
128
|
+
if protocol in {"file", "local"}:
|
|
129
|
+
return StorageCapabilities.local_filesystem()
|
|
130
|
+
return StorageCapabilities(
|
|
131
|
+
supports_arrow=PYARROW_INSTALLED,
|
|
132
|
+
supports_streaming=PYARROW_INSTALLED,
|
|
133
|
+
supports_async=True,
|
|
134
|
+
supports_compression=True,
|
|
135
|
+
is_remote=True,
|
|
136
|
+
is_cloud_native=False,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def capabilities(self) -> StorageCapabilities:
|
|
141
|
+
"""Return instance-specific capabilities based on detected protocol."""
|
|
142
|
+
return getattr(self, "_instance_capabilities", self.__class__._default_capabilities)
|
|
143
|
+
|
|
144
|
+
@classmethod
|
|
145
|
+
def has_capability(cls, capability: str) -> bool:
|
|
146
|
+
"""Check if backend has a specific capability."""
|
|
147
|
+
return getattr(cls._default_capabilities, capability, False)
|
|
148
|
+
|
|
149
|
+
@classmethod
|
|
150
|
+
def get_capabilities(cls) -> StorageCapabilities:
|
|
151
|
+
"""Get all capabilities for this backend."""
|
|
152
|
+
return cls._default_capabilities
|
|
153
|
+
|
|
154
|
+
@property
|
|
155
|
+
def backend_type(self) -> str:
|
|
156
|
+
return "fsspec"
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def base_uri(self) -> str:
|
|
160
|
+
return self._fs_uri
|
|
161
|
+
|
|
162
|
+
def read_bytes(self, path: Union[str, Path], **kwargs: Any) -> bytes:
|
|
163
|
+
"""Read bytes from an object."""
|
|
164
|
+
try:
|
|
165
|
+
resolved_path = self._resolve_path(path)
|
|
166
|
+
return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
|
|
167
|
+
except Exception as exc:
|
|
168
|
+
msg = f"Failed to read bytes from {path}"
|
|
169
|
+
raise StorageOperationFailedError(msg) from exc
|
|
170
|
+
|
|
171
|
+
def write_bytes(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
|
|
172
|
+
"""Write bytes to an object."""
|
|
173
|
+
try:
|
|
174
|
+
resolved_path = self._resolve_path(path)
|
|
175
|
+
with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
|
|
176
|
+
f.write(data) # pyright: ignore
|
|
177
|
+
except Exception as exc:
|
|
178
|
+
msg = f"Failed to write bytes to {path}"
|
|
179
|
+
raise StorageOperationFailedError(msg) from exc
|
|
180
|
+
|
|
181
|
+
def read_text(self, path: Union[str, Path], encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
182
|
+
"""Read text from an object."""
|
|
183
|
+
data = self.read_bytes(path, **kwargs)
|
|
184
|
+
return data.decode(encoding)
|
|
185
|
+
|
|
186
|
+
def write_text(self, path: Union[str, Path], data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
187
|
+
"""Write text to an object."""
|
|
188
|
+
self.write_bytes(path, data.encode(encoding), **kwargs)
|
|
189
|
+
|
|
190
|
+
def exists(self, path: Union[str, Path], **kwargs: Any) -> bool:
|
|
191
|
+
"""Check if an object exists."""
|
|
192
|
+
resolved_path = self._resolve_path(path)
|
|
193
|
+
return self.fs.exists(resolved_path, **kwargs) # type: ignore[no-any-return]
|
|
194
|
+
|
|
195
|
+
def delete(self, path: Union[str, Path], **kwargs: Any) -> None:
|
|
196
|
+
"""Delete an object."""
|
|
197
|
+
try:
|
|
198
|
+
resolved_path = self._resolve_path(path)
|
|
199
|
+
self.fs.rm(resolved_path, **kwargs)
|
|
200
|
+
except Exception as exc:
|
|
201
|
+
msg = f"Failed to delete {path}"
|
|
202
|
+
raise StorageOperationFailedError(msg) from exc
|
|
203
|
+
|
|
204
|
+
def copy(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
|
|
205
|
+
"""Copy an object."""
|
|
206
|
+
try:
|
|
207
|
+
source_path = self._resolve_path(source)
|
|
208
|
+
dest_path = self._resolve_path(destination)
|
|
209
|
+
self.fs.copy(source_path, dest_path, **kwargs)
|
|
210
|
+
except Exception as exc:
|
|
211
|
+
msg = f"Failed to copy {source} to {destination}"
|
|
212
|
+
raise StorageOperationFailedError(msg) from exc
|
|
213
|
+
|
|
214
|
+
def move(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
|
|
215
|
+
"""Move an object."""
|
|
216
|
+
try:
|
|
217
|
+
source_path = self._resolve_path(source)
|
|
218
|
+
dest_path = self._resolve_path(destination)
|
|
219
|
+
self.fs.mv(source_path, dest_path, **kwargs)
|
|
220
|
+
except Exception as exc:
|
|
221
|
+
msg = f"Failed to move {source} to {destination}"
|
|
222
|
+
raise StorageOperationFailedError(msg) from exc
|
|
223
|
+
|
|
224
|
+
def read_arrow(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
|
|
225
|
+
"""Read an Arrow table from storage."""
|
|
226
|
+
if not PYARROW_INSTALLED:
|
|
227
|
+
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
228
|
+
try:
|
|
229
|
+
import pyarrow.parquet as pq
|
|
230
|
+
|
|
231
|
+
resolved_path = self._resolve_path(path)
|
|
232
|
+
with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
|
|
233
|
+
return pq.read_table(f)
|
|
234
|
+
except Exception as exc:
|
|
235
|
+
msg = f"Failed to read Arrow table from {path}"
|
|
236
|
+
raise StorageOperationFailedError(msg) from exc
|
|
237
|
+
|
|
238
|
+
def write_arrow(self, path: Union[str, Path], table: "ArrowTable", **kwargs: Any) -> None:
|
|
239
|
+
"""Write an Arrow table to storage."""
|
|
240
|
+
if not PYARROW_INSTALLED:
|
|
241
|
+
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
242
|
+
try:
|
|
243
|
+
import pyarrow.parquet as pq
|
|
244
|
+
|
|
245
|
+
resolved_path = self._resolve_path(path)
|
|
246
|
+
with self.fs.open(resolved_path, mode="wb") as f:
|
|
247
|
+
pq.write_table(table, f, **kwargs) # pyright: ignore
|
|
248
|
+
except Exception as exc:
|
|
249
|
+
msg = f"Failed to write Arrow table to {path}"
|
|
250
|
+
raise StorageOperationFailedError(msg) from exc
|
|
251
|
+
|
|
252
|
+
def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
|
|
253
|
+
"""List objects with optional prefix."""
|
|
254
|
+
try:
|
|
255
|
+
resolved_prefix = self._resolve_path(prefix)
|
|
256
|
+
if recursive:
|
|
257
|
+
return sorted(self.fs.find(resolved_prefix, **kwargs))
|
|
258
|
+
return sorted(self.fs.ls(resolved_prefix, detail=False, **kwargs))
|
|
259
|
+
except Exception as exc:
|
|
260
|
+
msg = f"Failed to list objects with prefix '{prefix}'"
|
|
261
|
+
raise StorageOperationFailedError(msg) from exc
|
|
262
|
+
|
|
263
|
+
def glob(self, pattern: str, **kwargs: Any) -> list[str]:
|
|
264
|
+
"""Find objects matching a glob pattern."""
|
|
265
|
+
try:
|
|
266
|
+
resolved_pattern = self._resolve_path(pattern)
|
|
267
|
+
return sorted(self.fs.glob(resolved_pattern, **kwargs)) # pyright: ignore
|
|
268
|
+
except Exception as exc:
|
|
269
|
+
msg = f"Failed to glob with pattern '{pattern}'"
|
|
270
|
+
raise StorageOperationFailedError(msg) from exc
|
|
271
|
+
|
|
272
|
+
def is_object(self, path: str) -> bool:
|
|
273
|
+
"""Check if path points to an object."""
|
|
274
|
+
resolved_path = self._resolve_path(path)
|
|
275
|
+
return self.fs.exists(resolved_path) and not self.fs.isdir(resolved_path)
|
|
276
|
+
|
|
277
|
+
def is_path(self, path: str) -> bool:
|
|
278
|
+
"""Check if path points to a prefix (directory-like)."""
|
|
279
|
+
resolved_path = self._resolve_path(path)
|
|
280
|
+
return self.fs.isdir(resolved_path) # type: ignore[no-any-return]
|
|
281
|
+
|
|
282
|
+
def get_metadata(self, path: Union[str, Path], **kwargs: Any) -> dict[str, Any]:
|
|
283
|
+
"""Get object metadata."""
|
|
284
|
+
try:
|
|
285
|
+
resolved_path = self._resolve_path(path)
|
|
286
|
+
info = self.fs.info(resolved_path, **kwargs)
|
|
287
|
+
if isinstance(info, dict):
|
|
288
|
+
return {
|
|
289
|
+
"path": resolved_path,
|
|
290
|
+
"exists": True,
|
|
291
|
+
"size": info.get("size"),
|
|
292
|
+
"last_modified": info.get("mtime"),
|
|
293
|
+
"type": info.get("type", "file"),
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
except FileNotFoundError:
|
|
297
|
+
return {"path": self._resolve_path(path), "exists": False}
|
|
298
|
+
except Exception as exc:
|
|
299
|
+
msg = f"Failed to get metadata for {path}"
|
|
300
|
+
raise StorageOperationFailedError(msg) from exc
|
|
301
|
+
return {
|
|
302
|
+
"path": resolved_path,
|
|
303
|
+
"exists": True,
|
|
304
|
+
"size": info.size,
|
|
305
|
+
"last_modified": info.mtime,
|
|
306
|
+
"type": info.type,
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
def _stream_file_batches(self, obj_path: Union[str, Path]) -> "Iterator[ArrowRecordBatch]":
|
|
310
|
+
import pyarrow.parquet as pq
|
|
311
|
+
|
|
312
|
+
with self.fs.open(obj_path, mode="rb") as f:
|
|
313
|
+
parquet_file = pq.ParquetFile(f) # pyright: ignore[reportArgumentType]
|
|
314
|
+
yield from parquet_file.iter_batches()
|
|
315
|
+
|
|
316
|
+
def stream_arrow(self, pattern: str, **kwargs: Any) -> "Iterator[ArrowRecordBatch]":
|
|
317
|
+
if not FSSPEC_INSTALLED:
|
|
318
|
+
raise MissingDependencyError(package="fsspec", install_package="fsspec")
|
|
319
|
+
if not PYARROW_INSTALLED:
|
|
320
|
+
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
321
|
+
|
|
322
|
+
for obj_path in self.glob(pattern, **kwargs):
|
|
323
|
+
yield from self._stream_file_batches(obj_path)
|
|
324
|
+
|
|
325
|
+
async def read_bytes_async(self, path: Union[str, Path], **kwargs: Any) -> bytes:
|
|
326
|
+
"""Read bytes from storage asynchronously."""
|
|
327
|
+
return await async_(self.read_bytes)(path, **kwargs)
|
|
328
|
+
|
|
329
|
+
async def write_bytes_async(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
|
|
330
|
+
"""Write bytes to storage asynchronously."""
|
|
331
|
+
return await async_(self.write_bytes)(path, data, **kwargs)
|
|
332
|
+
|
|
333
|
+
def stream_arrow_async(self, pattern: str, **kwargs: Any) -> "AsyncIterator[ArrowRecordBatch]":
|
|
334
|
+
"""Stream Arrow record batches from storage asynchronously.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
pattern: The glob pattern to match.
|
|
338
|
+
**kwargs: Additional arguments to pass to the glob method.
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
AsyncIterator of Arrow record batches
|
|
342
|
+
"""
|
|
343
|
+
if not PYARROW_INSTALLED:
|
|
344
|
+
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
345
|
+
|
|
346
|
+
return _ArrowStreamer(self, pattern, **kwargs)
|
|
347
|
+
|
|
348
|
+
async def read_text_async(self, path: Union[str, Path], encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
349
|
+
"""Read text from storage asynchronously."""
|
|
350
|
+
return await async_(self.read_text)(path, encoding, **kwargs)
|
|
351
|
+
|
|
352
|
+
async def write_text_async(self, path: Union[str, Path], data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
353
|
+
"""Write text to storage asynchronously."""
|
|
354
|
+
await async_(self.write_text)(path, data, encoding, **kwargs)
|
|
355
|
+
|
|
356
|
+
async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
|
|
357
|
+
"""List objects in storage asynchronously."""
|
|
358
|
+
return await async_(self.list_objects)(prefix, recursive, **kwargs)
|
|
359
|
+
|
|
360
|
+
async def exists_async(self, path: Union[str, Path], **kwargs: Any) -> bool:
|
|
361
|
+
"""Check if object exists in storage asynchronously."""
|
|
362
|
+
return await async_(self.exists)(path, **kwargs)
|
|
363
|
+
|
|
364
|
+
async def delete_async(self, path: Union[str, Path], **kwargs: Any) -> None:
|
|
365
|
+
"""Delete object from storage asynchronously."""
|
|
366
|
+
await async_(self.delete)(path, **kwargs)
|
|
367
|
+
|
|
368
|
+
async def copy_async(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
|
|
369
|
+
"""Copy object in storage asynchronously."""
|
|
370
|
+
await async_(self.copy)(source, destination, **kwargs)
|
|
371
|
+
|
|
372
|
+
async def move_async(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
|
|
373
|
+
"""Move object in storage asynchronously."""
|
|
374
|
+
await async_(self.move)(source, destination, **kwargs)
|
|
375
|
+
|
|
376
|
+
async def get_metadata_async(self, path: Union[str, Path], **kwargs: Any) -> dict[str, Any]:
|
|
377
|
+
"""Get object metadata from storage asynchronously."""
|
|
378
|
+
return await async_(self.get_metadata)(path, **kwargs)
|
|
379
|
+
|
|
380
|
+
async def read_arrow_async(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
|
|
381
|
+
"""Read Arrow table from storage asynchronously."""
|
|
382
|
+
return await async_(self.read_arrow)(path, **kwargs)
|
|
383
|
+
|
|
384
|
+
async def write_arrow_async(self, path: Union[str, Path], table: "ArrowTable", **kwargs: Any) -> None:
|
|
385
|
+
"""Write Arrow table to storage asynchronously."""
|
|
386
|
+
await async_(self.write_arrow)(path, table, **kwargs)
|