sqlspec 0.16.1__cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- 51ff5a9eadfdefd49f98__mypyc.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/__init__.py +92 -0
- sqlspec/__main__.py +12 -0
- sqlspec/__metadata__.py +14 -0
- sqlspec/_serialization.py +77 -0
- sqlspec/_sql.py +1780 -0
- sqlspec/_typing.py +680 -0
- sqlspec/adapters/__init__.py +0 -0
- sqlspec/adapters/adbc/__init__.py +5 -0
- sqlspec/adapters/adbc/_types.py +12 -0
- sqlspec/adapters/adbc/config.py +361 -0
- sqlspec/adapters/adbc/driver.py +512 -0
- sqlspec/adapters/aiosqlite/__init__.py +19 -0
- sqlspec/adapters/aiosqlite/_types.py +13 -0
- sqlspec/adapters/aiosqlite/config.py +253 -0
- sqlspec/adapters/aiosqlite/driver.py +248 -0
- sqlspec/adapters/asyncmy/__init__.py +19 -0
- sqlspec/adapters/asyncmy/_types.py +12 -0
- sqlspec/adapters/asyncmy/config.py +180 -0
- sqlspec/adapters/asyncmy/driver.py +274 -0
- sqlspec/adapters/asyncpg/__init__.py +21 -0
- sqlspec/adapters/asyncpg/_types.py +17 -0
- sqlspec/adapters/asyncpg/config.py +229 -0
- sqlspec/adapters/asyncpg/driver.py +344 -0
- sqlspec/adapters/bigquery/__init__.py +18 -0
- sqlspec/adapters/bigquery/_types.py +12 -0
- sqlspec/adapters/bigquery/config.py +298 -0
- sqlspec/adapters/bigquery/driver.py +558 -0
- sqlspec/adapters/duckdb/__init__.py +22 -0
- sqlspec/adapters/duckdb/_types.py +12 -0
- sqlspec/adapters/duckdb/config.py +504 -0
- sqlspec/adapters/duckdb/driver.py +368 -0
- sqlspec/adapters/oracledb/__init__.py +32 -0
- sqlspec/adapters/oracledb/_types.py +14 -0
- sqlspec/adapters/oracledb/config.py +317 -0
- sqlspec/adapters/oracledb/driver.py +538 -0
- sqlspec/adapters/psqlpy/__init__.py +16 -0
- sqlspec/adapters/psqlpy/_types.py +11 -0
- sqlspec/adapters/psqlpy/config.py +214 -0
- sqlspec/adapters/psqlpy/driver.py +530 -0
- sqlspec/adapters/psycopg/__init__.py +32 -0
- sqlspec/adapters/psycopg/_types.py +17 -0
- sqlspec/adapters/psycopg/config.py +426 -0
- sqlspec/adapters/psycopg/driver.py +796 -0
- sqlspec/adapters/sqlite/__init__.py +15 -0
- sqlspec/adapters/sqlite/_types.py +11 -0
- sqlspec/adapters/sqlite/config.py +240 -0
- sqlspec/adapters/sqlite/driver.py +294 -0
- sqlspec/base.py +571 -0
- sqlspec/builder/__init__.py +62 -0
- sqlspec/builder/_base.py +473 -0
- sqlspec/builder/_column.py +320 -0
- sqlspec/builder/_ddl.py +1346 -0
- sqlspec/builder/_ddl_utils.py +103 -0
- sqlspec/builder/_delete.py +76 -0
- sqlspec/builder/_insert.py +256 -0
- sqlspec/builder/_merge.py +71 -0
- sqlspec/builder/_parsing_utils.py +140 -0
- sqlspec/builder/_select.py +170 -0
- sqlspec/builder/_update.py +188 -0
- sqlspec/builder/mixins/__init__.py +55 -0
- sqlspec/builder/mixins/_cte_and_set_ops.py +222 -0
- sqlspec/builder/mixins/_delete_operations.py +41 -0
- sqlspec/builder/mixins/_insert_operations.py +244 -0
- sqlspec/builder/mixins/_join_operations.py +122 -0
- sqlspec/builder/mixins/_merge_operations.py +476 -0
- sqlspec/builder/mixins/_order_limit_operations.py +135 -0
- sqlspec/builder/mixins/_pivot_operations.py +153 -0
- sqlspec/builder/mixins/_select_operations.py +603 -0
- sqlspec/builder/mixins/_update_operations.py +187 -0
- sqlspec/builder/mixins/_where_clause.py +621 -0
- sqlspec/cli.py +247 -0
- sqlspec/config.py +395 -0
- sqlspec/core/__init__.py +63 -0
- sqlspec/core/cache.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/cache.py +871 -0
- sqlspec/core/compiler.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/compiler.py +417 -0
- sqlspec/core/filters.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/filters.py +830 -0
- sqlspec/core/hashing.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/hashing.py +310 -0
- sqlspec/core/parameters.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/parameters.py +1237 -0
- sqlspec/core/result.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/result.py +677 -0
- sqlspec/core/splitter.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/splitter.py +819 -0
- sqlspec/core/statement.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/core/statement.py +676 -0
- sqlspec/driver/__init__.py +19 -0
- sqlspec/driver/_async.py +502 -0
- sqlspec/driver/_common.py +631 -0
- sqlspec/driver/_sync.py +503 -0
- sqlspec/driver/mixins/__init__.py +6 -0
- sqlspec/driver/mixins/_result_tools.py +193 -0
- sqlspec/driver/mixins/_sql_translator.py +86 -0
- sqlspec/exceptions.py +193 -0
- sqlspec/extensions/__init__.py +0 -0
- sqlspec/extensions/aiosql/__init__.py +10 -0
- sqlspec/extensions/aiosql/adapter.py +461 -0
- sqlspec/extensions/litestar/__init__.py +6 -0
- sqlspec/extensions/litestar/_utils.py +52 -0
- sqlspec/extensions/litestar/cli.py +48 -0
- sqlspec/extensions/litestar/config.py +92 -0
- sqlspec/extensions/litestar/handlers.py +260 -0
- sqlspec/extensions/litestar/plugin.py +145 -0
- sqlspec/extensions/litestar/providers.py +454 -0
- sqlspec/loader.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/loader.py +760 -0
- sqlspec/migrations/__init__.py +35 -0
- sqlspec/migrations/base.py +414 -0
- sqlspec/migrations/commands.py +443 -0
- sqlspec/migrations/loaders.py +402 -0
- sqlspec/migrations/runner.py +213 -0
- sqlspec/migrations/tracker.py +140 -0
- sqlspec/migrations/utils.py +129 -0
- sqlspec/protocols.py +407 -0
- sqlspec/py.typed +0 -0
- sqlspec/storage/__init__.py +23 -0
- sqlspec/storage/backends/__init__.py +0 -0
- sqlspec/storage/backends/base.py +163 -0
- sqlspec/storage/backends/fsspec.py +386 -0
- sqlspec/storage/backends/obstore.py +459 -0
- sqlspec/storage/capabilities.py +102 -0
- sqlspec/storage/registry.py +239 -0
- sqlspec/typing.py +299 -0
- sqlspec/utils/__init__.py +3 -0
- sqlspec/utils/correlation.py +150 -0
- sqlspec/utils/deprecation.py +106 -0
- sqlspec/utils/fixtures.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/fixtures.py +58 -0
- sqlspec/utils/logging.py +127 -0
- sqlspec/utils/module_loader.py +89 -0
- sqlspec/utils/serializers.py +4 -0
- sqlspec/utils/singleton.py +32 -0
- sqlspec/utils/sync_tools.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/sync_tools.py +237 -0
- sqlspec/utils/text.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/text.py +96 -0
- sqlspec/utils/type_guards.cpython-311-aarch64-linux-gnu.so +0 -0
- sqlspec/utils/type_guards.py +1139 -0
- sqlspec-0.16.1.dist-info/METADATA +365 -0
- sqlspec-0.16.1.dist-info/RECORD +148 -0
- sqlspec-0.16.1.dist-info/WHEEL +7 -0
- sqlspec-0.16.1.dist-info/entry_points.txt +2 -0
- sqlspec-0.16.1.dist-info/licenses/LICENSE +21 -0
- sqlspec-0.16.1.dist-info/licenses/NOTICE +29 -0
|
@@ -0,0 +1,459 @@
|
|
|
1
|
+
"""Object storage backend using obstore.
|
|
2
|
+
|
|
3
|
+
Implements the ObjectStoreProtocol using obstore,
|
|
4
|
+
providing native support for S3, GCS, Azure, and local file storage
|
|
5
|
+
with Arrow support.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import fnmatch
|
|
11
|
+
import logging
|
|
12
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Final, cast
|
|
13
|
+
|
|
14
|
+
from mypy_extensions import mypyc_attr
|
|
15
|
+
|
|
16
|
+
from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
|
|
17
|
+
from sqlspec.storage.backends.base import ObjectStoreBase
|
|
18
|
+
from sqlspec.storage.capabilities import HasStorageCapabilities, StorageCapabilities
|
|
19
|
+
from sqlspec.typing import OBSTORE_INSTALLED
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from collections.abc import AsyncIterator, Iterator
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
|
|
25
|
+
from sqlspec.typing import ArrowRecordBatch, ArrowTable
|
|
26
|
+
|
|
27
|
+
__all__ = ("ObStoreBackend",)
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class _AsyncArrowIterator:
|
|
33
|
+
"""Helper class to work around mypyc's lack of async generator support."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, store: Any, pattern: str, **kwargs: Any) -> None:
|
|
36
|
+
self.store = store
|
|
37
|
+
self.pattern = pattern
|
|
38
|
+
self.kwargs = kwargs
|
|
39
|
+
self._iterator: Any | None = None
|
|
40
|
+
|
|
41
|
+
def __aiter__(self) -> _AsyncArrowIterator:
|
|
42
|
+
return self
|
|
43
|
+
|
|
44
|
+
async def __anext__(self) -> ArrowRecordBatch:
|
|
45
|
+
if self._iterator is None:
|
|
46
|
+
self._iterator = self.store.stream_arrow_async(self.pattern, **self.kwargs)
|
|
47
|
+
if self._iterator is not None:
|
|
48
|
+
return cast("ArrowRecordBatch", await self._iterator.__anext__())
|
|
49
|
+
raise StopAsyncIteration
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
DEFAULT_OPTIONS: Final[dict[str, Any]] = {"connect_timeout": "30s", "request_timeout": "60s"}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
56
|
+
class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
57
|
+
"""Object storage backend using obstore.
|
|
58
|
+
|
|
59
|
+
Uses obstore's Rust-based implementation for storage operations,
|
|
60
|
+
providing native support for AWS S3, Google Cloud Storage, Azure Blob Storage,
|
|
61
|
+
local filesystem, and HTTP endpoints.
|
|
62
|
+
|
|
63
|
+
Includes native Arrow support.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
|
|
67
|
+
supports_arrow=True,
|
|
68
|
+
supports_streaming=True,
|
|
69
|
+
supports_async=True,
|
|
70
|
+
supports_batch_operations=True,
|
|
71
|
+
supports_multipart_upload=True,
|
|
72
|
+
supports_compression=True,
|
|
73
|
+
is_cloud_native=True,
|
|
74
|
+
has_low_latency=True,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
__slots__ = ("_path_cache", "base_path", "protocol", "store", "store_options", "store_uri")
|
|
78
|
+
|
|
79
|
+
def __init__(self, store_uri: str, base_path: str = "", **store_options: Any) -> None:
|
|
80
|
+
"""Initialize obstore backend.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
store_uri: Storage URI (e.g., 's3://bucket', 'file:///path', 'gs://bucket')
|
|
84
|
+
base_path: Base path prefix for all operations
|
|
85
|
+
**store_options: Additional options for obstore configuration
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
if not OBSTORE_INSTALLED:
|
|
89
|
+
raise MissingDependencyError(package="obstore", install_package="obstore")
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
self.store_uri = store_uri
|
|
93
|
+
self.base_path = base_path.rstrip("/") if base_path else ""
|
|
94
|
+
self.store_options = store_options
|
|
95
|
+
self.store: Any
|
|
96
|
+
self._path_cache: dict[str, str] = {}
|
|
97
|
+
self.protocol = store_uri.split("://", 1)[0] if "://" in store_uri else "file"
|
|
98
|
+
|
|
99
|
+
if store_uri.startswith("memory://"):
|
|
100
|
+
from obstore.store import MemoryStore
|
|
101
|
+
|
|
102
|
+
self.store = MemoryStore()
|
|
103
|
+
elif store_uri.startswith("file://"):
|
|
104
|
+
from obstore.store import LocalStore
|
|
105
|
+
|
|
106
|
+
self.store = LocalStore("/")
|
|
107
|
+
else:
|
|
108
|
+
from obstore.store import from_url
|
|
109
|
+
|
|
110
|
+
self.store = from_url(store_uri, **store_options) # pyright: ignore[reportAttributeAccessIssue]
|
|
111
|
+
|
|
112
|
+
logger.debug("ObStore backend initialized for %s", store_uri)
|
|
113
|
+
|
|
114
|
+
except Exception as exc:
|
|
115
|
+
msg = f"Failed to initialize obstore backend for {store_uri}"
|
|
116
|
+
raise StorageOperationFailedError(msg) from exc
|
|
117
|
+
|
|
118
|
+
def _resolve_path(self, path: str | Path) -> str:
|
|
119
|
+
"""Resolve path relative to base_path."""
|
|
120
|
+
path_str = str(path)
|
|
121
|
+
if path_str.startswith("file://"):
|
|
122
|
+
path_str = path_str.removeprefix("file://")
|
|
123
|
+
if self.store_uri.startswith("file://") and path_str.startswith("/"):
|
|
124
|
+
return path_str.lstrip("/")
|
|
125
|
+
if self.base_path:
|
|
126
|
+
clean_base = self.base_path.rstrip("/")
|
|
127
|
+
clean_path = path_str.lstrip("/")
|
|
128
|
+
return f"{clean_base}/{clean_path}"
|
|
129
|
+
return path_str
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def backend_type(self) -> str:
|
|
133
|
+
"""Return backend type identifier."""
|
|
134
|
+
return "obstore"
|
|
135
|
+
|
|
136
|
+
def read_bytes(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
137
|
+
"""Read bytes using obstore."""
|
|
138
|
+
try:
|
|
139
|
+
result = self.store.get(self._resolve_path(path))
|
|
140
|
+
return cast("bytes", result.bytes().to_bytes())
|
|
141
|
+
except Exception as exc:
|
|
142
|
+
msg = f"Failed to read bytes from {path}"
|
|
143
|
+
raise StorageOperationFailedError(msg) from exc
|
|
144
|
+
|
|
145
|
+
def write_bytes(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
146
|
+
"""Write bytes using obstore."""
|
|
147
|
+
try:
|
|
148
|
+
self.store.put(self._resolve_path(path), data)
|
|
149
|
+
except Exception as exc:
|
|
150
|
+
msg = f"Failed to write bytes to {path}"
|
|
151
|
+
raise StorageOperationFailedError(msg) from exc
|
|
152
|
+
|
|
153
|
+
def read_text(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
154
|
+
"""Read text using obstore."""
|
|
155
|
+
return self.read_bytes(path, **kwargs).decode(encoding)
|
|
156
|
+
|
|
157
|
+
def write_text(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
158
|
+
"""Write text using obstore."""
|
|
159
|
+
self.write_bytes(path, data.encode(encoding), **kwargs)
|
|
160
|
+
|
|
161
|
+
def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
|
|
162
|
+
"""List objects using obstore."""
|
|
163
|
+
try:
|
|
164
|
+
resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
|
|
165
|
+
items = (
|
|
166
|
+
self.store.list_with_delimiter(resolved_prefix) if not recursive else self.store.list(resolved_prefix)
|
|
167
|
+
)
|
|
168
|
+
return sorted(str(getattr(item, "path", getattr(item, "key", str(item)))) for item in items)
|
|
169
|
+
except Exception as exc:
|
|
170
|
+
msg = f"Failed to list objects with prefix '{prefix}'"
|
|
171
|
+
raise StorageOperationFailedError(msg) from exc
|
|
172
|
+
|
|
173
|
+
def exists(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
174
|
+
"""Check if object exists using obstore."""
|
|
175
|
+
try:
|
|
176
|
+
self.store.head(self._resolve_path(path))
|
|
177
|
+
except Exception:
|
|
178
|
+
return False
|
|
179
|
+
return True
|
|
180
|
+
|
|
181
|
+
def delete(self, path: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
182
|
+
"""Delete object using obstore."""
|
|
183
|
+
try:
|
|
184
|
+
self.store.delete(self._resolve_path(path))
|
|
185
|
+
except Exception as exc:
|
|
186
|
+
msg = f"Failed to delete {path}"
|
|
187
|
+
raise StorageOperationFailedError(msg) from exc
|
|
188
|
+
|
|
189
|
+
def copy(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
190
|
+
"""Copy object using obstore."""
|
|
191
|
+
try:
|
|
192
|
+
self.store.copy(self._resolve_path(source), self._resolve_path(destination))
|
|
193
|
+
except Exception as exc:
|
|
194
|
+
msg = f"Failed to copy {source} to {destination}"
|
|
195
|
+
raise StorageOperationFailedError(msg) from exc
|
|
196
|
+
|
|
197
|
+
def move(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
198
|
+
"""Move object using obstore."""
|
|
199
|
+
try:
|
|
200
|
+
self.store.rename(self._resolve_path(source), self._resolve_path(destination))
|
|
201
|
+
except Exception as exc:
|
|
202
|
+
msg = f"Failed to move {source} to {destination}"
|
|
203
|
+
raise StorageOperationFailedError(msg) from exc
|
|
204
|
+
|
|
205
|
+
def glob(self, pattern: str, **kwargs: Any) -> list[str]:
|
|
206
|
+
"""Find objects matching pattern using obstore.
|
|
207
|
+
|
|
208
|
+
Lists all objects and filters them client-side using the pattern.
|
|
209
|
+
"""
|
|
210
|
+
from pathlib import PurePosixPath
|
|
211
|
+
|
|
212
|
+
resolved_pattern = self._resolve_path(pattern)
|
|
213
|
+
all_objects = self.list_objects(recursive=True, **kwargs)
|
|
214
|
+
|
|
215
|
+
if "**" in pattern:
|
|
216
|
+
matching_objects = []
|
|
217
|
+
|
|
218
|
+
if pattern.startswith("**/"):
|
|
219
|
+
suffix_pattern = pattern[3:]
|
|
220
|
+
|
|
221
|
+
for obj in all_objects:
|
|
222
|
+
obj_path = PurePosixPath(obj)
|
|
223
|
+
if obj_path.match(resolved_pattern) or obj_path.match(suffix_pattern):
|
|
224
|
+
matching_objects.append(obj)
|
|
225
|
+
else:
|
|
226
|
+
for obj in all_objects:
|
|
227
|
+
obj_path = PurePosixPath(obj)
|
|
228
|
+
if obj_path.match(resolved_pattern):
|
|
229
|
+
matching_objects.append(obj)
|
|
230
|
+
|
|
231
|
+
return matching_objects
|
|
232
|
+
return [obj for obj in all_objects if fnmatch.fnmatch(obj, resolved_pattern)]
|
|
233
|
+
|
|
234
|
+
def get_metadata(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
235
|
+
"""Get object metadata using obstore."""
|
|
236
|
+
resolved_path = self._resolve_path(path)
|
|
237
|
+
result: dict[str, Any] = {}
|
|
238
|
+
try:
|
|
239
|
+
metadata = self.store.head(resolved_path)
|
|
240
|
+
result.update(
|
|
241
|
+
{
|
|
242
|
+
"path": resolved_path,
|
|
243
|
+
"exists": True,
|
|
244
|
+
"size": getattr(metadata, "size", None),
|
|
245
|
+
"last_modified": getattr(metadata, "last_modified", None),
|
|
246
|
+
"e_tag": getattr(metadata, "e_tag", None),
|
|
247
|
+
"version": getattr(metadata, "version", None),
|
|
248
|
+
}
|
|
249
|
+
)
|
|
250
|
+
if hasattr(metadata, "metadata") and metadata.metadata:
|
|
251
|
+
result["custom_metadata"] = metadata.metadata
|
|
252
|
+
|
|
253
|
+
except Exception:
|
|
254
|
+
return {"path": resolved_path, "exists": False}
|
|
255
|
+
else:
|
|
256
|
+
return result
|
|
257
|
+
|
|
258
|
+
def is_object(self, path: str | Path) -> bool:
|
|
259
|
+
"""Check if path is an object using obstore."""
|
|
260
|
+
resolved_path = self._resolve_path(path)
|
|
261
|
+
return self.exists(path) and not resolved_path.endswith("/")
|
|
262
|
+
|
|
263
|
+
def is_path(self, path: str | Path) -> bool:
|
|
264
|
+
"""Check if path is a prefix/directory using obstore."""
|
|
265
|
+
resolved_path = self._resolve_path(path)
|
|
266
|
+
|
|
267
|
+
if resolved_path.endswith("/"):
|
|
268
|
+
return True
|
|
269
|
+
|
|
270
|
+
try:
|
|
271
|
+
objects = self.list_objects(prefix=str(path), recursive=True)
|
|
272
|
+
return len(objects) > 0
|
|
273
|
+
except Exception:
|
|
274
|
+
return False
|
|
275
|
+
|
|
276
|
+
def read_arrow(self, path: str | Path, **kwargs: Any) -> ArrowTable:
|
|
277
|
+
"""Read Arrow table using obstore."""
|
|
278
|
+
try:
|
|
279
|
+
resolved_path = self._resolve_path(path)
|
|
280
|
+
if hasattr(self.store, "read_arrow"):
|
|
281
|
+
return self.store.read_arrow(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
282
|
+
|
|
283
|
+
import io
|
|
284
|
+
|
|
285
|
+
import pyarrow.parquet as pq
|
|
286
|
+
|
|
287
|
+
data = self.read_bytes(resolved_path)
|
|
288
|
+
buffer = io.BytesIO(data)
|
|
289
|
+
return pq.read_table(buffer, **kwargs)
|
|
290
|
+
except Exception as exc:
|
|
291
|
+
msg = f"Failed to read Arrow table from {path}"
|
|
292
|
+
raise StorageOperationFailedError(msg) from exc
|
|
293
|
+
|
|
294
|
+
def write_arrow(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
|
|
295
|
+
"""Write Arrow table using obstore."""
|
|
296
|
+
try:
|
|
297
|
+
resolved_path = self._resolve_path(path)
|
|
298
|
+
if hasattr(self.store, "write_arrow"):
|
|
299
|
+
self.store.write_arrow(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
|
|
300
|
+
else:
|
|
301
|
+
import io
|
|
302
|
+
|
|
303
|
+
import pyarrow as pa
|
|
304
|
+
import pyarrow.parquet as pq
|
|
305
|
+
|
|
306
|
+
buffer = io.BytesIO()
|
|
307
|
+
|
|
308
|
+
schema = table.schema
|
|
309
|
+
if any(str(f.type).startswith("decimal64") for f in schema):
|
|
310
|
+
new_fields = []
|
|
311
|
+
for field in schema:
|
|
312
|
+
if str(field.type).startswith("decimal64"):
|
|
313
|
+
import re
|
|
314
|
+
|
|
315
|
+
match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
|
|
316
|
+
if match:
|
|
317
|
+
precision, scale = int(match.group(1)), int(match.group(2))
|
|
318
|
+
new_fields.append(pa.field(field.name, pa.decimal128(precision, scale)))
|
|
319
|
+
else:
|
|
320
|
+
new_fields.append(field) # pragma: no cover
|
|
321
|
+
else:
|
|
322
|
+
new_fields.append(field)
|
|
323
|
+
table = table.cast(pa.schema(new_fields))
|
|
324
|
+
|
|
325
|
+
pq.write_table(table, buffer, **kwargs)
|
|
326
|
+
buffer.seek(0)
|
|
327
|
+
self.write_bytes(resolved_path, buffer.read())
|
|
328
|
+
except Exception as exc:
|
|
329
|
+
msg = f"Failed to write Arrow table to {path}"
|
|
330
|
+
raise StorageOperationFailedError(msg) from exc
|
|
331
|
+
|
|
332
|
+
def stream_arrow(self, pattern: str, **kwargs: Any) -> Iterator[ArrowRecordBatch]:
|
|
333
|
+
"""Stream Arrow record batches using obstore.
|
|
334
|
+
|
|
335
|
+
Yields:
|
|
336
|
+
Iterator of Arrow record batches from matching objects.
|
|
337
|
+
"""
|
|
338
|
+
try:
|
|
339
|
+
resolved_pattern = self._resolve_path(pattern)
|
|
340
|
+
yield from self.store.stream_arrow(resolved_pattern, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
|
|
341
|
+
except Exception as exc:
|
|
342
|
+
msg = f"Failed to stream Arrow data for pattern {pattern}"
|
|
343
|
+
raise StorageOperationFailedError(msg) from exc
|
|
344
|
+
|
|
345
|
+
async def read_bytes_async(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
346
|
+
"""Read bytes from storage asynchronously."""
|
|
347
|
+
try:
|
|
348
|
+
resolved_path = self._resolve_path(path)
|
|
349
|
+
result = await self.store.get_async(resolved_path)
|
|
350
|
+
bytes_obj = await result.bytes_async()
|
|
351
|
+
return bytes_obj.to_bytes() # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
352
|
+
except Exception as exc:
|
|
353
|
+
msg = f"Failed to read bytes from {path}"
|
|
354
|
+
raise StorageOperationFailedError(msg) from exc
|
|
355
|
+
|
|
356
|
+
async def write_bytes_async(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
357
|
+
"""Write bytes to storage asynchronously."""
|
|
358
|
+
resolved_path = self._resolve_path(path)
|
|
359
|
+
await self.store.put_async(resolved_path, data)
|
|
360
|
+
|
|
361
|
+
async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
|
|
362
|
+
"""List objects in storage asynchronously."""
|
|
363
|
+
try:
|
|
364
|
+
resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
|
|
365
|
+
|
|
366
|
+
objects = [str(item.path) async for item in self.store.list_async(resolved_prefix)] # pyright: ignore[reportAttributeAccessIssue]
|
|
367
|
+
|
|
368
|
+
if not recursive and resolved_prefix:
|
|
369
|
+
base_depth = resolved_prefix.count("/")
|
|
370
|
+
objects = [obj for obj in objects if obj.count("/") <= base_depth + 1]
|
|
371
|
+
|
|
372
|
+
return sorted(objects)
|
|
373
|
+
except Exception as exc:
|
|
374
|
+
msg = f"Failed to list objects with prefix '{prefix}'"
|
|
375
|
+
raise StorageOperationFailedError(msg) from exc
|
|
376
|
+
|
|
377
|
+
async def read_text_async(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
378
|
+
"""Read text from storage asynchronously."""
|
|
379
|
+
data = await self.read_bytes_async(path, **kwargs)
|
|
380
|
+
return data.decode(encoding)
|
|
381
|
+
|
|
382
|
+
async def write_text_async(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
383
|
+
"""Write text to storage asynchronously."""
|
|
384
|
+
encoded_data = data.encode(encoding)
|
|
385
|
+
await self.write_bytes_async(path, encoded_data, **kwargs)
|
|
386
|
+
|
|
387
|
+
async def exists_async(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
388
|
+
"""Check if object exists in storage asynchronously."""
|
|
389
|
+
resolved_path = self._resolve_path(path)
|
|
390
|
+
try:
|
|
391
|
+
await self.store.head_async(resolved_path)
|
|
392
|
+
except Exception:
|
|
393
|
+
return False
|
|
394
|
+
return True
|
|
395
|
+
|
|
396
|
+
async def delete_async(self, path: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
397
|
+
"""Delete object from storage asynchronously."""
|
|
398
|
+
resolved_path = self._resolve_path(path)
|
|
399
|
+
await self.store.delete_async(resolved_path)
|
|
400
|
+
|
|
401
|
+
async def copy_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
402
|
+
"""Copy object in storage asynchronously."""
|
|
403
|
+
source_path = self._resolve_path(source)
|
|
404
|
+
dest_path = self._resolve_path(destination)
|
|
405
|
+
await self.store.copy_async(source_path, dest_path)
|
|
406
|
+
|
|
407
|
+
async def move_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
408
|
+
"""Move object in storage asynchronously."""
|
|
409
|
+
source_path = self._resolve_path(source)
|
|
410
|
+
dest_path = self._resolve_path(destination)
|
|
411
|
+
await self.store.rename_async(source_path, dest_path)
|
|
412
|
+
|
|
413
|
+
async def get_metadata_async(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
414
|
+
"""Get object metadata from storage asynchronously."""
|
|
415
|
+
resolved_path = self._resolve_path(path)
|
|
416
|
+
result: dict[str, Any] = {}
|
|
417
|
+
try:
|
|
418
|
+
metadata = await self.store.head_async(resolved_path)
|
|
419
|
+
result.update(
|
|
420
|
+
{
|
|
421
|
+
"path": resolved_path,
|
|
422
|
+
"exists": True,
|
|
423
|
+
"size": metadata.size,
|
|
424
|
+
"last_modified": metadata.last_modified,
|
|
425
|
+
"e_tag": metadata.e_tag,
|
|
426
|
+
"version": metadata.version,
|
|
427
|
+
}
|
|
428
|
+
)
|
|
429
|
+
if hasattr(metadata, "metadata") and metadata.metadata:
|
|
430
|
+
result["custom_metadata"] = metadata.metadata
|
|
431
|
+
|
|
432
|
+
except Exception:
|
|
433
|
+
return {"path": resolved_path, "exists": False}
|
|
434
|
+
else:
|
|
435
|
+
return result
|
|
436
|
+
|
|
437
|
+
async def read_arrow_async(self, path: str | Path, **kwargs: Any) -> ArrowTable:
|
|
438
|
+
"""Read Arrow table from storage asynchronously."""
|
|
439
|
+
resolved_path = self._resolve_path(path)
|
|
440
|
+
return await self.store.read_arrow_async(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
441
|
+
|
|
442
|
+
async def write_arrow_async(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
|
|
443
|
+
"""Write Arrow table to storage asynchronously."""
|
|
444
|
+
resolved_path = self._resolve_path(path)
|
|
445
|
+
if hasattr(self.store, "write_arrow_async"):
|
|
446
|
+
await self.store.write_arrow_async(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
|
|
447
|
+
else:
|
|
448
|
+
import io
|
|
449
|
+
|
|
450
|
+
import pyarrow.parquet as pq
|
|
451
|
+
|
|
452
|
+
buffer = io.BytesIO()
|
|
453
|
+
pq.write_table(table, buffer, **kwargs)
|
|
454
|
+
buffer.seek(0)
|
|
455
|
+
await self.write_bytes_async(resolved_path, buffer.read())
|
|
456
|
+
|
|
457
|
+
def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator[ArrowRecordBatch]:
|
|
458
|
+
resolved_pattern = self._resolve_path(pattern)
|
|
459
|
+
return _AsyncArrowIterator(self.store, resolved_pattern, **kwargs)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""Storage backend capability system.
|
|
2
|
+
|
|
3
|
+
This module provides a centralized way to track and query storage backend capabilities.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import ClassVar
|
|
8
|
+
|
|
9
|
+
from mypy_extensions import mypyc_attr
|
|
10
|
+
|
|
11
|
+
__all__ = ("HasStorageCapabilities", "StorageCapabilities")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class StorageCapabilities:
|
|
16
|
+
"""Tracks capabilities of a storage backend."""
|
|
17
|
+
|
|
18
|
+
supports_read: bool = True
|
|
19
|
+
supports_write: bool = True
|
|
20
|
+
supports_delete: bool = True
|
|
21
|
+
supports_list: bool = True
|
|
22
|
+
supports_exists: bool = True
|
|
23
|
+
supports_copy: bool = True
|
|
24
|
+
supports_move: bool = True
|
|
25
|
+
supports_metadata: bool = True
|
|
26
|
+
|
|
27
|
+
supports_arrow: bool = False
|
|
28
|
+
supports_streaming: bool = False
|
|
29
|
+
supports_async: bool = False
|
|
30
|
+
supports_batch_operations: bool = False
|
|
31
|
+
supports_multipart_upload: bool = False
|
|
32
|
+
supports_compression: bool = False
|
|
33
|
+
|
|
34
|
+
supports_s3_select: bool = False
|
|
35
|
+
supports_gcs_compose: bool = False
|
|
36
|
+
supports_azure_snapshots: bool = False
|
|
37
|
+
|
|
38
|
+
is_remote: bool = True
|
|
39
|
+
is_cloud_native: bool = False
|
|
40
|
+
has_low_latency: bool = False
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def local_filesystem(cls) -> "StorageCapabilities":
|
|
44
|
+
"""Capabilities for local filesystem backend."""
|
|
45
|
+
return cls(
|
|
46
|
+
is_remote=False, has_low_latency=True, supports_arrow=True, supports_streaming=True, supports_async=True
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def s3_compatible(cls) -> "StorageCapabilities":
|
|
51
|
+
"""Capabilities for S3-compatible backends."""
|
|
52
|
+
return cls(
|
|
53
|
+
is_cloud_native=True,
|
|
54
|
+
supports_multipart_upload=True,
|
|
55
|
+
supports_s3_select=True,
|
|
56
|
+
supports_arrow=True,
|
|
57
|
+
supports_streaming=True,
|
|
58
|
+
supports_async=True,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def gcs(cls) -> "StorageCapabilities":
|
|
63
|
+
"""Capabilities for Google Cloud Storage."""
|
|
64
|
+
return cls(
|
|
65
|
+
is_cloud_native=True,
|
|
66
|
+
supports_multipart_upload=True,
|
|
67
|
+
supports_gcs_compose=True,
|
|
68
|
+
supports_arrow=True,
|
|
69
|
+
supports_streaming=True,
|
|
70
|
+
supports_async=True,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def azure_blob(cls) -> "StorageCapabilities":
|
|
75
|
+
"""Capabilities for Azure Blob Storage."""
|
|
76
|
+
return cls(
|
|
77
|
+
is_cloud_native=True,
|
|
78
|
+
supports_multipart_upload=True,
|
|
79
|
+
supports_azure_snapshots=True,
|
|
80
|
+
supports_arrow=True,
|
|
81
|
+
supports_streaming=True,
|
|
82
|
+
supports_async=True,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
87
|
+
class HasStorageCapabilities:
|
|
88
|
+
"""Mixin for storage backends that expose their capabilities."""
|
|
89
|
+
|
|
90
|
+
__slots__ = ()
|
|
91
|
+
|
|
92
|
+
capabilities: ClassVar[StorageCapabilities]
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def has_capability(cls, capability: str) -> bool:
|
|
96
|
+
"""Check if backend has a specific capability."""
|
|
97
|
+
return getattr(cls.capabilities, capability, False)
|
|
98
|
+
|
|
99
|
+
@classmethod
|
|
100
|
+
def get_capabilities(cls) -> StorageCapabilities:
|
|
101
|
+
"""Get all capabilities for this backend."""
|
|
102
|
+
return cls.capabilities
|