sqlspec 0.14.1__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/__init__.py +50 -25
- sqlspec/__main__.py +1 -1
- sqlspec/__metadata__.py +1 -3
- sqlspec/_serialization.py +1 -2
- sqlspec/_sql.py +480 -121
- sqlspec/_typing.py +278 -142
- sqlspec/adapters/adbc/__init__.py +4 -3
- sqlspec/adapters/adbc/_types.py +12 -0
- sqlspec/adapters/adbc/config.py +115 -260
- sqlspec/adapters/adbc/driver.py +462 -367
- sqlspec/adapters/aiosqlite/__init__.py +18 -3
- sqlspec/adapters/aiosqlite/_types.py +13 -0
- sqlspec/adapters/aiosqlite/config.py +199 -129
- sqlspec/adapters/aiosqlite/driver.py +230 -269
- sqlspec/adapters/asyncmy/__init__.py +18 -3
- sqlspec/adapters/asyncmy/_types.py +12 -0
- sqlspec/adapters/asyncmy/config.py +80 -168
- sqlspec/adapters/asyncmy/driver.py +260 -225
- sqlspec/adapters/asyncpg/__init__.py +19 -4
- sqlspec/adapters/asyncpg/_types.py +17 -0
- sqlspec/adapters/asyncpg/config.py +82 -181
- sqlspec/adapters/asyncpg/driver.py +285 -383
- sqlspec/adapters/bigquery/__init__.py +17 -3
- sqlspec/adapters/bigquery/_types.py +12 -0
- sqlspec/adapters/bigquery/config.py +191 -258
- sqlspec/adapters/bigquery/driver.py +474 -646
- sqlspec/adapters/duckdb/__init__.py +14 -3
- sqlspec/adapters/duckdb/_types.py +12 -0
- sqlspec/adapters/duckdb/config.py +415 -351
- sqlspec/adapters/duckdb/driver.py +343 -413
- sqlspec/adapters/oracledb/__init__.py +19 -5
- sqlspec/adapters/oracledb/_types.py +14 -0
- sqlspec/adapters/oracledb/config.py +123 -379
- sqlspec/adapters/oracledb/driver.py +507 -560
- sqlspec/adapters/psqlpy/__init__.py +13 -3
- sqlspec/adapters/psqlpy/_types.py +11 -0
- sqlspec/adapters/psqlpy/config.py +93 -254
- sqlspec/adapters/psqlpy/driver.py +505 -234
- sqlspec/adapters/psycopg/__init__.py +19 -5
- sqlspec/adapters/psycopg/_types.py +17 -0
- sqlspec/adapters/psycopg/config.py +143 -403
- sqlspec/adapters/psycopg/driver.py +706 -872
- sqlspec/adapters/sqlite/__init__.py +14 -3
- sqlspec/adapters/sqlite/_types.py +11 -0
- sqlspec/adapters/sqlite/config.py +202 -118
- sqlspec/adapters/sqlite/driver.py +264 -303
- sqlspec/base.py +105 -9
- sqlspec/{statement/builder → builder}/__init__.py +12 -14
- sqlspec/{statement/builder → builder}/_base.py +120 -55
- sqlspec/{statement/builder → builder}/_column.py +17 -6
- sqlspec/{statement/builder → builder}/_ddl.py +46 -79
- sqlspec/{statement/builder → builder}/_ddl_utils.py +5 -10
- sqlspec/{statement/builder → builder}/_delete.py +6 -25
- sqlspec/{statement/builder → builder}/_insert.py +18 -65
- sqlspec/builder/_merge.py +56 -0
- sqlspec/{statement/builder → builder}/_parsing_utils.py +8 -11
- sqlspec/{statement/builder → builder}/_select.py +11 -56
- sqlspec/{statement/builder → builder}/_update.py +12 -18
- sqlspec/{statement/builder → builder}/mixins/__init__.py +10 -14
- sqlspec/{statement/builder → builder}/mixins/_cte_and_set_ops.py +48 -59
- sqlspec/{statement/builder → builder}/mixins/_insert_operations.py +34 -18
- sqlspec/{statement/builder → builder}/mixins/_join_operations.py +1 -3
- sqlspec/{statement/builder → builder}/mixins/_merge_operations.py +19 -9
- sqlspec/{statement/builder → builder}/mixins/_order_limit_operations.py +3 -3
- sqlspec/{statement/builder → builder}/mixins/_pivot_operations.py +4 -8
- sqlspec/{statement/builder → builder}/mixins/_select_operations.py +25 -38
- sqlspec/{statement/builder → builder}/mixins/_update_operations.py +15 -16
- sqlspec/{statement/builder → builder}/mixins/_where_clause.py +210 -137
- sqlspec/cli.py +4 -5
- sqlspec/config.py +180 -133
- sqlspec/core/__init__.py +63 -0
- sqlspec/core/cache.py +873 -0
- sqlspec/core/compiler.py +396 -0
- sqlspec/core/filters.py +830 -0
- sqlspec/core/hashing.py +310 -0
- sqlspec/core/parameters.py +1209 -0
- sqlspec/core/result.py +664 -0
- sqlspec/{statement → core}/splitter.py +321 -191
- sqlspec/core/statement.py +666 -0
- sqlspec/driver/__init__.py +7 -10
- sqlspec/driver/_async.py +387 -176
- sqlspec/driver/_common.py +527 -289
- sqlspec/driver/_sync.py +390 -172
- sqlspec/driver/mixins/__init__.py +2 -19
- sqlspec/driver/mixins/_result_tools.py +164 -0
- sqlspec/driver/mixins/_sql_translator.py +6 -3
- sqlspec/exceptions.py +5 -252
- sqlspec/extensions/aiosql/adapter.py +93 -96
- sqlspec/extensions/litestar/cli.py +1 -1
- sqlspec/extensions/litestar/config.py +0 -1
- sqlspec/extensions/litestar/handlers.py +15 -26
- sqlspec/extensions/litestar/plugin.py +18 -16
- sqlspec/extensions/litestar/providers.py +17 -52
- sqlspec/loader.py +424 -105
- sqlspec/migrations/__init__.py +12 -0
- sqlspec/migrations/base.py +92 -68
- sqlspec/migrations/commands.py +24 -106
- sqlspec/migrations/loaders.py +402 -0
- sqlspec/migrations/runner.py +49 -51
- sqlspec/migrations/tracker.py +31 -44
- sqlspec/migrations/utils.py +64 -24
- sqlspec/protocols.py +7 -183
- sqlspec/storage/__init__.py +1 -1
- sqlspec/storage/backends/base.py +37 -40
- sqlspec/storage/backends/fsspec.py +136 -112
- sqlspec/storage/backends/obstore.py +138 -160
- sqlspec/storage/capabilities.py +5 -4
- sqlspec/storage/registry.py +57 -106
- sqlspec/typing.py +136 -115
- sqlspec/utils/__init__.py +2 -3
- sqlspec/utils/correlation.py +0 -3
- sqlspec/utils/deprecation.py +6 -6
- sqlspec/utils/fixtures.py +6 -6
- sqlspec/utils/logging.py +0 -2
- sqlspec/utils/module_loader.py +7 -12
- sqlspec/utils/singleton.py +0 -1
- sqlspec/utils/sync_tools.py +17 -38
- sqlspec/utils/text.py +12 -51
- sqlspec/utils/type_guards.py +443 -232
- {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/METADATA +7 -2
- sqlspec-0.16.0.dist-info/RECORD +134 -0
- sqlspec/adapters/adbc/transformers.py +0 -108
- sqlspec/driver/connection.py +0 -207
- sqlspec/driver/mixins/_cache.py +0 -114
- sqlspec/driver/mixins/_csv_writer.py +0 -91
- sqlspec/driver/mixins/_pipeline.py +0 -508
- sqlspec/driver/mixins/_query_tools.py +0 -796
- sqlspec/driver/mixins/_result_utils.py +0 -138
- sqlspec/driver/mixins/_storage.py +0 -912
- sqlspec/driver/mixins/_type_coercion.py +0 -128
- sqlspec/driver/parameters.py +0 -138
- sqlspec/statement/__init__.py +0 -21
- sqlspec/statement/builder/_merge.py +0 -95
- sqlspec/statement/cache.py +0 -50
- sqlspec/statement/filters.py +0 -625
- sqlspec/statement/parameters.py +0 -956
- sqlspec/statement/pipelines/__init__.py +0 -210
- sqlspec/statement/pipelines/analyzers/__init__.py +0 -9
- sqlspec/statement/pipelines/analyzers/_analyzer.py +0 -646
- sqlspec/statement/pipelines/context.py +0 -109
- sqlspec/statement/pipelines/transformers/__init__.py +0 -7
- sqlspec/statement/pipelines/transformers/_expression_simplifier.py +0 -88
- sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +0 -1247
- sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +0 -76
- sqlspec/statement/pipelines/validators/__init__.py +0 -23
- sqlspec/statement/pipelines/validators/_dml_safety.py +0 -290
- sqlspec/statement/pipelines/validators/_parameter_style.py +0 -370
- sqlspec/statement/pipelines/validators/_performance.py +0 -714
- sqlspec/statement/pipelines/validators/_security.py +0 -967
- sqlspec/statement/result.py +0 -435
- sqlspec/statement/sql.py +0 -1774
- sqlspec/utils/cached_property.py +0 -25
- sqlspec/utils/statement_hashing.py +0 -203
- sqlspec-0.14.1.dist-info/RECORD +0 -145
- /sqlspec/{statement/builder → builder}/mixins/_delete_operations.py +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/WHEEL +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/entry_points.txt +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/licenses/LICENSE +0 -0
- {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -1,15 +1,17 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Object storage backend using obstore.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Implements the ObjectStoreProtocol using obstore,
|
|
4
4
|
providing native support for S3, GCS, Azure, and local file storage
|
|
5
|
-
with
|
|
5
|
+
with Arrow support.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
10
|
import fnmatch
|
|
11
11
|
import logging
|
|
12
|
-
from typing import TYPE_CHECKING, Any, ClassVar
|
|
12
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Final, cast
|
|
13
|
+
|
|
14
|
+
from mypy_extensions import mypyc_attr
|
|
13
15
|
|
|
14
16
|
from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
|
|
15
17
|
from sqlspec.storage.backends.base import ObjectStoreBase
|
|
@@ -27,21 +29,40 @@ __all__ = ("ObStoreBackend",)
|
|
|
27
29
|
logger = logging.getLogger(__name__)
|
|
28
30
|
|
|
29
31
|
|
|
32
|
+
class _AsyncArrowIterator:
|
|
33
|
+
"""Helper class to work around mypyc's lack of async generator support."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, store: Any, pattern: str, **kwargs: Any) -> None:
|
|
36
|
+
self.store = store
|
|
37
|
+
self.pattern = pattern
|
|
38
|
+
self.kwargs = kwargs
|
|
39
|
+
self._iterator: Any | None = None
|
|
40
|
+
|
|
41
|
+
def __aiter__(self) -> _AsyncArrowIterator:
|
|
42
|
+
return self
|
|
43
|
+
|
|
44
|
+
async def __anext__(self) -> ArrowRecordBatch:
|
|
45
|
+
if self._iterator is None:
|
|
46
|
+
self._iterator = self.store.stream_arrow_async(self.pattern, **self.kwargs)
|
|
47
|
+
if self._iterator is not None:
|
|
48
|
+
return cast("ArrowRecordBatch", await self._iterator.__anext__())
|
|
49
|
+
raise StopAsyncIteration
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
DEFAULT_OPTIONS: Final[dict[str, Any]] = {"connect_timeout": "30s", "request_timeout": "60s"}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
30
56
|
class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
31
|
-
"""
|
|
57
|
+
"""Object storage backend using obstore.
|
|
32
58
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
- Google Cloud Storage
|
|
37
|
-
- Azure Blob Storage
|
|
38
|
-
- Local filesystem
|
|
39
|
-
- HTTP endpoints
|
|
59
|
+
Uses obstore's Rust-based implementation for storage operations,
|
|
60
|
+
providing native support for AWS S3, Google Cloud Storage, Azure Blob Storage,
|
|
61
|
+
local filesystem, and HTTP endpoints.
|
|
40
62
|
|
|
41
|
-
|
|
63
|
+
Includes native Arrow support.
|
|
42
64
|
"""
|
|
43
65
|
|
|
44
|
-
# ObStore has excellent native capabilities
|
|
45
66
|
capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
|
|
46
67
|
supports_arrow=True,
|
|
47
68
|
supports_streaming=True,
|
|
@@ -53,6 +74,8 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
|
53
74
|
has_low_latency=True,
|
|
54
75
|
)
|
|
55
76
|
|
|
77
|
+
__slots__ = ("_path_cache", "base_path", "protocol", "store", "store_options", "store_uri")
|
|
78
|
+
|
|
56
79
|
def __init__(self, store_uri: str, base_path: str = "", **store_options: Any) -> None:
|
|
57
80
|
"""Initialize obstore backend.
|
|
58
81
|
|
|
@@ -69,26 +92,23 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
|
69
92
|
self.store_uri = store_uri
|
|
70
93
|
self.base_path = base_path.rstrip("/") if base_path else ""
|
|
71
94
|
self.store_options = store_options
|
|
72
|
-
self.store: Any
|
|
95
|
+
self.store: Any
|
|
96
|
+
self._path_cache: dict[str, str] = {}
|
|
97
|
+
self.protocol = store_uri.split("://", 1)[0] if "://" in store_uri else "file"
|
|
73
98
|
|
|
74
99
|
if store_uri.startswith("memory://"):
|
|
75
|
-
# MemoryStore doesn't use from_url - create directly
|
|
76
100
|
from obstore.store import MemoryStore
|
|
77
101
|
|
|
78
102
|
self.store = MemoryStore()
|
|
79
103
|
elif store_uri.startswith("file://"):
|
|
80
104
|
from obstore.store import LocalStore
|
|
81
105
|
|
|
82
|
-
# LocalStore works with directory paths, so we use root
|
|
83
106
|
self.store = LocalStore("/")
|
|
84
|
-
# The full path will be handled in _resolve_path
|
|
85
107
|
else:
|
|
86
|
-
# Use obstore's from_url for automatic URI parsing
|
|
87
108
|
from obstore.store import from_url
|
|
88
109
|
|
|
89
110
|
self.store = from_url(store_uri, **store_options) # pyright: ignore[reportAttributeAccessIssue]
|
|
90
111
|
|
|
91
|
-
# Log successful initialization
|
|
92
112
|
logger.debug("ObStore backend initialized for %s", store_uri)
|
|
93
113
|
|
|
94
114
|
except Exception as exc:
|
|
@@ -98,10 +118,10 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
|
98
118
|
def _resolve_path(self, path: str | Path) -> str:
|
|
99
119
|
"""Resolve path relative to base_path."""
|
|
100
120
|
path_str = str(path)
|
|
101
|
-
|
|
121
|
+
if path_str.startswith("file://"):
|
|
122
|
+
path_str = path_str.removeprefix("file://")
|
|
102
123
|
if self.store_uri.startswith("file://") and path_str.startswith("/"):
|
|
103
124
|
return path_str.lstrip("/")
|
|
104
|
-
|
|
105
125
|
if self.base_path:
|
|
106
126
|
clean_base = self.base_path.rstrip("/")
|
|
107
127
|
clean_path = path_str.lstrip("/")
|
|
@@ -113,22 +133,11 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
|
113
133
|
"""Return backend type identifier."""
|
|
114
134
|
return "obstore"
|
|
115
135
|
|
|
116
|
-
# Implementation of abstract methods from ObjectStoreBase
|
|
117
|
-
|
|
118
136
|
def read_bytes(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
119
137
|
"""Read bytes using obstore."""
|
|
120
138
|
try:
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
bytes_data = result.bytes()
|
|
124
|
-
if hasattr(bytes_data, "__bytes__"):
|
|
125
|
-
return bytes(bytes_data)
|
|
126
|
-
if hasattr(bytes_data, "tobytes"):
|
|
127
|
-
return bytes_data.tobytes() # type: ignore[no-any-return]
|
|
128
|
-
if isinstance(bytes_data, bytes):
|
|
129
|
-
return bytes_data
|
|
130
|
-
# Try to convert to bytes
|
|
131
|
-
return bytes(bytes_data)
|
|
139
|
+
result = self.store.get(self._resolve_path(path))
|
|
140
|
+
return cast("bytes", result.bytes().to_bytes())
|
|
132
141
|
except Exception as exc:
|
|
133
142
|
msg = f"Failed to read bytes from {path}"
|
|
134
143
|
raise StorageOperationFailedError(msg) from exc
|
|
@@ -136,41 +145,30 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
|
136
145
|
def write_bytes(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
137
146
|
"""Write bytes using obstore."""
|
|
138
147
|
try:
|
|
139
|
-
|
|
140
|
-
self.store.put(resolved_path, data)
|
|
148
|
+
self.store.put(self._resolve_path(path), data)
|
|
141
149
|
except Exception as exc:
|
|
142
150
|
msg = f"Failed to write bytes to {path}"
|
|
143
151
|
raise StorageOperationFailedError(msg) from exc
|
|
144
152
|
|
|
145
153
|
def read_text(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
146
154
|
"""Read text using obstore."""
|
|
147
|
-
|
|
148
|
-
return data.decode(encoding)
|
|
155
|
+
return self.read_bytes(path, **kwargs).decode(encoding)
|
|
149
156
|
|
|
150
157
|
def write_text(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
151
158
|
"""Write text using obstore."""
|
|
152
|
-
|
|
153
|
-
self.write_bytes(path, encoded_data, **kwargs)
|
|
159
|
+
self.write_bytes(path, data.encode(encoding), **kwargs)
|
|
154
160
|
|
|
155
161
|
def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
|
|
156
162
|
"""List objects using obstore."""
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
return str(item)
|
|
167
|
-
|
|
168
|
-
if not recursive:
|
|
169
|
-
objects.extend(_get_item_path(item) for item in self.store.list_with_delimiter(resolved_prefix)) # pyright: ignore
|
|
170
|
-
else:
|
|
171
|
-
objects.extend(_get_item_path(item) for item in self.store.list(resolved_prefix))
|
|
172
|
-
|
|
173
|
-
return sorted(objects)
|
|
163
|
+
try:
|
|
164
|
+
resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
|
|
165
|
+
items = (
|
|
166
|
+
self.store.list_with_delimiter(resolved_prefix) if not recursive else self.store.list(resolved_prefix)
|
|
167
|
+
)
|
|
168
|
+
return sorted(str(getattr(item, "path", getattr(item, "key", str(item)))) for item in items)
|
|
169
|
+
except Exception as exc:
|
|
170
|
+
msg = f"Failed to list objects with prefix '{prefix}'"
|
|
171
|
+
raise StorageOperationFailedError(msg) from exc
|
|
174
172
|
|
|
175
173
|
def exists(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
176
174
|
"""Check if object exists using obstore."""
|
|
@@ -207,56 +205,52 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
|
207
205
|
def glob(self, pattern: str, **kwargs: Any) -> list[str]:
|
|
208
206
|
"""Find objects matching pattern using obstore.
|
|
209
207
|
|
|
210
|
-
|
|
211
|
-
lists all objects and filters them client-side, which may be inefficient
|
|
212
|
-
for large buckets.
|
|
208
|
+
Lists all objects and filters them client-side using the pattern.
|
|
213
209
|
"""
|
|
214
210
|
from pathlib import PurePosixPath
|
|
215
211
|
|
|
216
|
-
# List all objects and filter by pattern
|
|
217
212
|
resolved_pattern = self._resolve_path(pattern)
|
|
218
213
|
all_objects = self.list_objects(recursive=True, **kwargs)
|
|
219
214
|
|
|
220
215
|
if "**" in pattern:
|
|
221
216
|
matching_objects = []
|
|
222
217
|
|
|
223
|
-
# Special case: **/*.ext should also match *.ext in root
|
|
224
218
|
if pattern.startswith("**/"):
|
|
225
|
-
suffix_pattern = pattern[3:]
|
|
219
|
+
suffix_pattern = pattern[3:]
|
|
226
220
|
|
|
227
221
|
for obj in all_objects:
|
|
228
222
|
obj_path = PurePosixPath(obj)
|
|
229
|
-
# Try both the full pattern and just the suffix
|
|
230
223
|
if obj_path.match(resolved_pattern) or obj_path.match(suffix_pattern):
|
|
231
224
|
matching_objects.append(obj)
|
|
232
225
|
else:
|
|
233
|
-
# Standard ** pattern matching
|
|
234
226
|
for obj in all_objects:
|
|
235
227
|
obj_path = PurePosixPath(obj)
|
|
236
228
|
if obj_path.match(resolved_pattern):
|
|
237
229
|
matching_objects.append(obj)
|
|
238
230
|
|
|
239
231
|
return matching_objects
|
|
240
|
-
# Use standard fnmatch for simple patterns
|
|
241
232
|
return [obj for obj in all_objects if fnmatch.fnmatch(obj, resolved_pattern)]
|
|
242
233
|
|
|
243
234
|
def get_metadata(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
244
235
|
"""Get object metadata using obstore."""
|
|
245
236
|
resolved_path = self._resolve_path(path)
|
|
237
|
+
result: dict[str, Any] = {}
|
|
246
238
|
try:
|
|
247
239
|
metadata = self.store.head(resolved_path)
|
|
248
|
-
result
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
240
|
+
result.update(
|
|
241
|
+
{
|
|
242
|
+
"path": resolved_path,
|
|
243
|
+
"exists": True,
|
|
244
|
+
"size": getattr(metadata, "size", None),
|
|
245
|
+
"last_modified": getattr(metadata, "last_modified", None),
|
|
246
|
+
"e_tag": getattr(metadata, "e_tag", None),
|
|
247
|
+
"version": getattr(metadata, "version", None),
|
|
248
|
+
}
|
|
249
|
+
)
|
|
250
|
+
if hasattr(metadata, "metadata") and metadata.metadata:
|
|
251
|
+
result["custom_metadata"] = metadata.metadata
|
|
252
|
+
|
|
258
253
|
except Exception:
|
|
259
|
-
# Object doesn't exist
|
|
260
254
|
return {"path": resolved_path, "exists": False}
|
|
261
255
|
else:
|
|
262
256
|
return result
|
|
@@ -264,19 +258,17 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
|
264
258
|
def is_object(self, path: str | Path) -> bool:
|
|
265
259
|
"""Check if path is an object using obstore."""
|
|
266
260
|
resolved_path = self._resolve_path(path)
|
|
267
|
-
# An object exists and doesn't end with /
|
|
268
261
|
return self.exists(path) and not resolved_path.endswith("/")
|
|
269
262
|
|
|
270
263
|
def is_path(self, path: str | Path) -> bool:
|
|
271
264
|
"""Check if path is a prefix/directory using obstore."""
|
|
272
265
|
resolved_path = self._resolve_path(path)
|
|
273
266
|
|
|
274
|
-
# A path/prefix either ends with / or has objects under it
|
|
275
267
|
if resolved_path.endswith("/"):
|
|
276
268
|
return True
|
|
277
269
|
|
|
278
270
|
try:
|
|
279
|
-
objects = self.list_objects(prefix=str(path), recursive=
|
|
271
|
+
objects = self.list_objects(prefix=str(path), recursive=True)
|
|
280
272
|
return len(objects) > 0
|
|
281
273
|
except Exception:
|
|
282
274
|
return False
|
|
@@ -287,7 +279,7 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
|
287
279
|
resolved_path = self._resolve_path(path)
|
|
288
280
|
if hasattr(self.store, "read_arrow"):
|
|
289
281
|
return self.store.read_arrow(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
290
|
-
|
|
282
|
+
|
|
291
283
|
import io
|
|
292
284
|
|
|
293
285
|
import pyarrow.parquet as pq
|
|
@@ -306,7 +298,6 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
|
306
298
|
if hasattr(self.store, "write_arrow"):
|
|
307
299
|
self.store.write_arrow(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
|
|
308
300
|
else:
|
|
309
|
-
# Fall back to writing as Parquet via bytes
|
|
310
301
|
import io
|
|
311
302
|
|
|
312
303
|
import pyarrow as pa
|
|
@@ -314,30 +305,22 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
|
314
305
|
|
|
315
306
|
buffer = io.BytesIO()
|
|
316
307
|
|
|
317
|
-
# Check for decimal64 columns and convert to decimal128
|
|
318
|
-
# PyArrow doesn't support decimal64 in Parquet files
|
|
319
308
|
schema = table.schema
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
needs_conversion = True
|
|
309
|
+
if any(str(f.type).startswith("decimal64") for f in schema):
|
|
310
|
+
new_fields = []
|
|
311
|
+
for field in schema:
|
|
312
|
+
if str(field.type).startswith("decimal64"):
|
|
313
|
+
import re
|
|
314
|
+
|
|
315
|
+
match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
|
|
316
|
+
if match:
|
|
317
|
+
precision, scale = int(match.group(1)), int(match.group(2))
|
|
318
|
+
new_fields.append(pa.field(field.name, pa.decimal128(precision, scale)))
|
|
319
|
+
else:
|
|
320
|
+
new_fields.append(field) # pragma: no cover
|
|
333
321
|
else:
|
|
334
322
|
new_fields.append(field)
|
|
335
|
-
|
|
336
|
-
new_fields.append(field)
|
|
337
|
-
|
|
338
|
-
if needs_conversion:
|
|
339
|
-
new_schema = pa.schema(new_fields)
|
|
340
|
-
table = table.cast(new_schema)
|
|
323
|
+
table = table.cast(pa.schema(new_fields))
|
|
341
324
|
|
|
342
325
|
pq.write_table(table, buffer, **kwargs)
|
|
343
326
|
buffer.seek(0)
|
|
@@ -359,58 +342,50 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
|
359
342
|
msg = f"Failed to stream Arrow data for pattern {pattern}"
|
|
360
343
|
raise StorageOperationFailedError(msg) from exc
|
|
361
344
|
|
|
362
|
-
# Private async implementations for instrumentation support
|
|
363
|
-
# These are called by the base class async methods after instrumentation
|
|
364
|
-
|
|
365
345
|
async def read_bytes_async(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
366
|
-
"""
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
return
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
return bytes_data
|
|
376
|
-
# Try to convert to bytes
|
|
377
|
-
return bytes(bytes_data)
|
|
346
|
+
"""Read bytes from storage asynchronously."""
|
|
347
|
+
try:
|
|
348
|
+
resolved_path = self._resolve_path(path)
|
|
349
|
+
result = await self.store.get_async(resolved_path)
|
|
350
|
+
bytes_obj = await result.bytes_async()
|
|
351
|
+
return bytes_obj.to_bytes() # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
352
|
+
except Exception as exc:
|
|
353
|
+
msg = f"Failed to read bytes from {path}"
|
|
354
|
+
raise StorageOperationFailedError(msg) from exc
|
|
378
355
|
|
|
379
356
|
async def write_bytes_async(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
380
|
-
"""
|
|
357
|
+
"""Write bytes to storage asynchronously."""
|
|
381
358
|
resolved_path = self._resolve_path(path)
|
|
382
359
|
await self.store.put_async(resolved_path, data)
|
|
383
360
|
|
|
384
361
|
async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
|
|
385
|
-
"""
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
# Note: store.list_async returns an async iterator
|
|
389
|
-
objects = [str(item.path) async for item in self.store.list_async(resolved_prefix)] # pyright: ignore[reportAttributeAccessIssue]
|
|
362
|
+
"""List objects in storage asynchronously."""
|
|
363
|
+
try:
|
|
364
|
+
resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
|
|
390
365
|
|
|
391
|
-
|
|
392
|
-
# async version of list_with_delimiter.
|
|
393
|
-
if not recursive and resolved_prefix:
|
|
394
|
-
base_depth = resolved_prefix.count("/")
|
|
395
|
-
objects = [obj for obj in objects if obj.count("/") <= base_depth + 1]
|
|
366
|
+
objects = [str(item.path) async for item in self.store.list_async(resolved_prefix)] # pyright: ignore[reportAttributeAccessIssue]
|
|
396
367
|
|
|
397
|
-
|
|
368
|
+
if not recursive and resolved_prefix:
|
|
369
|
+
base_depth = resolved_prefix.count("/")
|
|
370
|
+
objects = [obj for obj in objects if obj.count("/") <= base_depth + 1]
|
|
398
371
|
|
|
399
|
-
|
|
400
|
-
|
|
372
|
+
return sorted(objects)
|
|
373
|
+
except Exception as exc:
|
|
374
|
+
msg = f"Failed to list objects with prefix '{prefix}'"
|
|
375
|
+
raise StorageOperationFailedError(msg) from exc
|
|
401
376
|
|
|
402
377
|
async def read_text_async(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
403
|
-
"""
|
|
378
|
+
"""Read text from storage asynchronously."""
|
|
404
379
|
data = await self.read_bytes_async(path, **kwargs)
|
|
405
380
|
return data.decode(encoding)
|
|
406
381
|
|
|
407
382
|
async def write_text_async(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
408
|
-
"""
|
|
383
|
+
"""Write text to storage asynchronously."""
|
|
409
384
|
encoded_data = data.encode(encoding)
|
|
410
385
|
await self.write_bytes_async(path, encoded_data, **kwargs)
|
|
411
386
|
|
|
412
387
|
async def exists_async(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
413
|
-
"""
|
|
388
|
+
"""Check if object exists in storage asynchronously."""
|
|
414
389
|
resolved_path = self._resolve_path(path)
|
|
415
390
|
try:
|
|
416
391
|
await self.store.head_async(resolved_path)
|
|
@@ -419,53 +394,57 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
|
419
394
|
return True
|
|
420
395
|
|
|
421
396
|
async def delete_async(self, path: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
422
|
-
"""
|
|
397
|
+
"""Delete object from storage asynchronously."""
|
|
423
398
|
resolved_path = self._resolve_path(path)
|
|
424
399
|
await self.store.delete_async(resolved_path)
|
|
425
400
|
|
|
426
401
|
async def copy_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
427
|
-
"""
|
|
402
|
+
"""Copy object in storage asynchronously."""
|
|
428
403
|
source_path = self._resolve_path(source)
|
|
429
404
|
dest_path = self._resolve_path(destination)
|
|
430
405
|
await self.store.copy_async(source_path, dest_path)
|
|
431
406
|
|
|
432
407
|
async def move_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
433
|
-
"""
|
|
408
|
+
"""Move object in storage asynchronously."""
|
|
434
409
|
source_path = self._resolve_path(source)
|
|
435
410
|
dest_path = self._resolve_path(destination)
|
|
436
411
|
await self.store.rename_async(source_path, dest_path)
|
|
437
412
|
|
|
438
413
|
async def get_metadata_async(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
439
|
-
"""
|
|
414
|
+
"""Get object metadata from storage asynchronously."""
|
|
440
415
|
resolved_path = self._resolve_path(path)
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
416
|
+
result: dict[str, Any] = {}
|
|
417
|
+
try:
|
|
418
|
+
metadata = await self.store.head_async(resolved_path)
|
|
419
|
+
result.update(
|
|
420
|
+
{
|
|
421
|
+
"path": resolved_path,
|
|
422
|
+
"exists": True,
|
|
423
|
+
"size": metadata.size,
|
|
424
|
+
"last_modified": metadata.last_modified,
|
|
425
|
+
"e_tag": metadata.e_tag,
|
|
426
|
+
"version": metadata.version,
|
|
427
|
+
}
|
|
428
|
+
)
|
|
429
|
+
if hasattr(metadata, "metadata") and metadata.metadata:
|
|
430
|
+
result["custom_metadata"] = metadata.metadata
|
|
454
431
|
|
|
455
|
-
|
|
432
|
+
except Exception:
|
|
433
|
+
return {"path": resolved_path, "exists": False}
|
|
434
|
+
else:
|
|
435
|
+
return result
|
|
456
436
|
|
|
457
437
|
async def read_arrow_async(self, path: str | Path, **kwargs: Any) -> ArrowTable:
|
|
458
|
-
"""
|
|
438
|
+
"""Read Arrow table from storage asynchronously."""
|
|
459
439
|
resolved_path = self._resolve_path(path)
|
|
460
440
|
return await self.store.read_arrow_async(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
461
441
|
|
|
462
442
|
async def write_arrow_async(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
|
|
463
|
-
"""
|
|
443
|
+
"""Write Arrow table to storage asynchronously."""
|
|
464
444
|
resolved_path = self._resolve_path(path)
|
|
465
445
|
if hasattr(self.store, "write_arrow_async"):
|
|
466
446
|
await self.store.write_arrow_async(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
|
|
467
447
|
else:
|
|
468
|
-
# Fall back to writing as Parquet via bytes
|
|
469
448
|
import io
|
|
470
449
|
|
|
471
450
|
import pyarrow.parquet as pq
|
|
@@ -475,7 +454,6 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
|
475
454
|
buffer.seek(0)
|
|
476
455
|
await self.write_bytes_async(resolved_path, buffer.read())
|
|
477
456
|
|
|
478
|
-
|
|
457
|
+
def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator[ArrowRecordBatch]:
|
|
479
458
|
resolved_pattern = self._resolve_path(pattern)
|
|
480
|
-
|
|
481
|
-
yield batch
|
|
459
|
+
return _AsyncArrowIterator(self.store, resolved_pattern, **kwargs)
|
sqlspec/storage/capabilities.py
CHANGED
|
@@ -6,6 +6,8 @@ This module provides a centralized way to track and query storage backend capabi
|
|
|
6
6
|
from dataclasses import dataclass
|
|
7
7
|
from typing import ClassVar
|
|
8
8
|
|
|
9
|
+
from mypy_extensions import mypyc_attr
|
|
10
|
+
|
|
9
11
|
__all__ = ("HasStorageCapabilities", "StorageCapabilities")
|
|
10
12
|
|
|
11
13
|
|
|
@@ -13,7 +15,6 @@ __all__ = ("HasStorageCapabilities", "StorageCapabilities")
|
|
|
13
15
|
class StorageCapabilities:
|
|
14
16
|
"""Tracks capabilities of a storage backend."""
|
|
15
17
|
|
|
16
|
-
# Basic operations
|
|
17
18
|
supports_read: bool = True
|
|
18
19
|
supports_write: bool = True
|
|
19
20
|
supports_delete: bool = True
|
|
@@ -23,7 +24,6 @@ class StorageCapabilities:
|
|
|
23
24
|
supports_move: bool = True
|
|
24
25
|
supports_metadata: bool = True
|
|
25
26
|
|
|
26
|
-
# Advanced operations
|
|
27
27
|
supports_arrow: bool = False
|
|
28
28
|
supports_streaming: bool = False
|
|
29
29
|
supports_async: bool = False
|
|
@@ -31,12 +31,10 @@ class StorageCapabilities:
|
|
|
31
31
|
supports_multipart_upload: bool = False
|
|
32
32
|
supports_compression: bool = False
|
|
33
33
|
|
|
34
|
-
# Protocol-specific features
|
|
35
34
|
supports_s3_select: bool = False
|
|
36
35
|
supports_gcs_compose: bool = False
|
|
37
36
|
supports_azure_snapshots: bool = False
|
|
38
37
|
|
|
39
|
-
# Performance characteristics
|
|
40
38
|
is_remote: bool = True
|
|
41
39
|
is_cloud_native: bool = False
|
|
42
40
|
has_low_latency: bool = False
|
|
@@ -85,9 +83,12 @@ class StorageCapabilities:
|
|
|
85
83
|
)
|
|
86
84
|
|
|
87
85
|
|
|
86
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
88
87
|
class HasStorageCapabilities:
|
|
89
88
|
"""Mixin for storage backends that expose their capabilities."""
|
|
90
89
|
|
|
90
|
+
__slots__ = ()
|
|
91
|
+
|
|
91
92
|
capabilities: ClassVar[StorageCapabilities]
|
|
92
93
|
|
|
93
94
|
@classmethod
|