sqlspec 0.12.1__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/_sql.py +21 -180
- sqlspec/adapters/adbc/config.py +10 -12
- sqlspec/adapters/adbc/driver.py +120 -118
- sqlspec/adapters/aiosqlite/config.py +3 -3
- sqlspec/adapters/aiosqlite/driver.py +116 -141
- sqlspec/adapters/asyncmy/config.py +3 -4
- sqlspec/adapters/asyncmy/driver.py +123 -135
- sqlspec/adapters/asyncpg/config.py +3 -7
- sqlspec/adapters/asyncpg/driver.py +98 -140
- sqlspec/adapters/bigquery/config.py +4 -5
- sqlspec/adapters/bigquery/driver.py +231 -181
- sqlspec/adapters/duckdb/config.py +3 -6
- sqlspec/adapters/duckdb/driver.py +132 -124
- sqlspec/adapters/oracledb/config.py +6 -5
- sqlspec/adapters/oracledb/driver.py +242 -259
- sqlspec/adapters/psqlpy/config.py +3 -7
- sqlspec/adapters/psqlpy/driver.py +118 -93
- sqlspec/adapters/psycopg/config.py +34 -30
- sqlspec/adapters/psycopg/driver.py +342 -214
- sqlspec/adapters/sqlite/config.py +3 -3
- sqlspec/adapters/sqlite/driver.py +150 -104
- sqlspec/config.py +0 -4
- sqlspec/driver/_async.py +89 -98
- sqlspec/driver/_common.py +52 -17
- sqlspec/driver/_sync.py +81 -105
- sqlspec/driver/connection.py +207 -0
- sqlspec/driver/mixins/_csv_writer.py +91 -0
- sqlspec/driver/mixins/_pipeline.py +38 -49
- sqlspec/driver/mixins/_result_utils.py +27 -9
- sqlspec/driver/mixins/_storage.py +149 -216
- sqlspec/driver/mixins/_type_coercion.py +3 -4
- sqlspec/driver/parameters.py +138 -0
- sqlspec/exceptions.py +10 -2
- sqlspec/extensions/aiosql/adapter.py +0 -10
- sqlspec/extensions/litestar/handlers.py +0 -1
- sqlspec/extensions/litestar/plugin.py +0 -3
- sqlspec/extensions/litestar/providers.py +0 -14
- sqlspec/loader.py +31 -118
- sqlspec/protocols.py +542 -0
- sqlspec/service/__init__.py +3 -2
- sqlspec/service/_util.py +147 -0
- sqlspec/service/base.py +1116 -9
- sqlspec/statement/builder/__init__.py +42 -32
- sqlspec/statement/builder/_ddl_utils.py +0 -10
- sqlspec/statement/builder/_parsing_utils.py +10 -4
- sqlspec/statement/builder/base.py +70 -23
- sqlspec/statement/builder/column.py +283 -0
- sqlspec/statement/builder/ddl.py +102 -65
- sqlspec/statement/builder/delete.py +23 -7
- sqlspec/statement/builder/insert.py +29 -15
- sqlspec/statement/builder/merge.py +4 -4
- sqlspec/statement/builder/mixins/_aggregate_functions.py +113 -14
- sqlspec/statement/builder/mixins/_common_table_expr.py +0 -1
- sqlspec/statement/builder/mixins/_delete_from.py +1 -1
- sqlspec/statement/builder/mixins/_from.py +10 -8
- sqlspec/statement/builder/mixins/_group_by.py +0 -1
- sqlspec/statement/builder/mixins/_insert_from_select.py +0 -1
- sqlspec/statement/builder/mixins/_insert_values.py +0 -2
- sqlspec/statement/builder/mixins/_join.py +20 -13
- sqlspec/statement/builder/mixins/_limit_offset.py +3 -3
- sqlspec/statement/builder/mixins/_merge_clauses.py +3 -4
- sqlspec/statement/builder/mixins/_order_by.py +2 -2
- sqlspec/statement/builder/mixins/_pivot.py +4 -7
- sqlspec/statement/builder/mixins/_select_columns.py +6 -5
- sqlspec/statement/builder/mixins/_unpivot.py +6 -9
- sqlspec/statement/builder/mixins/_update_from.py +2 -1
- sqlspec/statement/builder/mixins/_update_set.py +11 -8
- sqlspec/statement/builder/mixins/_where.py +61 -34
- sqlspec/statement/builder/select.py +32 -17
- sqlspec/statement/builder/update.py +25 -11
- sqlspec/statement/filters.py +39 -14
- sqlspec/statement/parameter_manager.py +220 -0
- sqlspec/statement/parameters.py +210 -79
- sqlspec/statement/pipelines/__init__.py +166 -23
- sqlspec/statement/pipelines/analyzers/_analyzer.py +22 -25
- sqlspec/statement/pipelines/context.py +35 -39
- sqlspec/statement/pipelines/transformers/__init__.py +2 -3
- sqlspec/statement/pipelines/transformers/_expression_simplifier.py +19 -187
- sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +667 -43
- sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +76 -0
- sqlspec/statement/pipelines/validators/_dml_safety.py +33 -18
- sqlspec/statement/pipelines/validators/_parameter_style.py +87 -14
- sqlspec/statement/pipelines/validators/_performance.py +38 -23
- sqlspec/statement/pipelines/validators/_security.py +39 -62
- sqlspec/statement/result.py +37 -129
- sqlspec/statement/splitter.py +0 -12
- sqlspec/statement/sql.py +885 -379
- sqlspec/statement/sql_compiler.py +140 -0
- sqlspec/storage/__init__.py +10 -2
- sqlspec/storage/backends/fsspec.py +82 -35
- sqlspec/storage/backends/obstore.py +66 -49
- sqlspec/storage/capabilities.py +101 -0
- sqlspec/storage/registry.py +56 -83
- sqlspec/typing.py +6 -434
- sqlspec/utils/cached_property.py +25 -0
- sqlspec/utils/correlation.py +0 -2
- sqlspec/utils/logging.py +0 -6
- sqlspec/utils/sync_tools.py +0 -4
- sqlspec/utils/text.py +0 -5
- sqlspec/utils/type_guards.py +892 -0
- {sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/METADATA +1 -1
- sqlspec-0.13.0.dist-info/RECORD +150 -0
- sqlspec/statement/builder/protocols.py +0 -20
- sqlspec/statement/pipelines/base.py +0 -315
- sqlspec/statement/pipelines/result_types.py +0 -41
- sqlspec/statement/pipelines/transformers/_remove_comments.py +0 -66
- sqlspec/statement/pipelines/transformers/_remove_hints.py +0 -81
- sqlspec/statement/pipelines/validators/base.py +0 -67
- sqlspec/storage/protocol.py +0 -170
- sqlspec-0.12.1.dist-info/RECORD +0 -145
- {sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/WHEEL +0 -0
- {sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/licenses/LICENSE +0 -0
- {sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -9,14 +9,16 @@ from __future__ import annotations
|
|
|
9
9
|
|
|
10
10
|
import fnmatch
|
|
11
11
|
import logging
|
|
12
|
-
from typing import TYPE_CHECKING, Any,
|
|
12
|
+
from typing import TYPE_CHECKING, Any, ClassVar
|
|
13
13
|
|
|
14
14
|
from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
|
|
15
15
|
from sqlspec.storage.backends.base import ObjectStoreBase
|
|
16
|
+
from sqlspec.storage.capabilities import HasStorageCapabilities, StorageCapabilities
|
|
16
17
|
from sqlspec.typing import OBSTORE_INSTALLED
|
|
17
18
|
|
|
18
19
|
if TYPE_CHECKING:
|
|
19
20
|
from collections.abc import AsyncIterator, Iterator
|
|
21
|
+
from pathlib import Path
|
|
20
22
|
|
|
21
23
|
from sqlspec.typing import ArrowRecordBatch, ArrowTable
|
|
22
24
|
|
|
@@ -25,7 +27,7 @@ __all__ = ("ObStoreBackend",)
|
|
|
25
27
|
logger = logging.getLogger(__name__)
|
|
26
28
|
|
|
27
29
|
|
|
28
|
-
class ObStoreBackend(ObjectStoreBase):
|
|
30
|
+
class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
29
31
|
"""High-performance object storage backend using obstore.
|
|
30
32
|
|
|
31
33
|
This backend leverages obstore's Rust-based implementation for maximum
|
|
@@ -39,6 +41,18 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
39
41
|
Features native Arrow support and ~9x better performance than fsspec.
|
|
40
42
|
"""
|
|
41
43
|
|
|
44
|
+
# ObStore has excellent native capabilities
|
|
45
|
+
capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
|
|
46
|
+
supports_arrow=True,
|
|
47
|
+
supports_streaming=True,
|
|
48
|
+
supports_async=True,
|
|
49
|
+
supports_batch_operations=True,
|
|
50
|
+
supports_multipart_upload=True,
|
|
51
|
+
supports_compression=True,
|
|
52
|
+
is_cloud_native=True,
|
|
53
|
+
has_low_latency=True,
|
|
54
|
+
)
|
|
55
|
+
|
|
42
56
|
def __init__(self, store_uri: str, base_path: str = "", **store_options: Any) -> None:
|
|
43
57
|
"""Initialize obstore backend.
|
|
44
58
|
|
|
@@ -57,14 +71,12 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
57
71
|
self.store_options = store_options
|
|
58
72
|
self.store: Any # Will be set based on store_uri
|
|
59
73
|
|
|
60
|
-
# Initialize obstore instance
|
|
61
74
|
if store_uri.startswith("memory://"):
|
|
62
75
|
# MemoryStore doesn't use from_url - create directly
|
|
63
76
|
from obstore.store import MemoryStore
|
|
64
77
|
|
|
65
78
|
self.store = MemoryStore()
|
|
66
79
|
elif store_uri.startswith("file://"):
|
|
67
|
-
# For file:// URIs, use LocalStore with root directory
|
|
68
80
|
from obstore.store import LocalStore
|
|
69
81
|
|
|
70
82
|
# LocalStore works with directory paths, so we use root
|
|
@@ -83,19 +95,18 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
83
95
|
msg = f"Failed to initialize obstore backend for {store_uri}"
|
|
84
96
|
raise StorageOperationFailedError(msg) from exc
|
|
85
97
|
|
|
86
|
-
def _resolve_path(self, path: str) -> str:
|
|
98
|
+
def _resolve_path(self, path: str | Path) -> str:
|
|
87
99
|
"""Resolve path relative to base_path."""
|
|
100
|
+
path_str = str(path)
|
|
88
101
|
# For file:// URIs, the path passed in is already absolute
|
|
89
|
-
if self.store_uri.startswith("file://") and
|
|
90
|
-
|
|
91
|
-
return path.lstrip("/")
|
|
102
|
+
if self.store_uri.startswith("file://") and path_str.startswith("/"):
|
|
103
|
+
return path_str.lstrip("/")
|
|
92
104
|
|
|
93
105
|
if self.base_path:
|
|
94
|
-
# Ensure no double slashes by stripping trailing slash from base_path
|
|
95
106
|
clean_base = self.base_path.rstrip("/")
|
|
96
|
-
clean_path =
|
|
107
|
+
clean_path = path_str.lstrip("/")
|
|
97
108
|
return f"{clean_base}/{clean_path}"
|
|
98
|
-
return
|
|
109
|
+
return path_str
|
|
99
110
|
|
|
100
111
|
@property
|
|
101
112
|
def backend_type(self) -> str:
|
|
@@ -104,17 +115,25 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
104
115
|
|
|
105
116
|
# Implementation of abstract methods from ObjectStoreBase
|
|
106
117
|
|
|
107
|
-
def read_bytes(self, path: str, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
118
|
+
def read_bytes(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
108
119
|
"""Read bytes using obstore."""
|
|
109
120
|
try:
|
|
110
121
|
resolved_path = self._resolve_path(path)
|
|
111
122
|
result = self.store.get(resolved_path)
|
|
112
|
-
|
|
123
|
+
bytes_data = result.bytes()
|
|
124
|
+
if hasattr(bytes_data, "__bytes__"):
|
|
125
|
+
return bytes(bytes_data)
|
|
126
|
+
if hasattr(bytes_data, "tobytes"):
|
|
127
|
+
return bytes_data.tobytes() # type: ignore[no-any-return]
|
|
128
|
+
if isinstance(bytes_data, bytes):
|
|
129
|
+
return bytes_data
|
|
130
|
+
# Try to convert to bytes
|
|
131
|
+
return bytes(bytes_data)
|
|
113
132
|
except Exception as exc:
|
|
114
133
|
msg = f"Failed to read bytes from {path}"
|
|
115
134
|
raise StorageOperationFailedError(msg) from exc
|
|
116
135
|
|
|
117
|
-
def write_bytes(self, path: str, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
136
|
+
def write_bytes(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
118
137
|
"""Write bytes using obstore."""
|
|
119
138
|
try:
|
|
120
139
|
resolved_path = self._resolve_path(path)
|
|
@@ -123,12 +142,12 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
123
142
|
msg = f"Failed to write bytes to {path}"
|
|
124
143
|
raise StorageOperationFailedError(msg) from exc
|
|
125
144
|
|
|
126
|
-
def read_text(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
145
|
+
def read_text(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
127
146
|
"""Read text using obstore."""
|
|
128
147
|
data = self.read_bytes(path, **kwargs)
|
|
129
148
|
return data.decode(encoding)
|
|
130
149
|
|
|
131
|
-
def write_text(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
150
|
+
def write_text(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
132
151
|
"""Write text using obstore."""
|
|
133
152
|
encoded_data = data.encode(encoding)
|
|
134
153
|
self.write_bytes(path, encoded_data, **kwargs)
|
|
@@ -153,7 +172,7 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
153
172
|
|
|
154
173
|
return sorted(objects)
|
|
155
174
|
|
|
156
|
-
def exists(self, path: str, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
175
|
+
def exists(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
157
176
|
"""Check if object exists using obstore."""
|
|
158
177
|
try:
|
|
159
178
|
self.store.head(self._resolve_path(path))
|
|
@@ -161,7 +180,7 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
161
180
|
return False
|
|
162
181
|
return True
|
|
163
182
|
|
|
164
|
-
def delete(self, path: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
183
|
+
def delete(self, path: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
165
184
|
"""Delete object using obstore."""
|
|
166
185
|
try:
|
|
167
186
|
self.store.delete(self._resolve_path(path))
|
|
@@ -169,7 +188,7 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
169
188
|
msg = f"Failed to delete {path}"
|
|
170
189
|
raise StorageOperationFailedError(msg) from exc
|
|
171
190
|
|
|
172
|
-
def copy(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
191
|
+
def copy(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
173
192
|
"""Copy object using obstore."""
|
|
174
193
|
try:
|
|
175
194
|
self.store.copy(self._resolve_path(source), self._resolve_path(destination))
|
|
@@ -177,7 +196,7 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
177
196
|
msg = f"Failed to copy {source} to {destination}"
|
|
178
197
|
raise StorageOperationFailedError(msg) from exc
|
|
179
198
|
|
|
180
|
-
def move(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
199
|
+
def move(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
181
200
|
"""Move object using obstore."""
|
|
182
201
|
try:
|
|
183
202
|
self.store.rename(self._resolve_path(source), self._resolve_path(destination))
|
|
@@ -198,17 +217,14 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
198
217
|
resolved_pattern = self._resolve_path(pattern)
|
|
199
218
|
all_objects = self.list_objects(recursive=True, **kwargs)
|
|
200
219
|
|
|
201
|
-
# For complex patterns with **, use PurePosixPath
|
|
202
220
|
if "**" in pattern:
|
|
203
221
|
matching_objects = []
|
|
204
222
|
|
|
205
223
|
# Special case: **/*.ext should also match *.ext in root
|
|
206
224
|
if pattern.startswith("**/"):
|
|
207
|
-
# Get the suffix pattern
|
|
208
225
|
suffix_pattern = pattern[3:] # Remove **/
|
|
209
226
|
|
|
210
227
|
for obj in all_objects:
|
|
211
|
-
# Check if object ends with the suffix pattern
|
|
212
228
|
obj_path = PurePosixPath(obj)
|
|
213
229
|
# Try both the full pattern and just the suffix
|
|
214
230
|
if obj_path.match(resolved_pattern) or obj_path.match(suffix_pattern):
|
|
@@ -224,7 +240,7 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
224
240
|
# Use standard fnmatch for simple patterns
|
|
225
241
|
return [obj for obj in all_objects if fnmatch.fnmatch(obj, resolved_pattern)]
|
|
226
242
|
|
|
227
|
-
def get_metadata(self, path: str, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
243
|
+
def get_metadata(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
228
244
|
"""Get object metadata using obstore."""
|
|
229
245
|
resolved_path = self._resolve_path(path)
|
|
230
246
|
try:
|
|
@@ -245,13 +261,13 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
245
261
|
else:
|
|
246
262
|
return result
|
|
247
263
|
|
|
248
|
-
def is_object(self, path: str) -> bool:
|
|
264
|
+
def is_object(self, path: str | Path) -> bool:
|
|
249
265
|
"""Check if path is an object using obstore."""
|
|
250
266
|
resolved_path = self._resolve_path(path)
|
|
251
267
|
# An object exists and doesn't end with /
|
|
252
268
|
return self.exists(path) and not resolved_path.endswith("/")
|
|
253
269
|
|
|
254
|
-
def is_path(self, path: str) -> bool:
|
|
270
|
+
def is_path(self, path: str | Path) -> bool:
|
|
255
271
|
"""Check if path is a prefix/directory using obstore."""
|
|
256
272
|
resolved_path = self._resolve_path(path)
|
|
257
273
|
|
|
@@ -259,18 +275,16 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
259
275
|
if resolved_path.endswith("/"):
|
|
260
276
|
return True
|
|
261
277
|
|
|
262
|
-
# Check if there are any objects with this prefix
|
|
263
278
|
try:
|
|
264
|
-
objects = self.list_objects(prefix=path, recursive=False)
|
|
279
|
+
objects = self.list_objects(prefix=str(path), recursive=False)
|
|
265
280
|
return len(objects) > 0
|
|
266
281
|
except Exception:
|
|
267
282
|
return False
|
|
268
283
|
|
|
269
|
-
def read_arrow(self, path: str, **kwargs: Any) -> ArrowTable:
|
|
284
|
+
def read_arrow(self, path: str | Path, **kwargs: Any) -> ArrowTable:
|
|
270
285
|
"""Read Arrow table using obstore."""
|
|
271
286
|
try:
|
|
272
287
|
resolved_path = self._resolve_path(path)
|
|
273
|
-
# Check if the store has native Arrow support
|
|
274
288
|
if hasattr(self.store, "read_arrow"):
|
|
275
289
|
return self.store.read_arrow(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
276
290
|
# Fall back to reading as Parquet via bytes
|
|
@@ -285,11 +299,10 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
285
299
|
msg = f"Failed to read Arrow table from {path}"
|
|
286
300
|
raise StorageOperationFailedError(msg) from exc
|
|
287
301
|
|
|
288
|
-
def write_arrow(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
|
|
302
|
+
def write_arrow(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
|
|
289
303
|
"""Write Arrow table using obstore."""
|
|
290
304
|
try:
|
|
291
305
|
resolved_path = self._resolve_path(path)
|
|
292
|
-
# Check if the store has native Arrow support
|
|
293
306
|
if hasattr(self.store, "write_arrow"):
|
|
294
307
|
self.store.write_arrow(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
|
|
295
308
|
else:
|
|
@@ -309,7 +322,6 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
309
322
|
|
|
310
323
|
for field in schema:
|
|
311
324
|
if str(field.type).startswith("decimal64"):
|
|
312
|
-
# Convert decimal64 to decimal128
|
|
313
325
|
import re
|
|
314
326
|
|
|
315
327
|
match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
|
|
@@ -350,13 +362,21 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
350
362
|
# Private async implementations for instrumentation support
|
|
351
363
|
# These are called by the base class async methods after instrumentation
|
|
352
364
|
|
|
353
|
-
async def read_bytes_async(self, path: str, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
365
|
+
async def read_bytes_async(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
354
366
|
"""Private async read bytes using native obstore async if available."""
|
|
355
367
|
resolved_path = self._resolve_path(path)
|
|
356
368
|
result = await self.store.get_async(resolved_path)
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
369
|
+
bytes_data = result.bytes()
|
|
370
|
+
if hasattr(bytes_data, "__bytes__"):
|
|
371
|
+
return bytes(bytes_data)
|
|
372
|
+
if hasattr(bytes_data, "tobytes"):
|
|
373
|
+
return bytes_data.tobytes() # type: ignore[no-any-return]
|
|
374
|
+
if isinstance(bytes_data, bytes):
|
|
375
|
+
return bytes_data
|
|
376
|
+
# Try to convert to bytes
|
|
377
|
+
return bytes(bytes_data)
|
|
378
|
+
|
|
379
|
+
async def write_bytes_async(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
360
380
|
"""Private async write bytes using native obstore async."""
|
|
361
381
|
resolved_path = self._resolve_path(path)
|
|
362
382
|
await self.store.put_async(resolved_path, data)
|
|
@@ -379,17 +399,17 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
379
399
|
# Implement all other required abstract async methods
|
|
380
400
|
# ObStore provides native async for most operations
|
|
381
401
|
|
|
382
|
-
async def read_text_async(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
402
|
+
async def read_text_async(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
383
403
|
"""Async read text using native obstore async."""
|
|
384
404
|
data = await self.read_bytes_async(path, **kwargs)
|
|
385
405
|
return data.decode(encoding)
|
|
386
406
|
|
|
387
|
-
async def write_text_async(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
407
|
+
async def write_text_async(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
388
408
|
"""Async write text using native obstore async."""
|
|
389
409
|
encoded_data = data.encode(encoding)
|
|
390
410
|
await self.write_bytes_async(path, encoded_data, **kwargs)
|
|
391
411
|
|
|
392
|
-
async def exists_async(self, path: str, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
412
|
+
async def exists_async(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
393
413
|
"""Async check if object exists using native obstore async."""
|
|
394
414
|
resolved_path = self._resolve_path(path)
|
|
395
415
|
try:
|
|
@@ -398,32 +418,30 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
398
418
|
return False
|
|
399
419
|
return True
|
|
400
420
|
|
|
401
|
-
async def delete_async(self, path: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
421
|
+
async def delete_async(self, path: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
402
422
|
"""Async delete object using native obstore async."""
|
|
403
423
|
resolved_path = self._resolve_path(path)
|
|
404
424
|
await self.store.delete_async(resolved_path)
|
|
405
425
|
|
|
406
|
-
async def copy_async(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
426
|
+
async def copy_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
407
427
|
"""Async copy object using native obstore async."""
|
|
408
428
|
source_path = self._resolve_path(source)
|
|
409
429
|
dest_path = self._resolve_path(destination)
|
|
410
430
|
await self.store.copy_async(source_path, dest_path)
|
|
411
431
|
|
|
412
|
-
async def move_async(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
432
|
+
async def move_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
413
433
|
"""Async move object using native obstore async."""
|
|
414
434
|
source_path = self._resolve_path(source)
|
|
415
435
|
dest_path = self._resolve_path(destination)
|
|
416
436
|
await self.store.rename_async(source_path, dest_path)
|
|
417
437
|
|
|
418
|
-
async def get_metadata_async(self, path: str, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
438
|
+
async def get_metadata_async(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
419
439
|
"""Async get object metadata using native obstore async."""
|
|
420
440
|
resolved_path = self._resolve_path(path)
|
|
421
441
|
metadata = await self.store.head_async(resolved_path)
|
|
422
442
|
|
|
423
|
-
# Convert obstore ObjectMeta to dict
|
|
424
443
|
result = {"path": resolved_path, "exists": True}
|
|
425
444
|
|
|
426
|
-
# Extract metadata attributes if available
|
|
427
445
|
for attr in ["size", "last_modified", "e_tag", "version"]:
|
|
428
446
|
if hasattr(metadata, attr):
|
|
429
447
|
result[attr] = getattr(metadata, attr)
|
|
@@ -436,15 +454,14 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
436
454
|
|
|
437
455
|
return result
|
|
438
456
|
|
|
439
|
-
async def read_arrow_async(self, path: str, **kwargs: Any) -> ArrowTable:
|
|
457
|
+
async def read_arrow_async(self, path: str | Path, **kwargs: Any) -> ArrowTable:
|
|
440
458
|
"""Async read Arrow table using native obstore async."""
|
|
441
459
|
resolved_path = self._resolve_path(path)
|
|
442
460
|
return await self.store.read_arrow_async(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
443
461
|
|
|
444
|
-
async def write_arrow_async(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
|
|
462
|
+
async def write_arrow_async(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
|
|
445
463
|
"""Async write Arrow table using native obstore async."""
|
|
446
464
|
resolved_path = self._resolve_path(path)
|
|
447
|
-
# Check if the store has native async Arrow support
|
|
448
465
|
if hasattr(self.store, "write_arrow_async"):
|
|
449
466
|
await self.store.write_arrow_async(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
|
|
450
467
|
else:
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Storage backend capability system.
|
|
2
|
+
|
|
3
|
+
This module provides a centralized way to track and query storage backend capabilities.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import ClassVar
|
|
8
|
+
|
|
9
|
+
__all__ = ("HasStorageCapabilities", "StorageCapabilities")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class StorageCapabilities:
|
|
14
|
+
"""Tracks capabilities of a storage backend."""
|
|
15
|
+
|
|
16
|
+
# Basic operations
|
|
17
|
+
supports_read: bool = True
|
|
18
|
+
supports_write: bool = True
|
|
19
|
+
supports_delete: bool = True
|
|
20
|
+
supports_list: bool = True
|
|
21
|
+
supports_exists: bool = True
|
|
22
|
+
supports_copy: bool = True
|
|
23
|
+
supports_move: bool = True
|
|
24
|
+
supports_metadata: bool = True
|
|
25
|
+
|
|
26
|
+
# Advanced operations
|
|
27
|
+
supports_arrow: bool = False
|
|
28
|
+
supports_streaming: bool = False
|
|
29
|
+
supports_async: bool = False
|
|
30
|
+
supports_batch_operations: bool = False
|
|
31
|
+
supports_multipart_upload: bool = False
|
|
32
|
+
supports_compression: bool = False
|
|
33
|
+
|
|
34
|
+
# Protocol-specific features
|
|
35
|
+
supports_s3_select: bool = False
|
|
36
|
+
supports_gcs_compose: bool = False
|
|
37
|
+
supports_azure_snapshots: bool = False
|
|
38
|
+
|
|
39
|
+
# Performance characteristics
|
|
40
|
+
is_remote: bool = True
|
|
41
|
+
is_cloud_native: bool = False
|
|
42
|
+
has_low_latency: bool = False
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def local_filesystem(cls) -> "StorageCapabilities":
|
|
46
|
+
"""Capabilities for local filesystem backend."""
|
|
47
|
+
return cls(
|
|
48
|
+
is_remote=False, has_low_latency=True, supports_arrow=True, supports_streaming=True, supports_async=True
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def s3_compatible(cls) -> "StorageCapabilities":
|
|
53
|
+
"""Capabilities for S3-compatible backends."""
|
|
54
|
+
return cls(
|
|
55
|
+
is_cloud_native=True,
|
|
56
|
+
supports_multipart_upload=True,
|
|
57
|
+
supports_s3_select=True,
|
|
58
|
+
supports_arrow=True,
|
|
59
|
+
supports_streaming=True,
|
|
60
|
+
supports_async=True,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def gcs(cls) -> "StorageCapabilities":
|
|
65
|
+
"""Capabilities for Google Cloud Storage."""
|
|
66
|
+
return cls(
|
|
67
|
+
is_cloud_native=True,
|
|
68
|
+
supports_multipart_upload=True,
|
|
69
|
+
supports_gcs_compose=True,
|
|
70
|
+
supports_arrow=True,
|
|
71
|
+
supports_streaming=True,
|
|
72
|
+
supports_async=True,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def azure_blob(cls) -> "StorageCapabilities":
|
|
77
|
+
"""Capabilities for Azure Blob Storage."""
|
|
78
|
+
return cls(
|
|
79
|
+
is_cloud_native=True,
|
|
80
|
+
supports_multipart_upload=True,
|
|
81
|
+
supports_azure_snapshots=True,
|
|
82
|
+
supports_arrow=True,
|
|
83
|
+
supports_streaming=True,
|
|
84
|
+
supports_async=True,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class HasStorageCapabilities:
|
|
89
|
+
"""Mixin for storage backends that expose their capabilities."""
|
|
90
|
+
|
|
91
|
+
capabilities: ClassVar[StorageCapabilities]
|
|
92
|
+
|
|
93
|
+
@classmethod
|
|
94
|
+
def has_capability(cls, capability: str) -> bool:
|
|
95
|
+
"""Check if backend has a specific capability."""
|
|
96
|
+
return getattr(cls.capabilities, capability, False)
|
|
97
|
+
|
|
98
|
+
@classmethod
|
|
99
|
+
def get_capabilities(cls) -> StorageCapabilities:
|
|
100
|
+
"""Get all capabilities for this backend."""
|
|
101
|
+
return cls.capabilities
|