sqlspec 0.12.1__py3-none-any.whl → 0.12.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/adapters/aiosqlite/driver.py +16 -11
- sqlspec/adapters/bigquery/driver.py +113 -21
- sqlspec/adapters/duckdb/driver.py +18 -13
- sqlspec/adapters/psycopg/config.py +20 -3
- sqlspec/adapters/psycopg/driver.py +82 -1
- sqlspec/adapters/sqlite/driver.py +50 -10
- sqlspec/driver/mixins/_storage.py +83 -36
- sqlspec/loader.py +8 -30
- sqlspec/statement/builder/base.py +3 -1
- sqlspec/statement/builder/ddl.py +14 -1
- sqlspec/statement/pipelines/analyzers/_analyzer.py +1 -5
- sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +56 -2
- sqlspec/statement/sql.py +40 -6
- sqlspec/storage/backends/fsspec.py +29 -27
- sqlspec/storage/backends/obstore.py +55 -34
- sqlspec/storage/protocol.py +28 -25
- {sqlspec-0.12.1.dist-info → sqlspec-0.12.2.dist-info}/METADATA +1 -1
- {sqlspec-0.12.1.dist-info → sqlspec-0.12.2.dist-info}/RECORD +21 -21
- {sqlspec-0.12.1.dist-info → sqlspec-0.12.2.dist-info}/WHEEL +0 -0
- {sqlspec-0.12.1.dist-info → sqlspec-0.12.2.dist-info}/licenses/LICENSE +0 -0
- {sqlspec-0.12.1.dist-info → sqlspec-0.12.2.dist-info}/licenses/NOTICE +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# pyright: ignore=reportUnknownVariableType
|
|
2
2
|
import logging
|
|
3
3
|
from io import BytesIO
|
|
4
|
+
from pathlib import Path
|
|
4
5
|
from typing import TYPE_CHECKING, Any, Union
|
|
5
6
|
|
|
6
7
|
from sqlspec.exceptions import MissingDependencyError
|
|
@@ -77,14 +78,15 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
77
78
|
|
|
78
79
|
return cls(fs=fs_instance, base_path=base_path)
|
|
79
80
|
|
|
80
|
-
def _resolve_path(self, path: str) -> str:
|
|
81
|
+
def _resolve_path(self, path: Union[str, Path]) -> str:
|
|
81
82
|
"""Resolve path relative to base_path."""
|
|
83
|
+
path_str = str(path)
|
|
82
84
|
if self.base_path:
|
|
83
85
|
# Ensure no double slashes
|
|
84
86
|
clean_base = self.base_path.rstrip("/")
|
|
85
|
-
clean_path =
|
|
87
|
+
clean_path = path_str.lstrip("/")
|
|
86
88
|
return f"{clean_base}/{clean_path}"
|
|
87
|
-
return
|
|
89
|
+
return path_str
|
|
88
90
|
|
|
89
91
|
@property
|
|
90
92
|
def backend_type(self) -> str:
|
|
@@ -95,51 +97,51 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
95
97
|
return self._fs_uri
|
|
96
98
|
|
|
97
99
|
# Core Operations (sync)
|
|
98
|
-
def read_bytes(self, path: str, **kwargs: Any) -> bytes:
|
|
100
|
+
def read_bytes(self, path: Union[str, Path], **kwargs: Any) -> bytes:
|
|
99
101
|
"""Read bytes from an object."""
|
|
100
102
|
resolved_path = self._resolve_path(path)
|
|
101
103
|
return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
|
|
102
104
|
|
|
103
|
-
def write_bytes(self, path: str, data: bytes, **kwargs: Any) -> None:
|
|
105
|
+
def write_bytes(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
|
|
104
106
|
"""Write bytes to an object."""
|
|
105
107
|
resolved_path = self._resolve_path(path)
|
|
106
108
|
with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
|
|
107
109
|
f.write(data) # pyright: ignore
|
|
108
110
|
|
|
109
|
-
def read_text(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
111
|
+
def read_text(self, path: Union[str, Path], encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
110
112
|
"""Read text from an object."""
|
|
111
113
|
data = self.read_bytes(path, **kwargs)
|
|
112
114
|
return data.decode(encoding)
|
|
113
115
|
|
|
114
|
-
def write_text(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
116
|
+
def write_text(self, path: Union[str, Path], data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
115
117
|
"""Write text to an object."""
|
|
116
118
|
self.write_bytes(path, data.encode(encoding), **kwargs)
|
|
117
119
|
|
|
118
120
|
# Object Operations
|
|
119
|
-
def exists(self, path: str, **kwargs: Any) -> bool:
|
|
121
|
+
def exists(self, path: Union[str, Path], **kwargs: Any) -> bool:
|
|
120
122
|
"""Check if an object exists."""
|
|
121
123
|
resolved_path = self._resolve_path(path)
|
|
122
124
|
return self.fs.exists(resolved_path, **kwargs) # type: ignore[no-any-return]
|
|
123
125
|
|
|
124
|
-
def delete(self, path: str, **kwargs: Any) -> None:
|
|
126
|
+
def delete(self, path: Union[str, Path], **kwargs: Any) -> None:
|
|
125
127
|
"""Delete an object."""
|
|
126
128
|
resolved_path = self._resolve_path(path)
|
|
127
129
|
self.fs.rm(resolved_path, **kwargs)
|
|
128
130
|
|
|
129
|
-
def copy(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
131
|
+
def copy(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
|
|
130
132
|
"""Copy an object."""
|
|
131
133
|
source_path = self._resolve_path(source)
|
|
132
134
|
dest_path = self._resolve_path(destination)
|
|
133
135
|
self.fs.copy(source_path, dest_path, **kwargs)
|
|
134
136
|
|
|
135
|
-
def move(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
137
|
+
def move(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
|
|
136
138
|
"""Move an object."""
|
|
137
139
|
source_path = self._resolve_path(source)
|
|
138
140
|
dest_path = self._resolve_path(destination)
|
|
139
141
|
self.fs.mv(source_path, dest_path, **kwargs)
|
|
140
142
|
|
|
141
143
|
# Arrow Operations
|
|
142
|
-
def read_arrow(self, path: str, **kwargs: Any) -> "ArrowTable":
|
|
144
|
+
def read_arrow(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
|
|
143
145
|
"""Read an Arrow table from storage."""
|
|
144
146
|
if not PYARROW_INSTALLED:
|
|
145
147
|
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
@@ -150,7 +152,7 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
150
152
|
with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
|
|
151
153
|
return pq.read_table(f)
|
|
152
154
|
|
|
153
|
-
def write_arrow(self, path: str, table: "ArrowTable", **kwargs: Any) -> None:
|
|
155
|
+
def write_arrow(self, path: Union[str, Path], table: "ArrowTable", **kwargs: Any) -> None:
|
|
154
156
|
"""Write an Arrow table to storage."""
|
|
155
157
|
if not PYARROW_INSTALLED:
|
|
156
158
|
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
@@ -194,7 +196,7 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
194
196
|
resolved_path = self._resolve_path(path)
|
|
195
197
|
return self.fs.isdir(resolved_path) # type: ignore[no-any-return]
|
|
196
198
|
|
|
197
|
-
def get_metadata(self, path: str, **kwargs: Any) -> dict[str, Any]:
|
|
199
|
+
def get_metadata(self, path: Union[str, Path], **kwargs: Any) -> dict[str, Any]:
|
|
198
200
|
"""Get object metadata."""
|
|
199
201
|
info = self.fs.info(self._resolve_path(path), **kwargs)
|
|
200
202
|
|
|
@@ -217,7 +219,7 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
217
219
|
"type": getattr(info, "type", "file"),
|
|
218
220
|
}
|
|
219
221
|
|
|
220
|
-
def _stream_file_batches(self, obj_path: str) -> "Iterator[ArrowRecordBatch]":
|
|
222
|
+
def _stream_file_batches(self, obj_path: Union[str, Path]) -> "Iterator[ArrowRecordBatch]":
|
|
221
223
|
import pyarrow.parquet as pq
|
|
222
224
|
|
|
223
225
|
with self.fs.open(obj_path, mode="rb") as f:
|
|
@@ -234,15 +236,15 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
234
236
|
for obj_path in self.glob(pattern, **kwargs):
|
|
235
237
|
yield from self._stream_file_batches(obj_path)
|
|
236
238
|
|
|
237
|
-
async def read_bytes_async(self, path: str, **kwargs: Any) -> bytes:
|
|
239
|
+
async def read_bytes_async(self, path: Union[str, Path], **kwargs: Any) -> bytes:
|
|
238
240
|
"""Async read bytes. Wraps the sync implementation."""
|
|
239
241
|
return await async_(self.read_bytes)(path, **kwargs)
|
|
240
242
|
|
|
241
|
-
async def write_bytes_async(self, path: str, data: bytes, **kwargs: Any) -> None:
|
|
243
|
+
async def write_bytes_async(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
|
|
242
244
|
"""Async write bytes. Wras the sync implementation."""
|
|
243
245
|
return await async_(self.write_bytes)(path, data, **kwargs)
|
|
244
246
|
|
|
245
|
-
async def _stream_file_batches_async(self, obj_path: str) -> "AsyncIterator[ArrowRecordBatch]":
|
|
247
|
+
async def _stream_file_batches_async(self, obj_path: Union[str, Path]) -> "AsyncIterator[ArrowRecordBatch]":
|
|
246
248
|
import pyarrow.parquet as pq
|
|
247
249
|
|
|
248
250
|
data = await self.read_bytes_async(obj_path)
|
|
@@ -274,11 +276,11 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
274
276
|
async for batch in self._stream_file_batches_async(path):
|
|
275
277
|
yield batch
|
|
276
278
|
|
|
277
|
-
async def read_text_async(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
279
|
+
async def read_text_async(self, path: Union[str, Path], encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
278
280
|
"""Async read text. Wraps the sync implementation."""
|
|
279
281
|
return await async_(self.read_text)(path, encoding, **kwargs)
|
|
280
282
|
|
|
281
|
-
async def write_text_async(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
283
|
+
async def write_text_async(self, path: Union[str, Path], data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
282
284
|
"""Async write text. Wraps the sync implementation."""
|
|
283
285
|
await async_(self.write_text)(path, data, encoding, **kwargs)
|
|
284
286
|
|
|
@@ -286,30 +288,30 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
286
288
|
"""Async list objects. Wraps the sync implementation."""
|
|
287
289
|
return await async_(self.list_objects)(prefix, recursive, **kwargs)
|
|
288
290
|
|
|
289
|
-
async def exists_async(self, path: str, **kwargs: Any) -> bool:
|
|
291
|
+
async def exists_async(self, path: Union[str, Path], **kwargs: Any) -> bool:
|
|
290
292
|
"""Async exists check. Wraps the sync implementation."""
|
|
291
293
|
return await async_(self.exists)(path, **kwargs)
|
|
292
294
|
|
|
293
|
-
async def delete_async(self, path: str, **kwargs: Any) -> None:
|
|
295
|
+
async def delete_async(self, path: Union[str, Path], **kwargs: Any) -> None:
|
|
294
296
|
"""Async delete. Wraps the sync implementation."""
|
|
295
297
|
await async_(self.delete)(path, **kwargs)
|
|
296
298
|
|
|
297
|
-
async def copy_async(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
299
|
+
async def copy_async(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
|
|
298
300
|
"""Async copy. Wraps the sync implementation."""
|
|
299
301
|
await async_(self.copy)(source, destination, **kwargs)
|
|
300
302
|
|
|
301
|
-
async def move_async(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
303
|
+
async def move_async(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
|
|
302
304
|
"""Async move. Wraps the sync implementation."""
|
|
303
305
|
await async_(self.move)(source, destination, **kwargs)
|
|
304
306
|
|
|
305
|
-
async def get_metadata_async(self, path: str, **kwargs: Any) -> dict[str, Any]:
|
|
307
|
+
async def get_metadata_async(self, path: Union[str, Path], **kwargs: Any) -> dict[str, Any]:
|
|
306
308
|
"""Async get metadata. Wraps the sync implementation."""
|
|
307
309
|
return await async_(self.get_metadata)(path, **kwargs)
|
|
308
310
|
|
|
309
|
-
async def read_arrow_async(self, path: str, **kwargs: Any) -> "ArrowTable":
|
|
311
|
+
async def read_arrow_async(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
|
|
310
312
|
"""Async read Arrow. Wraps the sync implementation."""
|
|
311
313
|
return await async_(self.read_arrow)(path, **kwargs)
|
|
312
314
|
|
|
313
|
-
async def write_arrow_async(self, path: str, table: "ArrowTable", **kwargs: Any) -> None:
|
|
315
|
+
async def write_arrow_async(self, path: Union[str, Path], table: "ArrowTable", **kwargs: Any) -> None:
|
|
314
316
|
"""Async write Arrow. Wraps the sync implementation."""
|
|
315
317
|
await async_(self.write_arrow)(path, table, **kwargs)
|
|
@@ -9,7 +9,7 @@ from __future__ import annotations
|
|
|
9
9
|
|
|
10
10
|
import fnmatch
|
|
11
11
|
import logging
|
|
12
|
-
from typing import TYPE_CHECKING, Any
|
|
12
|
+
from typing import TYPE_CHECKING, Any
|
|
13
13
|
|
|
14
14
|
from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
|
|
15
15
|
from sqlspec.storage.backends.base import ObjectStoreBase
|
|
@@ -17,6 +17,7 @@ from sqlspec.typing import OBSTORE_INSTALLED
|
|
|
17
17
|
|
|
18
18
|
if TYPE_CHECKING:
|
|
19
19
|
from collections.abc import AsyncIterator, Iterator
|
|
20
|
+
from pathlib import Path
|
|
20
21
|
|
|
21
22
|
from sqlspec.typing import ArrowRecordBatch, ArrowTable
|
|
22
23
|
|
|
@@ -83,19 +84,21 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
83
84
|
msg = f"Failed to initialize obstore backend for {store_uri}"
|
|
84
85
|
raise StorageOperationFailedError(msg) from exc
|
|
85
86
|
|
|
86
|
-
def _resolve_path(self, path: str) -> str:
|
|
87
|
+
def _resolve_path(self, path: str | Path) -> str:
|
|
87
88
|
"""Resolve path relative to base_path."""
|
|
89
|
+
# Convert Path to string
|
|
90
|
+
path_str = str(path)
|
|
88
91
|
# For file:// URIs, the path passed in is already absolute
|
|
89
|
-
if self.store_uri.startswith("file://") and
|
|
92
|
+
if self.store_uri.startswith("file://") and path_str.startswith("/"):
|
|
90
93
|
# Remove leading slash for LocalStore (it's relative to its root)
|
|
91
|
-
return
|
|
94
|
+
return path_str.lstrip("/")
|
|
92
95
|
|
|
93
96
|
if self.base_path:
|
|
94
97
|
# Ensure no double slashes by stripping trailing slash from base_path
|
|
95
98
|
clean_base = self.base_path.rstrip("/")
|
|
96
|
-
clean_path =
|
|
99
|
+
clean_path = path_str.lstrip("/")
|
|
97
100
|
return f"{clean_base}/{clean_path}"
|
|
98
|
-
return
|
|
101
|
+
return path_str
|
|
99
102
|
|
|
100
103
|
@property
|
|
101
104
|
def backend_type(self) -> str:
|
|
@@ -104,17 +107,26 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
104
107
|
|
|
105
108
|
# Implementation of abstract methods from ObjectStoreBase
|
|
106
109
|
|
|
107
|
-
def read_bytes(self, path: str, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
110
|
+
def read_bytes(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
108
111
|
"""Read bytes using obstore."""
|
|
109
112
|
try:
|
|
110
113
|
resolved_path = self._resolve_path(path)
|
|
111
114
|
result = self.store.get(resolved_path)
|
|
112
|
-
|
|
115
|
+
bytes_data = result.bytes()
|
|
116
|
+
# Handle obstore's Bytes type - it might have a method to get raw bytes
|
|
117
|
+
if hasattr(bytes_data, "__bytes__"):
|
|
118
|
+
return bytes(bytes_data)
|
|
119
|
+
if hasattr(bytes_data, "tobytes"):
|
|
120
|
+
return bytes_data.tobytes() # type: ignore[no-any-return]
|
|
121
|
+
if isinstance(bytes_data, bytes):
|
|
122
|
+
return bytes_data
|
|
123
|
+
# Try to convert to bytes
|
|
124
|
+
return bytes(bytes_data)
|
|
113
125
|
except Exception as exc:
|
|
114
126
|
msg = f"Failed to read bytes from {path}"
|
|
115
127
|
raise StorageOperationFailedError(msg) from exc
|
|
116
128
|
|
|
117
|
-
def write_bytes(self, path: str, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
129
|
+
def write_bytes(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
118
130
|
"""Write bytes using obstore."""
|
|
119
131
|
try:
|
|
120
132
|
resolved_path = self._resolve_path(path)
|
|
@@ -123,12 +135,12 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
123
135
|
msg = f"Failed to write bytes to {path}"
|
|
124
136
|
raise StorageOperationFailedError(msg) from exc
|
|
125
137
|
|
|
126
|
-
def read_text(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
138
|
+
def read_text(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
127
139
|
"""Read text using obstore."""
|
|
128
140
|
data = self.read_bytes(path, **kwargs)
|
|
129
141
|
return data.decode(encoding)
|
|
130
142
|
|
|
131
|
-
def write_text(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
143
|
+
def write_text(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
132
144
|
"""Write text using obstore."""
|
|
133
145
|
encoded_data = data.encode(encoding)
|
|
134
146
|
self.write_bytes(path, encoded_data, **kwargs)
|
|
@@ -153,7 +165,7 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
153
165
|
|
|
154
166
|
return sorted(objects)
|
|
155
167
|
|
|
156
|
-
def exists(self, path: str, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
168
|
+
def exists(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
157
169
|
"""Check if object exists using obstore."""
|
|
158
170
|
try:
|
|
159
171
|
self.store.head(self._resolve_path(path))
|
|
@@ -161,7 +173,7 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
161
173
|
return False
|
|
162
174
|
return True
|
|
163
175
|
|
|
164
|
-
def delete(self, path: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
176
|
+
def delete(self, path: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
165
177
|
"""Delete object using obstore."""
|
|
166
178
|
try:
|
|
167
179
|
self.store.delete(self._resolve_path(path))
|
|
@@ -169,7 +181,7 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
169
181
|
msg = f"Failed to delete {path}"
|
|
170
182
|
raise StorageOperationFailedError(msg) from exc
|
|
171
183
|
|
|
172
|
-
def copy(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
184
|
+
def copy(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
173
185
|
"""Copy object using obstore."""
|
|
174
186
|
try:
|
|
175
187
|
self.store.copy(self._resolve_path(source), self._resolve_path(destination))
|
|
@@ -177,7 +189,7 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
177
189
|
msg = f"Failed to copy {source} to {destination}"
|
|
178
190
|
raise StorageOperationFailedError(msg) from exc
|
|
179
191
|
|
|
180
|
-
def move(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
192
|
+
def move(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
181
193
|
"""Move object using obstore."""
|
|
182
194
|
try:
|
|
183
195
|
self.store.rename(self._resolve_path(source), self._resolve_path(destination))
|
|
@@ -224,7 +236,7 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
224
236
|
# Use standard fnmatch for simple patterns
|
|
225
237
|
return [obj for obj in all_objects if fnmatch.fnmatch(obj, resolved_pattern)]
|
|
226
238
|
|
|
227
|
-
def get_metadata(self, path: str, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
239
|
+
def get_metadata(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
228
240
|
"""Get object metadata using obstore."""
|
|
229
241
|
resolved_path = self._resolve_path(path)
|
|
230
242
|
try:
|
|
@@ -245,13 +257,13 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
245
257
|
else:
|
|
246
258
|
return result
|
|
247
259
|
|
|
248
|
-
def is_object(self, path: str) -> bool:
|
|
260
|
+
def is_object(self, path: str | Path) -> bool:
|
|
249
261
|
"""Check if path is an object using obstore."""
|
|
250
262
|
resolved_path = self._resolve_path(path)
|
|
251
263
|
# An object exists and doesn't end with /
|
|
252
264
|
return self.exists(path) and not resolved_path.endswith("/")
|
|
253
265
|
|
|
254
|
-
def is_path(self, path: str) -> bool:
|
|
266
|
+
def is_path(self, path: str | Path) -> bool:
|
|
255
267
|
"""Check if path is a prefix/directory using obstore."""
|
|
256
268
|
resolved_path = self._resolve_path(path)
|
|
257
269
|
|
|
@@ -261,12 +273,12 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
261
273
|
|
|
262
274
|
# Check if there are any objects with this prefix
|
|
263
275
|
try:
|
|
264
|
-
objects = self.list_objects(prefix=path, recursive=False)
|
|
276
|
+
objects = self.list_objects(prefix=str(path), recursive=False)
|
|
265
277
|
return len(objects) > 0
|
|
266
278
|
except Exception:
|
|
267
279
|
return False
|
|
268
280
|
|
|
269
|
-
def read_arrow(self, path: str, **kwargs: Any) -> ArrowTable:
|
|
281
|
+
def read_arrow(self, path: str | Path, **kwargs: Any) -> ArrowTable:
|
|
270
282
|
"""Read Arrow table using obstore."""
|
|
271
283
|
try:
|
|
272
284
|
resolved_path = self._resolve_path(path)
|
|
@@ -285,7 +297,7 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
285
297
|
msg = f"Failed to read Arrow table from {path}"
|
|
286
298
|
raise StorageOperationFailedError(msg) from exc
|
|
287
299
|
|
|
288
|
-
def write_arrow(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
|
|
300
|
+
def write_arrow(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
|
|
289
301
|
"""Write Arrow table using obstore."""
|
|
290
302
|
try:
|
|
291
303
|
resolved_path = self._resolve_path(path)
|
|
@@ -350,13 +362,22 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
350
362
|
# Private async implementations for instrumentation support
|
|
351
363
|
# These are called by the base class async methods after instrumentation
|
|
352
364
|
|
|
353
|
-
async def read_bytes_async(self, path: str, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
365
|
+
async def read_bytes_async(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
354
366
|
"""Private async read bytes using native obstore async if available."""
|
|
355
367
|
resolved_path = self._resolve_path(path)
|
|
356
368
|
result = await self.store.get_async(resolved_path)
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
369
|
+
bytes_data = result.bytes()
|
|
370
|
+
# Handle obstore's Bytes type - it might have a method to get raw bytes
|
|
371
|
+
if hasattr(bytes_data, "__bytes__"):
|
|
372
|
+
return bytes(bytes_data)
|
|
373
|
+
if hasattr(bytes_data, "tobytes"):
|
|
374
|
+
return bytes_data.tobytes() # type: ignore[no-any-return]
|
|
375
|
+
if isinstance(bytes_data, bytes):
|
|
376
|
+
return bytes_data
|
|
377
|
+
# Try to convert to bytes
|
|
378
|
+
return bytes(bytes_data)
|
|
379
|
+
|
|
380
|
+
async def write_bytes_async(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
360
381
|
"""Private async write bytes using native obstore async."""
|
|
361
382
|
resolved_path = self._resolve_path(path)
|
|
362
383
|
await self.store.put_async(resolved_path, data)
|
|
@@ -379,17 +400,17 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
379
400
|
# Implement all other required abstract async methods
|
|
380
401
|
# ObStore provides native async for most operations
|
|
381
402
|
|
|
382
|
-
async def read_text_async(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
403
|
+
async def read_text_async(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
383
404
|
"""Async read text using native obstore async."""
|
|
384
405
|
data = await self.read_bytes_async(path, **kwargs)
|
|
385
406
|
return data.decode(encoding)
|
|
386
407
|
|
|
387
|
-
async def write_text_async(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
408
|
+
async def write_text_async(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
388
409
|
"""Async write text using native obstore async."""
|
|
389
410
|
encoded_data = data.encode(encoding)
|
|
390
411
|
await self.write_bytes_async(path, encoded_data, **kwargs)
|
|
391
412
|
|
|
392
|
-
async def exists_async(self, path: str, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
413
|
+
async def exists_async(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
393
414
|
"""Async check if object exists using native obstore async."""
|
|
394
415
|
resolved_path = self._resolve_path(path)
|
|
395
416
|
try:
|
|
@@ -398,24 +419,24 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
398
419
|
return False
|
|
399
420
|
return True
|
|
400
421
|
|
|
401
|
-
async def delete_async(self, path: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
422
|
+
async def delete_async(self, path: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
402
423
|
"""Async delete object using native obstore async."""
|
|
403
424
|
resolved_path = self._resolve_path(path)
|
|
404
425
|
await self.store.delete_async(resolved_path)
|
|
405
426
|
|
|
406
|
-
async def copy_async(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
427
|
+
async def copy_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
407
428
|
"""Async copy object using native obstore async."""
|
|
408
429
|
source_path = self._resolve_path(source)
|
|
409
430
|
dest_path = self._resolve_path(destination)
|
|
410
431
|
await self.store.copy_async(source_path, dest_path)
|
|
411
432
|
|
|
412
|
-
async def move_async(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
433
|
+
async def move_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
413
434
|
"""Async move object using native obstore async."""
|
|
414
435
|
source_path = self._resolve_path(source)
|
|
415
436
|
dest_path = self._resolve_path(destination)
|
|
416
437
|
await self.store.rename_async(source_path, dest_path)
|
|
417
438
|
|
|
418
|
-
async def get_metadata_async(self, path: str, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
439
|
+
async def get_metadata_async(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
419
440
|
"""Async get object metadata using native obstore async."""
|
|
420
441
|
resolved_path = self._resolve_path(path)
|
|
421
442
|
metadata = await self.store.head_async(resolved_path)
|
|
@@ -436,12 +457,12 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
436
457
|
|
|
437
458
|
return result
|
|
438
459
|
|
|
439
|
-
async def read_arrow_async(self, path: str, **kwargs: Any) -> ArrowTable:
|
|
460
|
+
async def read_arrow_async(self, path: str | Path, **kwargs: Any) -> ArrowTable:
|
|
440
461
|
"""Async read Arrow table using native obstore async."""
|
|
441
462
|
resolved_path = self._resolve_path(path)
|
|
442
463
|
return await self.store.read_arrow_async(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
443
464
|
|
|
444
|
-
async def write_arrow_async(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
|
|
465
|
+
async def write_arrow_async(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
|
|
445
466
|
"""Async write Arrow table using native obstore async."""
|
|
446
467
|
resolved_path = self._resolve_path(path)
|
|
447
468
|
# Check if the store has native async Arrow support
|
sqlspec/storage/protocol.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
|
|
1
|
+
from typing import TYPE_CHECKING, Any, Protocol, Union, runtime_checkable
|
|
2
2
|
|
|
3
3
|
if TYPE_CHECKING:
|
|
4
4
|
from collections.abc import AsyncIterator, Iterator
|
|
5
|
+
from pathlib import Path
|
|
5
6
|
|
|
6
7
|
from sqlspec.typing import ArrowRecordBatch, ArrowTable
|
|
7
8
|
|
|
@@ -23,36 +24,36 @@ class ObjectStoreProtocol(Protocol):
|
|
|
23
24
|
return
|
|
24
25
|
|
|
25
26
|
# Core Operations (sync)
|
|
26
|
-
def read_bytes(self, path: str, **kwargs: Any) -> bytes:
|
|
27
|
+
def read_bytes(self, path: "Union[str, Path]", **kwargs: Any) -> bytes:
|
|
27
28
|
"""Read bytes from an object."""
|
|
28
29
|
return b""
|
|
29
30
|
|
|
30
|
-
def write_bytes(self, path: str, data: bytes, **kwargs: Any) -> None:
|
|
31
|
+
def write_bytes(self, path: "Union[str, Path]", data: bytes, **kwargs: Any) -> None:
|
|
31
32
|
"""Write bytes to an object."""
|
|
32
33
|
return
|
|
33
34
|
|
|
34
|
-
def read_text(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
35
|
+
def read_text(self, path: "Union[str, Path]", encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
35
36
|
"""Read text from an object."""
|
|
36
37
|
return ""
|
|
37
38
|
|
|
38
|
-
def write_text(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
39
|
+
def write_text(self, path: "Union[str, Path]", data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
39
40
|
"""Write text to an object."""
|
|
40
41
|
return
|
|
41
42
|
|
|
42
43
|
# Object Operations
|
|
43
|
-
def exists(self, path: str, **kwargs: Any) -> bool:
|
|
44
|
+
def exists(self, path: "Union[str, Path]", **kwargs: Any) -> bool:
|
|
44
45
|
"""Check if an object exists."""
|
|
45
46
|
return False
|
|
46
47
|
|
|
47
|
-
def delete(self, path: str, **kwargs: Any) -> None:
|
|
48
|
+
def delete(self, path: "Union[str, Path]", **kwargs: Any) -> None:
|
|
48
49
|
"""Delete an object."""
|
|
49
50
|
return
|
|
50
51
|
|
|
51
|
-
def copy(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
52
|
+
def copy(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
|
|
52
53
|
"""Copy an object."""
|
|
53
54
|
return
|
|
54
55
|
|
|
55
|
-
def move(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
56
|
+
def move(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
|
|
56
57
|
"""Move an object."""
|
|
57
58
|
return
|
|
58
59
|
|
|
@@ -66,20 +67,20 @@ class ObjectStoreProtocol(Protocol):
|
|
|
66
67
|
return []
|
|
67
68
|
|
|
68
69
|
# Path Operations
|
|
69
|
-
def is_object(self, path: str) -> bool:
|
|
70
|
+
def is_object(self, path: "Union[str, Path]") -> bool:
|
|
70
71
|
"""Check if path points to an object."""
|
|
71
72
|
return False
|
|
72
73
|
|
|
73
|
-
def is_path(self, path: str) -> bool:
|
|
74
|
+
def is_path(self, path: "Union[str, Path]") -> bool:
|
|
74
75
|
"""Check if path points to a prefix (directory-like)."""
|
|
75
76
|
return False
|
|
76
77
|
|
|
77
|
-
def get_metadata(self, path: str, **kwargs: Any) -> dict[str, Any]:
|
|
78
|
+
def get_metadata(self, path: "Union[str, Path]", **kwargs: Any) -> dict[str, Any]:
|
|
78
79
|
"""Get object metadata."""
|
|
79
80
|
return {}
|
|
80
81
|
|
|
81
82
|
# Arrow Operations
|
|
82
|
-
def read_arrow(self, path: str, **kwargs: Any) -> "ArrowTable":
|
|
83
|
+
def read_arrow(self, path: "Union[str, Path]", **kwargs: Any) -> "ArrowTable":
|
|
83
84
|
"""Read an Arrow table from storage.
|
|
84
85
|
|
|
85
86
|
For obstore backend, this should use native arrow operations when available.
|
|
@@ -87,7 +88,7 @@ class ObjectStoreProtocol(Protocol):
|
|
|
87
88
|
msg = "Arrow reading not implemented"
|
|
88
89
|
raise NotImplementedError(msg)
|
|
89
90
|
|
|
90
|
-
def write_arrow(self, path: str, table: "ArrowTable", **kwargs: Any) -> None:
|
|
91
|
+
def write_arrow(self, path: "Union[str, Path]", table: "ArrowTable", **kwargs: Any) -> None:
|
|
91
92
|
"""Write an Arrow table to storage.
|
|
92
93
|
|
|
93
94
|
For obstore backend, this should use native arrow operations when available.
|
|
@@ -104,32 +105,34 @@ class ObjectStoreProtocol(Protocol):
|
|
|
104
105
|
raise NotImplementedError(msg)
|
|
105
106
|
|
|
106
107
|
# Async versions
|
|
107
|
-
async def read_bytes_async(self, path: str, **kwargs: Any) -> bytes:
|
|
108
|
+
async def read_bytes_async(self, path: "Union[str, Path]", **kwargs: Any) -> bytes:
|
|
108
109
|
"""Async read bytes from an object."""
|
|
109
110
|
msg = "Async operations not implemented"
|
|
110
111
|
raise NotImplementedError(msg)
|
|
111
112
|
|
|
112
|
-
async def write_bytes_async(self, path: str, data: bytes, **kwargs: Any) -> None:
|
|
113
|
+
async def write_bytes_async(self, path: "Union[str, Path]", data: bytes, **kwargs: Any) -> None:
|
|
113
114
|
"""Async write bytes to an object."""
|
|
114
115
|
msg = "Async operations not implemented"
|
|
115
116
|
raise NotImplementedError(msg)
|
|
116
117
|
|
|
117
|
-
async def read_text_async(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
118
|
+
async def read_text_async(self, path: "Union[str, Path]", encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
118
119
|
"""Async read text from an object."""
|
|
119
120
|
msg = "Async operations not implemented"
|
|
120
121
|
raise NotImplementedError(msg)
|
|
121
122
|
|
|
122
|
-
async def write_text_async(
|
|
123
|
+
async def write_text_async(
|
|
124
|
+
self, path: "Union[str, Path]", data: str, encoding: str = "utf-8", **kwargs: Any
|
|
125
|
+
) -> None:
|
|
123
126
|
"""Async write text to an object."""
|
|
124
127
|
msg = "Async operations not implemented"
|
|
125
128
|
raise NotImplementedError(msg)
|
|
126
129
|
|
|
127
|
-
async def exists_async(self, path: str, **kwargs: Any) -> bool:
|
|
130
|
+
async def exists_async(self, path: "Union[str, Path]", **kwargs: Any) -> bool:
|
|
128
131
|
"""Async check if an object exists."""
|
|
129
132
|
msg = "Async operations not implemented"
|
|
130
133
|
raise NotImplementedError(msg)
|
|
131
134
|
|
|
132
|
-
async def delete_async(self, path: str, **kwargs: Any) -> None:
|
|
135
|
+
async def delete_async(self, path: "Union[str, Path]", **kwargs: Any) -> None:
|
|
133
136
|
"""Async delete an object."""
|
|
134
137
|
msg = "Async operations not implemented"
|
|
135
138
|
raise NotImplementedError(msg)
|
|
@@ -139,27 +142,27 @@ class ObjectStoreProtocol(Protocol):
|
|
|
139
142
|
msg = "Async operations not implemented"
|
|
140
143
|
raise NotImplementedError(msg)
|
|
141
144
|
|
|
142
|
-
async def copy_async(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
145
|
+
async def copy_async(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
|
|
143
146
|
"""Async copy an object."""
|
|
144
147
|
msg = "Async operations not implemented"
|
|
145
148
|
raise NotImplementedError(msg)
|
|
146
149
|
|
|
147
|
-
async def move_async(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
150
|
+
async def move_async(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
|
|
148
151
|
"""Async move an object."""
|
|
149
152
|
msg = "Async operations not implemented"
|
|
150
153
|
raise NotImplementedError(msg)
|
|
151
154
|
|
|
152
|
-
async def get_metadata_async(self, path: str, **kwargs: Any) -> dict[str, Any]:
|
|
155
|
+
async def get_metadata_async(self, path: "Union[str, Path]", **kwargs: Any) -> dict[str, Any]:
|
|
153
156
|
"""Async get object metadata."""
|
|
154
157
|
msg = "Async operations not implemented"
|
|
155
158
|
raise NotImplementedError(msg)
|
|
156
159
|
|
|
157
|
-
async def read_arrow_async(self, path: str, **kwargs: Any) -> "ArrowTable":
|
|
160
|
+
async def read_arrow_async(self, path: "Union[str, Path]", **kwargs: Any) -> "ArrowTable":
|
|
158
161
|
"""Async read an Arrow table from storage."""
|
|
159
162
|
msg = "Async arrow reading not implemented"
|
|
160
163
|
raise NotImplementedError(msg)
|
|
161
164
|
|
|
162
|
-
async def write_arrow_async(self, path: str, table: "ArrowTable", **kwargs: Any) -> None:
|
|
165
|
+
async def write_arrow_async(self, path: "Union[str, Path]", table: "ArrowTable", **kwargs: Any) -> None:
|
|
163
166
|
"""Async write an Arrow table to storage."""
|
|
164
167
|
msg = "Async arrow writing not implemented"
|
|
165
168
|
raise NotImplementedError(msg)
|