sqlspec 0.26.0__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/__init__.py +7 -15
- sqlspec/_serialization.py +55 -25
- sqlspec/_typing.py +155 -52
- sqlspec/adapters/adbc/_types.py +1 -1
- sqlspec/adapters/adbc/adk/__init__.py +5 -0
- sqlspec/adapters/adbc/adk/store.py +880 -0
- sqlspec/adapters/adbc/config.py +62 -12
- sqlspec/adapters/adbc/data_dictionary.py +74 -2
- sqlspec/adapters/adbc/driver.py +226 -58
- sqlspec/adapters/adbc/litestar/__init__.py +5 -0
- sqlspec/adapters/adbc/litestar/store.py +504 -0
- sqlspec/adapters/adbc/type_converter.py +44 -50
- sqlspec/adapters/aiosqlite/_types.py +1 -1
- sqlspec/adapters/aiosqlite/adk/__init__.py +5 -0
- sqlspec/adapters/aiosqlite/adk/store.py +536 -0
- sqlspec/adapters/aiosqlite/config.py +86 -16
- sqlspec/adapters/aiosqlite/data_dictionary.py +34 -2
- sqlspec/adapters/aiosqlite/driver.py +127 -38
- sqlspec/adapters/aiosqlite/litestar/__init__.py +5 -0
- sqlspec/adapters/aiosqlite/litestar/store.py +281 -0
- sqlspec/adapters/aiosqlite/pool.py +7 -7
- sqlspec/adapters/asyncmy/__init__.py +7 -1
- sqlspec/adapters/asyncmy/_types.py +1 -1
- sqlspec/adapters/asyncmy/adk/__init__.py +5 -0
- sqlspec/adapters/asyncmy/adk/store.py +503 -0
- sqlspec/adapters/asyncmy/config.py +59 -17
- sqlspec/adapters/asyncmy/data_dictionary.py +41 -2
- sqlspec/adapters/asyncmy/driver.py +293 -62
- sqlspec/adapters/asyncmy/litestar/__init__.py +5 -0
- sqlspec/adapters/asyncmy/litestar/store.py +296 -0
- sqlspec/adapters/asyncpg/__init__.py +2 -1
- sqlspec/adapters/asyncpg/_type_handlers.py +71 -0
- sqlspec/adapters/asyncpg/_types.py +11 -7
- sqlspec/adapters/asyncpg/adk/__init__.py +5 -0
- sqlspec/adapters/asyncpg/adk/store.py +460 -0
- sqlspec/adapters/asyncpg/config.py +57 -36
- sqlspec/adapters/asyncpg/data_dictionary.py +48 -2
- sqlspec/adapters/asyncpg/driver.py +153 -23
- sqlspec/adapters/asyncpg/litestar/__init__.py +5 -0
- sqlspec/adapters/asyncpg/litestar/store.py +253 -0
- sqlspec/adapters/bigquery/_types.py +1 -1
- sqlspec/adapters/bigquery/adk/__init__.py +5 -0
- sqlspec/adapters/bigquery/adk/store.py +585 -0
- sqlspec/adapters/bigquery/config.py +36 -11
- sqlspec/adapters/bigquery/data_dictionary.py +42 -2
- sqlspec/adapters/bigquery/driver.py +489 -144
- sqlspec/adapters/bigquery/litestar/__init__.py +5 -0
- sqlspec/adapters/bigquery/litestar/store.py +327 -0
- sqlspec/adapters/bigquery/type_converter.py +55 -23
- sqlspec/adapters/duckdb/_types.py +2 -2
- sqlspec/adapters/duckdb/adk/__init__.py +14 -0
- sqlspec/adapters/duckdb/adk/store.py +563 -0
- sqlspec/adapters/duckdb/config.py +79 -21
- sqlspec/adapters/duckdb/data_dictionary.py +41 -2
- sqlspec/adapters/duckdb/driver.py +225 -44
- sqlspec/adapters/duckdb/litestar/__init__.py +5 -0
- sqlspec/adapters/duckdb/litestar/store.py +332 -0
- sqlspec/adapters/duckdb/pool.py +5 -5
- sqlspec/adapters/duckdb/type_converter.py +51 -21
- sqlspec/adapters/oracledb/_numpy_handlers.py +133 -0
- sqlspec/adapters/oracledb/_types.py +20 -2
- sqlspec/adapters/oracledb/adk/__init__.py +5 -0
- sqlspec/adapters/oracledb/adk/store.py +1628 -0
- sqlspec/adapters/oracledb/config.py +120 -36
- sqlspec/adapters/oracledb/data_dictionary.py +87 -20
- sqlspec/adapters/oracledb/driver.py +475 -86
- sqlspec/adapters/oracledb/litestar/__init__.py +5 -0
- sqlspec/adapters/oracledb/litestar/store.py +765 -0
- sqlspec/adapters/oracledb/migrations.py +316 -25
- sqlspec/adapters/oracledb/type_converter.py +91 -16
- sqlspec/adapters/psqlpy/_type_handlers.py +44 -0
- sqlspec/adapters/psqlpy/_types.py +2 -1
- sqlspec/adapters/psqlpy/adk/__init__.py +5 -0
- sqlspec/adapters/psqlpy/adk/store.py +483 -0
- sqlspec/adapters/psqlpy/config.py +45 -19
- sqlspec/adapters/psqlpy/data_dictionary.py +48 -2
- sqlspec/adapters/psqlpy/driver.py +108 -41
- sqlspec/adapters/psqlpy/litestar/__init__.py +5 -0
- sqlspec/adapters/psqlpy/litestar/store.py +272 -0
- sqlspec/adapters/psqlpy/type_converter.py +40 -11
- sqlspec/adapters/psycopg/_type_handlers.py +80 -0
- sqlspec/adapters/psycopg/_types.py +2 -1
- sqlspec/adapters/psycopg/adk/__init__.py +5 -0
- sqlspec/adapters/psycopg/adk/store.py +962 -0
- sqlspec/adapters/psycopg/config.py +65 -37
- sqlspec/adapters/psycopg/data_dictionary.py +91 -3
- sqlspec/adapters/psycopg/driver.py +200 -78
- sqlspec/adapters/psycopg/litestar/__init__.py +5 -0
- sqlspec/adapters/psycopg/litestar/store.py +554 -0
- sqlspec/adapters/sqlite/__init__.py +2 -1
- sqlspec/adapters/sqlite/_type_handlers.py +86 -0
- sqlspec/adapters/sqlite/_types.py +1 -1
- sqlspec/adapters/sqlite/adk/__init__.py +5 -0
- sqlspec/adapters/sqlite/adk/store.py +582 -0
- sqlspec/adapters/sqlite/config.py +85 -16
- sqlspec/adapters/sqlite/data_dictionary.py +34 -2
- sqlspec/adapters/sqlite/driver.py +120 -52
- sqlspec/adapters/sqlite/litestar/__init__.py +5 -0
- sqlspec/adapters/sqlite/litestar/store.py +318 -0
- sqlspec/adapters/sqlite/pool.py +5 -5
- sqlspec/base.py +45 -26
- sqlspec/builder/__init__.py +73 -4
- sqlspec/builder/_base.py +91 -58
- sqlspec/builder/_column.py +5 -5
- sqlspec/builder/_ddl.py +98 -89
- sqlspec/builder/_delete.py +5 -4
- sqlspec/builder/_dml.py +388 -0
- sqlspec/{_sql.py → builder/_factory.py} +41 -44
- sqlspec/builder/_insert.py +5 -82
- sqlspec/builder/{mixins/_join_operations.py → _join.py} +145 -143
- sqlspec/builder/_merge.py +446 -11
- sqlspec/builder/_parsing_utils.py +9 -11
- sqlspec/builder/_select.py +1313 -25
- sqlspec/builder/_update.py +11 -42
- sqlspec/cli.py +76 -69
- sqlspec/config.py +331 -62
- sqlspec/core/__init__.py +5 -4
- sqlspec/core/cache.py +18 -18
- sqlspec/core/compiler.py +6 -8
- sqlspec/core/filters.py +55 -47
- sqlspec/core/hashing.py +9 -9
- sqlspec/core/parameters.py +76 -45
- sqlspec/core/result.py +234 -47
- sqlspec/core/splitter.py +16 -17
- sqlspec/core/statement.py +32 -31
- sqlspec/core/type_conversion.py +3 -2
- sqlspec/driver/__init__.py +1 -3
- sqlspec/driver/_async.py +183 -160
- sqlspec/driver/_common.py +197 -109
- sqlspec/driver/_sync.py +189 -161
- sqlspec/driver/mixins/_result_tools.py +20 -236
- sqlspec/driver/mixins/_sql_translator.py +4 -4
- sqlspec/exceptions.py +70 -7
- sqlspec/extensions/adk/__init__.py +53 -0
- sqlspec/extensions/adk/_types.py +51 -0
- sqlspec/extensions/adk/converters.py +172 -0
- sqlspec/extensions/adk/migrations/0001_create_adk_tables.py +144 -0
- sqlspec/extensions/adk/migrations/__init__.py +0 -0
- sqlspec/extensions/adk/service.py +181 -0
- sqlspec/extensions/adk/store.py +536 -0
- sqlspec/extensions/aiosql/adapter.py +69 -61
- sqlspec/extensions/fastapi/__init__.py +21 -0
- sqlspec/extensions/fastapi/extension.py +331 -0
- sqlspec/extensions/fastapi/providers.py +543 -0
- sqlspec/extensions/flask/__init__.py +36 -0
- sqlspec/extensions/flask/_state.py +71 -0
- sqlspec/extensions/flask/_utils.py +40 -0
- sqlspec/extensions/flask/extension.py +389 -0
- sqlspec/extensions/litestar/__init__.py +21 -4
- sqlspec/extensions/litestar/cli.py +54 -10
- sqlspec/extensions/litestar/config.py +56 -266
- sqlspec/extensions/litestar/handlers.py +46 -17
- sqlspec/extensions/litestar/migrations/0001_create_session_table.py +137 -0
- sqlspec/extensions/litestar/migrations/__init__.py +3 -0
- sqlspec/extensions/litestar/plugin.py +349 -224
- sqlspec/extensions/litestar/providers.py +25 -25
- sqlspec/extensions/litestar/store.py +265 -0
- sqlspec/extensions/starlette/__init__.py +10 -0
- sqlspec/extensions/starlette/_state.py +25 -0
- sqlspec/extensions/starlette/_utils.py +52 -0
- sqlspec/extensions/starlette/extension.py +254 -0
- sqlspec/extensions/starlette/middleware.py +154 -0
- sqlspec/loader.py +30 -49
- sqlspec/migrations/base.py +200 -76
- sqlspec/migrations/commands.py +591 -62
- sqlspec/migrations/context.py +6 -9
- sqlspec/migrations/fix.py +199 -0
- sqlspec/migrations/loaders.py +47 -19
- sqlspec/migrations/runner.py +241 -75
- sqlspec/migrations/tracker.py +237 -21
- sqlspec/migrations/utils.py +51 -3
- sqlspec/migrations/validation.py +177 -0
- sqlspec/protocols.py +106 -36
- sqlspec/storage/_utils.py +85 -0
- sqlspec/storage/backends/fsspec.py +133 -107
- sqlspec/storage/backends/local.py +78 -51
- sqlspec/storage/backends/obstore.py +276 -168
- sqlspec/storage/registry.py +75 -39
- sqlspec/typing.py +30 -84
- sqlspec/utils/__init__.py +25 -4
- sqlspec/utils/arrow_helpers.py +81 -0
- sqlspec/utils/config_resolver.py +6 -6
- sqlspec/utils/correlation.py +4 -5
- sqlspec/utils/data_transformation.py +3 -2
- sqlspec/utils/deprecation.py +9 -8
- sqlspec/utils/fixtures.py +4 -4
- sqlspec/utils/logging.py +46 -6
- sqlspec/utils/module_loader.py +205 -5
- sqlspec/utils/portal.py +311 -0
- sqlspec/utils/schema.py +288 -0
- sqlspec/utils/serializers.py +113 -4
- sqlspec/utils/sync_tools.py +36 -22
- sqlspec/utils/text.py +1 -2
- sqlspec/utils/type_guards.py +136 -20
- sqlspec/utils/version.py +433 -0
- {sqlspec-0.26.0.dist-info → sqlspec-0.28.0.dist-info}/METADATA +41 -22
- sqlspec-0.28.0.dist-info/RECORD +221 -0
- sqlspec/builder/mixins/__init__.py +0 -55
- sqlspec/builder/mixins/_cte_and_set_ops.py +0 -253
- sqlspec/builder/mixins/_delete_operations.py +0 -50
- sqlspec/builder/mixins/_insert_operations.py +0 -282
- sqlspec/builder/mixins/_merge_operations.py +0 -698
- sqlspec/builder/mixins/_order_limit_operations.py +0 -145
- sqlspec/builder/mixins/_pivot_operations.py +0 -157
- sqlspec/builder/mixins/_select_operations.py +0 -930
- sqlspec/builder/mixins/_update_operations.py +0 -199
- sqlspec/builder/mixins/_where_clause.py +0 -1298
- sqlspec-0.26.0.dist-info/RECORD +0 -157
- sqlspec-0.26.0.dist-info/licenses/NOTICE +0 -29
- {sqlspec-0.26.0.dist-info → sqlspec-0.28.0.dist-info}/WHEEL +0 -0
- {sqlspec-0.26.0.dist-info → sqlspec-0.28.0.dist-info}/entry_points.txt +0 -0
- {sqlspec-0.26.0.dist-info → sqlspec-0.28.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,18 +5,20 @@ and local file storage.
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
import fnmatch
|
|
8
|
+
import io
|
|
8
9
|
import logging
|
|
9
10
|
from collections.abc import AsyncIterator, Iterator
|
|
10
|
-
from
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Final, cast
|
|
11
13
|
from urllib.parse import urlparse
|
|
12
14
|
|
|
13
|
-
if TYPE_CHECKING:
|
|
14
|
-
from pathlib import Path
|
|
15
|
-
|
|
16
15
|
from mypy_extensions import mypyc_attr
|
|
17
16
|
|
|
18
|
-
from sqlspec.exceptions import
|
|
19
|
-
from sqlspec.
|
|
17
|
+
from sqlspec.exceptions import StorageOperationFailedError
|
|
18
|
+
from sqlspec.storage._utils import resolve_storage_path
|
|
19
|
+
from sqlspec.typing import ArrowRecordBatch, ArrowTable
|
|
20
|
+
from sqlspec.utils.module_loader import ensure_obstore, ensure_pyarrow
|
|
21
|
+
from sqlspec.utils.sync_tools import async_
|
|
20
22
|
|
|
21
23
|
__all__ = ("ObStoreBackend",)
|
|
22
24
|
|
|
@@ -24,37 +26,64 @@ logger = logging.getLogger(__name__)
|
|
|
24
26
|
|
|
25
27
|
|
|
26
28
|
class _AsyncArrowIterator:
|
|
27
|
-
"""Helper class to work around mypyc's lack of async generator support.
|
|
29
|
+
"""Helper class to work around mypyc's lack of async generator support.
|
|
30
|
+
|
|
31
|
+
Uses hybrid async/sync pattern:
|
|
32
|
+
- Native async I/O for network operations (S3, GCS, Azure)
|
|
33
|
+
- Thread pool for CPU-bound PyArrow parsing
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
__slots__ = ("_current_file_iterator", "_files_iterator", "backend", "kwargs", "pattern")
|
|
28
37
|
|
|
29
38
|
def __init__(self, backend: "ObStoreBackend", pattern: str, **kwargs: Any) -> None:
|
|
30
39
|
self.backend = backend
|
|
31
40
|
self.pattern = pattern
|
|
32
41
|
self.kwargs = kwargs
|
|
33
|
-
self._files_iterator:
|
|
34
|
-
self._current_file_iterator:
|
|
42
|
+
self._files_iterator: Iterator[str] | None = None
|
|
43
|
+
self._current_file_iterator: Iterator[ArrowRecordBatch] | None = None
|
|
35
44
|
|
|
36
45
|
def __aiter__(self) -> "_AsyncArrowIterator":
|
|
37
46
|
return self
|
|
38
47
|
|
|
39
48
|
async def __anext__(self) -> ArrowRecordBatch:
|
|
49
|
+
import pyarrow.parquet as pq
|
|
50
|
+
|
|
40
51
|
if self._files_iterator is None:
|
|
41
52
|
files = self.backend.glob(self.pattern, **self.kwargs)
|
|
42
53
|
self._files_iterator = iter(files)
|
|
43
54
|
|
|
44
55
|
while True:
|
|
45
56
|
if self._current_file_iterator is not None:
|
|
57
|
+
|
|
58
|
+
def _safe_next_batch() -> ArrowRecordBatch:
|
|
59
|
+
try:
|
|
60
|
+
return next(self._current_file_iterator) # type: ignore[arg-type]
|
|
61
|
+
except StopIteration as e:
|
|
62
|
+
raise StopAsyncIteration from e
|
|
63
|
+
|
|
46
64
|
try:
|
|
47
|
-
return
|
|
48
|
-
except
|
|
65
|
+
return await async_(_safe_next_batch)()
|
|
66
|
+
except StopAsyncIteration:
|
|
49
67
|
self._current_file_iterator = None
|
|
68
|
+
continue
|
|
50
69
|
|
|
51
70
|
try:
|
|
52
71
|
next_file = next(self._files_iterator)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
72
|
+
except StopIteration as e:
|
|
73
|
+
raise StopAsyncIteration from e
|
|
74
|
+
|
|
75
|
+
data = await self.backend.read_bytes_async(next_file)
|
|
76
|
+
parquet_file = pq.ParquetFile(io.BytesIO(data))
|
|
77
|
+
self._current_file_iterator = parquet_file.iter_batches()
|
|
78
|
+
|
|
79
|
+
async def aclose(self) -> None:
|
|
80
|
+
"""Close underlying file iterator."""
|
|
81
|
+
if self._current_file_iterator is not None:
|
|
82
|
+
try:
|
|
83
|
+
close_method = self._current_file_iterator.close # type: ignore[attr-defined]
|
|
84
|
+
await async_(close_method)() # pyright: ignore
|
|
85
|
+
except AttributeError:
|
|
86
|
+
pass
|
|
58
87
|
|
|
59
88
|
|
|
60
89
|
DEFAULT_OPTIONS: Final[dict[str, Any]] = {"connect_timeout": "30s", "request_timeout": "60s"}
|
|
@@ -69,17 +98,17 @@ class ObStoreBackend:
|
|
|
69
98
|
local filesystem, and HTTP endpoints.
|
|
70
99
|
"""
|
|
71
100
|
|
|
72
|
-
__slots__ = (
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
""
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
""
|
|
81
|
-
|
|
82
|
-
|
|
101
|
+
__slots__ = (
|
|
102
|
+
"_is_local_store",
|
|
103
|
+
"_local_store_root",
|
|
104
|
+
"_path_cache",
|
|
105
|
+
"backend_type",
|
|
106
|
+
"base_path",
|
|
107
|
+
"protocol",
|
|
108
|
+
"store",
|
|
109
|
+
"store_options",
|
|
110
|
+
"store_uri",
|
|
111
|
+
)
|
|
83
112
|
|
|
84
113
|
def __init__(self, uri: str, **kwargs: Any) -> None:
|
|
85
114
|
"""Initialize obstore backend.
|
|
@@ -87,9 +116,9 @@ class ObStoreBackend:
|
|
|
87
116
|
Args:
|
|
88
117
|
uri: Storage URI (e.g., 's3://bucket', 'file:///path', 'gs://bucket')
|
|
89
118
|
**kwargs: Additional options including base_path and obstore configuration
|
|
90
|
-
"""
|
|
91
119
|
|
|
92
|
-
|
|
120
|
+
"""
|
|
121
|
+
ensure_obstore()
|
|
93
122
|
|
|
94
123
|
try:
|
|
95
124
|
# Extract base_path from kwargs
|
|
@@ -100,6 +129,8 @@ class ObStoreBackend:
|
|
|
100
129
|
self.store_options = kwargs
|
|
101
130
|
self.store: Any
|
|
102
131
|
self._path_cache: dict[str, str] = {}
|
|
132
|
+
self._is_local_store = False
|
|
133
|
+
self._local_store_root = ""
|
|
103
134
|
self.protocol = uri.split("://", 1)[0] if "://" in uri else "file"
|
|
104
135
|
self.backend_type = "obstore"
|
|
105
136
|
|
|
@@ -108,15 +139,28 @@ class ObStoreBackend:
|
|
|
108
139
|
|
|
109
140
|
self.store = MemoryStore()
|
|
110
141
|
elif uri.startswith("file://"):
|
|
111
|
-
from pathlib import Path as PathlibPath
|
|
112
|
-
|
|
113
142
|
from obstore.store import LocalStore
|
|
114
143
|
|
|
144
|
+
# Parse URI to extract path
|
|
145
|
+
# Note: urlparse splits on '#', so we need to reconstruct the full path
|
|
115
146
|
parsed = urlparse(uri)
|
|
116
|
-
|
|
117
|
-
#
|
|
118
|
-
|
|
119
|
-
|
|
147
|
+
path_str = parsed.path or "/"
|
|
148
|
+
# Append fragment if present (handles paths with '#' character)
|
|
149
|
+
if parsed.fragment:
|
|
150
|
+
path_str = f"{path_str}#{parsed.fragment}"
|
|
151
|
+
path_obj = Path(path_str)
|
|
152
|
+
|
|
153
|
+
# If path points to a file, use its parent as the base directory
|
|
154
|
+
if path_obj.is_file():
|
|
155
|
+
path_str = str(path_obj.parent)
|
|
156
|
+
|
|
157
|
+
# If base_path provided via kwargs, use it as LocalStore root
|
|
158
|
+
# Otherwise use the URI path
|
|
159
|
+
local_store_root = self.base_path or path_str
|
|
160
|
+
|
|
161
|
+
self._is_local_store = True
|
|
162
|
+
self._local_store_root = local_store_root
|
|
163
|
+
self.store = LocalStore(local_store_root, mkdir=True)
|
|
120
164
|
else:
|
|
121
165
|
from obstore.store import from_url
|
|
122
166
|
|
|
@@ -141,64 +185,90 @@ class ObStoreBackend:
|
|
|
141
185
|
|
|
142
186
|
return cls(uri=store_uri, **kwargs)
|
|
143
187
|
|
|
144
|
-
def
|
|
145
|
-
"""Resolve path relative
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
188
|
+
def _resolve_path_for_local_store(self, path: "str | Path") -> str:
|
|
189
|
+
"""Resolve path for LocalStore which expects relative paths from its root."""
|
|
190
|
+
|
|
191
|
+
path_obj = Path(str(path))
|
|
192
|
+
|
|
193
|
+
# If absolute path, try to make it relative to LocalStore root
|
|
194
|
+
if path_obj.is_absolute() and self._local_store_root:
|
|
195
|
+
try:
|
|
196
|
+
return str(path_obj.relative_to(self._local_store_root))
|
|
197
|
+
except ValueError:
|
|
198
|
+
# Path is outside LocalStore root - strip leading / as fallback
|
|
199
|
+
return str(path).lstrip("/")
|
|
200
|
+
|
|
201
|
+
# Relative path - return as-is (already relative to LocalStore root)
|
|
202
|
+
return str(path)
|
|
203
|
+
|
|
204
|
+
def read_bytes(self, path: "str | Path", **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
158
205
|
"""Read bytes using obstore."""
|
|
159
|
-
|
|
206
|
+
# For LocalStore, use special path resolution (relative to LocalStore root)
|
|
207
|
+
if self._is_local_store:
|
|
208
|
+
resolved_path = self._resolve_path_for_local_store(path)
|
|
209
|
+
else:
|
|
210
|
+
# For cloud storage, use standard resolution
|
|
211
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
212
|
+
|
|
213
|
+
result = self.store.get(resolved_path)
|
|
160
214
|
return cast("bytes", result.bytes().to_bytes())
|
|
161
215
|
|
|
162
|
-
def write_bytes(self, path: "
|
|
216
|
+
def write_bytes(self, path: "str | Path", data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
163
217
|
"""Write bytes using obstore."""
|
|
164
|
-
|
|
218
|
+
# For LocalStore, use special path resolution (relative to LocalStore root)
|
|
219
|
+
if self._is_local_store:
|
|
220
|
+
resolved_path = self._resolve_path_for_local_store(path)
|
|
221
|
+
else:
|
|
222
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
223
|
+
|
|
224
|
+
self.store.put(resolved_path, data)
|
|
165
225
|
|
|
166
|
-
def read_text(self, path: "
|
|
226
|
+
def read_text(self, path: "str | Path", encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
167
227
|
"""Read text using obstore."""
|
|
168
228
|
return self.read_bytes(path, **kwargs).decode(encoding)
|
|
169
229
|
|
|
170
|
-
def write_text(self, path: "
|
|
230
|
+
def write_text(self, path: "str | Path", data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
171
231
|
"""Write text using obstore."""
|
|
172
232
|
self.write_bytes(path, data.encode(encoding), **kwargs)
|
|
173
233
|
|
|
174
234
|
def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
|
|
175
235
|
"""List objects using obstore."""
|
|
176
|
-
resolved_prefix =
|
|
236
|
+
resolved_prefix = (
|
|
237
|
+
resolve_storage_path(prefix, self.base_path, self.protocol, strip_file_scheme=True)
|
|
238
|
+
if prefix
|
|
239
|
+
else self.base_path or ""
|
|
240
|
+
)
|
|
177
241
|
items = self.store.list_with_delimiter(resolved_prefix) if not recursive else self.store.list(resolved_prefix)
|
|
178
242
|
paths: list[str] = []
|
|
179
243
|
for batch in items:
|
|
180
244
|
paths.extend(item["path"] for item in batch)
|
|
181
245
|
return sorted(paths)
|
|
182
246
|
|
|
183
|
-
def exists(self, path: "
|
|
247
|
+
def exists(self, path: "str | Path", **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
184
248
|
"""Check if object exists using obstore."""
|
|
185
249
|
try:
|
|
186
|
-
self.
|
|
250
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
251
|
+
self.store.head(resolved_path)
|
|
187
252
|
except Exception:
|
|
188
253
|
return False
|
|
189
254
|
return True
|
|
190
255
|
|
|
191
|
-
def delete(self, path: "
|
|
256
|
+
def delete(self, path: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
192
257
|
"""Delete object using obstore."""
|
|
193
|
-
self.
|
|
258
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
259
|
+
self.store.delete(resolved_path)
|
|
194
260
|
|
|
195
|
-
def copy(self, source: "
|
|
261
|
+
def copy(self, source: "str | Path", destination: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
196
262
|
"""Copy object using obstore."""
|
|
197
|
-
|
|
263
|
+
source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=True)
|
|
264
|
+
dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=True)
|
|
265
|
+
self.store.copy(source_path, dest_path)
|
|
198
266
|
|
|
199
|
-
def move(self, source: "
|
|
267
|
+
def move(self, source: "str | Path", destination: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
200
268
|
"""Move object using obstore."""
|
|
201
|
-
|
|
269
|
+
source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=True)
|
|
270
|
+
dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=True)
|
|
271
|
+
self.store.rename(source_path, dest_path)
|
|
202
272
|
|
|
203
273
|
def glob(self, pattern: str, **kwargs: Any) -> list[str]:
|
|
204
274
|
"""Find objects matching pattern.
|
|
@@ -207,7 +277,7 @@ class ObStoreBackend:
|
|
|
207
277
|
"""
|
|
208
278
|
from pathlib import PurePosixPath
|
|
209
279
|
|
|
210
|
-
resolved_pattern = self.
|
|
280
|
+
resolved_pattern = resolve_storage_path(pattern, self.base_path, self.protocol, strip_file_scheme=True)
|
|
211
281
|
all_objects = self.list_objects(recursive=True, **kwargs)
|
|
212
282
|
|
|
213
283
|
if "**" in pattern:
|
|
@@ -229,38 +299,50 @@ class ObStoreBackend:
|
|
|
229
299
|
return matching_objects
|
|
230
300
|
return [obj for obj in all_objects if fnmatch.fnmatch(obj, resolved_pattern)]
|
|
231
301
|
|
|
232
|
-
def get_metadata(self, path: "
|
|
302
|
+
def get_metadata(self, path: "str | Path", **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
233
303
|
"""Get object metadata using obstore."""
|
|
234
|
-
resolved_path = self.
|
|
235
|
-
|
|
304
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
305
|
+
|
|
236
306
|
try:
|
|
237
307
|
metadata = self.store.head(resolved_path)
|
|
238
|
-
|
|
239
|
-
|
|
308
|
+
except Exception:
|
|
309
|
+
return {"path": resolved_path, "exists": False}
|
|
310
|
+
else:
|
|
311
|
+
if isinstance(metadata, dict):
|
|
312
|
+
result = {
|
|
240
313
|
"path": resolved_path,
|
|
241
314
|
"exists": True,
|
|
242
|
-
"size":
|
|
243
|
-
"last_modified":
|
|
244
|
-
"e_tag":
|
|
245
|
-
"version":
|
|
315
|
+
"size": metadata.get("size"),
|
|
316
|
+
"last_modified": metadata.get("last_modified"),
|
|
317
|
+
"e_tag": metadata.get("e_tag"),
|
|
318
|
+
"version": metadata.get("version"),
|
|
246
319
|
}
|
|
247
|
-
|
|
248
|
-
|
|
320
|
+
if metadata.get("metadata"):
|
|
321
|
+
result["custom_metadata"] = metadata["metadata"]
|
|
322
|
+
return result
|
|
323
|
+
|
|
324
|
+
result = {
|
|
325
|
+
"path": resolved_path,
|
|
326
|
+
"exists": True,
|
|
327
|
+
"size": metadata.size,
|
|
328
|
+
"last_modified": metadata.last_modified,
|
|
329
|
+
"e_tag": metadata.e_tag,
|
|
330
|
+
"version": metadata.version,
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
if metadata.metadata:
|
|
249
334
|
result["custom_metadata"] = metadata.metadata
|
|
250
335
|
|
|
251
|
-
except Exception:
|
|
252
|
-
return {"path": resolved_path, "exists": False}
|
|
253
|
-
else:
|
|
254
336
|
return result
|
|
255
337
|
|
|
256
|
-
def is_object(self, path: "
|
|
338
|
+
def is_object(self, path: "str | Path") -> bool:
|
|
257
339
|
"""Check if path is an object using obstore."""
|
|
258
|
-
resolved_path = self.
|
|
340
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
259
341
|
return self.exists(path) and not resolved_path.endswith("/")
|
|
260
342
|
|
|
261
|
-
def is_path(self, path: "
|
|
343
|
+
def is_path(self, path: "str | Path") -> bool:
|
|
262
344
|
"""Check if path is a prefix/directory using obstore."""
|
|
263
|
-
resolved_path = self.
|
|
345
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
264
346
|
|
|
265
347
|
if resolved_path.endswith("/"):
|
|
266
348
|
return True
|
|
@@ -271,53 +353,48 @@ class ObStoreBackend:
|
|
|
271
353
|
except Exception:
|
|
272
354
|
return False
|
|
273
355
|
|
|
274
|
-
def read_arrow(self, path: "
|
|
356
|
+
def read_arrow(self, path: "str | Path", **kwargs: Any) -> ArrowTable:
|
|
275
357
|
"""Read Arrow table using obstore."""
|
|
276
|
-
|
|
277
|
-
if hasattr(self.store, "read_arrow"):
|
|
278
|
-
return self.store.read_arrow(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
279
|
-
|
|
280
|
-
self._ensure_pyarrow()
|
|
358
|
+
ensure_pyarrow()
|
|
281
359
|
import io
|
|
282
360
|
|
|
283
361
|
import pyarrow.parquet as pq
|
|
284
362
|
|
|
285
|
-
|
|
363
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
364
|
+
data = self.read_bytes(resolved_path)
|
|
365
|
+
return pq.read_table(io.BytesIO(data), **kwargs)
|
|
286
366
|
|
|
287
|
-
def write_arrow(self, path: "
|
|
367
|
+
def write_arrow(self, path: "str | Path", table: ArrowTable, **kwargs: Any) -> None:
|
|
288
368
|
"""Write Arrow table using obstore."""
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
if
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
|
|
309
|
-
if match:
|
|
310
|
-
precision, scale = int(match.group(1)), int(match.group(2))
|
|
311
|
-
new_fields.append(pa.field(field.name, pa.decimal128(precision, scale)))
|
|
312
|
-
else:
|
|
313
|
-
new_fields.append(field) # pragma: no cover
|
|
369
|
+
ensure_pyarrow()
|
|
370
|
+
import io
|
|
371
|
+
|
|
372
|
+
import pyarrow as pa
|
|
373
|
+
import pyarrow.parquet as pq
|
|
374
|
+
|
|
375
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
376
|
+
|
|
377
|
+
schema = table.schema
|
|
378
|
+
if any(str(f.type).startswith("decimal64") for f in schema):
|
|
379
|
+
new_fields = []
|
|
380
|
+
for field in schema:
|
|
381
|
+
if str(field.type).startswith("decimal64"):
|
|
382
|
+
import re
|
|
383
|
+
|
|
384
|
+
match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
|
|
385
|
+
if match:
|
|
386
|
+
precision, scale = int(match.group(1)), int(match.group(2))
|
|
387
|
+
new_fields.append(pa.field(field.name, pa.decimal128(precision, scale)))
|
|
314
388
|
else:
|
|
315
389
|
new_fields.append(field)
|
|
316
|
-
|
|
390
|
+
else:
|
|
391
|
+
new_fields.append(field)
|
|
392
|
+
table = table.cast(pa.schema(new_fields))
|
|
317
393
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
394
|
+
buffer = io.BytesIO()
|
|
395
|
+
pq.write_table(table, buffer, **kwargs)
|
|
396
|
+
buffer.seek(0)
|
|
397
|
+
self.write_bytes(resolved_path, buffer.read())
|
|
321
398
|
|
|
322
399
|
def stream_arrow(self, pattern: str, **kwargs: Any) -> Iterator[ArrowRecordBatch]:
|
|
323
400
|
"""Stream Arrow record batches.
|
|
@@ -325,13 +402,14 @@ class ObStoreBackend:
|
|
|
325
402
|
Yields:
|
|
326
403
|
Iterator of Arrow record batches from matching objects.
|
|
327
404
|
"""
|
|
328
|
-
|
|
405
|
+
ensure_pyarrow()
|
|
329
406
|
from io import BytesIO
|
|
330
407
|
|
|
331
408
|
import pyarrow.parquet as pq
|
|
332
409
|
|
|
333
410
|
for obj_path in self.glob(pattern, **kwargs):
|
|
334
|
-
|
|
411
|
+
resolved_path = resolve_storage_path(obj_path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
412
|
+
result = self.store.get(resolved_path)
|
|
335
413
|
bytes_obj = result.bytes()
|
|
336
414
|
data = bytes_obj.to_bytes()
|
|
337
415
|
buffer = BytesIO(data)
|
|
@@ -340,26 +418,38 @@ class ObStoreBackend:
|
|
|
340
418
|
|
|
341
419
|
def sign(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
|
|
342
420
|
"""Generate a signed URL for the object."""
|
|
343
|
-
resolved_path = self.
|
|
344
|
-
if hasattr(self.store, "sign_url") and callable(self.store.sign_url):
|
|
345
|
-
return self.store.sign_url(resolved_path, expires_in=expires_in) # type: ignore[no-any-return]
|
|
421
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
346
422
|
return f"{self.store_uri}/{resolved_path}"
|
|
347
423
|
|
|
348
|
-
async def read_bytes_async(self, path: "
|
|
424
|
+
async def read_bytes_async(self, path: "str | Path", **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
349
425
|
"""Read bytes from storage asynchronously."""
|
|
350
|
-
|
|
426
|
+
# For LocalStore (file protocol with base_path), use special resolution
|
|
427
|
+
if self._is_local_store:
|
|
428
|
+
resolved_path = self._resolve_path_for_local_store(path)
|
|
429
|
+
else:
|
|
430
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
431
|
+
|
|
351
432
|
result = await self.store.get_async(resolved_path)
|
|
352
433
|
bytes_obj = await result.bytes_async()
|
|
353
434
|
return bytes_obj.to_bytes() # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
354
435
|
|
|
355
|
-
async def write_bytes_async(self, path: "
|
|
436
|
+
async def write_bytes_async(self, path: "str | Path", data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
356
437
|
"""Write bytes to storage asynchronously."""
|
|
357
|
-
|
|
438
|
+
# For LocalStore (file protocol with base_path), use special resolution
|
|
439
|
+
if self._is_local_store:
|
|
440
|
+
resolved_path = self._resolve_path_for_local_store(path)
|
|
441
|
+
else:
|
|
442
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
443
|
+
|
|
358
444
|
await self.store.put_async(resolved_path, data)
|
|
359
445
|
|
|
360
446
|
async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
|
|
361
447
|
"""List objects in storage asynchronously."""
|
|
362
|
-
resolved_prefix =
|
|
448
|
+
resolved_prefix = (
|
|
449
|
+
resolve_storage_path(prefix, self.base_path, self.protocol, strip_file_scheme=True)
|
|
450
|
+
if prefix
|
|
451
|
+
else self.base_path or ""
|
|
452
|
+
)
|
|
363
453
|
|
|
364
454
|
objects: list[str] = []
|
|
365
455
|
async for batch in self.store.list_async(resolved_prefix): # pyright: ignore[reportAttributeAccessIssue]
|
|
@@ -371,47 +461,72 @@ class ObStoreBackend:
|
|
|
371
461
|
|
|
372
462
|
return sorted(objects)
|
|
373
463
|
|
|
374
|
-
async def read_text_async(self, path: "
|
|
464
|
+
async def read_text_async(self, path: "str | Path", encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
375
465
|
"""Read text from storage asynchronously."""
|
|
376
466
|
data = await self.read_bytes_async(path, **kwargs)
|
|
377
467
|
return data.decode(encoding)
|
|
378
468
|
|
|
379
|
-
async def write_text_async(
|
|
380
|
-
self, path: "Union[str, Path]", data: str, encoding: str = "utf-8", **kwargs: Any
|
|
381
|
-
) -> None: # pyright: ignore[reportUnusedParameter]
|
|
469
|
+
async def write_text_async(self, path: "str | Path", data: str, encoding: str = "utf-8", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
382
470
|
"""Write text to storage asynchronously."""
|
|
383
471
|
encoded_data = data.encode(encoding)
|
|
384
472
|
await self.write_bytes_async(path, encoded_data, **kwargs)
|
|
385
473
|
|
|
386
|
-
async def exists_async(self, path: "
|
|
474
|
+
async def exists_async(self, path: "str | Path", **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
387
475
|
"""Check if object exists in storage asynchronously."""
|
|
388
|
-
|
|
476
|
+
# For LocalStore (file protocol with base_path), use special resolution
|
|
477
|
+
if self._is_local_store:
|
|
478
|
+
resolved_path = self._resolve_path_for_local_store(path)
|
|
479
|
+
else:
|
|
480
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
481
|
+
|
|
389
482
|
try:
|
|
390
483
|
await self.store.head_async(resolved_path)
|
|
391
484
|
except Exception:
|
|
392
485
|
return False
|
|
393
486
|
return True
|
|
394
487
|
|
|
395
|
-
async def delete_async(self, path: "
|
|
488
|
+
async def delete_async(self, path: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
396
489
|
"""Delete object from storage asynchronously."""
|
|
397
|
-
|
|
490
|
+
# For LocalStore (file protocol with base_path), use special resolution
|
|
491
|
+
if self._is_local_store:
|
|
492
|
+
resolved_path = self._resolve_path_for_local_store(path)
|
|
493
|
+
else:
|
|
494
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
495
|
+
|
|
398
496
|
await self.store.delete_async(resolved_path)
|
|
399
497
|
|
|
400
|
-
async def copy_async(self, source: "
|
|
498
|
+
async def copy_async(self, source: "str | Path", destination: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
401
499
|
"""Copy object in storage asynchronously."""
|
|
402
|
-
|
|
403
|
-
|
|
500
|
+
# For LocalStore (file protocol with base_path), use special resolution
|
|
501
|
+
if self._is_local_store:
|
|
502
|
+
source_path = self._resolve_path_for_local_store(source)
|
|
503
|
+
dest_path = self._resolve_path_for_local_store(destination)
|
|
504
|
+
else:
|
|
505
|
+
source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=True)
|
|
506
|
+
dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=True)
|
|
507
|
+
|
|
404
508
|
await self.store.copy_async(source_path, dest_path)
|
|
405
509
|
|
|
406
|
-
async def move_async(self, source: "
|
|
510
|
+
async def move_async(self, source: "str | Path", destination: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
407
511
|
"""Move object in storage asynchronously."""
|
|
408
|
-
|
|
409
|
-
|
|
512
|
+
# For LocalStore (file protocol with base_path), use special resolution
|
|
513
|
+
if self._is_local_store:
|
|
514
|
+
source_path = self._resolve_path_for_local_store(source)
|
|
515
|
+
dest_path = self._resolve_path_for_local_store(destination)
|
|
516
|
+
else:
|
|
517
|
+
source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=True)
|
|
518
|
+
dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=True)
|
|
519
|
+
|
|
410
520
|
await self.store.rename_async(source_path, dest_path)
|
|
411
521
|
|
|
412
|
-
async def get_metadata_async(self, path: "
|
|
522
|
+
async def get_metadata_async(self, path: "str | Path", **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
413
523
|
"""Get object metadata from storage asynchronously."""
|
|
414
|
-
|
|
524
|
+
# For LocalStore (file protocol with base_path), use special resolution
|
|
525
|
+
if self._is_local_store:
|
|
526
|
+
resolved_path = self._resolve_path_for_local_store(path)
|
|
527
|
+
else:
|
|
528
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
529
|
+
|
|
415
530
|
result: dict[str, Any] = {}
|
|
416
531
|
try:
|
|
417
532
|
metadata = await self.store.head_async(resolved_path)
|
|
@@ -433,42 +548,35 @@ class ObStoreBackend:
|
|
|
433
548
|
else:
|
|
434
549
|
return result
|
|
435
550
|
|
|
436
|
-
async def read_arrow_async(self, path: "
|
|
551
|
+
async def read_arrow_async(self, path: "str | Path", **kwargs: Any) -> ArrowTable:
|
|
437
552
|
"""Read Arrow table from storage asynchronously."""
|
|
438
|
-
|
|
439
|
-
if hasattr(self.store, "read_arrow_async"):
|
|
440
|
-
return await self.store.read_arrow_async(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
441
|
-
|
|
442
|
-
self._ensure_pyarrow()
|
|
553
|
+
ensure_pyarrow()
|
|
443
554
|
import io
|
|
444
555
|
|
|
445
556
|
import pyarrow.parquet as pq
|
|
446
557
|
|
|
447
|
-
|
|
558
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
559
|
+
data = await self.read_bytes_async(resolved_path)
|
|
560
|
+
return pq.read_table(io.BytesIO(data), **kwargs)
|
|
448
561
|
|
|
449
|
-
async def write_arrow_async(self, path: "
|
|
562
|
+
async def write_arrow_async(self, path: "str | Path", table: ArrowTable, **kwargs: Any) -> None:
|
|
450
563
|
"""Write Arrow table to storage asynchronously."""
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
await self.store.write_arrow_async(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
|
|
454
|
-
else:
|
|
455
|
-
self._ensure_pyarrow()
|
|
456
|
-
import io
|
|
564
|
+
ensure_pyarrow()
|
|
565
|
+
import io
|
|
457
566
|
|
|
458
|
-
|
|
567
|
+
import pyarrow.parquet as pq
|
|
459
568
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
569
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
570
|
+
buffer = io.BytesIO()
|
|
571
|
+
pq.write_table(table, buffer, **kwargs)
|
|
572
|
+
buffer.seek(0)
|
|
573
|
+
await self.write_bytes_async(resolved_path, buffer.read())
|
|
464
574
|
|
|
465
575
|
def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator[ArrowRecordBatch]:
|
|
466
|
-
resolved_pattern = self.
|
|
576
|
+
resolved_pattern = resolve_storage_path(pattern, self.base_path, self.protocol, strip_file_scheme=True)
|
|
467
577
|
return _AsyncArrowIterator(self, resolved_pattern, **kwargs)
|
|
468
578
|
|
|
469
579
|
async def sign_async(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
|
|
470
580
|
"""Generate a signed URL asynchronously."""
|
|
471
|
-
resolved_path = self.
|
|
472
|
-
if hasattr(self.store, "sign_url_async") and callable(self.store.sign_url_async):
|
|
473
|
-
return await self.store.sign_url_async(resolved_path, expires_in=expires_in) # type: ignore[no-any-return]
|
|
581
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
474
582
|
return f"{self.store_uri}/{resolved_path}"
|