sqlspec 0.26.0__py3-none-any.whl → 0.27.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/__init__.py +7 -15
- sqlspec/_serialization.py +55 -25
- sqlspec/_typing.py +62 -52
- sqlspec/adapters/adbc/_types.py +1 -1
- sqlspec/adapters/adbc/adk/__init__.py +5 -0
- sqlspec/adapters/adbc/adk/store.py +870 -0
- sqlspec/adapters/adbc/config.py +62 -12
- sqlspec/adapters/adbc/data_dictionary.py +52 -2
- sqlspec/adapters/adbc/driver.py +144 -45
- sqlspec/adapters/adbc/litestar/__init__.py +5 -0
- sqlspec/adapters/adbc/litestar/store.py +504 -0
- sqlspec/adapters/adbc/type_converter.py +44 -50
- sqlspec/adapters/aiosqlite/_types.py +1 -1
- sqlspec/adapters/aiosqlite/adk/__init__.py +5 -0
- sqlspec/adapters/aiosqlite/adk/store.py +527 -0
- sqlspec/adapters/aiosqlite/config.py +86 -16
- sqlspec/adapters/aiosqlite/data_dictionary.py +34 -2
- sqlspec/adapters/aiosqlite/driver.py +127 -38
- sqlspec/adapters/aiosqlite/litestar/__init__.py +5 -0
- sqlspec/adapters/aiosqlite/litestar/store.py +281 -0
- sqlspec/adapters/aiosqlite/pool.py +7 -7
- sqlspec/adapters/asyncmy/__init__.py +7 -1
- sqlspec/adapters/asyncmy/_types.py +1 -1
- sqlspec/adapters/asyncmy/adk/__init__.py +5 -0
- sqlspec/adapters/asyncmy/adk/store.py +493 -0
- sqlspec/adapters/asyncmy/config.py +59 -17
- sqlspec/adapters/asyncmy/data_dictionary.py +41 -2
- sqlspec/adapters/asyncmy/driver.py +293 -62
- sqlspec/adapters/asyncmy/litestar/__init__.py +5 -0
- sqlspec/adapters/asyncmy/litestar/store.py +296 -0
- sqlspec/adapters/asyncpg/__init__.py +2 -1
- sqlspec/adapters/asyncpg/_type_handlers.py +71 -0
- sqlspec/adapters/asyncpg/_types.py +11 -7
- sqlspec/adapters/asyncpg/adk/__init__.py +5 -0
- sqlspec/adapters/asyncpg/adk/store.py +450 -0
- sqlspec/adapters/asyncpg/config.py +57 -36
- sqlspec/adapters/asyncpg/data_dictionary.py +41 -2
- sqlspec/adapters/asyncpg/driver.py +153 -23
- sqlspec/adapters/asyncpg/litestar/__init__.py +5 -0
- sqlspec/adapters/asyncpg/litestar/store.py +253 -0
- sqlspec/adapters/bigquery/_types.py +1 -1
- sqlspec/adapters/bigquery/adk/__init__.py +5 -0
- sqlspec/adapters/bigquery/adk/store.py +576 -0
- sqlspec/adapters/bigquery/config.py +25 -11
- sqlspec/adapters/bigquery/data_dictionary.py +42 -2
- sqlspec/adapters/bigquery/driver.py +352 -144
- sqlspec/adapters/bigquery/litestar/__init__.py +5 -0
- sqlspec/adapters/bigquery/litestar/store.py +327 -0
- sqlspec/adapters/bigquery/type_converter.py +55 -23
- sqlspec/adapters/duckdb/_types.py +2 -2
- sqlspec/adapters/duckdb/adk/__init__.py +14 -0
- sqlspec/adapters/duckdb/adk/store.py +553 -0
- sqlspec/adapters/duckdb/config.py +79 -21
- sqlspec/adapters/duckdb/data_dictionary.py +41 -2
- sqlspec/adapters/duckdb/driver.py +138 -43
- sqlspec/adapters/duckdb/litestar/__init__.py +5 -0
- sqlspec/adapters/duckdb/litestar/store.py +332 -0
- sqlspec/adapters/duckdb/pool.py +5 -5
- sqlspec/adapters/duckdb/type_converter.py +51 -21
- sqlspec/adapters/oracledb/_numpy_handlers.py +133 -0
- sqlspec/adapters/oracledb/_types.py +20 -2
- sqlspec/adapters/oracledb/adk/__init__.py +5 -0
- sqlspec/adapters/oracledb/adk/store.py +1745 -0
- sqlspec/adapters/oracledb/config.py +120 -36
- sqlspec/adapters/oracledb/data_dictionary.py +87 -20
- sqlspec/adapters/oracledb/driver.py +292 -84
- sqlspec/adapters/oracledb/litestar/__init__.py +5 -0
- sqlspec/adapters/oracledb/litestar/store.py +767 -0
- sqlspec/adapters/oracledb/migrations.py +316 -25
- sqlspec/adapters/oracledb/type_converter.py +91 -16
- sqlspec/adapters/psqlpy/_type_handlers.py +44 -0
- sqlspec/adapters/psqlpy/_types.py +2 -1
- sqlspec/adapters/psqlpy/adk/__init__.py +5 -0
- sqlspec/adapters/psqlpy/adk/store.py +482 -0
- sqlspec/adapters/psqlpy/config.py +45 -19
- sqlspec/adapters/psqlpy/data_dictionary.py +41 -2
- sqlspec/adapters/psqlpy/driver.py +101 -31
- sqlspec/adapters/psqlpy/litestar/__init__.py +5 -0
- sqlspec/adapters/psqlpy/litestar/store.py +272 -0
- sqlspec/adapters/psqlpy/type_converter.py +40 -11
- sqlspec/adapters/psycopg/_type_handlers.py +80 -0
- sqlspec/adapters/psycopg/_types.py +2 -1
- sqlspec/adapters/psycopg/adk/__init__.py +5 -0
- sqlspec/adapters/psycopg/adk/store.py +944 -0
- sqlspec/adapters/psycopg/config.py +65 -37
- sqlspec/adapters/psycopg/data_dictionary.py +77 -3
- sqlspec/adapters/psycopg/driver.py +200 -78
- sqlspec/adapters/psycopg/litestar/__init__.py +5 -0
- sqlspec/adapters/psycopg/litestar/store.py +554 -0
- sqlspec/adapters/sqlite/__init__.py +2 -1
- sqlspec/adapters/sqlite/_type_handlers.py +86 -0
- sqlspec/adapters/sqlite/_types.py +1 -1
- sqlspec/adapters/sqlite/adk/__init__.py +5 -0
- sqlspec/adapters/sqlite/adk/store.py +572 -0
- sqlspec/adapters/sqlite/config.py +85 -16
- sqlspec/adapters/sqlite/data_dictionary.py +34 -2
- sqlspec/adapters/sqlite/driver.py +120 -52
- sqlspec/adapters/sqlite/litestar/__init__.py +5 -0
- sqlspec/adapters/sqlite/litestar/store.py +318 -0
- sqlspec/adapters/sqlite/pool.py +5 -5
- sqlspec/base.py +45 -26
- sqlspec/builder/__init__.py +73 -4
- sqlspec/builder/_base.py +91 -58
- sqlspec/builder/_column.py +5 -5
- sqlspec/builder/_ddl.py +98 -89
- sqlspec/builder/_delete.py +5 -4
- sqlspec/builder/_dml.py +388 -0
- sqlspec/{_sql.py → builder/_factory.py} +41 -44
- sqlspec/builder/_insert.py +5 -82
- sqlspec/builder/{mixins/_join_operations.py → _join.py} +145 -143
- sqlspec/builder/_merge.py +446 -11
- sqlspec/builder/_parsing_utils.py +9 -11
- sqlspec/builder/_select.py +1313 -25
- sqlspec/builder/_update.py +11 -42
- sqlspec/cli.py +76 -69
- sqlspec/config.py +231 -60
- sqlspec/core/__init__.py +5 -4
- sqlspec/core/cache.py +18 -18
- sqlspec/core/compiler.py +6 -8
- sqlspec/core/filters.py +37 -37
- sqlspec/core/hashing.py +9 -9
- sqlspec/core/parameters.py +76 -45
- sqlspec/core/result.py +102 -46
- sqlspec/core/splitter.py +16 -17
- sqlspec/core/statement.py +32 -31
- sqlspec/core/type_conversion.py +3 -2
- sqlspec/driver/__init__.py +1 -3
- sqlspec/driver/_async.py +95 -161
- sqlspec/driver/_common.py +133 -80
- sqlspec/driver/_sync.py +95 -162
- sqlspec/driver/mixins/_result_tools.py +20 -236
- sqlspec/driver/mixins/_sql_translator.py +4 -4
- sqlspec/exceptions.py +70 -7
- sqlspec/extensions/adk/__init__.py +53 -0
- sqlspec/extensions/adk/_types.py +51 -0
- sqlspec/extensions/adk/converters.py +172 -0
- sqlspec/extensions/adk/migrations/0001_create_adk_tables.py +144 -0
- sqlspec/extensions/adk/migrations/__init__.py +0 -0
- sqlspec/extensions/adk/service.py +181 -0
- sqlspec/extensions/adk/store.py +536 -0
- sqlspec/extensions/aiosql/adapter.py +73 -53
- sqlspec/extensions/litestar/__init__.py +21 -4
- sqlspec/extensions/litestar/cli.py +54 -10
- sqlspec/extensions/litestar/config.py +59 -266
- sqlspec/extensions/litestar/handlers.py +46 -17
- sqlspec/extensions/litestar/migrations/0001_create_session_table.py +137 -0
- sqlspec/extensions/litestar/migrations/__init__.py +3 -0
- sqlspec/extensions/litestar/plugin.py +324 -223
- sqlspec/extensions/litestar/providers.py +25 -25
- sqlspec/extensions/litestar/store.py +265 -0
- sqlspec/loader.py +30 -49
- sqlspec/migrations/base.py +200 -76
- sqlspec/migrations/commands.py +591 -62
- sqlspec/migrations/context.py +6 -9
- sqlspec/migrations/fix.py +199 -0
- sqlspec/migrations/loaders.py +47 -19
- sqlspec/migrations/runner.py +241 -75
- sqlspec/migrations/tracker.py +237 -21
- sqlspec/migrations/utils.py +51 -3
- sqlspec/migrations/validation.py +177 -0
- sqlspec/protocols.py +66 -36
- sqlspec/storage/_utils.py +98 -0
- sqlspec/storage/backends/fsspec.py +134 -106
- sqlspec/storage/backends/local.py +78 -51
- sqlspec/storage/backends/obstore.py +278 -162
- sqlspec/storage/registry.py +75 -39
- sqlspec/typing.py +14 -84
- sqlspec/utils/config_resolver.py +6 -6
- sqlspec/utils/correlation.py +4 -5
- sqlspec/utils/data_transformation.py +3 -2
- sqlspec/utils/deprecation.py +9 -8
- sqlspec/utils/fixtures.py +4 -4
- sqlspec/utils/logging.py +46 -6
- sqlspec/utils/module_loader.py +2 -2
- sqlspec/utils/schema.py +288 -0
- sqlspec/utils/serializers.py +3 -3
- sqlspec/utils/sync_tools.py +21 -17
- sqlspec/utils/text.py +1 -2
- sqlspec/utils/type_guards.py +111 -20
- sqlspec/utils/version.py +433 -0
- {sqlspec-0.26.0.dist-info → sqlspec-0.27.0.dist-info}/METADATA +40 -21
- sqlspec-0.27.0.dist-info/RECORD +207 -0
- sqlspec/builder/mixins/__init__.py +0 -55
- sqlspec/builder/mixins/_cte_and_set_ops.py +0 -253
- sqlspec/builder/mixins/_delete_operations.py +0 -50
- sqlspec/builder/mixins/_insert_operations.py +0 -282
- sqlspec/builder/mixins/_merge_operations.py +0 -698
- sqlspec/builder/mixins/_order_limit_operations.py +0 -145
- sqlspec/builder/mixins/_pivot_operations.py +0 -157
- sqlspec/builder/mixins/_select_operations.py +0 -930
- sqlspec/builder/mixins/_update_operations.py +0 -199
- sqlspec/builder/mixins/_where_clause.py +0 -1298
- sqlspec-0.26.0.dist-info/RECORD +0 -157
- sqlspec-0.26.0.dist-info/licenses/NOTICE +0 -29
- {sqlspec-0.26.0.dist-info → sqlspec-0.27.0.dist-info}/WHEEL +0 -0
- {sqlspec-0.26.0.dist-info → sqlspec-0.27.0.dist-info}/entry_points.txt +0 -0
- {sqlspec-0.26.0.dist-info → sqlspec-0.27.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,18 +5,22 @@ and local file storage.
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
import fnmatch
|
|
8
|
+
import io
|
|
8
9
|
import logging
|
|
9
10
|
from collections.abc import AsyncIterator, Iterator
|
|
10
|
-
from typing import TYPE_CHECKING, Any, Final,
|
|
11
|
+
from typing import TYPE_CHECKING, Any, Final, cast
|
|
11
12
|
from urllib.parse import urlparse
|
|
12
13
|
|
|
14
|
+
from sqlspec.utils.sync_tools import async_
|
|
15
|
+
|
|
13
16
|
if TYPE_CHECKING:
|
|
14
17
|
from pathlib import Path
|
|
15
18
|
|
|
16
19
|
from mypy_extensions import mypyc_attr
|
|
17
20
|
|
|
18
21
|
from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
|
|
19
|
-
from sqlspec.
|
|
22
|
+
from sqlspec.storage._utils import ensure_pyarrow, resolve_storage_path
|
|
23
|
+
from sqlspec.typing import OBSTORE_INSTALLED, ArrowRecordBatch, ArrowTable
|
|
20
24
|
|
|
21
25
|
__all__ = ("ObStoreBackend",)
|
|
22
26
|
|
|
@@ -24,37 +28,64 @@ logger = logging.getLogger(__name__)
|
|
|
24
28
|
|
|
25
29
|
|
|
26
30
|
class _AsyncArrowIterator:
|
|
27
|
-
"""Helper class to work around mypyc's lack of async generator support.
|
|
31
|
+
"""Helper class to work around mypyc's lack of async generator support.
|
|
32
|
+
|
|
33
|
+
Uses hybrid async/sync pattern:
|
|
34
|
+
- Native async I/O for network operations (S3, GCS, Azure)
|
|
35
|
+
- Thread pool for CPU-bound PyArrow parsing
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
__slots__ = ("_current_file_iterator", "_files_iterator", "backend", "kwargs", "pattern")
|
|
28
39
|
|
|
29
40
|
def __init__(self, backend: "ObStoreBackend", pattern: str, **kwargs: Any) -> None:
|
|
30
41
|
self.backend = backend
|
|
31
42
|
self.pattern = pattern
|
|
32
43
|
self.kwargs = kwargs
|
|
33
|
-
self._files_iterator:
|
|
34
|
-
self._current_file_iterator:
|
|
44
|
+
self._files_iterator: Iterator[str] | None = None
|
|
45
|
+
self._current_file_iterator: Iterator[ArrowRecordBatch] | None = None
|
|
35
46
|
|
|
36
47
|
def __aiter__(self) -> "_AsyncArrowIterator":
|
|
37
48
|
return self
|
|
38
49
|
|
|
39
50
|
async def __anext__(self) -> ArrowRecordBatch:
|
|
51
|
+
import pyarrow.parquet as pq
|
|
52
|
+
|
|
40
53
|
if self._files_iterator is None:
|
|
41
54
|
files = self.backend.glob(self.pattern, **self.kwargs)
|
|
42
55
|
self._files_iterator = iter(files)
|
|
43
56
|
|
|
44
57
|
while True:
|
|
45
58
|
if self._current_file_iterator is not None:
|
|
59
|
+
|
|
60
|
+
def _safe_next_batch() -> ArrowRecordBatch:
|
|
61
|
+
try:
|
|
62
|
+
return next(self._current_file_iterator) # type: ignore[arg-type]
|
|
63
|
+
except StopIteration as e:
|
|
64
|
+
raise StopAsyncIteration from e
|
|
65
|
+
|
|
46
66
|
try:
|
|
47
|
-
return
|
|
48
|
-
except
|
|
67
|
+
return await async_(_safe_next_batch)()
|
|
68
|
+
except StopAsyncIteration:
|
|
49
69
|
self._current_file_iterator = None
|
|
70
|
+
continue
|
|
50
71
|
|
|
51
72
|
try:
|
|
52
73
|
next_file = next(self._files_iterator)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
74
|
+
except StopIteration as e:
|
|
75
|
+
raise StopAsyncIteration from e
|
|
76
|
+
|
|
77
|
+
data = await self.backend.read_bytes_async(next_file)
|
|
78
|
+
parquet_file = pq.ParquetFile(io.BytesIO(data))
|
|
79
|
+
self._current_file_iterator = parquet_file.iter_batches()
|
|
80
|
+
|
|
81
|
+
async def aclose(self) -> None:
|
|
82
|
+
"""Close underlying file iterator."""
|
|
83
|
+
if self._current_file_iterator is not None:
|
|
84
|
+
try:
|
|
85
|
+
close_method = self._current_file_iterator.close # type: ignore[attr-defined]
|
|
86
|
+
await async_(close_method)() # pyright: ignore
|
|
87
|
+
except AttributeError:
|
|
88
|
+
pass
|
|
58
89
|
|
|
59
90
|
|
|
60
91
|
DEFAULT_OPTIONS: Final[dict[str, Any]] = {"connect_timeout": "30s", "request_timeout": "60s"}
|
|
@@ -69,17 +100,17 @@ class ObStoreBackend:
|
|
|
69
100
|
local filesystem, and HTTP endpoints.
|
|
70
101
|
"""
|
|
71
102
|
|
|
72
|
-
__slots__ = (
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
""
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
""
|
|
81
|
-
|
|
82
|
-
|
|
103
|
+
__slots__ = (
|
|
104
|
+
"_is_local_store",
|
|
105
|
+
"_local_store_root",
|
|
106
|
+
"_path_cache",
|
|
107
|
+
"backend_type",
|
|
108
|
+
"base_path",
|
|
109
|
+
"protocol",
|
|
110
|
+
"store",
|
|
111
|
+
"store_options",
|
|
112
|
+
"store_uri",
|
|
113
|
+
)
|
|
83
114
|
|
|
84
115
|
def __init__(self, uri: str, **kwargs: Any) -> None:
|
|
85
116
|
"""Initialize obstore backend.
|
|
@@ -87,9 +118,12 @@ class ObStoreBackend:
|
|
|
87
118
|
Args:
|
|
88
119
|
uri: Storage URI (e.g., 's3://bucket', 'file:///path', 'gs://bucket')
|
|
89
120
|
**kwargs: Additional options including base_path and obstore configuration
|
|
90
|
-
"""
|
|
91
121
|
|
|
92
|
-
|
|
122
|
+
Raises:
|
|
123
|
+
MissingDependencyError: If obstore is not installed.
|
|
124
|
+
"""
|
|
125
|
+
if not OBSTORE_INSTALLED:
|
|
126
|
+
raise MissingDependencyError(package="obstore", install_package="obstore")
|
|
93
127
|
|
|
94
128
|
try:
|
|
95
129
|
# Extract base_path from kwargs
|
|
@@ -100,6 +134,8 @@ class ObStoreBackend:
|
|
|
100
134
|
self.store_options = kwargs
|
|
101
135
|
self.store: Any
|
|
102
136
|
self._path_cache: dict[str, str] = {}
|
|
137
|
+
self._is_local_store = False
|
|
138
|
+
self._local_store_root = ""
|
|
103
139
|
self.protocol = uri.split("://", 1)[0] if "://" in uri else "file"
|
|
104
140
|
self.backend_type = "obstore"
|
|
105
141
|
|
|
@@ -112,11 +148,26 @@ class ObStoreBackend:
|
|
|
112
148
|
|
|
113
149
|
from obstore.store import LocalStore
|
|
114
150
|
|
|
151
|
+
# Parse URI to extract path
|
|
152
|
+
# Note: urlparse splits on '#', so we need to reconstruct the full path
|
|
115
153
|
parsed = urlparse(uri)
|
|
116
|
-
|
|
117
|
-
#
|
|
118
|
-
|
|
119
|
-
|
|
154
|
+
path_str = parsed.path or "/"
|
|
155
|
+
# Append fragment if present (handles paths with '#' character)
|
|
156
|
+
if parsed.fragment:
|
|
157
|
+
path_str = f"{path_str}#{parsed.fragment}"
|
|
158
|
+
path_obj = PathlibPath(path_str)
|
|
159
|
+
|
|
160
|
+
# If path points to a file, use its parent as the base directory
|
|
161
|
+
if path_obj.is_file():
|
|
162
|
+
path_str = str(path_obj.parent)
|
|
163
|
+
|
|
164
|
+
# If base_path provided via kwargs, use it as LocalStore root
|
|
165
|
+
# Otherwise use the URI path
|
|
166
|
+
local_store_root = self.base_path or path_str
|
|
167
|
+
|
|
168
|
+
self._is_local_store = True
|
|
169
|
+
self._local_store_root = local_store_root
|
|
170
|
+
self.store = LocalStore(local_store_root, mkdir=True)
|
|
120
171
|
else:
|
|
121
172
|
from obstore.store import from_url
|
|
122
173
|
|
|
@@ -141,64 +192,91 @@ class ObStoreBackend:
|
|
|
141
192
|
|
|
142
193
|
return cls(uri=store_uri, **kwargs)
|
|
143
194
|
|
|
144
|
-
def
|
|
145
|
-
"""Resolve path relative
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
if self.
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
195
|
+
def _resolve_path_for_local_store(self, path: "str | Path") -> str:
|
|
196
|
+
"""Resolve path for LocalStore which expects relative paths from its root."""
|
|
197
|
+
from pathlib import Path as PathlibPath
|
|
198
|
+
|
|
199
|
+
path_obj = PathlibPath(str(path))
|
|
200
|
+
|
|
201
|
+
# If absolute path, try to make it relative to LocalStore root
|
|
202
|
+
if path_obj.is_absolute() and self._local_store_root:
|
|
203
|
+
try:
|
|
204
|
+
return str(path_obj.relative_to(self._local_store_root))
|
|
205
|
+
except ValueError:
|
|
206
|
+
# Path is outside LocalStore root - strip leading / as fallback
|
|
207
|
+
return str(path).lstrip("/")
|
|
208
|
+
|
|
209
|
+
# Relative path - return as-is (already relative to LocalStore root)
|
|
210
|
+
return str(path)
|
|
211
|
+
|
|
212
|
+
def read_bytes(self, path: "str | Path", **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
158
213
|
"""Read bytes using obstore."""
|
|
159
|
-
|
|
214
|
+
# For LocalStore, use special path resolution (relative to LocalStore root)
|
|
215
|
+
if self._is_local_store:
|
|
216
|
+
resolved_path = self._resolve_path_for_local_store(path)
|
|
217
|
+
else:
|
|
218
|
+
# For cloud storage, use standard resolution
|
|
219
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
220
|
+
|
|
221
|
+
result = self.store.get(resolved_path)
|
|
160
222
|
return cast("bytes", result.bytes().to_bytes())
|
|
161
223
|
|
|
162
|
-
def write_bytes(self, path: "
|
|
224
|
+
def write_bytes(self, path: "str | Path", data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
163
225
|
"""Write bytes using obstore."""
|
|
164
|
-
|
|
226
|
+
# For LocalStore, use special path resolution (relative to LocalStore root)
|
|
227
|
+
if self._is_local_store:
|
|
228
|
+
resolved_path = self._resolve_path_for_local_store(path)
|
|
229
|
+
else:
|
|
230
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
165
231
|
|
|
166
|
-
|
|
232
|
+
self.store.put(resolved_path, data)
|
|
233
|
+
|
|
234
|
+
def read_text(self, path: "str | Path", encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
167
235
|
"""Read text using obstore."""
|
|
168
236
|
return self.read_bytes(path, **kwargs).decode(encoding)
|
|
169
237
|
|
|
170
|
-
def write_text(self, path: "
|
|
238
|
+
def write_text(self, path: "str | Path", data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
171
239
|
"""Write text using obstore."""
|
|
172
240
|
self.write_bytes(path, data.encode(encoding), **kwargs)
|
|
173
241
|
|
|
174
242
|
def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
|
|
175
243
|
"""List objects using obstore."""
|
|
176
|
-
resolved_prefix =
|
|
244
|
+
resolved_prefix = (
|
|
245
|
+
resolve_storage_path(prefix, self.base_path, self.protocol, strip_file_scheme=True)
|
|
246
|
+
if prefix
|
|
247
|
+
else self.base_path or ""
|
|
248
|
+
)
|
|
177
249
|
items = self.store.list_with_delimiter(resolved_prefix) if not recursive else self.store.list(resolved_prefix)
|
|
178
250
|
paths: list[str] = []
|
|
179
251
|
for batch in items:
|
|
180
252
|
paths.extend(item["path"] for item in batch)
|
|
181
253
|
return sorted(paths)
|
|
182
254
|
|
|
183
|
-
def exists(self, path: "
|
|
255
|
+
def exists(self, path: "str | Path", **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
184
256
|
"""Check if object exists using obstore."""
|
|
185
257
|
try:
|
|
186
|
-
self.
|
|
258
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
259
|
+
self.store.head(resolved_path)
|
|
187
260
|
except Exception:
|
|
188
261
|
return False
|
|
189
262
|
return True
|
|
190
263
|
|
|
191
|
-
def delete(self, path: "
|
|
264
|
+
def delete(self, path: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
192
265
|
"""Delete object using obstore."""
|
|
193
|
-
self.
|
|
266
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
267
|
+
self.store.delete(resolved_path)
|
|
194
268
|
|
|
195
|
-
def copy(self, source: "
|
|
269
|
+
def copy(self, source: "str | Path", destination: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
196
270
|
"""Copy object using obstore."""
|
|
197
|
-
|
|
271
|
+
source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=True)
|
|
272
|
+
dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=True)
|
|
273
|
+
self.store.copy(source_path, dest_path)
|
|
198
274
|
|
|
199
|
-
def move(self, source: "
|
|
275
|
+
def move(self, source: "str | Path", destination: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
200
276
|
"""Move object using obstore."""
|
|
201
|
-
|
|
277
|
+
source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=True)
|
|
278
|
+
dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=True)
|
|
279
|
+
self.store.rename(source_path, dest_path)
|
|
202
280
|
|
|
203
281
|
def glob(self, pattern: str, **kwargs: Any) -> list[str]:
|
|
204
282
|
"""Find objects matching pattern.
|
|
@@ -207,7 +285,7 @@ class ObStoreBackend:
|
|
|
207
285
|
"""
|
|
208
286
|
from pathlib import PurePosixPath
|
|
209
287
|
|
|
210
|
-
resolved_pattern = self.
|
|
288
|
+
resolved_pattern = resolve_storage_path(pattern, self.base_path, self.protocol, strip_file_scheme=True)
|
|
211
289
|
all_objects = self.list_objects(recursive=True, **kwargs)
|
|
212
290
|
|
|
213
291
|
if "**" in pattern:
|
|
@@ -229,38 +307,50 @@ class ObStoreBackend:
|
|
|
229
307
|
return matching_objects
|
|
230
308
|
return [obj for obj in all_objects if fnmatch.fnmatch(obj, resolved_pattern)]
|
|
231
309
|
|
|
232
|
-
def get_metadata(self, path: "
|
|
310
|
+
def get_metadata(self, path: "str | Path", **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
233
311
|
"""Get object metadata using obstore."""
|
|
234
|
-
resolved_path = self.
|
|
235
|
-
|
|
312
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
313
|
+
|
|
236
314
|
try:
|
|
237
315
|
metadata = self.store.head(resolved_path)
|
|
238
|
-
|
|
239
|
-
|
|
316
|
+
except Exception:
|
|
317
|
+
return {"path": resolved_path, "exists": False}
|
|
318
|
+
else:
|
|
319
|
+
if isinstance(metadata, dict):
|
|
320
|
+
result = {
|
|
240
321
|
"path": resolved_path,
|
|
241
322
|
"exists": True,
|
|
242
|
-
"size":
|
|
243
|
-
"last_modified":
|
|
244
|
-
"e_tag":
|
|
245
|
-
"version":
|
|
323
|
+
"size": metadata.get("size"),
|
|
324
|
+
"last_modified": metadata.get("last_modified"),
|
|
325
|
+
"e_tag": metadata.get("e_tag"),
|
|
326
|
+
"version": metadata.get("version"),
|
|
246
327
|
}
|
|
247
|
-
|
|
248
|
-
|
|
328
|
+
if metadata.get("metadata"):
|
|
329
|
+
result["custom_metadata"] = metadata["metadata"]
|
|
330
|
+
return result
|
|
331
|
+
|
|
332
|
+
result = {
|
|
333
|
+
"path": resolved_path,
|
|
334
|
+
"exists": True,
|
|
335
|
+
"size": metadata.size,
|
|
336
|
+
"last_modified": metadata.last_modified,
|
|
337
|
+
"e_tag": metadata.e_tag,
|
|
338
|
+
"version": metadata.version,
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
if metadata.metadata:
|
|
249
342
|
result["custom_metadata"] = metadata.metadata
|
|
250
343
|
|
|
251
|
-
except Exception:
|
|
252
|
-
return {"path": resolved_path, "exists": False}
|
|
253
|
-
else:
|
|
254
344
|
return result
|
|
255
345
|
|
|
256
|
-
def is_object(self, path: "
|
|
346
|
+
def is_object(self, path: "str | Path") -> bool:
|
|
257
347
|
"""Check if path is an object using obstore."""
|
|
258
|
-
resolved_path = self.
|
|
348
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
259
349
|
return self.exists(path) and not resolved_path.endswith("/")
|
|
260
350
|
|
|
261
|
-
def is_path(self, path: "
|
|
351
|
+
def is_path(self, path: "str | Path") -> bool:
|
|
262
352
|
"""Check if path is a prefix/directory using obstore."""
|
|
263
|
-
resolved_path = self.
|
|
353
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
264
354
|
|
|
265
355
|
if resolved_path.endswith("/"):
|
|
266
356
|
return True
|
|
@@ -271,53 +361,48 @@ class ObStoreBackend:
|
|
|
271
361
|
except Exception:
|
|
272
362
|
return False
|
|
273
363
|
|
|
274
|
-
def read_arrow(self, path: "
|
|
364
|
+
def read_arrow(self, path: "str | Path", **kwargs: Any) -> ArrowTable:
|
|
275
365
|
"""Read Arrow table using obstore."""
|
|
276
|
-
|
|
277
|
-
if hasattr(self.store, "read_arrow"):
|
|
278
|
-
return self.store.read_arrow(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
279
|
-
|
|
280
|
-
self._ensure_pyarrow()
|
|
366
|
+
ensure_pyarrow()
|
|
281
367
|
import io
|
|
282
368
|
|
|
283
369
|
import pyarrow.parquet as pq
|
|
284
370
|
|
|
285
|
-
|
|
371
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
372
|
+
data = self.read_bytes(resolved_path)
|
|
373
|
+
return pq.read_table(io.BytesIO(data), **kwargs)
|
|
286
374
|
|
|
287
|
-
def write_arrow(self, path: "
|
|
375
|
+
def write_arrow(self, path: "str | Path", table: ArrowTable, **kwargs: Any) -> None:
|
|
288
376
|
"""Write Arrow table using obstore."""
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
if
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
|
|
309
|
-
if match:
|
|
310
|
-
precision, scale = int(match.group(1)), int(match.group(2))
|
|
311
|
-
new_fields.append(pa.field(field.name, pa.decimal128(precision, scale)))
|
|
312
|
-
else:
|
|
313
|
-
new_fields.append(field) # pragma: no cover
|
|
377
|
+
ensure_pyarrow()
|
|
378
|
+
import io
|
|
379
|
+
|
|
380
|
+
import pyarrow as pa
|
|
381
|
+
import pyarrow.parquet as pq
|
|
382
|
+
|
|
383
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
384
|
+
|
|
385
|
+
schema = table.schema
|
|
386
|
+
if any(str(f.type).startswith("decimal64") for f in schema):
|
|
387
|
+
new_fields = []
|
|
388
|
+
for field in schema:
|
|
389
|
+
if str(field.type).startswith("decimal64"):
|
|
390
|
+
import re
|
|
391
|
+
|
|
392
|
+
match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
|
|
393
|
+
if match:
|
|
394
|
+
precision, scale = int(match.group(1)), int(match.group(2))
|
|
395
|
+
new_fields.append(pa.field(field.name, pa.decimal128(precision, scale)))
|
|
314
396
|
else:
|
|
315
397
|
new_fields.append(field)
|
|
316
|
-
|
|
398
|
+
else:
|
|
399
|
+
new_fields.append(field)
|
|
400
|
+
table = table.cast(pa.schema(new_fields))
|
|
317
401
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
402
|
+
buffer = io.BytesIO()
|
|
403
|
+
pq.write_table(table, buffer, **kwargs)
|
|
404
|
+
buffer.seek(0)
|
|
405
|
+
self.write_bytes(resolved_path, buffer.read())
|
|
321
406
|
|
|
322
407
|
def stream_arrow(self, pattern: str, **kwargs: Any) -> Iterator[ArrowRecordBatch]:
|
|
323
408
|
"""Stream Arrow record batches.
|
|
@@ -325,13 +410,14 @@ class ObStoreBackend:
|
|
|
325
410
|
Yields:
|
|
326
411
|
Iterator of Arrow record batches from matching objects.
|
|
327
412
|
"""
|
|
328
|
-
|
|
413
|
+
ensure_pyarrow()
|
|
329
414
|
from io import BytesIO
|
|
330
415
|
|
|
331
416
|
import pyarrow.parquet as pq
|
|
332
417
|
|
|
333
418
|
for obj_path in self.glob(pattern, **kwargs):
|
|
334
|
-
|
|
419
|
+
resolved_path = resolve_storage_path(obj_path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
420
|
+
result = self.store.get(resolved_path)
|
|
335
421
|
bytes_obj = result.bytes()
|
|
336
422
|
data = bytes_obj.to_bytes()
|
|
337
423
|
buffer = BytesIO(data)
|
|
@@ -340,26 +426,38 @@ class ObStoreBackend:
|
|
|
340
426
|
|
|
341
427
|
def sign(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
|
|
342
428
|
"""Generate a signed URL for the object."""
|
|
343
|
-
resolved_path = self.
|
|
344
|
-
if hasattr(self.store, "sign_url") and callable(self.store.sign_url):
|
|
345
|
-
return self.store.sign_url(resolved_path, expires_in=expires_in) # type: ignore[no-any-return]
|
|
429
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
346
430
|
return f"{self.store_uri}/{resolved_path}"
|
|
347
431
|
|
|
348
|
-
async def read_bytes_async(self, path: "
|
|
432
|
+
async def read_bytes_async(self, path: "str | Path", **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
|
|
349
433
|
"""Read bytes from storage asynchronously."""
|
|
350
|
-
|
|
434
|
+
# For LocalStore (file protocol with base_path), use special resolution
|
|
435
|
+
if self._is_local_store:
|
|
436
|
+
resolved_path = self._resolve_path_for_local_store(path)
|
|
437
|
+
else:
|
|
438
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
439
|
+
|
|
351
440
|
result = await self.store.get_async(resolved_path)
|
|
352
441
|
bytes_obj = await result.bytes_async()
|
|
353
442
|
return bytes_obj.to_bytes() # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
354
443
|
|
|
355
|
-
async def write_bytes_async(self, path: "
|
|
444
|
+
async def write_bytes_async(self, path: "str | Path", data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
356
445
|
"""Write bytes to storage asynchronously."""
|
|
357
|
-
|
|
446
|
+
# For LocalStore (file protocol with base_path), use special resolution
|
|
447
|
+
if self._is_local_store:
|
|
448
|
+
resolved_path = self._resolve_path_for_local_store(path)
|
|
449
|
+
else:
|
|
450
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
451
|
+
|
|
358
452
|
await self.store.put_async(resolved_path, data)
|
|
359
453
|
|
|
360
454
|
async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
|
|
361
455
|
"""List objects in storage asynchronously."""
|
|
362
|
-
resolved_prefix =
|
|
456
|
+
resolved_prefix = (
|
|
457
|
+
resolve_storage_path(prefix, self.base_path, self.protocol, strip_file_scheme=True)
|
|
458
|
+
if prefix
|
|
459
|
+
else self.base_path or ""
|
|
460
|
+
)
|
|
363
461
|
|
|
364
462
|
objects: list[str] = []
|
|
365
463
|
async for batch in self.store.list_async(resolved_prefix): # pyright: ignore[reportAttributeAccessIssue]
|
|
@@ -371,47 +469,72 @@ class ObStoreBackend:
|
|
|
371
469
|
|
|
372
470
|
return sorted(objects)
|
|
373
471
|
|
|
374
|
-
async def read_text_async(self, path: "
|
|
472
|
+
async def read_text_async(self, path: "str | Path", encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
375
473
|
"""Read text from storage asynchronously."""
|
|
376
474
|
data = await self.read_bytes_async(path, **kwargs)
|
|
377
475
|
return data.decode(encoding)
|
|
378
476
|
|
|
379
|
-
async def write_text_async(
|
|
380
|
-
self, path: "Union[str, Path]", data: str, encoding: str = "utf-8", **kwargs: Any
|
|
381
|
-
) -> None: # pyright: ignore[reportUnusedParameter]
|
|
477
|
+
async def write_text_async(self, path: "str | Path", data: str, encoding: str = "utf-8", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
382
478
|
"""Write text to storage asynchronously."""
|
|
383
479
|
encoded_data = data.encode(encoding)
|
|
384
480
|
await self.write_bytes_async(path, encoded_data, **kwargs)
|
|
385
481
|
|
|
386
|
-
async def exists_async(self, path: "
|
|
482
|
+
async def exists_async(self, path: "str | Path", **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
|
|
387
483
|
"""Check if object exists in storage asynchronously."""
|
|
388
|
-
|
|
484
|
+
# For LocalStore (file protocol with base_path), use special resolution
|
|
485
|
+
if self._is_local_store:
|
|
486
|
+
resolved_path = self._resolve_path_for_local_store(path)
|
|
487
|
+
else:
|
|
488
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
489
|
+
|
|
389
490
|
try:
|
|
390
491
|
await self.store.head_async(resolved_path)
|
|
391
492
|
except Exception:
|
|
392
493
|
return False
|
|
393
494
|
return True
|
|
394
495
|
|
|
395
|
-
async def delete_async(self, path: "
|
|
496
|
+
async def delete_async(self, path: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
396
497
|
"""Delete object from storage asynchronously."""
|
|
397
|
-
|
|
498
|
+
# For LocalStore (file protocol with base_path), use special resolution
|
|
499
|
+
if self._is_local_store:
|
|
500
|
+
resolved_path = self._resolve_path_for_local_store(path)
|
|
501
|
+
else:
|
|
502
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
503
|
+
|
|
398
504
|
await self.store.delete_async(resolved_path)
|
|
399
505
|
|
|
400
|
-
async def copy_async(self, source: "
|
|
506
|
+
async def copy_async(self, source: "str | Path", destination: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
401
507
|
"""Copy object in storage asynchronously."""
|
|
402
|
-
|
|
403
|
-
|
|
508
|
+
# For LocalStore (file protocol with base_path), use special resolution
|
|
509
|
+
if self._is_local_store:
|
|
510
|
+
source_path = self._resolve_path_for_local_store(source)
|
|
511
|
+
dest_path = self._resolve_path_for_local_store(destination)
|
|
512
|
+
else:
|
|
513
|
+
source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=True)
|
|
514
|
+
dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=True)
|
|
515
|
+
|
|
404
516
|
await self.store.copy_async(source_path, dest_path)
|
|
405
517
|
|
|
406
|
-
async def move_async(self, source: "
|
|
518
|
+
async def move_async(self, source: "str | Path", destination: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
|
|
407
519
|
"""Move object in storage asynchronously."""
|
|
408
|
-
|
|
409
|
-
|
|
520
|
+
# For LocalStore (file protocol with base_path), use special resolution
|
|
521
|
+
if self._is_local_store:
|
|
522
|
+
source_path = self._resolve_path_for_local_store(source)
|
|
523
|
+
dest_path = self._resolve_path_for_local_store(destination)
|
|
524
|
+
else:
|
|
525
|
+
source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=True)
|
|
526
|
+
dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=True)
|
|
527
|
+
|
|
410
528
|
await self.store.rename_async(source_path, dest_path)
|
|
411
529
|
|
|
412
|
-
async def get_metadata_async(self, path: "
|
|
530
|
+
async def get_metadata_async(self, path: "str | Path", **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
|
|
413
531
|
"""Get object metadata from storage asynchronously."""
|
|
414
|
-
|
|
532
|
+
# For LocalStore (file protocol with base_path), use special resolution
|
|
533
|
+
if self._is_local_store:
|
|
534
|
+
resolved_path = self._resolve_path_for_local_store(path)
|
|
535
|
+
else:
|
|
536
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
537
|
+
|
|
415
538
|
result: dict[str, Any] = {}
|
|
416
539
|
try:
|
|
417
540
|
metadata = await self.store.head_async(resolved_path)
|
|
@@ -433,42 +556,35 @@ class ObStoreBackend:
|
|
|
433
556
|
else:
|
|
434
557
|
return result
|
|
435
558
|
|
|
436
|
-
async def read_arrow_async(self, path: "
|
|
559
|
+
async def read_arrow_async(self, path: "str | Path", **kwargs: Any) -> ArrowTable:
|
|
437
560
|
"""Read Arrow table from storage asynchronously."""
|
|
438
|
-
|
|
439
|
-
if hasattr(self.store, "read_arrow_async"):
|
|
440
|
-
return await self.store.read_arrow_async(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
441
|
-
|
|
442
|
-
self._ensure_pyarrow()
|
|
561
|
+
ensure_pyarrow()
|
|
443
562
|
import io
|
|
444
563
|
|
|
445
564
|
import pyarrow.parquet as pq
|
|
446
565
|
|
|
447
|
-
|
|
566
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
567
|
+
data = await self.read_bytes_async(resolved_path)
|
|
568
|
+
return pq.read_table(io.BytesIO(data), **kwargs)
|
|
448
569
|
|
|
449
|
-
async def write_arrow_async(self, path: "
|
|
570
|
+
async def write_arrow_async(self, path: "str | Path", table: ArrowTable, **kwargs: Any) -> None:
|
|
450
571
|
"""Write Arrow table to storage asynchronously."""
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
await self.store.write_arrow_async(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
|
|
454
|
-
else:
|
|
455
|
-
self._ensure_pyarrow()
|
|
456
|
-
import io
|
|
572
|
+
ensure_pyarrow()
|
|
573
|
+
import io
|
|
457
574
|
|
|
458
|
-
|
|
575
|
+
import pyarrow.parquet as pq
|
|
459
576
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
577
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
578
|
+
buffer = io.BytesIO()
|
|
579
|
+
pq.write_table(table, buffer, **kwargs)
|
|
580
|
+
buffer.seek(0)
|
|
581
|
+
await self.write_bytes_async(resolved_path, buffer.read())
|
|
464
582
|
|
|
465
583
|
def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator[ArrowRecordBatch]:
|
|
466
|
-
resolved_pattern = self.
|
|
584
|
+
resolved_pattern = resolve_storage_path(pattern, self.base_path, self.protocol, strip_file_scheme=True)
|
|
467
585
|
return _AsyncArrowIterator(self, resolved_pattern, **kwargs)
|
|
468
586
|
|
|
469
587
|
async def sign_async(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
|
|
470
588
|
"""Generate a signed URL asynchronously."""
|
|
471
|
-
resolved_path = self.
|
|
472
|
-
if hasattr(self.store, "sign_url_async") and callable(self.store.sign_url_async):
|
|
473
|
-
return await self.store.sign_url_async(resolved_path, expires_in=expires_in) # type: ignore[no-any-return]
|
|
589
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
|
|
474
590
|
return f"{self.store_uri}/{resolved_path}"
|