sqlspec 0.26.0__py3-none-any.whl → 0.27.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/__init__.py +7 -15
- sqlspec/_serialization.py +55 -25
- sqlspec/_typing.py +62 -52
- sqlspec/adapters/adbc/_types.py +1 -1
- sqlspec/adapters/adbc/adk/__init__.py +5 -0
- sqlspec/adapters/adbc/adk/store.py +870 -0
- sqlspec/adapters/adbc/config.py +62 -12
- sqlspec/adapters/adbc/data_dictionary.py +52 -2
- sqlspec/adapters/adbc/driver.py +144 -45
- sqlspec/adapters/adbc/litestar/__init__.py +5 -0
- sqlspec/adapters/adbc/litestar/store.py +504 -0
- sqlspec/adapters/adbc/type_converter.py +44 -50
- sqlspec/adapters/aiosqlite/_types.py +1 -1
- sqlspec/adapters/aiosqlite/adk/__init__.py +5 -0
- sqlspec/adapters/aiosqlite/adk/store.py +527 -0
- sqlspec/adapters/aiosqlite/config.py +86 -16
- sqlspec/adapters/aiosqlite/data_dictionary.py +34 -2
- sqlspec/adapters/aiosqlite/driver.py +127 -38
- sqlspec/adapters/aiosqlite/litestar/__init__.py +5 -0
- sqlspec/adapters/aiosqlite/litestar/store.py +281 -0
- sqlspec/adapters/aiosqlite/pool.py +7 -7
- sqlspec/adapters/asyncmy/__init__.py +7 -1
- sqlspec/adapters/asyncmy/_types.py +1 -1
- sqlspec/adapters/asyncmy/adk/__init__.py +5 -0
- sqlspec/adapters/asyncmy/adk/store.py +493 -0
- sqlspec/adapters/asyncmy/config.py +59 -17
- sqlspec/adapters/asyncmy/data_dictionary.py +41 -2
- sqlspec/adapters/asyncmy/driver.py +293 -62
- sqlspec/adapters/asyncmy/litestar/__init__.py +5 -0
- sqlspec/adapters/asyncmy/litestar/store.py +296 -0
- sqlspec/adapters/asyncpg/__init__.py +2 -1
- sqlspec/adapters/asyncpg/_type_handlers.py +71 -0
- sqlspec/adapters/asyncpg/_types.py +11 -7
- sqlspec/adapters/asyncpg/adk/__init__.py +5 -0
- sqlspec/adapters/asyncpg/adk/store.py +450 -0
- sqlspec/adapters/asyncpg/config.py +57 -36
- sqlspec/adapters/asyncpg/data_dictionary.py +41 -2
- sqlspec/adapters/asyncpg/driver.py +153 -23
- sqlspec/adapters/asyncpg/litestar/__init__.py +5 -0
- sqlspec/adapters/asyncpg/litestar/store.py +253 -0
- sqlspec/adapters/bigquery/_types.py +1 -1
- sqlspec/adapters/bigquery/adk/__init__.py +5 -0
- sqlspec/adapters/bigquery/adk/store.py +576 -0
- sqlspec/adapters/bigquery/config.py +25 -11
- sqlspec/adapters/bigquery/data_dictionary.py +42 -2
- sqlspec/adapters/bigquery/driver.py +352 -144
- sqlspec/adapters/bigquery/litestar/__init__.py +5 -0
- sqlspec/adapters/bigquery/litestar/store.py +327 -0
- sqlspec/adapters/bigquery/type_converter.py +55 -23
- sqlspec/adapters/duckdb/_types.py +2 -2
- sqlspec/adapters/duckdb/adk/__init__.py +14 -0
- sqlspec/adapters/duckdb/adk/store.py +553 -0
- sqlspec/adapters/duckdb/config.py +79 -21
- sqlspec/adapters/duckdb/data_dictionary.py +41 -2
- sqlspec/adapters/duckdb/driver.py +138 -43
- sqlspec/adapters/duckdb/litestar/__init__.py +5 -0
- sqlspec/adapters/duckdb/litestar/store.py +332 -0
- sqlspec/adapters/duckdb/pool.py +5 -5
- sqlspec/adapters/duckdb/type_converter.py +51 -21
- sqlspec/adapters/oracledb/_numpy_handlers.py +133 -0
- sqlspec/adapters/oracledb/_types.py +20 -2
- sqlspec/adapters/oracledb/adk/__init__.py +5 -0
- sqlspec/adapters/oracledb/adk/store.py +1745 -0
- sqlspec/adapters/oracledb/config.py +120 -36
- sqlspec/adapters/oracledb/data_dictionary.py +87 -20
- sqlspec/adapters/oracledb/driver.py +292 -84
- sqlspec/adapters/oracledb/litestar/__init__.py +5 -0
- sqlspec/adapters/oracledb/litestar/store.py +767 -0
- sqlspec/adapters/oracledb/migrations.py +316 -25
- sqlspec/adapters/oracledb/type_converter.py +91 -16
- sqlspec/adapters/psqlpy/_type_handlers.py +44 -0
- sqlspec/adapters/psqlpy/_types.py +2 -1
- sqlspec/adapters/psqlpy/adk/__init__.py +5 -0
- sqlspec/adapters/psqlpy/adk/store.py +482 -0
- sqlspec/adapters/psqlpy/config.py +45 -19
- sqlspec/adapters/psqlpy/data_dictionary.py +41 -2
- sqlspec/adapters/psqlpy/driver.py +101 -31
- sqlspec/adapters/psqlpy/litestar/__init__.py +5 -0
- sqlspec/adapters/psqlpy/litestar/store.py +272 -0
- sqlspec/adapters/psqlpy/type_converter.py +40 -11
- sqlspec/adapters/psycopg/_type_handlers.py +80 -0
- sqlspec/adapters/psycopg/_types.py +2 -1
- sqlspec/adapters/psycopg/adk/__init__.py +5 -0
- sqlspec/adapters/psycopg/adk/store.py +944 -0
- sqlspec/adapters/psycopg/config.py +65 -37
- sqlspec/adapters/psycopg/data_dictionary.py +77 -3
- sqlspec/adapters/psycopg/driver.py +200 -78
- sqlspec/adapters/psycopg/litestar/__init__.py +5 -0
- sqlspec/adapters/psycopg/litestar/store.py +554 -0
- sqlspec/adapters/sqlite/__init__.py +2 -1
- sqlspec/adapters/sqlite/_type_handlers.py +86 -0
- sqlspec/adapters/sqlite/_types.py +1 -1
- sqlspec/adapters/sqlite/adk/__init__.py +5 -0
- sqlspec/adapters/sqlite/adk/store.py +572 -0
- sqlspec/adapters/sqlite/config.py +85 -16
- sqlspec/adapters/sqlite/data_dictionary.py +34 -2
- sqlspec/adapters/sqlite/driver.py +120 -52
- sqlspec/adapters/sqlite/litestar/__init__.py +5 -0
- sqlspec/adapters/sqlite/litestar/store.py +318 -0
- sqlspec/adapters/sqlite/pool.py +5 -5
- sqlspec/base.py +45 -26
- sqlspec/builder/__init__.py +73 -4
- sqlspec/builder/_base.py +91 -58
- sqlspec/builder/_column.py +5 -5
- sqlspec/builder/_ddl.py +98 -89
- sqlspec/builder/_delete.py +5 -4
- sqlspec/builder/_dml.py +388 -0
- sqlspec/{_sql.py → builder/_factory.py} +41 -44
- sqlspec/builder/_insert.py +5 -82
- sqlspec/builder/{mixins/_join_operations.py → _join.py} +145 -143
- sqlspec/builder/_merge.py +446 -11
- sqlspec/builder/_parsing_utils.py +9 -11
- sqlspec/builder/_select.py +1313 -25
- sqlspec/builder/_update.py +11 -42
- sqlspec/cli.py +76 -69
- sqlspec/config.py +231 -60
- sqlspec/core/__init__.py +5 -4
- sqlspec/core/cache.py +18 -18
- sqlspec/core/compiler.py +6 -8
- sqlspec/core/filters.py +37 -37
- sqlspec/core/hashing.py +9 -9
- sqlspec/core/parameters.py +76 -45
- sqlspec/core/result.py +102 -46
- sqlspec/core/splitter.py +16 -17
- sqlspec/core/statement.py +32 -31
- sqlspec/core/type_conversion.py +3 -2
- sqlspec/driver/__init__.py +1 -3
- sqlspec/driver/_async.py +95 -161
- sqlspec/driver/_common.py +133 -80
- sqlspec/driver/_sync.py +95 -162
- sqlspec/driver/mixins/_result_tools.py +20 -236
- sqlspec/driver/mixins/_sql_translator.py +4 -4
- sqlspec/exceptions.py +70 -7
- sqlspec/extensions/adk/__init__.py +53 -0
- sqlspec/extensions/adk/_types.py +51 -0
- sqlspec/extensions/adk/converters.py +172 -0
- sqlspec/extensions/adk/migrations/0001_create_adk_tables.py +144 -0
- sqlspec/extensions/adk/migrations/__init__.py +0 -0
- sqlspec/extensions/adk/service.py +181 -0
- sqlspec/extensions/adk/store.py +536 -0
- sqlspec/extensions/aiosql/adapter.py +73 -53
- sqlspec/extensions/litestar/__init__.py +21 -4
- sqlspec/extensions/litestar/cli.py +54 -10
- sqlspec/extensions/litestar/config.py +59 -266
- sqlspec/extensions/litestar/handlers.py +46 -17
- sqlspec/extensions/litestar/migrations/0001_create_session_table.py +137 -0
- sqlspec/extensions/litestar/migrations/__init__.py +3 -0
- sqlspec/extensions/litestar/plugin.py +324 -223
- sqlspec/extensions/litestar/providers.py +25 -25
- sqlspec/extensions/litestar/store.py +265 -0
- sqlspec/loader.py +30 -49
- sqlspec/migrations/base.py +200 -76
- sqlspec/migrations/commands.py +591 -62
- sqlspec/migrations/context.py +6 -9
- sqlspec/migrations/fix.py +199 -0
- sqlspec/migrations/loaders.py +47 -19
- sqlspec/migrations/runner.py +241 -75
- sqlspec/migrations/tracker.py +237 -21
- sqlspec/migrations/utils.py +51 -3
- sqlspec/migrations/validation.py +177 -0
- sqlspec/protocols.py +66 -36
- sqlspec/storage/_utils.py +98 -0
- sqlspec/storage/backends/fsspec.py +134 -106
- sqlspec/storage/backends/local.py +78 -51
- sqlspec/storage/backends/obstore.py +278 -162
- sqlspec/storage/registry.py +75 -39
- sqlspec/typing.py +14 -84
- sqlspec/utils/config_resolver.py +6 -6
- sqlspec/utils/correlation.py +4 -5
- sqlspec/utils/data_transformation.py +3 -2
- sqlspec/utils/deprecation.py +9 -8
- sqlspec/utils/fixtures.py +4 -4
- sqlspec/utils/logging.py +46 -6
- sqlspec/utils/module_loader.py +2 -2
- sqlspec/utils/schema.py +288 -0
- sqlspec/utils/serializers.py +3 -3
- sqlspec/utils/sync_tools.py +21 -17
- sqlspec/utils/text.py +1 -2
- sqlspec/utils/type_guards.py +111 -20
- sqlspec/utils/version.py +433 -0
- {sqlspec-0.26.0.dist-info → sqlspec-0.27.0.dist-info}/METADATA +40 -21
- sqlspec-0.27.0.dist-info/RECORD +207 -0
- sqlspec/builder/mixins/__init__.py +0 -55
- sqlspec/builder/mixins/_cte_and_set_ops.py +0 -253
- sqlspec/builder/mixins/_delete_operations.py +0 -50
- sqlspec/builder/mixins/_insert_operations.py +0 -282
- sqlspec/builder/mixins/_merge_operations.py +0 -698
- sqlspec/builder/mixins/_order_limit_operations.py +0 -145
- sqlspec/builder/mixins/_pivot_operations.py +0 -157
- sqlspec/builder/mixins/_select_operations.py +0 -930
- sqlspec/builder/mixins/_update_operations.py +0 -199
- sqlspec/builder/mixins/_where_clause.py +0 -1298
- sqlspec-0.26.0.dist-info/RECORD +0 -157
- sqlspec-0.26.0.dist-info/licenses/NOTICE +0 -29
- {sqlspec-0.26.0.dist-info → sqlspec-0.27.0.dist-info}/WHEEL +0 -0
- {sqlspec-0.26.0.dist-info → sqlspec-0.27.0.dist-info}/entry_points.txt +0 -0
- {sqlspec-0.26.0.dist-info → sqlspec-0.27.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,10 +1,13 @@
|
|
|
1
1
|
# pyright: reportPrivateUsage=false
|
|
2
2
|
import logging
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import TYPE_CHECKING, Any
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
|
|
6
|
+
from mypy_extensions import mypyc_attr
|
|
5
7
|
|
|
6
8
|
from sqlspec.exceptions import MissingDependencyError
|
|
7
|
-
from sqlspec.
|
|
9
|
+
from sqlspec.storage._utils import ensure_pyarrow, resolve_storage_path
|
|
10
|
+
from sqlspec.typing import FSSPEC_INSTALLED
|
|
8
11
|
from sqlspec.utils.sync_tools import async_
|
|
9
12
|
|
|
10
13
|
if TYPE_CHECKING:
|
|
@@ -18,41 +21,80 @@ logger = logging.getLogger(__name__)
|
|
|
18
21
|
|
|
19
22
|
|
|
20
23
|
class _ArrowStreamer:
|
|
24
|
+
"""Async iterator for streaming Arrow batches from FSSpec backend.
|
|
25
|
+
|
|
26
|
+
Uses async_() to offload blocking operations to thread pool,
|
|
27
|
+
preventing event loop blocking during file I/O and iteration.
|
|
28
|
+
|
|
29
|
+
CRITICAL: Creates generators on main thread, offloads only next() calls.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
__slots__ = ("_initialized", "backend", "batch_iterator", "kwargs", "paths_iterator", "pattern")
|
|
33
|
+
|
|
21
34
|
def __init__(self, backend: "FSSpecBackend", pattern: str, **kwargs: Any) -> None:
|
|
22
35
|
self.backend = backend
|
|
23
36
|
self.pattern = pattern
|
|
24
37
|
self.kwargs = kwargs
|
|
25
|
-
self.paths_iterator:
|
|
26
|
-
self.batch_iterator:
|
|
38
|
+
self.paths_iterator: Iterator[str] | None = None
|
|
39
|
+
self.batch_iterator: Iterator[ArrowRecordBatch] | None = None
|
|
40
|
+
self._initialized = False
|
|
27
41
|
|
|
28
42
|
def __aiter__(self) -> "_ArrowStreamer":
|
|
29
43
|
return self
|
|
30
44
|
|
|
31
45
|
async def _initialize(self) -> None:
|
|
32
|
-
"""Initialize paths iterator."""
|
|
33
|
-
if self.
|
|
46
|
+
"""Initialize paths iterator asynchronously."""
|
|
47
|
+
if not self._initialized:
|
|
34
48
|
paths = await async_(self.backend.glob)(self.pattern, **self.kwargs)
|
|
35
49
|
self.paths_iterator = iter(paths)
|
|
50
|
+
self._initialized = True
|
|
36
51
|
|
|
37
52
|
async def __anext__(self) -> "ArrowRecordBatch":
|
|
53
|
+
"""Get next Arrow batch asynchronously.
|
|
54
|
+
|
|
55
|
+
Iterative state machine that avoids recursion and blocking calls.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Arrow record batches from matching files.
|
|
59
|
+
|
|
60
|
+
Raises:
|
|
61
|
+
StopAsyncIteration: When no more batches available.
|
|
62
|
+
"""
|
|
38
63
|
await self._initialize()
|
|
39
64
|
|
|
40
|
-
|
|
65
|
+
while True:
|
|
66
|
+
if self.batch_iterator is not None:
|
|
67
|
+
|
|
68
|
+
def _safe_next_batch() -> "ArrowRecordBatch":
|
|
69
|
+
try:
|
|
70
|
+
return next(self.batch_iterator) # type: ignore[arg-type]
|
|
71
|
+
except StopIteration as e:
|
|
72
|
+
raise StopAsyncIteration from e
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
return await async_(_safe_next_batch)()
|
|
76
|
+
except StopAsyncIteration:
|
|
77
|
+
self.batch_iterator = None
|
|
78
|
+
continue
|
|
79
|
+
|
|
41
80
|
try:
|
|
42
|
-
|
|
43
|
-
except StopIteration:
|
|
44
|
-
|
|
81
|
+
path = next(self.paths_iterator) # type: ignore[arg-type]
|
|
82
|
+
except StopIteration as e:
|
|
83
|
+
raise StopAsyncIteration from e
|
|
84
|
+
|
|
85
|
+
self.batch_iterator = self.backend._stream_file_batches(path)
|
|
45
86
|
|
|
46
|
-
|
|
87
|
+
async def aclose(self) -> None:
|
|
88
|
+
"""Close underlying batch iterator."""
|
|
89
|
+
if self.batch_iterator is not None:
|
|
47
90
|
try:
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
raise StopAsyncIteration
|
|
53
|
-
raise StopAsyncIteration
|
|
91
|
+
close_method = self.batch_iterator.close # type: ignore[attr-defined]
|
|
92
|
+
await async_(close_method)()
|
|
93
|
+
except AttributeError:
|
|
94
|
+
pass
|
|
54
95
|
|
|
55
96
|
|
|
97
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
56
98
|
class FSSpecBackend:
|
|
57
99
|
"""Storage backend using fsspec.
|
|
58
100
|
|
|
@@ -60,19 +102,37 @@ class FSSpecBackend:
|
|
|
60
102
|
including HTTP, HTTPS, FTP, and cloud storage services.
|
|
61
103
|
"""
|
|
62
104
|
|
|
105
|
+
__slots__ = ("_fs_uri", "backend_type", "base_path", "fs", "protocol")
|
|
106
|
+
|
|
63
107
|
def __init__(self, uri: str, **kwargs: Any) -> None:
|
|
64
|
-
|
|
108
|
+
if not FSSPEC_INSTALLED:
|
|
109
|
+
raise MissingDependencyError(package="fsspec", install_package="fsspec")
|
|
65
110
|
|
|
66
111
|
base_path = kwargs.pop("base_path", "")
|
|
67
|
-
self.base_path = base_path.rstrip("/") if base_path else ""
|
|
68
112
|
|
|
69
113
|
if "://" in uri:
|
|
70
114
|
self.protocol = uri.split("://", maxsplit=1)[0]
|
|
71
115
|
self._fs_uri = uri
|
|
116
|
+
|
|
117
|
+
# For S3/cloud URIs, extract bucket/path from URI as base_path
|
|
118
|
+
if self.protocol in {"s3", "gs", "az", "gcs"}:
|
|
119
|
+
from urllib.parse import urlparse
|
|
120
|
+
|
|
121
|
+
parsed = urlparse(uri)
|
|
122
|
+
# Combine netloc (bucket) and path for base_path
|
|
123
|
+
if parsed.netloc:
|
|
124
|
+
uri_base_path = parsed.netloc
|
|
125
|
+
if parsed.path and parsed.path != "/":
|
|
126
|
+
uri_base_path = f"{uri_base_path}{parsed.path}"
|
|
127
|
+
# Only use URI base_path if no explicit base_path provided
|
|
128
|
+
if not base_path:
|
|
129
|
+
base_path = uri_base_path
|
|
72
130
|
else:
|
|
73
131
|
self.protocol = uri
|
|
74
132
|
self._fs_uri = f"{uri}://"
|
|
75
133
|
|
|
134
|
+
self.base_path = base_path.rstrip("/") if base_path else ""
|
|
135
|
+
|
|
76
136
|
import fsspec
|
|
77
137
|
|
|
78
138
|
self.fs = fsspec.filesystem(self.protocol, **kwargs)
|
|
@@ -93,48 +153,19 @@ class FSSpecBackend:
|
|
|
93
153
|
|
|
94
154
|
return cls(uri=uri, **kwargs)
|
|
95
155
|
|
|
96
|
-
def _ensure_fsspec(self) -> None:
|
|
97
|
-
"""Ensure fsspec is available for operations."""
|
|
98
|
-
if not FSSPEC_INSTALLED:
|
|
99
|
-
raise MissingDependencyError(package="fsspec", install_package="fsspec")
|
|
100
|
-
|
|
101
|
-
def _ensure_pyarrow(self) -> None:
|
|
102
|
-
"""Ensure PyArrow is available for Arrow operations."""
|
|
103
|
-
if not PYARROW_INSTALLED:
|
|
104
|
-
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
105
|
-
|
|
106
|
-
def _resolve_path(self, path: Union[str, Path]) -> str:
|
|
107
|
-
"""Resolve path relative to base_path."""
|
|
108
|
-
path_str = str(path)
|
|
109
|
-
if self.base_path:
|
|
110
|
-
clean_base = self.base_path.rstrip("/")
|
|
111
|
-
clean_path = path_str.lstrip("/")
|
|
112
|
-
return f"{clean_base}/{clean_path}"
|
|
113
|
-
if self.protocol == "s3" and "://" in self._fs_uri:
|
|
114
|
-
# For S3, we need to include the bucket from the URI
|
|
115
|
-
# Extract bucket and path from URI like s3://bucket/path
|
|
116
|
-
uri_parts = self._fs_uri.split("://", 1)[1] # Remove s3://
|
|
117
|
-
if "/" in uri_parts:
|
|
118
|
-
# URI has bucket and base path
|
|
119
|
-
return f"{uri_parts.rstrip('/')}/{path_str.lstrip('/')}"
|
|
120
|
-
# URI has only bucket
|
|
121
|
-
return f"{uri_parts}/{path_str.lstrip('/')}"
|
|
122
|
-
return path_str
|
|
123
|
-
|
|
124
156
|
@property
|
|
125
157
|
def base_uri(self) -> str:
|
|
126
158
|
return self._fs_uri
|
|
127
159
|
|
|
128
|
-
def read_bytes(self, path:
|
|
160
|
+
def read_bytes(self, path: str | Path, **kwargs: Any) -> bytes:
|
|
129
161
|
"""Read bytes from an object."""
|
|
130
|
-
resolved_path = self.
|
|
162
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
131
163
|
return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
|
|
132
164
|
|
|
133
|
-
def write_bytes(self, path:
|
|
165
|
+
def write_bytes(self, path: str | Path, data: bytes, **kwargs: Any) -> None:
|
|
134
166
|
"""Write bytes to an object."""
|
|
135
|
-
resolved_path = self.
|
|
167
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
136
168
|
|
|
137
|
-
# Only create directories for local file systems, not for cloud storage
|
|
138
169
|
if self.protocol == "file":
|
|
139
170
|
parent_dir = str(Path(resolved_path).parent)
|
|
140
171
|
if parent_dir and not self.fs.exists(parent_dir):
|
|
@@ -143,84 +174,85 @@ class FSSpecBackend:
|
|
|
143
174
|
with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
|
|
144
175
|
f.write(data) # pyright: ignore
|
|
145
176
|
|
|
146
|
-
def read_text(self, path:
|
|
177
|
+
def read_text(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
147
178
|
"""Read text from an object."""
|
|
148
179
|
data = self.read_bytes(path, **kwargs)
|
|
149
180
|
return data.decode(encoding)
|
|
150
181
|
|
|
151
|
-
def write_text(self, path:
|
|
182
|
+
def write_text(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
152
183
|
"""Write text to an object."""
|
|
153
184
|
self.write_bytes(path, data.encode(encoding), **kwargs)
|
|
154
185
|
|
|
155
|
-
def exists(self, path:
|
|
186
|
+
def exists(self, path: str | Path, **kwargs: Any) -> bool:
|
|
156
187
|
"""Check if an object exists."""
|
|
157
|
-
resolved_path = self.
|
|
188
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
158
189
|
return self.fs.exists(resolved_path, **kwargs) # type: ignore[no-any-return]
|
|
159
190
|
|
|
160
|
-
def delete(self, path:
|
|
191
|
+
def delete(self, path: str | Path, **kwargs: Any) -> None:
|
|
161
192
|
"""Delete an object."""
|
|
162
|
-
resolved_path = self.
|
|
193
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
163
194
|
self.fs.rm(resolved_path, **kwargs)
|
|
164
195
|
|
|
165
|
-
def copy(self, source:
|
|
196
|
+
def copy(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None:
|
|
166
197
|
"""Copy an object."""
|
|
167
|
-
source_path = self.
|
|
168
|
-
dest_path = self.
|
|
198
|
+
source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=False)
|
|
199
|
+
dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=False)
|
|
169
200
|
self.fs.copy(source_path, dest_path, **kwargs)
|
|
170
201
|
|
|
171
|
-
def move(self, source:
|
|
202
|
+
def move(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None:
|
|
172
203
|
"""Move an object."""
|
|
173
|
-
source_path = self.
|
|
174
|
-
dest_path = self.
|
|
204
|
+
source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=False)
|
|
205
|
+
dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=False)
|
|
175
206
|
self.fs.mv(source_path, dest_path, **kwargs)
|
|
176
207
|
|
|
177
|
-
def read_arrow(self, path:
|
|
208
|
+
def read_arrow(self, path: str | Path, **kwargs: Any) -> "ArrowTable":
|
|
178
209
|
"""Read an Arrow table from storage."""
|
|
179
|
-
|
|
180
|
-
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
210
|
+
ensure_pyarrow()
|
|
181
211
|
import pyarrow.parquet as pq
|
|
182
212
|
|
|
183
|
-
resolved_path = self.
|
|
213
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
184
214
|
with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
|
|
185
215
|
return pq.read_table(f)
|
|
186
216
|
|
|
187
|
-
def write_arrow(self, path:
|
|
217
|
+
def write_arrow(self, path: str | Path, table: "ArrowTable", **kwargs: Any) -> None:
|
|
188
218
|
"""Write an Arrow table to storage."""
|
|
189
|
-
|
|
190
|
-
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
219
|
+
ensure_pyarrow()
|
|
191
220
|
import pyarrow.parquet as pq
|
|
192
221
|
|
|
193
|
-
resolved_path = self.
|
|
222
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
194
223
|
with self.fs.open(resolved_path, mode="wb") as f:
|
|
195
224
|
pq.write_table(table, f, **kwargs) # pyright: ignore
|
|
196
225
|
|
|
197
226
|
def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
|
|
198
227
|
"""List objects with optional prefix."""
|
|
199
|
-
resolved_prefix = self.
|
|
228
|
+
resolved_prefix = resolve_storage_path(prefix, self.base_path, self.protocol, strip_file_scheme=False)
|
|
200
229
|
if recursive:
|
|
201
230
|
return sorted(self.fs.find(resolved_prefix, **kwargs))
|
|
202
231
|
return sorted(self.fs.ls(resolved_prefix, detail=False, **kwargs))
|
|
203
232
|
|
|
204
233
|
def glob(self, pattern: str, **kwargs: Any) -> list[str]:
|
|
205
234
|
"""Find objects matching a glob pattern."""
|
|
206
|
-
resolved_pattern = self.
|
|
235
|
+
resolved_pattern = resolve_storage_path(pattern, self.base_path, self.protocol, strip_file_scheme=False)
|
|
207
236
|
return sorted(self.fs.glob(resolved_pattern, **kwargs)) # pyright: ignore
|
|
208
237
|
|
|
209
|
-
def is_object(self, path:
|
|
238
|
+
def is_object(self, path: str | Path) -> bool:
|
|
210
239
|
"""Check if path points to an object."""
|
|
211
|
-
resolved_path = self.
|
|
240
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
212
241
|
return self.fs.exists(resolved_path) and not self.fs.isdir(resolved_path)
|
|
213
242
|
|
|
214
|
-
def is_path(self, path:
|
|
243
|
+
def is_path(self, path: str | Path) -> bool:
|
|
215
244
|
"""Check if path points to a prefix (directory-like)."""
|
|
216
|
-
resolved_path = self.
|
|
245
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
217
246
|
return self.fs.isdir(resolved_path) # type: ignore[no-any-return]
|
|
218
247
|
|
|
219
|
-
def get_metadata(self, path:
|
|
248
|
+
def get_metadata(self, path: str | Path, **kwargs: Any) -> dict[str, Any]:
|
|
220
249
|
"""Get object metadata."""
|
|
250
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
221
251
|
try:
|
|
222
|
-
resolved_path = self._resolve_path(path)
|
|
223
252
|
info = self.fs.info(resolved_path, **kwargs)
|
|
253
|
+
except FileNotFoundError:
|
|
254
|
+
return {"path": resolved_path, "exists": False}
|
|
255
|
+
else:
|
|
224
256
|
if isinstance(info, dict):
|
|
225
257
|
return {
|
|
226
258
|
"path": resolved_path,
|
|
@@ -229,23 +261,20 @@ class FSSpecBackend:
|
|
|
229
261
|
"last_modified": info.get("mtime"),
|
|
230
262
|
"type": info.get("type", "file"),
|
|
231
263
|
}
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
"last_modified": info.mtime,
|
|
240
|
-
"type": info.type,
|
|
241
|
-
}
|
|
264
|
+
return {
|
|
265
|
+
"path": resolved_path,
|
|
266
|
+
"exists": True,
|
|
267
|
+
"size": info.size,
|
|
268
|
+
"last_modified": info.mtime,
|
|
269
|
+
"type": info.type,
|
|
270
|
+
}
|
|
242
271
|
|
|
243
272
|
def sign(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
|
|
244
273
|
"""Generate a signed URL for the file."""
|
|
245
|
-
resolved_path = self.
|
|
274
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
246
275
|
return f"{self._fs_uri}{resolved_path}"
|
|
247
276
|
|
|
248
|
-
def _stream_file_batches(self, obj_path:
|
|
277
|
+
def _stream_file_batches(self, obj_path: str | Path) -> "Iterator[ArrowRecordBatch]":
|
|
249
278
|
import pyarrow.parquet as pq
|
|
250
279
|
|
|
251
280
|
with self.fs.open(obj_path, mode="rb") as f:
|
|
@@ -253,17 +282,16 @@ class FSSpecBackend:
|
|
|
253
282
|
yield from parquet_file.iter_batches()
|
|
254
283
|
|
|
255
284
|
def stream_arrow(self, pattern: str, **kwargs: Any) -> "Iterator[ArrowRecordBatch]":
|
|
256
|
-
|
|
257
|
-
self._ensure_pyarrow()
|
|
285
|
+
ensure_pyarrow()
|
|
258
286
|
|
|
259
287
|
for obj_path in self.glob(pattern, **kwargs):
|
|
260
288
|
yield from self._stream_file_batches(obj_path)
|
|
261
289
|
|
|
262
|
-
async def read_bytes_async(self, path:
|
|
290
|
+
async def read_bytes_async(self, path: str | Path, **kwargs: Any) -> bytes:
|
|
263
291
|
"""Read bytes from storage asynchronously."""
|
|
264
292
|
return await async_(self.read_bytes)(path, **kwargs)
|
|
265
293
|
|
|
266
|
-
async def write_bytes_async(self, path:
|
|
294
|
+
async def write_bytes_async(self, path: str | Path, data: bytes, **kwargs: Any) -> None:
|
|
267
295
|
"""Write bytes to storage asynchronously."""
|
|
268
296
|
return await async_(self.write_bytes)(path, data, **kwargs)
|
|
269
297
|
|
|
@@ -277,15 +305,15 @@ class FSSpecBackend:
|
|
|
277
305
|
Returns:
|
|
278
306
|
AsyncIterator of Arrow record batches
|
|
279
307
|
"""
|
|
280
|
-
|
|
308
|
+
ensure_pyarrow()
|
|
281
309
|
|
|
282
310
|
return _ArrowStreamer(self, pattern, **kwargs)
|
|
283
311
|
|
|
284
|
-
async def read_text_async(self, path:
|
|
312
|
+
async def read_text_async(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
285
313
|
"""Read text from storage asynchronously."""
|
|
286
314
|
return await async_(self.read_text)(path, encoding, **kwargs)
|
|
287
315
|
|
|
288
|
-
async def write_text_async(self, path:
|
|
316
|
+
async def write_text_async(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
289
317
|
"""Write text to storage asynchronously."""
|
|
290
318
|
await async_(self.write_text)(path, data, encoding, **kwargs)
|
|
291
319
|
|
|
@@ -293,23 +321,23 @@ class FSSpecBackend:
|
|
|
293
321
|
"""List objects in storage asynchronously."""
|
|
294
322
|
return await async_(self.list_objects)(prefix, recursive, **kwargs)
|
|
295
323
|
|
|
296
|
-
async def exists_async(self, path:
|
|
324
|
+
async def exists_async(self, path: str | Path, **kwargs: Any) -> bool:
|
|
297
325
|
"""Check if object exists in storage asynchronously."""
|
|
298
326
|
return await async_(self.exists)(path, **kwargs)
|
|
299
327
|
|
|
300
|
-
async def delete_async(self, path:
|
|
328
|
+
async def delete_async(self, path: str | Path, **kwargs: Any) -> None:
|
|
301
329
|
"""Delete object from storage asynchronously."""
|
|
302
330
|
await async_(self.delete)(path, **kwargs)
|
|
303
331
|
|
|
304
|
-
async def copy_async(self, source:
|
|
332
|
+
async def copy_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None:
|
|
305
333
|
"""Copy object in storage asynchronously."""
|
|
306
334
|
await async_(self.copy)(source, destination, **kwargs)
|
|
307
335
|
|
|
308
|
-
async def move_async(self, source:
|
|
336
|
+
async def move_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None:
|
|
309
337
|
"""Move object in storage asynchronously."""
|
|
310
338
|
await async_(self.move)(source, destination, **kwargs)
|
|
311
339
|
|
|
312
|
-
async def get_metadata_async(self, path:
|
|
340
|
+
async def get_metadata_async(self, path: str | Path, **kwargs: Any) -> dict[str, Any]:
|
|
313
341
|
"""Get object metadata from storage asynchronously."""
|
|
314
342
|
return await async_(self.get_metadata)(path, **kwargs)
|
|
315
343
|
|
|
@@ -317,10 +345,10 @@ class FSSpecBackend:
|
|
|
317
345
|
"""Generate a signed URL asynchronously."""
|
|
318
346
|
return await async_(self.sign)(path, expires_in, for_upload)
|
|
319
347
|
|
|
320
|
-
async def read_arrow_async(self, path:
|
|
348
|
+
async def read_arrow_async(self, path: str | Path, **kwargs: Any) -> "ArrowTable":
|
|
321
349
|
"""Read Arrow table from storage asynchronously."""
|
|
322
350
|
return await async_(self.read_arrow)(path, **kwargs)
|
|
323
351
|
|
|
324
|
-
async def write_arrow_async(self, path:
|
|
352
|
+
async def write_arrow_async(self, path: str | Path, table: "ArrowTable", **kwargs: Any) -> None:
|
|
325
353
|
"""Write Arrow table to storage asynchronously."""
|
|
326
354
|
await async_(self.write_arrow)(path, table, **kwargs)
|