sqlspec 0.26.0__py3-none-any.whl → 0.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/__init__.py +7 -15
- sqlspec/_serialization.py +55 -25
- sqlspec/_typing.py +155 -52
- sqlspec/adapters/adbc/_types.py +1 -1
- sqlspec/adapters/adbc/adk/__init__.py +5 -0
- sqlspec/adapters/adbc/adk/store.py +880 -0
- sqlspec/adapters/adbc/config.py +62 -12
- sqlspec/adapters/adbc/data_dictionary.py +74 -2
- sqlspec/adapters/adbc/driver.py +226 -58
- sqlspec/adapters/adbc/litestar/__init__.py +5 -0
- sqlspec/adapters/adbc/litestar/store.py +504 -0
- sqlspec/adapters/adbc/type_converter.py +44 -50
- sqlspec/adapters/aiosqlite/_types.py +1 -1
- sqlspec/adapters/aiosqlite/adk/__init__.py +5 -0
- sqlspec/adapters/aiosqlite/adk/store.py +536 -0
- sqlspec/adapters/aiosqlite/config.py +86 -16
- sqlspec/adapters/aiosqlite/data_dictionary.py +34 -2
- sqlspec/adapters/aiosqlite/driver.py +127 -38
- sqlspec/adapters/aiosqlite/litestar/__init__.py +5 -0
- sqlspec/adapters/aiosqlite/litestar/store.py +281 -0
- sqlspec/adapters/aiosqlite/pool.py +7 -7
- sqlspec/adapters/asyncmy/__init__.py +7 -1
- sqlspec/adapters/asyncmy/_types.py +1 -1
- sqlspec/adapters/asyncmy/adk/__init__.py +5 -0
- sqlspec/adapters/asyncmy/adk/store.py +503 -0
- sqlspec/adapters/asyncmy/config.py +59 -17
- sqlspec/adapters/asyncmy/data_dictionary.py +41 -2
- sqlspec/adapters/asyncmy/driver.py +293 -62
- sqlspec/adapters/asyncmy/litestar/__init__.py +5 -0
- sqlspec/adapters/asyncmy/litestar/store.py +296 -0
- sqlspec/adapters/asyncpg/__init__.py +2 -1
- sqlspec/adapters/asyncpg/_type_handlers.py +71 -0
- sqlspec/adapters/asyncpg/_types.py +11 -7
- sqlspec/adapters/asyncpg/adk/__init__.py +5 -0
- sqlspec/adapters/asyncpg/adk/store.py +460 -0
- sqlspec/adapters/asyncpg/config.py +57 -36
- sqlspec/adapters/asyncpg/data_dictionary.py +48 -2
- sqlspec/adapters/asyncpg/driver.py +153 -23
- sqlspec/adapters/asyncpg/litestar/__init__.py +5 -0
- sqlspec/adapters/asyncpg/litestar/store.py +253 -0
- sqlspec/adapters/bigquery/_types.py +1 -1
- sqlspec/adapters/bigquery/adk/__init__.py +5 -0
- sqlspec/adapters/bigquery/adk/store.py +585 -0
- sqlspec/adapters/bigquery/config.py +36 -11
- sqlspec/adapters/bigquery/data_dictionary.py +42 -2
- sqlspec/adapters/bigquery/driver.py +489 -144
- sqlspec/adapters/bigquery/litestar/__init__.py +5 -0
- sqlspec/adapters/bigquery/litestar/store.py +327 -0
- sqlspec/adapters/bigquery/type_converter.py +55 -23
- sqlspec/adapters/duckdb/_types.py +2 -2
- sqlspec/adapters/duckdb/adk/__init__.py +14 -0
- sqlspec/adapters/duckdb/adk/store.py +563 -0
- sqlspec/adapters/duckdb/config.py +79 -21
- sqlspec/adapters/duckdb/data_dictionary.py +41 -2
- sqlspec/adapters/duckdb/driver.py +225 -44
- sqlspec/adapters/duckdb/litestar/__init__.py +5 -0
- sqlspec/adapters/duckdb/litestar/store.py +332 -0
- sqlspec/adapters/duckdb/pool.py +5 -5
- sqlspec/adapters/duckdb/type_converter.py +51 -21
- sqlspec/adapters/oracledb/_numpy_handlers.py +133 -0
- sqlspec/adapters/oracledb/_types.py +20 -2
- sqlspec/adapters/oracledb/adk/__init__.py +5 -0
- sqlspec/adapters/oracledb/adk/store.py +1628 -0
- sqlspec/adapters/oracledb/config.py +120 -36
- sqlspec/adapters/oracledb/data_dictionary.py +87 -20
- sqlspec/adapters/oracledb/driver.py +475 -86
- sqlspec/adapters/oracledb/litestar/__init__.py +5 -0
- sqlspec/adapters/oracledb/litestar/store.py +765 -0
- sqlspec/adapters/oracledb/migrations.py +316 -25
- sqlspec/adapters/oracledb/type_converter.py +91 -16
- sqlspec/adapters/psqlpy/_type_handlers.py +44 -0
- sqlspec/adapters/psqlpy/_types.py +2 -1
- sqlspec/adapters/psqlpy/adk/__init__.py +5 -0
- sqlspec/adapters/psqlpy/adk/store.py +483 -0
- sqlspec/adapters/psqlpy/config.py +45 -19
- sqlspec/adapters/psqlpy/data_dictionary.py +48 -2
- sqlspec/adapters/psqlpy/driver.py +108 -41
- sqlspec/adapters/psqlpy/litestar/__init__.py +5 -0
- sqlspec/adapters/psqlpy/litestar/store.py +272 -0
- sqlspec/adapters/psqlpy/type_converter.py +40 -11
- sqlspec/adapters/psycopg/_type_handlers.py +80 -0
- sqlspec/adapters/psycopg/_types.py +2 -1
- sqlspec/adapters/psycopg/adk/__init__.py +5 -0
- sqlspec/adapters/psycopg/adk/store.py +962 -0
- sqlspec/adapters/psycopg/config.py +65 -37
- sqlspec/adapters/psycopg/data_dictionary.py +91 -3
- sqlspec/adapters/psycopg/driver.py +200 -78
- sqlspec/adapters/psycopg/litestar/__init__.py +5 -0
- sqlspec/adapters/psycopg/litestar/store.py +554 -0
- sqlspec/adapters/sqlite/__init__.py +2 -1
- sqlspec/adapters/sqlite/_type_handlers.py +86 -0
- sqlspec/adapters/sqlite/_types.py +1 -1
- sqlspec/adapters/sqlite/adk/__init__.py +5 -0
- sqlspec/adapters/sqlite/adk/store.py +582 -0
- sqlspec/adapters/sqlite/config.py +85 -16
- sqlspec/adapters/sqlite/data_dictionary.py +34 -2
- sqlspec/adapters/sqlite/driver.py +120 -52
- sqlspec/adapters/sqlite/litestar/__init__.py +5 -0
- sqlspec/adapters/sqlite/litestar/store.py +318 -0
- sqlspec/adapters/sqlite/pool.py +5 -5
- sqlspec/base.py +45 -26
- sqlspec/builder/__init__.py +73 -4
- sqlspec/builder/_base.py +91 -58
- sqlspec/builder/_column.py +5 -5
- sqlspec/builder/_ddl.py +98 -89
- sqlspec/builder/_delete.py +5 -4
- sqlspec/builder/_dml.py +388 -0
- sqlspec/{_sql.py → builder/_factory.py} +41 -44
- sqlspec/builder/_insert.py +5 -82
- sqlspec/builder/{mixins/_join_operations.py → _join.py} +145 -143
- sqlspec/builder/_merge.py +446 -11
- sqlspec/builder/_parsing_utils.py +9 -11
- sqlspec/builder/_select.py +1313 -25
- sqlspec/builder/_update.py +11 -42
- sqlspec/cli.py +76 -69
- sqlspec/config.py +331 -62
- sqlspec/core/__init__.py +5 -4
- sqlspec/core/cache.py +18 -18
- sqlspec/core/compiler.py +6 -8
- sqlspec/core/filters.py +55 -47
- sqlspec/core/hashing.py +9 -9
- sqlspec/core/parameters.py +76 -45
- sqlspec/core/result.py +234 -47
- sqlspec/core/splitter.py +16 -17
- sqlspec/core/statement.py +32 -31
- sqlspec/core/type_conversion.py +3 -2
- sqlspec/driver/__init__.py +1 -3
- sqlspec/driver/_async.py +183 -160
- sqlspec/driver/_common.py +197 -109
- sqlspec/driver/_sync.py +189 -161
- sqlspec/driver/mixins/_result_tools.py +20 -236
- sqlspec/driver/mixins/_sql_translator.py +4 -4
- sqlspec/exceptions.py +70 -7
- sqlspec/extensions/adk/__init__.py +53 -0
- sqlspec/extensions/adk/_types.py +51 -0
- sqlspec/extensions/adk/converters.py +172 -0
- sqlspec/extensions/adk/migrations/0001_create_adk_tables.py +144 -0
- sqlspec/extensions/adk/migrations/__init__.py +0 -0
- sqlspec/extensions/adk/service.py +181 -0
- sqlspec/extensions/adk/store.py +536 -0
- sqlspec/extensions/aiosql/adapter.py +69 -61
- sqlspec/extensions/fastapi/__init__.py +21 -0
- sqlspec/extensions/fastapi/extension.py +331 -0
- sqlspec/extensions/fastapi/providers.py +543 -0
- sqlspec/extensions/flask/__init__.py +36 -0
- sqlspec/extensions/flask/_state.py +71 -0
- sqlspec/extensions/flask/_utils.py +40 -0
- sqlspec/extensions/flask/extension.py +389 -0
- sqlspec/extensions/litestar/__init__.py +21 -4
- sqlspec/extensions/litestar/cli.py +54 -10
- sqlspec/extensions/litestar/config.py +56 -266
- sqlspec/extensions/litestar/handlers.py +46 -17
- sqlspec/extensions/litestar/migrations/0001_create_session_table.py +137 -0
- sqlspec/extensions/litestar/migrations/__init__.py +3 -0
- sqlspec/extensions/litestar/plugin.py +349 -224
- sqlspec/extensions/litestar/providers.py +25 -25
- sqlspec/extensions/litestar/store.py +265 -0
- sqlspec/extensions/starlette/__init__.py +10 -0
- sqlspec/extensions/starlette/_state.py +25 -0
- sqlspec/extensions/starlette/_utils.py +52 -0
- sqlspec/extensions/starlette/extension.py +254 -0
- sqlspec/extensions/starlette/middleware.py +154 -0
- sqlspec/loader.py +30 -49
- sqlspec/migrations/base.py +200 -76
- sqlspec/migrations/commands.py +591 -62
- sqlspec/migrations/context.py +6 -9
- sqlspec/migrations/fix.py +199 -0
- sqlspec/migrations/loaders.py +47 -19
- sqlspec/migrations/runner.py +241 -75
- sqlspec/migrations/tracker.py +237 -21
- sqlspec/migrations/utils.py +51 -3
- sqlspec/migrations/validation.py +177 -0
- sqlspec/protocols.py +106 -36
- sqlspec/storage/_utils.py +85 -0
- sqlspec/storage/backends/fsspec.py +133 -107
- sqlspec/storage/backends/local.py +78 -51
- sqlspec/storage/backends/obstore.py +276 -168
- sqlspec/storage/registry.py +75 -39
- sqlspec/typing.py +30 -84
- sqlspec/utils/__init__.py +25 -4
- sqlspec/utils/arrow_helpers.py +81 -0
- sqlspec/utils/config_resolver.py +6 -6
- sqlspec/utils/correlation.py +4 -5
- sqlspec/utils/data_transformation.py +3 -2
- sqlspec/utils/deprecation.py +9 -8
- sqlspec/utils/fixtures.py +4 -4
- sqlspec/utils/logging.py +46 -6
- sqlspec/utils/module_loader.py +205 -5
- sqlspec/utils/portal.py +311 -0
- sqlspec/utils/schema.py +288 -0
- sqlspec/utils/serializers.py +113 -4
- sqlspec/utils/sync_tools.py +36 -22
- sqlspec/utils/text.py +1 -2
- sqlspec/utils/type_guards.py +136 -20
- sqlspec/utils/version.py +433 -0
- {sqlspec-0.26.0.dist-info → sqlspec-0.28.0.dist-info}/METADATA +41 -22
- sqlspec-0.28.0.dist-info/RECORD +221 -0
- sqlspec/builder/mixins/__init__.py +0 -55
- sqlspec/builder/mixins/_cte_and_set_ops.py +0 -253
- sqlspec/builder/mixins/_delete_operations.py +0 -50
- sqlspec/builder/mixins/_insert_operations.py +0 -282
- sqlspec/builder/mixins/_merge_operations.py +0 -698
- sqlspec/builder/mixins/_order_limit_operations.py +0 -145
- sqlspec/builder/mixins/_pivot_operations.py +0 -157
- sqlspec/builder/mixins/_select_operations.py +0 -930
- sqlspec/builder/mixins/_update_operations.py +0 -199
- sqlspec/builder/mixins/_where_clause.py +0 -1298
- sqlspec-0.26.0.dist-info/RECORD +0 -157
- sqlspec-0.26.0.dist-info/licenses/NOTICE +0 -29
- {sqlspec-0.26.0.dist-info → sqlspec-0.28.0.dist-info}/WHEEL +0 -0
- {sqlspec-0.26.0.dist-info → sqlspec-0.28.0.dist-info}/entry_points.txt +0 -0
- {sqlspec-0.26.0.dist-info → sqlspec-0.28.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
# pyright: reportPrivateUsage=false
|
|
2
2
|
import logging
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import TYPE_CHECKING, Any
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
5
|
|
|
6
|
-
from
|
|
7
|
-
|
|
6
|
+
from mypy_extensions import mypyc_attr
|
|
7
|
+
|
|
8
|
+
from sqlspec.storage._utils import resolve_storage_path
|
|
9
|
+
from sqlspec.utils.module_loader import ensure_fsspec, ensure_pyarrow
|
|
8
10
|
from sqlspec.utils.sync_tools import async_
|
|
9
11
|
|
|
10
12
|
if TYPE_CHECKING:
|
|
@@ -18,41 +20,80 @@ logger = logging.getLogger(__name__)
|
|
|
18
20
|
|
|
19
21
|
|
|
20
22
|
class _ArrowStreamer:
|
|
23
|
+
"""Async iterator for streaming Arrow batches from FSSpec backend.
|
|
24
|
+
|
|
25
|
+
Uses async_() to offload blocking operations to thread pool,
|
|
26
|
+
preventing event loop blocking during file I/O and iteration.
|
|
27
|
+
|
|
28
|
+
CRITICAL: Creates generators on main thread, offloads only next() calls.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
__slots__ = ("_initialized", "backend", "batch_iterator", "kwargs", "paths_iterator", "pattern")
|
|
32
|
+
|
|
21
33
|
def __init__(self, backend: "FSSpecBackend", pattern: str, **kwargs: Any) -> None:
|
|
22
34
|
self.backend = backend
|
|
23
35
|
self.pattern = pattern
|
|
24
36
|
self.kwargs = kwargs
|
|
25
|
-
self.paths_iterator:
|
|
26
|
-
self.batch_iterator:
|
|
37
|
+
self.paths_iterator: Iterator[str] | None = None
|
|
38
|
+
self.batch_iterator: Iterator[ArrowRecordBatch] | None = None
|
|
39
|
+
self._initialized = False
|
|
27
40
|
|
|
28
41
|
def __aiter__(self) -> "_ArrowStreamer":
|
|
29
42
|
return self
|
|
30
43
|
|
|
31
44
|
async def _initialize(self) -> None:
|
|
32
|
-
"""Initialize paths iterator."""
|
|
33
|
-
if self.
|
|
45
|
+
"""Initialize paths iterator asynchronously."""
|
|
46
|
+
if not self._initialized:
|
|
34
47
|
paths = await async_(self.backend.glob)(self.pattern, **self.kwargs)
|
|
35
48
|
self.paths_iterator = iter(paths)
|
|
49
|
+
self._initialized = True
|
|
36
50
|
|
|
37
51
|
async def __anext__(self) -> "ArrowRecordBatch":
|
|
52
|
+
"""Get next Arrow batch asynchronously.
|
|
53
|
+
|
|
54
|
+
Iterative state machine that avoids recursion and blocking calls.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Arrow record batches from matching files.
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
StopAsyncIteration: When no more batches available.
|
|
61
|
+
"""
|
|
38
62
|
await self._initialize()
|
|
39
63
|
|
|
40
|
-
|
|
64
|
+
while True:
|
|
65
|
+
if self.batch_iterator is not None:
|
|
66
|
+
|
|
67
|
+
def _safe_next_batch() -> "ArrowRecordBatch":
|
|
68
|
+
try:
|
|
69
|
+
return next(self.batch_iterator) # type: ignore[arg-type]
|
|
70
|
+
except StopIteration as e:
|
|
71
|
+
raise StopAsyncIteration from e
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
return await async_(_safe_next_batch)()
|
|
75
|
+
except StopAsyncIteration:
|
|
76
|
+
self.batch_iterator = None
|
|
77
|
+
continue
|
|
78
|
+
|
|
41
79
|
try:
|
|
42
|
-
|
|
43
|
-
except StopIteration:
|
|
44
|
-
|
|
80
|
+
path = next(self.paths_iterator) # type: ignore[arg-type]
|
|
81
|
+
except StopIteration as e:
|
|
82
|
+
raise StopAsyncIteration from e
|
|
83
|
+
|
|
84
|
+
self.batch_iterator = self.backend._stream_file_batches(path)
|
|
45
85
|
|
|
46
|
-
|
|
86
|
+
async def aclose(self) -> None:
|
|
87
|
+
"""Close underlying batch iterator."""
|
|
88
|
+
if self.batch_iterator is not None:
|
|
47
89
|
try:
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
raise StopAsyncIteration
|
|
53
|
-
raise StopAsyncIteration
|
|
90
|
+
close_method = self.batch_iterator.close # type: ignore[attr-defined]
|
|
91
|
+
await async_(close_method)()
|
|
92
|
+
except AttributeError:
|
|
93
|
+
pass
|
|
54
94
|
|
|
55
95
|
|
|
96
|
+
@mypyc_attr(allow_interpreted_subclasses=True)
|
|
56
97
|
class FSSpecBackend:
|
|
57
98
|
"""Storage backend using fsspec.
|
|
58
99
|
|
|
@@ -60,19 +101,36 @@ class FSSpecBackend:
|
|
|
60
101
|
including HTTP, HTTPS, FTP, and cloud storage services.
|
|
61
102
|
"""
|
|
62
103
|
|
|
104
|
+
__slots__ = ("_fs_uri", "backend_type", "base_path", "fs", "protocol")
|
|
105
|
+
|
|
63
106
|
def __init__(self, uri: str, **kwargs: Any) -> None:
|
|
64
|
-
|
|
107
|
+
ensure_fsspec()
|
|
65
108
|
|
|
66
109
|
base_path = kwargs.pop("base_path", "")
|
|
67
|
-
self.base_path = base_path.rstrip("/") if base_path else ""
|
|
68
110
|
|
|
69
111
|
if "://" in uri:
|
|
70
112
|
self.protocol = uri.split("://", maxsplit=1)[0]
|
|
71
113
|
self._fs_uri = uri
|
|
114
|
+
|
|
115
|
+
# For S3/cloud URIs, extract bucket/path from URI as base_path
|
|
116
|
+
if self.protocol in {"s3", "gs", "az", "gcs"}:
|
|
117
|
+
from urllib.parse import urlparse
|
|
118
|
+
|
|
119
|
+
parsed = urlparse(uri)
|
|
120
|
+
# Combine netloc (bucket) and path for base_path
|
|
121
|
+
if parsed.netloc:
|
|
122
|
+
uri_base_path = parsed.netloc
|
|
123
|
+
if parsed.path and parsed.path != "/":
|
|
124
|
+
uri_base_path = f"{uri_base_path}{parsed.path}"
|
|
125
|
+
# Only use URI base_path if no explicit base_path provided
|
|
126
|
+
if not base_path:
|
|
127
|
+
base_path = uri_base_path
|
|
72
128
|
else:
|
|
73
129
|
self.protocol = uri
|
|
74
130
|
self._fs_uri = f"{uri}://"
|
|
75
131
|
|
|
132
|
+
self.base_path = base_path.rstrip("/") if base_path else ""
|
|
133
|
+
|
|
76
134
|
import fsspec
|
|
77
135
|
|
|
78
136
|
self.fs = fsspec.filesystem(self.protocol, **kwargs)
|
|
@@ -93,48 +151,19 @@ class FSSpecBackend:
|
|
|
93
151
|
|
|
94
152
|
return cls(uri=uri, **kwargs)
|
|
95
153
|
|
|
96
|
-
def _ensure_fsspec(self) -> None:
|
|
97
|
-
"""Ensure fsspec is available for operations."""
|
|
98
|
-
if not FSSPEC_INSTALLED:
|
|
99
|
-
raise MissingDependencyError(package="fsspec", install_package="fsspec")
|
|
100
|
-
|
|
101
|
-
def _ensure_pyarrow(self) -> None:
|
|
102
|
-
"""Ensure PyArrow is available for Arrow operations."""
|
|
103
|
-
if not PYARROW_INSTALLED:
|
|
104
|
-
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
105
|
-
|
|
106
|
-
def _resolve_path(self, path: Union[str, Path]) -> str:
|
|
107
|
-
"""Resolve path relative to base_path."""
|
|
108
|
-
path_str = str(path)
|
|
109
|
-
if self.base_path:
|
|
110
|
-
clean_base = self.base_path.rstrip("/")
|
|
111
|
-
clean_path = path_str.lstrip("/")
|
|
112
|
-
return f"{clean_base}/{clean_path}"
|
|
113
|
-
if self.protocol == "s3" and "://" in self._fs_uri:
|
|
114
|
-
# For S3, we need to include the bucket from the URI
|
|
115
|
-
# Extract bucket and path from URI like s3://bucket/path
|
|
116
|
-
uri_parts = self._fs_uri.split("://", 1)[1] # Remove s3://
|
|
117
|
-
if "/" in uri_parts:
|
|
118
|
-
# URI has bucket and base path
|
|
119
|
-
return f"{uri_parts.rstrip('/')}/{path_str.lstrip('/')}"
|
|
120
|
-
# URI has only bucket
|
|
121
|
-
return f"{uri_parts}/{path_str.lstrip('/')}"
|
|
122
|
-
return path_str
|
|
123
|
-
|
|
124
154
|
@property
|
|
125
155
|
def base_uri(self) -> str:
|
|
126
156
|
return self._fs_uri
|
|
127
157
|
|
|
128
|
-
def read_bytes(self, path:
|
|
158
|
+
def read_bytes(self, path: str | Path, **kwargs: Any) -> bytes:
|
|
129
159
|
"""Read bytes from an object."""
|
|
130
|
-
resolved_path = self.
|
|
160
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
131
161
|
return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
|
|
132
162
|
|
|
133
|
-
def write_bytes(self, path:
|
|
163
|
+
def write_bytes(self, path: str | Path, data: bytes, **kwargs: Any) -> None:
|
|
134
164
|
"""Write bytes to an object."""
|
|
135
|
-
resolved_path = self.
|
|
165
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
136
166
|
|
|
137
|
-
# Only create directories for local file systems, not for cloud storage
|
|
138
167
|
if self.protocol == "file":
|
|
139
168
|
parent_dir = str(Path(resolved_path).parent)
|
|
140
169
|
if parent_dir and not self.fs.exists(parent_dir):
|
|
@@ -143,84 +172,85 @@ class FSSpecBackend:
|
|
|
143
172
|
with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
|
|
144
173
|
f.write(data) # pyright: ignore
|
|
145
174
|
|
|
146
|
-
def read_text(self, path:
|
|
175
|
+
def read_text(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
147
176
|
"""Read text from an object."""
|
|
148
177
|
data = self.read_bytes(path, **kwargs)
|
|
149
178
|
return data.decode(encoding)
|
|
150
179
|
|
|
151
|
-
def write_text(self, path:
|
|
180
|
+
def write_text(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
152
181
|
"""Write text to an object."""
|
|
153
182
|
self.write_bytes(path, data.encode(encoding), **kwargs)
|
|
154
183
|
|
|
155
|
-
def exists(self, path:
|
|
184
|
+
def exists(self, path: str | Path, **kwargs: Any) -> bool:
|
|
156
185
|
"""Check if an object exists."""
|
|
157
|
-
resolved_path = self.
|
|
186
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
158
187
|
return self.fs.exists(resolved_path, **kwargs) # type: ignore[no-any-return]
|
|
159
188
|
|
|
160
|
-
def delete(self, path:
|
|
189
|
+
def delete(self, path: str | Path, **kwargs: Any) -> None:
|
|
161
190
|
"""Delete an object."""
|
|
162
|
-
resolved_path = self.
|
|
191
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
163
192
|
self.fs.rm(resolved_path, **kwargs)
|
|
164
193
|
|
|
165
|
-
def copy(self, source:
|
|
194
|
+
def copy(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None:
|
|
166
195
|
"""Copy an object."""
|
|
167
|
-
source_path = self.
|
|
168
|
-
dest_path = self.
|
|
196
|
+
source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=False)
|
|
197
|
+
dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=False)
|
|
169
198
|
self.fs.copy(source_path, dest_path, **kwargs)
|
|
170
199
|
|
|
171
|
-
def move(self, source:
|
|
200
|
+
def move(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None:
|
|
172
201
|
"""Move an object."""
|
|
173
|
-
source_path = self.
|
|
174
|
-
dest_path = self.
|
|
202
|
+
source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=False)
|
|
203
|
+
dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=False)
|
|
175
204
|
self.fs.mv(source_path, dest_path, **kwargs)
|
|
176
205
|
|
|
177
|
-
def read_arrow(self, path:
|
|
206
|
+
def read_arrow(self, path: str | Path, **kwargs: Any) -> "ArrowTable":
|
|
178
207
|
"""Read an Arrow table from storage."""
|
|
179
|
-
|
|
180
|
-
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
208
|
+
ensure_pyarrow()
|
|
181
209
|
import pyarrow.parquet as pq
|
|
182
210
|
|
|
183
|
-
resolved_path = self.
|
|
211
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
184
212
|
with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
|
|
185
213
|
return pq.read_table(f)
|
|
186
214
|
|
|
187
|
-
def write_arrow(self, path:
|
|
215
|
+
def write_arrow(self, path: str | Path, table: "ArrowTable", **kwargs: Any) -> None:
|
|
188
216
|
"""Write an Arrow table to storage."""
|
|
189
|
-
|
|
190
|
-
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
217
|
+
ensure_pyarrow()
|
|
191
218
|
import pyarrow.parquet as pq
|
|
192
219
|
|
|
193
|
-
resolved_path = self.
|
|
220
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
194
221
|
with self.fs.open(resolved_path, mode="wb") as f:
|
|
195
222
|
pq.write_table(table, f, **kwargs) # pyright: ignore
|
|
196
223
|
|
|
197
224
|
def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
|
|
198
225
|
"""List objects with optional prefix."""
|
|
199
|
-
resolved_prefix = self.
|
|
226
|
+
resolved_prefix = resolve_storage_path(prefix, self.base_path, self.protocol, strip_file_scheme=False)
|
|
200
227
|
if recursive:
|
|
201
228
|
return sorted(self.fs.find(resolved_prefix, **kwargs))
|
|
202
229
|
return sorted(self.fs.ls(resolved_prefix, detail=False, **kwargs))
|
|
203
230
|
|
|
204
231
|
def glob(self, pattern: str, **kwargs: Any) -> list[str]:
|
|
205
232
|
"""Find objects matching a glob pattern."""
|
|
206
|
-
resolved_pattern = self.
|
|
233
|
+
resolved_pattern = resolve_storage_path(pattern, self.base_path, self.protocol, strip_file_scheme=False)
|
|
207
234
|
return sorted(self.fs.glob(resolved_pattern, **kwargs)) # pyright: ignore
|
|
208
235
|
|
|
209
|
-
def is_object(self, path:
|
|
236
|
+
def is_object(self, path: str | Path) -> bool:
|
|
210
237
|
"""Check if path points to an object."""
|
|
211
|
-
resolved_path = self.
|
|
238
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
212
239
|
return self.fs.exists(resolved_path) and not self.fs.isdir(resolved_path)
|
|
213
240
|
|
|
214
|
-
def is_path(self, path:
|
|
241
|
+
def is_path(self, path: str | Path) -> bool:
|
|
215
242
|
"""Check if path points to a prefix (directory-like)."""
|
|
216
|
-
resolved_path = self.
|
|
243
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
217
244
|
return self.fs.isdir(resolved_path) # type: ignore[no-any-return]
|
|
218
245
|
|
|
219
|
-
def get_metadata(self, path:
|
|
246
|
+
def get_metadata(self, path: str | Path, **kwargs: Any) -> dict[str, Any]:
|
|
220
247
|
"""Get object metadata."""
|
|
248
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
221
249
|
try:
|
|
222
|
-
resolved_path = self._resolve_path(path)
|
|
223
250
|
info = self.fs.info(resolved_path, **kwargs)
|
|
251
|
+
except FileNotFoundError:
|
|
252
|
+
return {"path": resolved_path, "exists": False}
|
|
253
|
+
else:
|
|
224
254
|
if isinstance(info, dict):
|
|
225
255
|
return {
|
|
226
256
|
"path": resolved_path,
|
|
@@ -229,23 +259,20 @@ class FSSpecBackend:
|
|
|
229
259
|
"last_modified": info.get("mtime"),
|
|
230
260
|
"type": info.get("type", "file"),
|
|
231
261
|
}
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
"last_modified": info.mtime,
|
|
240
|
-
"type": info.type,
|
|
241
|
-
}
|
|
262
|
+
return {
|
|
263
|
+
"path": resolved_path,
|
|
264
|
+
"exists": True,
|
|
265
|
+
"size": info.size,
|
|
266
|
+
"last_modified": info.mtime,
|
|
267
|
+
"type": info.type,
|
|
268
|
+
}
|
|
242
269
|
|
|
243
270
|
def sign(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
|
|
244
271
|
"""Generate a signed URL for the file."""
|
|
245
|
-
resolved_path = self.
|
|
272
|
+
resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
|
|
246
273
|
return f"{self._fs_uri}{resolved_path}"
|
|
247
274
|
|
|
248
|
-
def _stream_file_batches(self, obj_path:
|
|
275
|
+
def _stream_file_batches(self, obj_path: str | Path) -> "Iterator[ArrowRecordBatch]":
|
|
249
276
|
import pyarrow.parquet as pq
|
|
250
277
|
|
|
251
278
|
with self.fs.open(obj_path, mode="rb") as f:
|
|
@@ -253,17 +280,16 @@ class FSSpecBackend:
|
|
|
253
280
|
yield from parquet_file.iter_batches()
|
|
254
281
|
|
|
255
282
|
def stream_arrow(self, pattern: str, **kwargs: Any) -> "Iterator[ArrowRecordBatch]":
|
|
256
|
-
|
|
257
|
-
self._ensure_pyarrow()
|
|
283
|
+
ensure_pyarrow()
|
|
258
284
|
|
|
259
285
|
for obj_path in self.glob(pattern, **kwargs):
|
|
260
286
|
yield from self._stream_file_batches(obj_path)
|
|
261
287
|
|
|
262
|
-
async def read_bytes_async(self, path:
|
|
288
|
+
async def read_bytes_async(self, path: str | Path, **kwargs: Any) -> bytes:
|
|
263
289
|
"""Read bytes from storage asynchronously."""
|
|
264
290
|
return await async_(self.read_bytes)(path, **kwargs)
|
|
265
291
|
|
|
266
|
-
async def write_bytes_async(self, path:
|
|
292
|
+
async def write_bytes_async(self, path: str | Path, data: bytes, **kwargs: Any) -> None:
|
|
267
293
|
"""Write bytes to storage asynchronously."""
|
|
268
294
|
return await async_(self.write_bytes)(path, data, **kwargs)
|
|
269
295
|
|
|
@@ -277,15 +303,15 @@ class FSSpecBackend:
|
|
|
277
303
|
Returns:
|
|
278
304
|
AsyncIterator of Arrow record batches
|
|
279
305
|
"""
|
|
280
|
-
|
|
306
|
+
ensure_pyarrow()
|
|
281
307
|
|
|
282
308
|
return _ArrowStreamer(self, pattern, **kwargs)
|
|
283
309
|
|
|
284
|
-
async def read_text_async(self, path:
|
|
310
|
+
async def read_text_async(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
285
311
|
"""Read text from storage asynchronously."""
|
|
286
312
|
return await async_(self.read_text)(path, encoding, **kwargs)
|
|
287
313
|
|
|
288
|
-
async def write_text_async(self, path:
|
|
314
|
+
async def write_text_async(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
289
315
|
"""Write text to storage asynchronously."""
|
|
290
316
|
await async_(self.write_text)(path, data, encoding, **kwargs)
|
|
291
317
|
|
|
@@ -293,23 +319,23 @@ class FSSpecBackend:
|
|
|
293
319
|
"""List objects in storage asynchronously."""
|
|
294
320
|
return await async_(self.list_objects)(prefix, recursive, **kwargs)
|
|
295
321
|
|
|
296
|
-
async def exists_async(self, path:
|
|
322
|
+
async def exists_async(self, path: str | Path, **kwargs: Any) -> bool:
|
|
297
323
|
"""Check if object exists in storage asynchronously."""
|
|
298
324
|
return await async_(self.exists)(path, **kwargs)
|
|
299
325
|
|
|
300
|
-
async def delete_async(self, path:
|
|
326
|
+
async def delete_async(self, path: str | Path, **kwargs: Any) -> None:
|
|
301
327
|
"""Delete object from storage asynchronously."""
|
|
302
328
|
await async_(self.delete)(path, **kwargs)
|
|
303
329
|
|
|
304
|
-
async def copy_async(self, source:
|
|
330
|
+
async def copy_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None:
|
|
305
331
|
"""Copy object in storage asynchronously."""
|
|
306
332
|
await async_(self.copy)(source, destination, **kwargs)
|
|
307
333
|
|
|
308
|
-
async def move_async(self, source:
|
|
334
|
+
async def move_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None:
|
|
309
335
|
"""Move object in storage asynchronously."""
|
|
310
336
|
await async_(self.move)(source, destination, **kwargs)
|
|
311
337
|
|
|
312
|
-
async def get_metadata_async(self, path:
|
|
338
|
+
async def get_metadata_async(self, path: str | Path, **kwargs: Any) -> dict[str, Any]:
|
|
313
339
|
"""Get object metadata from storage asynchronously."""
|
|
314
340
|
return await async_(self.get_metadata)(path, **kwargs)
|
|
315
341
|
|
|
@@ -317,10 +343,10 @@ class FSSpecBackend:
|
|
|
317
343
|
"""Generate a signed URL asynchronously."""
|
|
318
344
|
return await async_(self.sign)(path, expires_in, for_upload)
|
|
319
345
|
|
|
320
|
-
async def read_arrow_async(self, path:
|
|
346
|
+
async def read_arrow_async(self, path: str | Path, **kwargs: Any) -> "ArrowTable":
|
|
321
347
|
"""Read Arrow table from storage asynchronously."""
|
|
322
348
|
return await async_(self.read_arrow)(path, **kwargs)
|
|
323
349
|
|
|
324
|
-
async def write_arrow_async(self, path:
|
|
350
|
+
async def write_arrow_async(self, path: str | Path, table: "ArrowTable", **kwargs: Any) -> None:
|
|
325
351
|
"""Write Arrow table to storage asynchronously."""
|
|
326
352
|
await async_(self.write_arrow)(path, table, **kwargs)
|