sqlspec 0.12.1__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/_sql.py +21 -180
- sqlspec/adapters/adbc/config.py +10 -12
- sqlspec/adapters/adbc/driver.py +120 -118
- sqlspec/adapters/aiosqlite/config.py +3 -3
- sqlspec/adapters/aiosqlite/driver.py +116 -141
- sqlspec/adapters/asyncmy/config.py +3 -4
- sqlspec/adapters/asyncmy/driver.py +123 -135
- sqlspec/adapters/asyncpg/config.py +3 -7
- sqlspec/adapters/asyncpg/driver.py +98 -140
- sqlspec/adapters/bigquery/config.py +4 -5
- sqlspec/adapters/bigquery/driver.py +231 -181
- sqlspec/adapters/duckdb/config.py +3 -6
- sqlspec/adapters/duckdb/driver.py +132 -124
- sqlspec/adapters/oracledb/config.py +6 -5
- sqlspec/adapters/oracledb/driver.py +242 -259
- sqlspec/adapters/psqlpy/config.py +3 -7
- sqlspec/adapters/psqlpy/driver.py +118 -93
- sqlspec/adapters/psycopg/config.py +34 -30
- sqlspec/adapters/psycopg/driver.py +342 -214
- sqlspec/adapters/sqlite/config.py +3 -3
- sqlspec/adapters/sqlite/driver.py +150 -104
- sqlspec/config.py +0 -4
- sqlspec/driver/_async.py +89 -98
- sqlspec/driver/_common.py +52 -17
- sqlspec/driver/_sync.py +81 -105
- sqlspec/driver/connection.py +207 -0
- sqlspec/driver/mixins/_csv_writer.py +91 -0
- sqlspec/driver/mixins/_pipeline.py +38 -49
- sqlspec/driver/mixins/_result_utils.py +27 -9
- sqlspec/driver/mixins/_storage.py +149 -216
- sqlspec/driver/mixins/_type_coercion.py +3 -4
- sqlspec/driver/parameters.py +138 -0
- sqlspec/exceptions.py +10 -2
- sqlspec/extensions/aiosql/adapter.py +0 -10
- sqlspec/extensions/litestar/handlers.py +0 -1
- sqlspec/extensions/litestar/plugin.py +0 -3
- sqlspec/extensions/litestar/providers.py +0 -14
- sqlspec/loader.py +31 -118
- sqlspec/protocols.py +542 -0
- sqlspec/service/__init__.py +3 -2
- sqlspec/service/_util.py +147 -0
- sqlspec/service/base.py +1116 -9
- sqlspec/statement/builder/__init__.py +42 -32
- sqlspec/statement/builder/_ddl_utils.py +0 -10
- sqlspec/statement/builder/_parsing_utils.py +10 -4
- sqlspec/statement/builder/base.py +70 -23
- sqlspec/statement/builder/column.py +283 -0
- sqlspec/statement/builder/ddl.py +102 -65
- sqlspec/statement/builder/delete.py +23 -7
- sqlspec/statement/builder/insert.py +29 -15
- sqlspec/statement/builder/merge.py +4 -4
- sqlspec/statement/builder/mixins/_aggregate_functions.py +113 -14
- sqlspec/statement/builder/mixins/_common_table_expr.py +0 -1
- sqlspec/statement/builder/mixins/_delete_from.py +1 -1
- sqlspec/statement/builder/mixins/_from.py +10 -8
- sqlspec/statement/builder/mixins/_group_by.py +0 -1
- sqlspec/statement/builder/mixins/_insert_from_select.py +0 -1
- sqlspec/statement/builder/mixins/_insert_values.py +0 -2
- sqlspec/statement/builder/mixins/_join.py +20 -13
- sqlspec/statement/builder/mixins/_limit_offset.py +3 -3
- sqlspec/statement/builder/mixins/_merge_clauses.py +3 -4
- sqlspec/statement/builder/mixins/_order_by.py +2 -2
- sqlspec/statement/builder/mixins/_pivot.py +4 -7
- sqlspec/statement/builder/mixins/_select_columns.py +6 -5
- sqlspec/statement/builder/mixins/_unpivot.py +6 -9
- sqlspec/statement/builder/mixins/_update_from.py +2 -1
- sqlspec/statement/builder/mixins/_update_set.py +11 -8
- sqlspec/statement/builder/mixins/_where.py +61 -34
- sqlspec/statement/builder/select.py +32 -17
- sqlspec/statement/builder/update.py +25 -11
- sqlspec/statement/filters.py +39 -14
- sqlspec/statement/parameter_manager.py +220 -0
- sqlspec/statement/parameters.py +210 -79
- sqlspec/statement/pipelines/__init__.py +166 -23
- sqlspec/statement/pipelines/analyzers/_analyzer.py +22 -25
- sqlspec/statement/pipelines/context.py +35 -39
- sqlspec/statement/pipelines/transformers/__init__.py +2 -3
- sqlspec/statement/pipelines/transformers/_expression_simplifier.py +19 -187
- sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +667 -43
- sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +76 -0
- sqlspec/statement/pipelines/validators/_dml_safety.py +33 -18
- sqlspec/statement/pipelines/validators/_parameter_style.py +87 -14
- sqlspec/statement/pipelines/validators/_performance.py +38 -23
- sqlspec/statement/pipelines/validators/_security.py +39 -62
- sqlspec/statement/result.py +37 -129
- sqlspec/statement/splitter.py +0 -12
- sqlspec/statement/sql.py +885 -379
- sqlspec/statement/sql_compiler.py +140 -0
- sqlspec/storage/__init__.py +10 -2
- sqlspec/storage/backends/fsspec.py +82 -35
- sqlspec/storage/backends/obstore.py +66 -49
- sqlspec/storage/capabilities.py +101 -0
- sqlspec/storage/registry.py +56 -83
- sqlspec/typing.py +6 -434
- sqlspec/utils/cached_property.py +25 -0
- sqlspec/utils/correlation.py +0 -2
- sqlspec/utils/logging.py +0 -6
- sqlspec/utils/sync_tools.py +0 -4
- sqlspec/utils/text.py +0 -5
- sqlspec/utils/type_guards.py +892 -0
- {sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/METADATA +1 -1
- sqlspec-0.13.0.dist-info/RECORD +150 -0
- sqlspec/statement/builder/protocols.py +0 -20
- sqlspec/statement/pipelines/base.py +0 -315
- sqlspec/statement/pipelines/result_types.py +0 -41
- sqlspec/statement/pipelines/transformers/_remove_comments.py +0 -66
- sqlspec/statement/pipelines/transformers/_remove_hints.py +0 -81
- sqlspec/statement/pipelines/validators/base.py +0 -67
- sqlspec/storage/protocol.py +0 -170
- sqlspec-0.12.1.dist-info/RECORD +0 -145
- {sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/WHEEL +0 -0
- {sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/licenses/LICENSE +0 -0
- {sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""SQL compilation logic separated from the main SQL class."""
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Optional, Union, cast
|
|
4
|
+
|
|
5
|
+
import sqlglot.expressions as exp
|
|
6
|
+
|
|
7
|
+
from sqlspec.exceptions import SQLCompilationError
|
|
8
|
+
from sqlspec.statement.parameters import ParameterConverter, ParameterStyle
|
|
9
|
+
from sqlspec.statement.pipelines import SQLProcessingContext, StatementPipeline
|
|
10
|
+
from sqlspec.statement.sql import SQLConfig
|
|
11
|
+
from sqlspec.utils.cached_property import CachedProperty
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from sqlglot.dialects.dialect import DialectType
|
|
15
|
+
|
|
16
|
+
from sqlspec.protocols import ProcessorProtocol
|
|
17
|
+
from sqlspec.statement.parameter_manager import ParameterManager
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
__all__ = ("SQLCompiler",)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SQLCompiler:
|
|
24
|
+
"""Handles SQL compilation and pipeline processing."""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
expression: exp.Expression,
|
|
29
|
+
dialect: "Optional[DialectType]" = None,
|
|
30
|
+
parameter_manager: "Optional[ParameterManager]" = None,
|
|
31
|
+
is_script: bool = False,
|
|
32
|
+
original_sql: Optional[str] = None,
|
|
33
|
+
config: Optional[SQLConfig] = None,
|
|
34
|
+
) -> None:
|
|
35
|
+
self.expression = expression
|
|
36
|
+
self.dialect = dialect
|
|
37
|
+
self.parameter_manager = parameter_manager
|
|
38
|
+
self.is_script = is_script
|
|
39
|
+
self._original_sql = original_sql
|
|
40
|
+
self.config = config or SQLConfig(dialect=dialect)
|
|
41
|
+
|
|
42
|
+
@CachedProperty
|
|
43
|
+
def _pipeline(self) -> StatementPipeline:
|
|
44
|
+
"""Get the statement pipeline."""
|
|
45
|
+
validators: list[ProcessorProtocol] = []
|
|
46
|
+
|
|
47
|
+
if self.config.enable_validation and self.config.allowed_parameter_styles is not None:
|
|
48
|
+
from sqlspec.statement.pipelines.validators._parameter_style import ParameterStyleValidator
|
|
49
|
+
|
|
50
|
+
# In strict mode, fail on violations
|
|
51
|
+
validators.append(ParameterStyleValidator(fail_on_violation=self.config.strict_mode))
|
|
52
|
+
|
|
53
|
+
return StatementPipeline(validators=validators)
|
|
54
|
+
|
|
55
|
+
@CachedProperty
|
|
56
|
+
def _context(self) -> SQLProcessingContext:
|
|
57
|
+
"""Get the processing context."""
|
|
58
|
+
if isinstance(self.expression, exp.Anonymous) and self.expression.this:
|
|
59
|
+
sql_string = str(self.expression.this)
|
|
60
|
+
else:
|
|
61
|
+
sql_string = self.expression.sql(dialect=self.dialect)
|
|
62
|
+
|
|
63
|
+
context = SQLProcessingContext(initial_sql_string=sql_string, dialect=self.dialect, config=self.config)
|
|
64
|
+
context.initial_expression = self.expression
|
|
65
|
+
context.current_expression = self.expression
|
|
66
|
+
|
|
67
|
+
from sqlspec.statement.parameters import ParameterValidator
|
|
68
|
+
|
|
69
|
+
validator = ParameterValidator()
|
|
70
|
+
context.parameter_info = validator.extract_parameters(sql_string)
|
|
71
|
+
|
|
72
|
+
if self.parameter_manager:
|
|
73
|
+
if self.parameter_manager.positional_parameters:
|
|
74
|
+
context.merged_parameters = self.parameter_manager.positional_parameters
|
|
75
|
+
context.initial_parameters = self.parameter_manager.positional_parameters
|
|
76
|
+
elif self.parameter_manager.named_parameters:
|
|
77
|
+
context.merged_parameters = self.parameter_manager.named_parameters
|
|
78
|
+
context.initial_kwargs = self.parameter_manager.named_parameters
|
|
79
|
+
context.initial_parameters = self.parameter_manager.positional_parameters
|
|
80
|
+
context.initial_kwargs = self.parameter_manager.named_parameters
|
|
81
|
+
return context
|
|
82
|
+
|
|
83
|
+
@CachedProperty
|
|
84
|
+
def _processed_expr(self) -> exp.Expression:
|
|
85
|
+
"""Execute the processing pipeline and cache the result."""
|
|
86
|
+
try:
|
|
87
|
+
result = self._pipeline.execute_pipeline(self._context)
|
|
88
|
+
except Exception as e:
|
|
89
|
+
msg = f"Failed to compile SQL: {self._context.initial_sql_string}"
|
|
90
|
+
raise SQLCompilationError(msg) from e
|
|
91
|
+
else:
|
|
92
|
+
return cast("exp.Expression", result.expression)
|
|
93
|
+
|
|
94
|
+
@CachedProperty
|
|
95
|
+
def _compiled_sql(self) -> str:
|
|
96
|
+
"""Get the compiled SQL string."""
|
|
97
|
+
if self.is_script:
|
|
98
|
+
return str(self._original_sql or self.expression.sql(dialect=self.dialect))
|
|
99
|
+
# Always go through the pipeline to ensure validation runs
|
|
100
|
+
processed = self._processed_expr
|
|
101
|
+
if isinstance(processed, exp.Anonymous) and processed.this:
|
|
102
|
+
return str(processed.this)
|
|
103
|
+
return str(processed.sql(dialect=self.dialect, comments=False))
|
|
104
|
+
|
|
105
|
+
def compile(self, placeholder_style: Optional[str] = None) -> tuple[str, Any]:
|
|
106
|
+
"""Compile SQL and parameters."""
|
|
107
|
+
if self.is_script:
|
|
108
|
+
return self._compiled_sql, None
|
|
109
|
+
|
|
110
|
+
sql = self.to_sql(placeholder_style)
|
|
111
|
+
params = self._get_compiled_parameters(placeholder_style)
|
|
112
|
+
return sql, params
|
|
113
|
+
|
|
114
|
+
def to_sql(self, placeholder_style: Optional[str] = None) -> str:
|
|
115
|
+
"""Get the SQL string with a specific placeholder style."""
|
|
116
|
+
if placeholder_style is None or self.is_script:
|
|
117
|
+
return cast("str", self._compiled_sql)
|
|
118
|
+
|
|
119
|
+
converter = ParameterConverter()
|
|
120
|
+
sql = self._compiled_sql
|
|
121
|
+
|
|
122
|
+
target_style = ParameterStyle(placeholder_style)
|
|
123
|
+
return converter.convert_placeholders(sql, target_style, self._context.parameter_info)
|
|
124
|
+
|
|
125
|
+
def get_parameters(self, style: Union[ParameterStyle, str, None] = None) -> Any:
|
|
126
|
+
"""Get the parameters in a specific style."""
|
|
127
|
+
if self.is_script:
|
|
128
|
+
return None
|
|
129
|
+
return cast("Any", self._get_compiled_parameters(str(style) if style else None))
|
|
130
|
+
|
|
131
|
+
def _get_compiled_parameters(self, placeholder_style: Optional[str]) -> Any:
|
|
132
|
+
"""Get compiled parameters in target style."""
|
|
133
|
+
if not self.parameter_manager:
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
# This ensures the pipeline has run and context is populated
|
|
137
|
+
_ = self._processed_expr
|
|
138
|
+
|
|
139
|
+
style_enum = ParameterStyle(placeholder_style) if placeholder_style else ParameterStyle.NAMED_COLON
|
|
140
|
+
return self.parameter_manager.get_compiled_parameters(self._context.parameter_info, style_enum)
|
sqlspec/storage/__init__.py
CHANGED
|
@@ -5,11 +5,19 @@ This module provides a flexible storage system with:
|
|
|
5
5
|
- Lazy loading and configuration-based registration
|
|
6
6
|
- URI scheme-based automatic backend resolution
|
|
7
7
|
- Key-based named storage configurations
|
|
8
|
+
- Capability-based backend selection
|
|
8
9
|
"""
|
|
9
10
|
|
|
10
|
-
from sqlspec.
|
|
11
|
+
from sqlspec.protocols import ObjectStoreProtocol
|
|
12
|
+
from sqlspec.storage.capabilities import HasStorageCapabilities, StorageCapabilities
|
|
11
13
|
from sqlspec.storage.registry import StorageRegistry
|
|
12
14
|
|
|
13
15
|
storage_registry = StorageRegistry()
|
|
14
16
|
|
|
15
|
-
__all__ = (
|
|
17
|
+
__all__ = (
|
|
18
|
+
"HasStorageCapabilities",
|
|
19
|
+
"ObjectStoreProtocol",
|
|
20
|
+
"StorageCapabilities",
|
|
21
|
+
"StorageRegistry",
|
|
22
|
+
"storage_registry",
|
|
23
|
+
)
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
# pyright: ignore=reportUnknownVariableType
|
|
2
2
|
import logging
|
|
3
3
|
from io import BytesIO
|
|
4
|
-
from
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Union
|
|
5
6
|
|
|
6
7
|
from sqlspec.exceptions import MissingDependencyError
|
|
7
8
|
from sqlspec.storage.backends.base import ObjectStoreBase
|
|
9
|
+
from sqlspec.storage.capabilities import StorageCapabilities
|
|
8
10
|
from sqlspec.typing import FSSPEC_INSTALLED, PYARROW_INSTALLED
|
|
9
11
|
from sqlspec.utils.sync_tools import async_
|
|
10
12
|
|
|
@@ -46,6 +48,16 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
46
48
|
and offering fallback capabilities.
|
|
47
49
|
"""
|
|
48
50
|
|
|
51
|
+
# FSSpec supports most operations but varies by underlying filesystem
|
|
52
|
+
_default_capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
|
|
53
|
+
supports_arrow=PYARROW_INSTALLED,
|
|
54
|
+
supports_streaming=PYARROW_INSTALLED,
|
|
55
|
+
supports_async=True,
|
|
56
|
+
supports_compression=True,
|
|
57
|
+
is_remote=True,
|
|
58
|
+
is_cloud_native=False,
|
|
59
|
+
)
|
|
60
|
+
|
|
49
61
|
def __init__(self, fs: "Union[str, AbstractFileSystem]", base_path: str = "") -> None:
|
|
50
62
|
if not FSSPEC_INSTALLED:
|
|
51
63
|
raise MissingDependencyError(package="fsspec", install_package="fsspec")
|
|
@@ -62,6 +74,10 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
62
74
|
self.fs = fs
|
|
63
75
|
self.protocol = getattr(fs, "protocol", "unknown")
|
|
64
76
|
self._fs_uri = f"{self.protocol}://"
|
|
77
|
+
|
|
78
|
+
# Set instance-level capabilities based on detected protocol
|
|
79
|
+
self._instance_capabilities = self._detect_capabilities()
|
|
80
|
+
|
|
65
81
|
super().__init__()
|
|
66
82
|
|
|
67
83
|
@classmethod
|
|
@@ -70,21 +86,56 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
70
86
|
fs_config = config.get("fs_config", {})
|
|
71
87
|
base_path = config.get("base_path", "")
|
|
72
88
|
|
|
73
|
-
# Create filesystem instance from protocol
|
|
74
89
|
import fsspec
|
|
75
90
|
|
|
76
91
|
fs_instance = fsspec.filesystem(protocol, **fs_config)
|
|
77
92
|
|
|
78
93
|
return cls(fs=fs_instance, base_path=base_path)
|
|
79
94
|
|
|
80
|
-
def _resolve_path(self, path: str) -> str:
|
|
95
|
+
def _resolve_path(self, path: Union[str, Path]) -> str:
|
|
81
96
|
"""Resolve path relative to base_path."""
|
|
97
|
+
path_str = str(path)
|
|
82
98
|
if self.base_path:
|
|
83
|
-
# Ensure no double slashes
|
|
84
99
|
clean_base = self.base_path.rstrip("/")
|
|
85
|
-
clean_path =
|
|
100
|
+
clean_path = path_str.lstrip("/")
|
|
86
101
|
return f"{clean_base}/{clean_path}"
|
|
87
|
-
return
|
|
102
|
+
return path_str
|
|
103
|
+
|
|
104
|
+
def _detect_capabilities(self) -> StorageCapabilities:
|
|
105
|
+
"""Detect capabilities based on underlying filesystem protocol."""
|
|
106
|
+
protocol = self.protocol.lower()
|
|
107
|
+
|
|
108
|
+
if protocol in {"s3", "s3a", "s3n"}:
|
|
109
|
+
return StorageCapabilities.s3_compatible()
|
|
110
|
+
if protocol in {"gcs", "gs"}:
|
|
111
|
+
return StorageCapabilities.gcs()
|
|
112
|
+
if protocol in {"abfs", "az", "azure"}:
|
|
113
|
+
return StorageCapabilities.azure_blob()
|
|
114
|
+
if protocol in {"file", "local"}:
|
|
115
|
+
return StorageCapabilities.local_filesystem()
|
|
116
|
+
return StorageCapabilities(
|
|
117
|
+
supports_arrow=PYARROW_INSTALLED,
|
|
118
|
+
supports_streaming=PYARROW_INSTALLED,
|
|
119
|
+
supports_async=True,
|
|
120
|
+
supports_compression=True,
|
|
121
|
+
is_remote=True,
|
|
122
|
+
is_cloud_native=False,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def capabilities(self) -> StorageCapabilities:
|
|
127
|
+
"""Return instance-specific capabilities based on detected protocol."""
|
|
128
|
+
return getattr(self, "_instance_capabilities", self.__class__._default_capabilities)
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def has_capability(cls, capability: str) -> bool:
|
|
132
|
+
"""Check if backend has a specific capability."""
|
|
133
|
+
return getattr(cls._default_capabilities, capability, False)
|
|
134
|
+
|
|
135
|
+
@classmethod
|
|
136
|
+
def get_capabilities(cls) -> StorageCapabilities:
|
|
137
|
+
"""Get all capabilities for this backend."""
|
|
138
|
+
return cls._default_capabilities
|
|
88
139
|
|
|
89
140
|
@property
|
|
90
141
|
def backend_type(self) -> str:
|
|
@@ -95,51 +146,51 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
95
146
|
return self._fs_uri
|
|
96
147
|
|
|
97
148
|
# Core Operations (sync)
|
|
98
|
-
def read_bytes(self, path: str, **kwargs: Any) -> bytes:
|
|
149
|
+
def read_bytes(self, path: Union[str, Path], **kwargs: Any) -> bytes:
|
|
99
150
|
"""Read bytes from an object."""
|
|
100
151
|
resolved_path = self._resolve_path(path)
|
|
101
152
|
return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
|
|
102
153
|
|
|
103
|
-
def write_bytes(self, path: str, data: bytes, **kwargs: Any) -> None:
|
|
154
|
+
def write_bytes(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
|
|
104
155
|
"""Write bytes to an object."""
|
|
105
156
|
resolved_path = self._resolve_path(path)
|
|
106
157
|
with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
|
|
107
158
|
f.write(data) # pyright: ignore
|
|
108
159
|
|
|
109
|
-
def read_text(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
160
|
+
def read_text(self, path: Union[str, Path], encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
110
161
|
"""Read text from an object."""
|
|
111
162
|
data = self.read_bytes(path, **kwargs)
|
|
112
163
|
return data.decode(encoding)
|
|
113
164
|
|
|
114
|
-
def write_text(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
165
|
+
def write_text(self, path: Union[str, Path], data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
115
166
|
"""Write text to an object."""
|
|
116
167
|
self.write_bytes(path, data.encode(encoding), **kwargs)
|
|
117
168
|
|
|
118
169
|
# Object Operations
|
|
119
|
-
def exists(self, path: str, **kwargs: Any) -> bool:
|
|
170
|
+
def exists(self, path: Union[str, Path], **kwargs: Any) -> bool:
|
|
120
171
|
"""Check if an object exists."""
|
|
121
172
|
resolved_path = self._resolve_path(path)
|
|
122
173
|
return self.fs.exists(resolved_path, **kwargs) # type: ignore[no-any-return]
|
|
123
174
|
|
|
124
|
-
def delete(self, path: str, **kwargs: Any) -> None:
|
|
175
|
+
def delete(self, path: Union[str, Path], **kwargs: Any) -> None:
|
|
125
176
|
"""Delete an object."""
|
|
126
177
|
resolved_path = self._resolve_path(path)
|
|
127
178
|
self.fs.rm(resolved_path, **kwargs)
|
|
128
179
|
|
|
129
|
-
def copy(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
180
|
+
def copy(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
|
|
130
181
|
"""Copy an object."""
|
|
131
182
|
source_path = self._resolve_path(source)
|
|
132
183
|
dest_path = self._resolve_path(destination)
|
|
133
184
|
self.fs.copy(source_path, dest_path, **kwargs)
|
|
134
185
|
|
|
135
|
-
def move(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
186
|
+
def move(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
|
|
136
187
|
"""Move an object."""
|
|
137
188
|
source_path = self._resolve_path(source)
|
|
138
189
|
dest_path = self._resolve_path(destination)
|
|
139
190
|
self.fs.mv(source_path, dest_path, **kwargs)
|
|
140
191
|
|
|
141
192
|
# Arrow Operations
|
|
142
|
-
def read_arrow(self, path: str, **kwargs: Any) -> "ArrowTable":
|
|
193
|
+
def read_arrow(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
|
|
143
194
|
"""Read an Arrow table from storage."""
|
|
144
195
|
if not PYARROW_INSTALLED:
|
|
145
196
|
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
@@ -150,7 +201,7 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
150
201
|
with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
|
|
151
202
|
return pq.read_table(f)
|
|
152
203
|
|
|
153
|
-
def write_arrow(self, path: str, table: "ArrowTable", **kwargs: Any) -> None:
|
|
204
|
+
def write_arrow(self, path: Union[str, Path], table: "ArrowTable", **kwargs: Any) -> None:
|
|
154
205
|
"""Write an Arrow table to storage."""
|
|
155
206
|
if not PYARROW_INSTALLED:
|
|
156
207
|
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
@@ -172,7 +223,6 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
172
223
|
else:
|
|
173
224
|
pattern = f"{resolved_prefix}/*" if resolved_prefix else "*"
|
|
174
225
|
|
|
175
|
-
# Get all files (not directories)
|
|
176
226
|
paths = [str(path) for path in self.fs.glob(pattern, **kwargs) if not self.fs.isdir(path)]
|
|
177
227
|
return sorted(paths)
|
|
178
228
|
|
|
@@ -194,11 +244,10 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
194
244
|
resolved_path = self._resolve_path(path)
|
|
195
245
|
return self.fs.isdir(resolved_path) # type: ignore[no-any-return]
|
|
196
246
|
|
|
197
|
-
def get_metadata(self, path: str, **kwargs: Any) -> dict[str, Any]:
|
|
247
|
+
def get_metadata(self, path: Union[str, Path], **kwargs: Any) -> dict[str, Any]:
|
|
198
248
|
"""Get object metadata."""
|
|
199
249
|
info = self.fs.info(self._resolve_path(path), **kwargs)
|
|
200
250
|
|
|
201
|
-
# Convert fsspec info to dict
|
|
202
251
|
if isinstance(info, dict):
|
|
203
252
|
return info
|
|
204
253
|
|
|
@@ -208,7 +257,6 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
208
257
|
except AttributeError:
|
|
209
258
|
pass
|
|
210
259
|
|
|
211
|
-
# Fallback to basic metadata with safe attribute access
|
|
212
260
|
resolved_path = self._resolve_path(path)
|
|
213
261
|
return {
|
|
214
262
|
"path": resolved_path,
|
|
@@ -217,7 +265,7 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
217
265
|
"type": getattr(info, "type", "file"),
|
|
218
266
|
}
|
|
219
267
|
|
|
220
|
-
def _stream_file_batches(self, obj_path: str) -> "Iterator[ArrowRecordBatch]":
|
|
268
|
+
def _stream_file_batches(self, obj_path: Union[str, Path]) -> "Iterator[ArrowRecordBatch]":
|
|
221
269
|
import pyarrow.parquet as pq
|
|
222
270
|
|
|
223
271
|
with self.fs.open(obj_path, mode="rb") as f:
|
|
@@ -234,15 +282,15 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
234
282
|
for obj_path in self.glob(pattern, **kwargs):
|
|
235
283
|
yield from self._stream_file_batches(obj_path)
|
|
236
284
|
|
|
237
|
-
async def read_bytes_async(self, path: str, **kwargs: Any) -> bytes:
|
|
285
|
+
async def read_bytes_async(self, path: Union[str, Path], **kwargs: Any) -> bytes:
|
|
238
286
|
"""Async read bytes. Wraps the sync implementation."""
|
|
239
287
|
return await async_(self.read_bytes)(path, **kwargs)
|
|
240
288
|
|
|
241
|
-
async def write_bytes_async(self, path: str, data: bytes, **kwargs: Any) -> None:
|
|
242
|
-
"""Async write bytes.
|
|
289
|
+
async def write_bytes_async(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
|
|
290
|
+
"""Async write bytes. Wraps the sync implementation."""
|
|
243
291
|
return await async_(self.write_bytes)(path, data, **kwargs)
|
|
244
292
|
|
|
245
|
-
async def _stream_file_batches_async(self, obj_path: str) -> "AsyncIterator[ArrowRecordBatch]":
|
|
293
|
+
async def _stream_file_batches_async(self, obj_path: Union[str, Path]) -> "AsyncIterator[ArrowRecordBatch]":
|
|
246
294
|
import pyarrow.parquet as pq
|
|
247
295
|
|
|
248
296
|
data = await self.read_bytes_async(obj_path)
|
|
@@ -266,7 +314,6 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
266
314
|
if not PYARROW_INSTALLED:
|
|
267
315
|
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
268
316
|
|
|
269
|
-
# Get paths asynchronously
|
|
270
317
|
paths = await async_(self.glob)(pattern, **kwargs)
|
|
271
318
|
|
|
272
319
|
# Stream batches from each path
|
|
@@ -274,11 +321,11 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
274
321
|
async for batch in self._stream_file_batches_async(path):
|
|
275
322
|
yield batch
|
|
276
323
|
|
|
277
|
-
async def read_text_async(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
324
|
+
async def read_text_async(self, path: Union[str, Path], encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
278
325
|
"""Async read text. Wraps the sync implementation."""
|
|
279
326
|
return await async_(self.read_text)(path, encoding, **kwargs)
|
|
280
327
|
|
|
281
|
-
async def write_text_async(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
328
|
+
async def write_text_async(self, path: Union[str, Path], data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
282
329
|
"""Async write text. Wraps the sync implementation."""
|
|
283
330
|
await async_(self.write_text)(path, data, encoding, **kwargs)
|
|
284
331
|
|
|
@@ -286,30 +333,30 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
286
333
|
"""Async list objects. Wraps the sync implementation."""
|
|
287
334
|
return await async_(self.list_objects)(prefix, recursive, **kwargs)
|
|
288
335
|
|
|
289
|
-
async def exists_async(self, path: str, **kwargs: Any) -> bool:
|
|
336
|
+
async def exists_async(self, path: Union[str, Path], **kwargs: Any) -> bool:
|
|
290
337
|
"""Async exists check. Wraps the sync implementation."""
|
|
291
338
|
return await async_(self.exists)(path, **kwargs)
|
|
292
339
|
|
|
293
|
-
async def delete_async(self, path: str, **kwargs: Any) -> None:
|
|
340
|
+
async def delete_async(self, path: Union[str, Path], **kwargs: Any) -> None:
|
|
294
341
|
"""Async delete. Wraps the sync implementation."""
|
|
295
342
|
await async_(self.delete)(path, **kwargs)
|
|
296
343
|
|
|
297
|
-
async def copy_async(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
344
|
+
async def copy_async(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
|
|
298
345
|
"""Async copy. Wraps the sync implementation."""
|
|
299
346
|
await async_(self.copy)(source, destination, **kwargs)
|
|
300
347
|
|
|
301
|
-
async def move_async(self, source: str, destination: str, **kwargs: Any) -> None:
|
|
348
|
+
async def move_async(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
|
|
302
349
|
"""Async move. Wraps the sync implementation."""
|
|
303
350
|
await async_(self.move)(source, destination, **kwargs)
|
|
304
351
|
|
|
305
|
-
async def get_metadata_async(self, path: str, **kwargs: Any) -> dict[str, Any]:
|
|
352
|
+
async def get_metadata_async(self, path: Union[str, Path], **kwargs: Any) -> dict[str, Any]:
|
|
306
353
|
"""Async get metadata. Wraps the sync implementation."""
|
|
307
354
|
return await async_(self.get_metadata)(path, **kwargs)
|
|
308
355
|
|
|
309
|
-
async def read_arrow_async(self, path: str, **kwargs: Any) -> "ArrowTable":
|
|
356
|
+
async def read_arrow_async(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
|
|
310
357
|
"""Async read Arrow. Wraps the sync implementation."""
|
|
311
358
|
return await async_(self.read_arrow)(path, **kwargs)
|
|
312
359
|
|
|
313
|
-
async def write_arrow_async(self, path: str, table: "ArrowTable", **kwargs: Any) -> None:
|
|
360
|
+
async def write_arrow_async(self, path: Union[str, Path], table: "ArrowTable", **kwargs: Any) -> None:
|
|
314
361
|
"""Async write Arrow. Wraps the sync implementation."""
|
|
315
362
|
await async_(self.write_arrow)(path, table, **kwargs)
|