sqlspec 0.12.2__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/_sql.py +21 -180
- sqlspec/adapters/adbc/config.py +10 -12
- sqlspec/adapters/adbc/driver.py +120 -118
- sqlspec/adapters/aiosqlite/config.py +3 -3
- sqlspec/adapters/aiosqlite/driver.py +100 -130
- sqlspec/adapters/asyncmy/config.py +3 -4
- sqlspec/adapters/asyncmy/driver.py +123 -135
- sqlspec/adapters/asyncpg/config.py +3 -7
- sqlspec/adapters/asyncpg/driver.py +98 -140
- sqlspec/adapters/bigquery/config.py +4 -5
- sqlspec/adapters/bigquery/driver.py +125 -167
- sqlspec/adapters/duckdb/config.py +3 -6
- sqlspec/adapters/duckdb/driver.py +114 -111
- sqlspec/adapters/oracledb/config.py +6 -5
- sqlspec/adapters/oracledb/driver.py +242 -259
- sqlspec/adapters/psqlpy/config.py +3 -7
- sqlspec/adapters/psqlpy/driver.py +118 -93
- sqlspec/adapters/psycopg/config.py +18 -31
- sqlspec/adapters/psycopg/driver.py +283 -236
- sqlspec/adapters/sqlite/config.py +3 -3
- sqlspec/adapters/sqlite/driver.py +103 -97
- sqlspec/config.py +0 -4
- sqlspec/driver/_async.py +89 -98
- sqlspec/driver/_common.py +52 -17
- sqlspec/driver/_sync.py +81 -105
- sqlspec/driver/connection.py +207 -0
- sqlspec/driver/mixins/_csv_writer.py +91 -0
- sqlspec/driver/mixins/_pipeline.py +38 -49
- sqlspec/driver/mixins/_result_utils.py +27 -9
- sqlspec/driver/mixins/_storage.py +67 -181
- sqlspec/driver/mixins/_type_coercion.py +3 -4
- sqlspec/driver/parameters.py +138 -0
- sqlspec/exceptions.py +10 -2
- sqlspec/extensions/aiosql/adapter.py +0 -10
- sqlspec/extensions/litestar/handlers.py +0 -1
- sqlspec/extensions/litestar/plugin.py +0 -3
- sqlspec/extensions/litestar/providers.py +0 -14
- sqlspec/loader.py +25 -90
- sqlspec/protocols.py +542 -0
- sqlspec/service/__init__.py +3 -2
- sqlspec/service/_util.py +147 -0
- sqlspec/service/base.py +1116 -9
- sqlspec/statement/builder/__init__.py +42 -32
- sqlspec/statement/builder/_ddl_utils.py +0 -10
- sqlspec/statement/builder/_parsing_utils.py +10 -4
- sqlspec/statement/builder/base.py +67 -22
- sqlspec/statement/builder/column.py +283 -0
- sqlspec/statement/builder/ddl.py +91 -67
- sqlspec/statement/builder/delete.py +23 -7
- sqlspec/statement/builder/insert.py +29 -15
- sqlspec/statement/builder/merge.py +4 -4
- sqlspec/statement/builder/mixins/_aggregate_functions.py +113 -14
- sqlspec/statement/builder/mixins/_common_table_expr.py +0 -1
- sqlspec/statement/builder/mixins/_delete_from.py +1 -1
- sqlspec/statement/builder/mixins/_from.py +10 -8
- sqlspec/statement/builder/mixins/_group_by.py +0 -1
- sqlspec/statement/builder/mixins/_insert_from_select.py +0 -1
- sqlspec/statement/builder/mixins/_insert_values.py +0 -2
- sqlspec/statement/builder/mixins/_join.py +20 -13
- sqlspec/statement/builder/mixins/_limit_offset.py +3 -3
- sqlspec/statement/builder/mixins/_merge_clauses.py +3 -4
- sqlspec/statement/builder/mixins/_order_by.py +2 -2
- sqlspec/statement/builder/mixins/_pivot.py +4 -7
- sqlspec/statement/builder/mixins/_select_columns.py +6 -5
- sqlspec/statement/builder/mixins/_unpivot.py +6 -9
- sqlspec/statement/builder/mixins/_update_from.py +2 -1
- sqlspec/statement/builder/mixins/_update_set.py +11 -8
- sqlspec/statement/builder/mixins/_where.py +61 -34
- sqlspec/statement/builder/select.py +32 -17
- sqlspec/statement/builder/update.py +25 -11
- sqlspec/statement/filters.py +39 -14
- sqlspec/statement/parameter_manager.py +220 -0
- sqlspec/statement/parameters.py +210 -79
- sqlspec/statement/pipelines/__init__.py +166 -23
- sqlspec/statement/pipelines/analyzers/_analyzer.py +21 -20
- sqlspec/statement/pipelines/context.py +35 -39
- sqlspec/statement/pipelines/transformers/__init__.py +2 -3
- sqlspec/statement/pipelines/transformers/_expression_simplifier.py +19 -187
- sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +628 -58
- sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +76 -0
- sqlspec/statement/pipelines/validators/_dml_safety.py +33 -18
- sqlspec/statement/pipelines/validators/_parameter_style.py +87 -14
- sqlspec/statement/pipelines/validators/_performance.py +38 -23
- sqlspec/statement/pipelines/validators/_security.py +39 -62
- sqlspec/statement/result.py +37 -129
- sqlspec/statement/splitter.py +0 -12
- sqlspec/statement/sql.py +863 -391
- sqlspec/statement/sql_compiler.py +140 -0
- sqlspec/storage/__init__.py +10 -2
- sqlspec/storage/backends/fsspec.py +53 -8
- sqlspec/storage/backends/obstore.py +15 -19
- sqlspec/storage/capabilities.py +101 -0
- sqlspec/storage/registry.py +56 -83
- sqlspec/typing.py +6 -434
- sqlspec/utils/cached_property.py +25 -0
- sqlspec/utils/correlation.py +0 -2
- sqlspec/utils/logging.py +0 -6
- sqlspec/utils/sync_tools.py +0 -4
- sqlspec/utils/text.py +0 -5
- sqlspec/utils/type_guards.py +892 -0
- {sqlspec-0.12.2.dist-info → sqlspec-0.13.0.dist-info}/METADATA +1 -1
- sqlspec-0.13.0.dist-info/RECORD +150 -0
- sqlspec/statement/builder/protocols.py +0 -20
- sqlspec/statement/pipelines/base.py +0 -315
- sqlspec/statement/pipelines/result_types.py +0 -41
- sqlspec/statement/pipelines/transformers/_remove_comments.py +0 -66
- sqlspec/statement/pipelines/transformers/_remove_hints.py +0 -81
- sqlspec/statement/pipelines/validators/base.py +0 -67
- sqlspec/storage/protocol.py +0 -173
- sqlspec-0.12.2.dist-info/RECORD +0 -145
- {sqlspec-0.12.2.dist-info → sqlspec-0.13.0.dist-info}/WHEEL +0 -0
- {sqlspec-0.12.2.dist-info → sqlspec-0.13.0.dist-info}/licenses/LICENSE +0 -0
- {sqlspec-0.12.2.dist-info → sqlspec-0.13.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""SQL compilation logic separated from the main SQL class."""
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Optional, Union, cast
|
|
4
|
+
|
|
5
|
+
import sqlglot.expressions as exp
|
|
6
|
+
|
|
7
|
+
from sqlspec.exceptions import SQLCompilationError
|
|
8
|
+
from sqlspec.statement.parameters import ParameterConverter, ParameterStyle
|
|
9
|
+
from sqlspec.statement.pipelines import SQLProcessingContext, StatementPipeline
|
|
10
|
+
from sqlspec.statement.sql import SQLConfig
|
|
11
|
+
from sqlspec.utils.cached_property import CachedProperty
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from sqlglot.dialects.dialect import DialectType
|
|
15
|
+
|
|
16
|
+
from sqlspec.protocols import ProcessorProtocol
|
|
17
|
+
from sqlspec.statement.parameter_manager import ParameterManager
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
__all__ = ("SQLCompiler",)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SQLCompiler:
|
|
24
|
+
"""Handles SQL compilation and pipeline processing."""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
expression: exp.Expression,
|
|
29
|
+
dialect: "Optional[DialectType]" = None,
|
|
30
|
+
parameter_manager: "Optional[ParameterManager]" = None,
|
|
31
|
+
is_script: bool = False,
|
|
32
|
+
original_sql: Optional[str] = None,
|
|
33
|
+
config: Optional[SQLConfig] = None,
|
|
34
|
+
) -> None:
|
|
35
|
+
self.expression = expression
|
|
36
|
+
self.dialect = dialect
|
|
37
|
+
self.parameter_manager = parameter_manager
|
|
38
|
+
self.is_script = is_script
|
|
39
|
+
self._original_sql = original_sql
|
|
40
|
+
self.config = config or SQLConfig(dialect=dialect)
|
|
41
|
+
|
|
42
|
+
@CachedProperty
|
|
43
|
+
def _pipeline(self) -> StatementPipeline:
|
|
44
|
+
"""Get the statement pipeline."""
|
|
45
|
+
validators: list[ProcessorProtocol] = []
|
|
46
|
+
|
|
47
|
+
if self.config.enable_validation and self.config.allowed_parameter_styles is not None:
|
|
48
|
+
from sqlspec.statement.pipelines.validators._parameter_style import ParameterStyleValidator
|
|
49
|
+
|
|
50
|
+
# In strict mode, fail on violations
|
|
51
|
+
validators.append(ParameterStyleValidator(fail_on_violation=self.config.strict_mode))
|
|
52
|
+
|
|
53
|
+
return StatementPipeline(validators=validators)
|
|
54
|
+
|
|
55
|
+
@CachedProperty
|
|
56
|
+
def _context(self) -> SQLProcessingContext:
|
|
57
|
+
"""Get the processing context."""
|
|
58
|
+
if isinstance(self.expression, exp.Anonymous) and self.expression.this:
|
|
59
|
+
sql_string = str(self.expression.this)
|
|
60
|
+
else:
|
|
61
|
+
sql_string = self.expression.sql(dialect=self.dialect)
|
|
62
|
+
|
|
63
|
+
context = SQLProcessingContext(initial_sql_string=sql_string, dialect=self.dialect, config=self.config)
|
|
64
|
+
context.initial_expression = self.expression
|
|
65
|
+
context.current_expression = self.expression
|
|
66
|
+
|
|
67
|
+
from sqlspec.statement.parameters import ParameterValidator
|
|
68
|
+
|
|
69
|
+
validator = ParameterValidator()
|
|
70
|
+
context.parameter_info = validator.extract_parameters(sql_string)
|
|
71
|
+
|
|
72
|
+
if self.parameter_manager:
|
|
73
|
+
if self.parameter_manager.positional_parameters:
|
|
74
|
+
context.merged_parameters = self.parameter_manager.positional_parameters
|
|
75
|
+
context.initial_parameters = self.parameter_manager.positional_parameters
|
|
76
|
+
elif self.parameter_manager.named_parameters:
|
|
77
|
+
context.merged_parameters = self.parameter_manager.named_parameters
|
|
78
|
+
context.initial_kwargs = self.parameter_manager.named_parameters
|
|
79
|
+
context.initial_parameters = self.parameter_manager.positional_parameters
|
|
80
|
+
context.initial_kwargs = self.parameter_manager.named_parameters
|
|
81
|
+
return context
|
|
82
|
+
|
|
83
|
+
@CachedProperty
|
|
84
|
+
def _processed_expr(self) -> exp.Expression:
|
|
85
|
+
"""Execute the processing pipeline and cache the result."""
|
|
86
|
+
try:
|
|
87
|
+
result = self._pipeline.execute_pipeline(self._context)
|
|
88
|
+
except Exception as e:
|
|
89
|
+
msg = f"Failed to compile SQL: {self._context.initial_sql_string}"
|
|
90
|
+
raise SQLCompilationError(msg) from e
|
|
91
|
+
else:
|
|
92
|
+
return cast("exp.Expression", result.expression)
|
|
93
|
+
|
|
94
|
+
@CachedProperty
|
|
95
|
+
def _compiled_sql(self) -> str:
|
|
96
|
+
"""Get the compiled SQL string."""
|
|
97
|
+
if self.is_script:
|
|
98
|
+
return str(self._original_sql or self.expression.sql(dialect=self.dialect))
|
|
99
|
+
# Always go through the pipeline to ensure validation runs
|
|
100
|
+
processed = self._processed_expr
|
|
101
|
+
if isinstance(processed, exp.Anonymous) and processed.this:
|
|
102
|
+
return str(processed.this)
|
|
103
|
+
return str(processed.sql(dialect=self.dialect, comments=False))
|
|
104
|
+
|
|
105
|
+
def compile(self, placeholder_style: Optional[str] = None) -> tuple[str, Any]:
|
|
106
|
+
"""Compile SQL and parameters."""
|
|
107
|
+
if self.is_script:
|
|
108
|
+
return self._compiled_sql, None
|
|
109
|
+
|
|
110
|
+
sql = self.to_sql(placeholder_style)
|
|
111
|
+
params = self._get_compiled_parameters(placeholder_style)
|
|
112
|
+
return sql, params
|
|
113
|
+
|
|
114
|
+
def to_sql(self, placeholder_style: Optional[str] = None) -> str:
|
|
115
|
+
"""Get the SQL string with a specific placeholder style."""
|
|
116
|
+
if placeholder_style is None or self.is_script:
|
|
117
|
+
return cast("str", self._compiled_sql)
|
|
118
|
+
|
|
119
|
+
converter = ParameterConverter()
|
|
120
|
+
sql = self._compiled_sql
|
|
121
|
+
|
|
122
|
+
target_style = ParameterStyle(placeholder_style)
|
|
123
|
+
return converter.convert_placeholders(sql, target_style, self._context.parameter_info)
|
|
124
|
+
|
|
125
|
+
def get_parameters(self, style: Union[ParameterStyle, str, None] = None) -> Any:
|
|
126
|
+
"""Get the parameters in a specific style."""
|
|
127
|
+
if self.is_script:
|
|
128
|
+
return None
|
|
129
|
+
return cast("Any", self._get_compiled_parameters(str(style) if style else None))
|
|
130
|
+
|
|
131
|
+
def _get_compiled_parameters(self, placeholder_style: Optional[str]) -> Any:
|
|
132
|
+
"""Get compiled parameters in target style."""
|
|
133
|
+
if not self.parameter_manager:
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
# This ensures the pipeline has run and context is populated
|
|
137
|
+
_ = self._processed_expr
|
|
138
|
+
|
|
139
|
+
style_enum = ParameterStyle(placeholder_style) if placeholder_style else ParameterStyle.NAMED_COLON
|
|
140
|
+
return self.parameter_manager.get_compiled_parameters(self._context.parameter_info, style_enum)
|
sqlspec/storage/__init__.py
CHANGED
|
@@ -5,11 +5,19 @@ This module provides a flexible storage system with:
|
|
|
5
5
|
- Lazy loading and configuration-based registration
|
|
6
6
|
- URI scheme-based automatic backend resolution
|
|
7
7
|
- Key-based named storage configurations
|
|
8
|
+
- Capability-based backend selection
|
|
8
9
|
"""
|
|
9
10
|
|
|
10
|
-
from sqlspec.
|
|
11
|
+
from sqlspec.protocols import ObjectStoreProtocol
|
|
12
|
+
from sqlspec.storage.capabilities import HasStorageCapabilities, StorageCapabilities
|
|
11
13
|
from sqlspec.storage.registry import StorageRegistry
|
|
12
14
|
|
|
13
15
|
storage_registry = StorageRegistry()
|
|
14
16
|
|
|
15
|
-
__all__ = (
|
|
17
|
+
__all__ = (
|
|
18
|
+
"HasStorageCapabilities",
|
|
19
|
+
"ObjectStoreProtocol",
|
|
20
|
+
"StorageCapabilities",
|
|
21
|
+
"StorageRegistry",
|
|
22
|
+
"storage_registry",
|
|
23
|
+
)
|
|
@@ -2,10 +2,11 @@
|
|
|
2
2
|
import logging
|
|
3
3
|
from io import BytesIO
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Union
|
|
5
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Union
|
|
6
6
|
|
|
7
7
|
from sqlspec.exceptions import MissingDependencyError
|
|
8
8
|
from sqlspec.storage.backends.base import ObjectStoreBase
|
|
9
|
+
from sqlspec.storage.capabilities import StorageCapabilities
|
|
9
10
|
from sqlspec.typing import FSSPEC_INSTALLED, PYARROW_INSTALLED
|
|
10
11
|
from sqlspec.utils.sync_tools import async_
|
|
11
12
|
|
|
@@ -47,6 +48,16 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
47
48
|
and offering fallback capabilities.
|
|
48
49
|
"""
|
|
49
50
|
|
|
51
|
+
# FSSpec supports most operations but varies by underlying filesystem
|
|
52
|
+
_default_capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
|
|
53
|
+
supports_arrow=PYARROW_INSTALLED,
|
|
54
|
+
supports_streaming=PYARROW_INSTALLED,
|
|
55
|
+
supports_async=True,
|
|
56
|
+
supports_compression=True,
|
|
57
|
+
is_remote=True,
|
|
58
|
+
is_cloud_native=False,
|
|
59
|
+
)
|
|
60
|
+
|
|
50
61
|
def __init__(self, fs: "Union[str, AbstractFileSystem]", base_path: str = "") -> None:
|
|
51
62
|
if not FSSPEC_INSTALLED:
|
|
52
63
|
raise MissingDependencyError(package="fsspec", install_package="fsspec")
|
|
@@ -63,6 +74,10 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
63
74
|
self.fs = fs
|
|
64
75
|
self.protocol = getattr(fs, "protocol", "unknown")
|
|
65
76
|
self._fs_uri = f"{self.protocol}://"
|
|
77
|
+
|
|
78
|
+
# Set instance-level capabilities based on detected protocol
|
|
79
|
+
self._instance_capabilities = self._detect_capabilities()
|
|
80
|
+
|
|
66
81
|
super().__init__()
|
|
67
82
|
|
|
68
83
|
@classmethod
|
|
@@ -71,7 +86,6 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
71
86
|
fs_config = config.get("fs_config", {})
|
|
72
87
|
base_path = config.get("base_path", "")
|
|
73
88
|
|
|
74
|
-
# Create filesystem instance from protocol
|
|
75
89
|
import fsspec
|
|
76
90
|
|
|
77
91
|
fs_instance = fsspec.filesystem(protocol, **fs_config)
|
|
@@ -82,12 +96,47 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
82
96
|
"""Resolve path relative to base_path."""
|
|
83
97
|
path_str = str(path)
|
|
84
98
|
if self.base_path:
|
|
85
|
-
# Ensure no double slashes
|
|
86
99
|
clean_base = self.base_path.rstrip("/")
|
|
87
100
|
clean_path = path_str.lstrip("/")
|
|
88
101
|
return f"{clean_base}/{clean_path}"
|
|
89
102
|
return path_str
|
|
90
103
|
|
|
104
|
+
def _detect_capabilities(self) -> StorageCapabilities:
|
|
105
|
+
"""Detect capabilities based on underlying filesystem protocol."""
|
|
106
|
+
protocol = self.protocol.lower()
|
|
107
|
+
|
|
108
|
+
if protocol in {"s3", "s3a", "s3n"}:
|
|
109
|
+
return StorageCapabilities.s3_compatible()
|
|
110
|
+
if protocol in {"gcs", "gs"}:
|
|
111
|
+
return StorageCapabilities.gcs()
|
|
112
|
+
if protocol in {"abfs", "az", "azure"}:
|
|
113
|
+
return StorageCapabilities.azure_blob()
|
|
114
|
+
if protocol in {"file", "local"}:
|
|
115
|
+
return StorageCapabilities.local_filesystem()
|
|
116
|
+
return StorageCapabilities(
|
|
117
|
+
supports_arrow=PYARROW_INSTALLED,
|
|
118
|
+
supports_streaming=PYARROW_INSTALLED,
|
|
119
|
+
supports_async=True,
|
|
120
|
+
supports_compression=True,
|
|
121
|
+
is_remote=True,
|
|
122
|
+
is_cloud_native=False,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def capabilities(self) -> StorageCapabilities:
|
|
127
|
+
"""Return instance-specific capabilities based on detected protocol."""
|
|
128
|
+
return getattr(self, "_instance_capabilities", self.__class__._default_capabilities)
|
|
129
|
+
|
|
130
|
+
@classmethod
|
|
131
|
+
def has_capability(cls, capability: str) -> bool:
|
|
132
|
+
"""Check if backend has a specific capability."""
|
|
133
|
+
return getattr(cls._default_capabilities, capability, False)
|
|
134
|
+
|
|
135
|
+
@classmethod
|
|
136
|
+
def get_capabilities(cls) -> StorageCapabilities:
|
|
137
|
+
"""Get all capabilities for this backend."""
|
|
138
|
+
return cls._default_capabilities
|
|
139
|
+
|
|
91
140
|
@property
|
|
92
141
|
def backend_type(self) -> str:
|
|
93
142
|
return "fsspec"
|
|
@@ -174,7 +223,6 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
174
223
|
else:
|
|
175
224
|
pattern = f"{resolved_prefix}/*" if resolved_prefix else "*"
|
|
176
225
|
|
|
177
|
-
# Get all files (not directories)
|
|
178
226
|
paths = [str(path) for path in self.fs.glob(pattern, **kwargs) if not self.fs.isdir(path)]
|
|
179
227
|
return sorted(paths)
|
|
180
228
|
|
|
@@ -200,7 +248,6 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
200
248
|
"""Get object metadata."""
|
|
201
249
|
info = self.fs.info(self._resolve_path(path), **kwargs)
|
|
202
250
|
|
|
203
|
-
# Convert fsspec info to dict
|
|
204
251
|
if isinstance(info, dict):
|
|
205
252
|
return info
|
|
206
253
|
|
|
@@ -210,7 +257,6 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
210
257
|
except AttributeError:
|
|
211
258
|
pass
|
|
212
259
|
|
|
213
|
-
# Fallback to basic metadata with safe attribute access
|
|
214
260
|
resolved_path = self._resolve_path(path)
|
|
215
261
|
return {
|
|
216
262
|
"path": resolved_path,
|
|
@@ -241,7 +287,7 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
241
287
|
return await async_(self.read_bytes)(path, **kwargs)
|
|
242
288
|
|
|
243
289
|
async def write_bytes_async(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
|
|
244
|
-
"""Async write bytes.
|
|
290
|
+
"""Async write bytes. Wraps the sync implementation."""
|
|
245
291
|
return await async_(self.write_bytes)(path, data, **kwargs)
|
|
246
292
|
|
|
247
293
|
async def _stream_file_batches_async(self, obj_path: Union[str, Path]) -> "AsyncIterator[ArrowRecordBatch]":
|
|
@@ -268,7 +314,6 @@ class FSSpecBackend(ObjectStoreBase):
|
|
|
268
314
|
if not PYARROW_INSTALLED:
|
|
269
315
|
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
270
316
|
|
|
271
|
-
# Get paths asynchronously
|
|
272
317
|
paths = await async_(self.glob)(pattern, **kwargs)
|
|
273
318
|
|
|
274
319
|
# Stream batches from each path
|
|
@@ -9,10 +9,11 @@ from __future__ import annotations
|
|
|
9
9
|
|
|
10
10
|
import fnmatch
|
|
11
11
|
import logging
|
|
12
|
-
from typing import TYPE_CHECKING, Any
|
|
12
|
+
from typing import TYPE_CHECKING, Any, ClassVar
|
|
13
13
|
|
|
14
14
|
from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
|
|
15
15
|
from sqlspec.storage.backends.base import ObjectStoreBase
|
|
16
|
+
from sqlspec.storage.capabilities import HasStorageCapabilities, StorageCapabilities
|
|
16
17
|
from sqlspec.typing import OBSTORE_INSTALLED
|
|
17
18
|
|
|
18
19
|
if TYPE_CHECKING:
|
|
@@ -26,7 +27,7 @@ __all__ = ("ObStoreBackend",)
|
|
|
26
27
|
logger = logging.getLogger(__name__)
|
|
27
28
|
|
|
28
29
|
|
|
29
|
-
class ObStoreBackend(ObjectStoreBase):
|
|
30
|
+
class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
|
|
30
31
|
"""High-performance object storage backend using obstore.
|
|
31
32
|
|
|
32
33
|
This backend leverages obstore's Rust-based implementation for maximum
|
|
@@ -40,6 +41,18 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
40
41
|
Features native Arrow support and ~9x better performance than fsspec.
|
|
41
42
|
"""
|
|
42
43
|
|
|
44
|
+
# ObStore has excellent native capabilities
|
|
45
|
+
capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
|
|
46
|
+
supports_arrow=True,
|
|
47
|
+
supports_streaming=True,
|
|
48
|
+
supports_async=True,
|
|
49
|
+
supports_batch_operations=True,
|
|
50
|
+
supports_multipart_upload=True,
|
|
51
|
+
supports_compression=True,
|
|
52
|
+
is_cloud_native=True,
|
|
53
|
+
has_low_latency=True,
|
|
54
|
+
)
|
|
55
|
+
|
|
43
56
|
def __init__(self, store_uri: str, base_path: str = "", **store_options: Any) -> None:
|
|
44
57
|
"""Initialize obstore backend.
|
|
45
58
|
|
|
@@ -58,14 +71,12 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
58
71
|
self.store_options = store_options
|
|
59
72
|
self.store: Any # Will be set based on store_uri
|
|
60
73
|
|
|
61
|
-
# Initialize obstore instance
|
|
62
74
|
if store_uri.startswith("memory://"):
|
|
63
75
|
# MemoryStore doesn't use from_url - create directly
|
|
64
76
|
from obstore.store import MemoryStore
|
|
65
77
|
|
|
66
78
|
self.store = MemoryStore()
|
|
67
79
|
elif store_uri.startswith("file://"):
|
|
68
|
-
# For file:// URIs, use LocalStore with root directory
|
|
69
80
|
from obstore.store import LocalStore
|
|
70
81
|
|
|
71
82
|
# LocalStore works with directory paths, so we use root
|
|
@@ -86,15 +97,12 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
86
97
|
|
|
87
98
|
def _resolve_path(self, path: str | Path) -> str:
|
|
88
99
|
"""Resolve path relative to base_path."""
|
|
89
|
-
# Convert Path to string
|
|
90
100
|
path_str = str(path)
|
|
91
101
|
# For file:// URIs, the path passed in is already absolute
|
|
92
102
|
if self.store_uri.startswith("file://") and path_str.startswith("/"):
|
|
93
|
-
# Remove leading slash for LocalStore (it's relative to its root)
|
|
94
103
|
return path_str.lstrip("/")
|
|
95
104
|
|
|
96
105
|
if self.base_path:
|
|
97
|
-
# Ensure no double slashes by stripping trailing slash from base_path
|
|
98
106
|
clean_base = self.base_path.rstrip("/")
|
|
99
107
|
clean_path = path_str.lstrip("/")
|
|
100
108
|
return f"{clean_base}/{clean_path}"
|
|
@@ -113,7 +121,6 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
113
121
|
resolved_path = self._resolve_path(path)
|
|
114
122
|
result = self.store.get(resolved_path)
|
|
115
123
|
bytes_data = result.bytes()
|
|
116
|
-
# Handle obstore's Bytes type - it might have a method to get raw bytes
|
|
117
124
|
if hasattr(bytes_data, "__bytes__"):
|
|
118
125
|
return bytes(bytes_data)
|
|
119
126
|
if hasattr(bytes_data, "tobytes"):
|
|
@@ -210,17 +217,14 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
210
217
|
resolved_pattern = self._resolve_path(pattern)
|
|
211
218
|
all_objects = self.list_objects(recursive=True, **kwargs)
|
|
212
219
|
|
|
213
|
-
# For complex patterns with **, use PurePosixPath
|
|
214
220
|
if "**" in pattern:
|
|
215
221
|
matching_objects = []
|
|
216
222
|
|
|
217
223
|
# Special case: **/*.ext should also match *.ext in root
|
|
218
224
|
if pattern.startswith("**/"):
|
|
219
|
-
# Get the suffix pattern
|
|
220
225
|
suffix_pattern = pattern[3:] # Remove **/
|
|
221
226
|
|
|
222
227
|
for obj in all_objects:
|
|
223
|
-
# Check if object ends with the suffix pattern
|
|
224
228
|
obj_path = PurePosixPath(obj)
|
|
225
229
|
# Try both the full pattern and just the suffix
|
|
226
230
|
if obj_path.match(resolved_pattern) or obj_path.match(suffix_pattern):
|
|
@@ -271,7 +275,6 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
271
275
|
if resolved_path.endswith("/"):
|
|
272
276
|
return True
|
|
273
277
|
|
|
274
|
-
# Check if there are any objects with this prefix
|
|
275
278
|
try:
|
|
276
279
|
objects = self.list_objects(prefix=str(path), recursive=False)
|
|
277
280
|
return len(objects) > 0
|
|
@@ -282,7 +285,6 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
282
285
|
"""Read Arrow table using obstore."""
|
|
283
286
|
try:
|
|
284
287
|
resolved_path = self._resolve_path(path)
|
|
285
|
-
# Check if the store has native Arrow support
|
|
286
288
|
if hasattr(self.store, "read_arrow"):
|
|
287
289
|
return self.store.read_arrow(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
|
|
288
290
|
# Fall back to reading as Parquet via bytes
|
|
@@ -301,7 +303,6 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
301
303
|
"""Write Arrow table using obstore."""
|
|
302
304
|
try:
|
|
303
305
|
resolved_path = self._resolve_path(path)
|
|
304
|
-
# Check if the store has native Arrow support
|
|
305
306
|
if hasattr(self.store, "write_arrow"):
|
|
306
307
|
self.store.write_arrow(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
|
|
307
308
|
else:
|
|
@@ -321,7 +322,6 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
321
322
|
|
|
322
323
|
for field in schema:
|
|
323
324
|
if str(field.type).startswith("decimal64"):
|
|
324
|
-
# Convert decimal64 to decimal128
|
|
325
325
|
import re
|
|
326
326
|
|
|
327
327
|
match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
|
|
@@ -367,7 +367,6 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
367
367
|
resolved_path = self._resolve_path(path)
|
|
368
368
|
result = await self.store.get_async(resolved_path)
|
|
369
369
|
bytes_data = result.bytes()
|
|
370
|
-
# Handle obstore's Bytes type - it might have a method to get raw bytes
|
|
371
370
|
if hasattr(bytes_data, "__bytes__"):
|
|
372
371
|
return bytes(bytes_data)
|
|
373
372
|
if hasattr(bytes_data, "tobytes"):
|
|
@@ -441,10 +440,8 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
441
440
|
resolved_path = self._resolve_path(path)
|
|
442
441
|
metadata = await self.store.head_async(resolved_path)
|
|
443
442
|
|
|
444
|
-
# Convert obstore ObjectMeta to dict
|
|
445
443
|
result = {"path": resolved_path, "exists": True}
|
|
446
444
|
|
|
447
|
-
# Extract metadata attributes if available
|
|
448
445
|
for attr in ["size", "last_modified", "e_tag", "version"]:
|
|
449
446
|
if hasattr(metadata, attr):
|
|
450
447
|
result[attr] = getattr(metadata, attr)
|
|
@@ -465,7 +462,6 @@ class ObStoreBackend(ObjectStoreBase):
|
|
|
465
462
|
async def write_arrow_async(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
|
|
466
463
|
"""Async write Arrow table using native obstore async."""
|
|
467
464
|
resolved_path = self._resolve_path(path)
|
|
468
|
-
# Check if the store has native async Arrow support
|
|
469
465
|
if hasattr(self.store, "write_arrow_async"):
|
|
470
466
|
await self.store.write_arrow_async(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
|
|
471
467
|
else:
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Storage backend capability system.
|
|
2
|
+
|
|
3
|
+
This module provides a centralized way to track and query storage backend capabilities.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import ClassVar
|
|
8
|
+
|
|
9
|
+
__all__ = ("HasStorageCapabilities", "StorageCapabilities")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class StorageCapabilities:
|
|
14
|
+
"""Tracks capabilities of a storage backend."""
|
|
15
|
+
|
|
16
|
+
# Basic operations
|
|
17
|
+
supports_read: bool = True
|
|
18
|
+
supports_write: bool = True
|
|
19
|
+
supports_delete: bool = True
|
|
20
|
+
supports_list: bool = True
|
|
21
|
+
supports_exists: bool = True
|
|
22
|
+
supports_copy: bool = True
|
|
23
|
+
supports_move: bool = True
|
|
24
|
+
supports_metadata: bool = True
|
|
25
|
+
|
|
26
|
+
# Advanced operations
|
|
27
|
+
supports_arrow: bool = False
|
|
28
|
+
supports_streaming: bool = False
|
|
29
|
+
supports_async: bool = False
|
|
30
|
+
supports_batch_operations: bool = False
|
|
31
|
+
supports_multipart_upload: bool = False
|
|
32
|
+
supports_compression: bool = False
|
|
33
|
+
|
|
34
|
+
# Protocol-specific features
|
|
35
|
+
supports_s3_select: bool = False
|
|
36
|
+
supports_gcs_compose: bool = False
|
|
37
|
+
supports_azure_snapshots: bool = False
|
|
38
|
+
|
|
39
|
+
# Performance characteristics
|
|
40
|
+
is_remote: bool = True
|
|
41
|
+
is_cloud_native: bool = False
|
|
42
|
+
has_low_latency: bool = False
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def local_filesystem(cls) -> "StorageCapabilities":
|
|
46
|
+
"""Capabilities for local filesystem backend."""
|
|
47
|
+
return cls(
|
|
48
|
+
is_remote=False, has_low_latency=True, supports_arrow=True, supports_streaming=True, supports_async=True
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def s3_compatible(cls) -> "StorageCapabilities":
|
|
53
|
+
"""Capabilities for S3-compatible backends."""
|
|
54
|
+
return cls(
|
|
55
|
+
is_cloud_native=True,
|
|
56
|
+
supports_multipart_upload=True,
|
|
57
|
+
supports_s3_select=True,
|
|
58
|
+
supports_arrow=True,
|
|
59
|
+
supports_streaming=True,
|
|
60
|
+
supports_async=True,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def gcs(cls) -> "StorageCapabilities":
|
|
65
|
+
"""Capabilities for Google Cloud Storage."""
|
|
66
|
+
return cls(
|
|
67
|
+
is_cloud_native=True,
|
|
68
|
+
supports_multipart_upload=True,
|
|
69
|
+
supports_gcs_compose=True,
|
|
70
|
+
supports_arrow=True,
|
|
71
|
+
supports_streaming=True,
|
|
72
|
+
supports_async=True,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def azure_blob(cls) -> "StorageCapabilities":
|
|
77
|
+
"""Capabilities for Azure Blob Storage."""
|
|
78
|
+
return cls(
|
|
79
|
+
is_cloud_native=True,
|
|
80
|
+
supports_multipart_upload=True,
|
|
81
|
+
supports_azure_snapshots=True,
|
|
82
|
+
supports_arrow=True,
|
|
83
|
+
supports_streaming=True,
|
|
84
|
+
supports_async=True,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class HasStorageCapabilities:
|
|
89
|
+
"""Mixin for storage backends that expose their capabilities."""
|
|
90
|
+
|
|
91
|
+
capabilities: ClassVar[StorageCapabilities]
|
|
92
|
+
|
|
93
|
+
@classmethod
|
|
94
|
+
def has_capability(cls, capability: str) -> bool:
|
|
95
|
+
"""Check if backend has a specific capability."""
|
|
96
|
+
return getattr(cls.capabilities, capability, False)
|
|
97
|
+
|
|
98
|
+
@classmethod
|
|
99
|
+
def get_capabilities(cls) -> StorageCapabilities:
|
|
100
|
+
"""Get all capabilities for this backend."""
|
|
101
|
+
return cls.capabilities
|