sqlspec 0.12.2__py3-none-any.whl → 0.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (113) hide show
  1. sqlspec/_sql.py +21 -180
  2. sqlspec/adapters/adbc/config.py +10 -12
  3. sqlspec/adapters/adbc/driver.py +120 -118
  4. sqlspec/adapters/aiosqlite/config.py +16 -3
  5. sqlspec/adapters/aiosqlite/driver.py +100 -130
  6. sqlspec/adapters/asyncmy/config.py +17 -4
  7. sqlspec/adapters/asyncmy/driver.py +123 -135
  8. sqlspec/adapters/asyncpg/config.py +17 -29
  9. sqlspec/adapters/asyncpg/driver.py +98 -140
  10. sqlspec/adapters/bigquery/config.py +4 -5
  11. sqlspec/adapters/bigquery/driver.py +125 -167
  12. sqlspec/adapters/duckdb/config.py +3 -6
  13. sqlspec/adapters/duckdb/driver.py +114 -111
  14. sqlspec/adapters/oracledb/config.py +32 -5
  15. sqlspec/adapters/oracledb/driver.py +242 -259
  16. sqlspec/adapters/psqlpy/config.py +18 -9
  17. sqlspec/adapters/psqlpy/driver.py +118 -93
  18. sqlspec/adapters/psycopg/config.py +44 -31
  19. sqlspec/adapters/psycopg/driver.py +283 -236
  20. sqlspec/adapters/sqlite/config.py +3 -3
  21. sqlspec/adapters/sqlite/driver.py +103 -97
  22. sqlspec/config.py +0 -4
  23. sqlspec/driver/_async.py +89 -98
  24. sqlspec/driver/_common.py +52 -17
  25. sqlspec/driver/_sync.py +81 -105
  26. sqlspec/driver/connection.py +207 -0
  27. sqlspec/driver/mixins/_csv_writer.py +91 -0
  28. sqlspec/driver/mixins/_pipeline.py +38 -49
  29. sqlspec/driver/mixins/_result_utils.py +27 -9
  30. sqlspec/driver/mixins/_storage.py +67 -181
  31. sqlspec/driver/mixins/_type_coercion.py +3 -4
  32. sqlspec/driver/parameters.py +138 -0
  33. sqlspec/exceptions.py +10 -2
  34. sqlspec/extensions/aiosql/adapter.py +0 -10
  35. sqlspec/extensions/litestar/handlers.py +0 -1
  36. sqlspec/extensions/litestar/plugin.py +0 -3
  37. sqlspec/extensions/litestar/providers.py +0 -14
  38. sqlspec/loader.py +25 -90
  39. sqlspec/protocols.py +542 -0
  40. sqlspec/service/__init__.py +3 -2
  41. sqlspec/service/_util.py +147 -0
  42. sqlspec/service/base.py +1116 -9
  43. sqlspec/statement/builder/__init__.py +42 -32
  44. sqlspec/statement/builder/_ddl_utils.py +0 -10
  45. sqlspec/statement/builder/_parsing_utils.py +10 -4
  46. sqlspec/statement/builder/base.py +67 -22
  47. sqlspec/statement/builder/column.py +283 -0
  48. sqlspec/statement/builder/ddl.py +91 -67
  49. sqlspec/statement/builder/delete.py +23 -7
  50. sqlspec/statement/builder/insert.py +29 -15
  51. sqlspec/statement/builder/merge.py +4 -4
  52. sqlspec/statement/builder/mixins/_aggregate_functions.py +113 -14
  53. sqlspec/statement/builder/mixins/_common_table_expr.py +0 -1
  54. sqlspec/statement/builder/mixins/_delete_from.py +1 -1
  55. sqlspec/statement/builder/mixins/_from.py +10 -8
  56. sqlspec/statement/builder/mixins/_group_by.py +0 -1
  57. sqlspec/statement/builder/mixins/_insert_from_select.py +0 -1
  58. sqlspec/statement/builder/mixins/_insert_values.py +0 -2
  59. sqlspec/statement/builder/mixins/_join.py +20 -13
  60. sqlspec/statement/builder/mixins/_limit_offset.py +3 -3
  61. sqlspec/statement/builder/mixins/_merge_clauses.py +3 -4
  62. sqlspec/statement/builder/mixins/_order_by.py +2 -2
  63. sqlspec/statement/builder/mixins/_pivot.py +4 -7
  64. sqlspec/statement/builder/mixins/_select_columns.py +6 -5
  65. sqlspec/statement/builder/mixins/_unpivot.py +6 -9
  66. sqlspec/statement/builder/mixins/_update_from.py +2 -1
  67. sqlspec/statement/builder/mixins/_update_set.py +11 -8
  68. sqlspec/statement/builder/mixins/_where.py +61 -34
  69. sqlspec/statement/builder/select.py +32 -17
  70. sqlspec/statement/builder/update.py +25 -11
  71. sqlspec/statement/filters.py +39 -14
  72. sqlspec/statement/parameter_manager.py +220 -0
  73. sqlspec/statement/parameters.py +210 -79
  74. sqlspec/statement/pipelines/__init__.py +166 -23
  75. sqlspec/statement/pipelines/analyzers/_analyzer.py +21 -20
  76. sqlspec/statement/pipelines/context.py +35 -39
  77. sqlspec/statement/pipelines/transformers/__init__.py +2 -3
  78. sqlspec/statement/pipelines/transformers/_expression_simplifier.py +19 -187
  79. sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +628 -58
  80. sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +76 -0
  81. sqlspec/statement/pipelines/validators/_dml_safety.py +33 -18
  82. sqlspec/statement/pipelines/validators/_parameter_style.py +87 -14
  83. sqlspec/statement/pipelines/validators/_performance.py +38 -23
  84. sqlspec/statement/pipelines/validators/_security.py +39 -62
  85. sqlspec/statement/result.py +37 -129
  86. sqlspec/statement/splitter.py +0 -12
  87. sqlspec/statement/sql.py +863 -391
  88. sqlspec/statement/sql_compiler.py +140 -0
  89. sqlspec/storage/__init__.py +10 -2
  90. sqlspec/storage/backends/fsspec.py +53 -8
  91. sqlspec/storage/backends/obstore.py +15 -19
  92. sqlspec/storage/capabilities.py +101 -0
  93. sqlspec/storage/registry.py +56 -83
  94. sqlspec/typing.py +6 -434
  95. sqlspec/utils/cached_property.py +25 -0
  96. sqlspec/utils/correlation.py +0 -2
  97. sqlspec/utils/logging.py +0 -6
  98. sqlspec/utils/sync_tools.py +0 -4
  99. sqlspec/utils/text.py +0 -5
  100. sqlspec/utils/type_guards.py +892 -0
  101. {sqlspec-0.12.2.dist-info → sqlspec-0.13.1.dist-info}/METADATA +1 -1
  102. sqlspec-0.13.1.dist-info/RECORD +150 -0
  103. sqlspec/statement/builder/protocols.py +0 -20
  104. sqlspec/statement/pipelines/base.py +0 -315
  105. sqlspec/statement/pipelines/result_types.py +0 -41
  106. sqlspec/statement/pipelines/transformers/_remove_comments.py +0 -66
  107. sqlspec/statement/pipelines/transformers/_remove_hints.py +0 -81
  108. sqlspec/statement/pipelines/validators/base.py +0 -67
  109. sqlspec/storage/protocol.py +0 -173
  110. sqlspec-0.12.2.dist-info/RECORD +0 -145
  111. {sqlspec-0.12.2.dist-info → sqlspec-0.13.1.dist-info}/WHEEL +0 -0
  112. {sqlspec-0.12.2.dist-info → sqlspec-0.13.1.dist-info}/licenses/LICENSE +0 -0
  113. {sqlspec-0.12.2.dist-info → sqlspec-0.13.1.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1,140 @@
1
+ """SQL compilation logic separated from the main SQL class."""
2
+
3
+ from typing import TYPE_CHECKING, Any, Optional, Union, cast
4
+
5
+ import sqlglot.expressions as exp
6
+
7
+ from sqlspec.exceptions import SQLCompilationError
8
+ from sqlspec.statement.parameters import ParameterConverter, ParameterStyle
9
+ from sqlspec.statement.pipelines import SQLProcessingContext, StatementPipeline
10
+ from sqlspec.statement.sql import SQLConfig
11
+ from sqlspec.utils.cached_property import CachedProperty
12
+
13
+ if TYPE_CHECKING:
14
+ from sqlglot.dialects.dialect import DialectType
15
+
16
+ from sqlspec.protocols import ProcessorProtocol
17
+ from sqlspec.statement.parameter_manager import ParameterManager
18
+
19
+
20
+ __all__ = ("SQLCompiler",)
21
+
22
+
23
+ class SQLCompiler:
24
+ """Handles SQL compilation and pipeline processing."""
25
+
26
+ def __init__(
27
+ self,
28
+ expression: exp.Expression,
29
+ dialect: "Optional[DialectType]" = None,
30
+ parameter_manager: "Optional[ParameterManager]" = None,
31
+ is_script: bool = False,
32
+ original_sql: Optional[str] = None,
33
+ config: Optional[SQLConfig] = None,
34
+ ) -> None:
35
+ self.expression = expression
36
+ self.dialect = dialect
37
+ self.parameter_manager = parameter_manager
38
+ self.is_script = is_script
39
+ self._original_sql = original_sql
40
+ self.config = config or SQLConfig(dialect=dialect)
41
+
42
+ @CachedProperty
43
+ def _pipeline(self) -> StatementPipeline:
44
+ """Get the statement pipeline."""
45
+ validators: list[ProcessorProtocol] = []
46
+
47
+ if self.config.enable_validation and self.config.allowed_parameter_styles is not None:
48
+ from sqlspec.statement.pipelines.validators._parameter_style import ParameterStyleValidator
49
+
50
+ # In strict mode, fail on violations
51
+ validators.append(ParameterStyleValidator(fail_on_violation=self.config.strict_mode))
52
+
53
+ return StatementPipeline(validators=validators)
54
+
55
+ @CachedProperty
56
+ def _context(self) -> SQLProcessingContext:
57
+ """Get the processing context."""
58
+ if isinstance(self.expression, exp.Anonymous) and self.expression.this:
59
+ sql_string = str(self.expression.this)
60
+ else:
61
+ sql_string = self.expression.sql(dialect=self.dialect)
62
+
63
+ context = SQLProcessingContext(initial_sql_string=sql_string, dialect=self.dialect, config=self.config)
64
+ context.initial_expression = self.expression
65
+ context.current_expression = self.expression
66
+
67
+ from sqlspec.statement.parameters import ParameterValidator
68
+
69
+ validator = ParameterValidator()
70
+ context.parameter_info = validator.extract_parameters(sql_string)
71
+
72
+ if self.parameter_manager:
73
+ if self.parameter_manager.positional_parameters:
74
+ context.merged_parameters = self.parameter_manager.positional_parameters
75
+ context.initial_parameters = self.parameter_manager.positional_parameters
76
+ elif self.parameter_manager.named_parameters:
77
+ context.merged_parameters = self.parameter_manager.named_parameters
78
+ context.initial_kwargs = self.parameter_manager.named_parameters
79
+ context.initial_parameters = self.parameter_manager.positional_parameters
80
+ context.initial_kwargs = self.parameter_manager.named_parameters
81
+ return context
82
+
83
+ @CachedProperty
84
+ def _processed_expr(self) -> exp.Expression:
85
+ """Execute the processing pipeline and cache the result."""
86
+ try:
87
+ result = self._pipeline.execute_pipeline(self._context)
88
+ except Exception as e:
89
+ msg = f"Failed to compile SQL: {self._context.initial_sql_string}"
90
+ raise SQLCompilationError(msg) from e
91
+ else:
92
+ return cast("exp.Expression", result.expression)
93
+
94
+ @CachedProperty
95
+ def _compiled_sql(self) -> str:
96
+ """Get the compiled SQL string."""
97
+ if self.is_script:
98
+ return str(self._original_sql or self.expression.sql(dialect=self.dialect))
99
+ # Always go through the pipeline to ensure validation runs
100
+ processed = self._processed_expr
101
+ if isinstance(processed, exp.Anonymous) and processed.this:
102
+ return str(processed.this)
103
+ return str(processed.sql(dialect=self.dialect, comments=False))
104
+
105
+ def compile(self, placeholder_style: Optional[str] = None) -> tuple[str, Any]:
106
+ """Compile SQL and parameters."""
107
+ if self.is_script:
108
+ return self._compiled_sql, None
109
+
110
+ sql = self.to_sql(placeholder_style)
111
+ params = self._get_compiled_parameters(placeholder_style)
112
+ return sql, params
113
+
114
+ def to_sql(self, placeholder_style: Optional[str] = None) -> str:
115
+ """Get the SQL string with a specific placeholder style."""
116
+ if placeholder_style is None or self.is_script:
117
+ return cast("str", self._compiled_sql)
118
+
119
+ converter = ParameterConverter()
120
+ sql = self._compiled_sql
121
+
122
+ target_style = ParameterStyle(placeholder_style)
123
+ return converter.convert_placeholders(sql, target_style, self._context.parameter_info)
124
+
125
+ def get_parameters(self, style: Union[ParameterStyle, str, None] = None) -> Any:
126
+ """Get the parameters in a specific style."""
127
+ if self.is_script:
128
+ return None
129
+ return cast("Any", self._get_compiled_parameters(str(style) if style else None))
130
+
131
+ def _get_compiled_parameters(self, placeholder_style: Optional[str]) -> Any:
132
+ """Get compiled parameters in target style."""
133
+ if not self.parameter_manager:
134
+ return None
135
+
136
+ # This ensures the pipeline has run and context is populated
137
+ _ = self._processed_expr
138
+
139
+ style_enum = ParameterStyle(placeholder_style) if placeholder_style else ParameterStyle.NAMED_COLON
140
+ return self.parameter_manager.get_compiled_parameters(self._context.parameter_info, style_enum)
@@ -5,11 +5,19 @@ This module provides a flexible storage system with:
5
5
  - Lazy loading and configuration-based registration
6
6
  - URI scheme-based automatic backend resolution
7
7
  - Key-based named storage configurations
8
+ - Capability-based backend selection
8
9
  """
9
10
 
10
- from sqlspec.storage.protocol import ObjectStoreProtocol
11
+ from sqlspec.protocols import ObjectStoreProtocol
12
+ from sqlspec.storage.capabilities import HasStorageCapabilities, StorageCapabilities
11
13
  from sqlspec.storage.registry import StorageRegistry
12
14
 
13
15
  storage_registry = StorageRegistry()
14
16
 
15
- __all__ = ("ObjectStoreProtocol", "StorageRegistry", "storage_registry")
17
+ __all__ = (
18
+ "HasStorageCapabilities",
19
+ "ObjectStoreProtocol",
20
+ "StorageCapabilities",
21
+ "StorageRegistry",
22
+ "storage_registry",
23
+ )
@@ -2,10 +2,11 @@
2
2
  import logging
3
3
  from io import BytesIO
4
4
  from pathlib import Path
5
- from typing import TYPE_CHECKING, Any, Union
5
+ from typing import TYPE_CHECKING, Any, ClassVar, Union
6
6
 
7
7
  from sqlspec.exceptions import MissingDependencyError
8
8
  from sqlspec.storage.backends.base import ObjectStoreBase
9
+ from sqlspec.storage.capabilities import StorageCapabilities
9
10
  from sqlspec.typing import FSSPEC_INSTALLED, PYARROW_INSTALLED
10
11
  from sqlspec.utils.sync_tools import async_
11
12
 
@@ -47,6 +48,16 @@ class FSSpecBackend(ObjectStoreBase):
47
48
  and offering fallback capabilities.
48
49
  """
49
50
 
51
+ # FSSpec supports most operations but varies by underlying filesystem
52
+ _default_capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
53
+ supports_arrow=PYARROW_INSTALLED,
54
+ supports_streaming=PYARROW_INSTALLED,
55
+ supports_async=True,
56
+ supports_compression=True,
57
+ is_remote=True,
58
+ is_cloud_native=False,
59
+ )
60
+
50
61
  def __init__(self, fs: "Union[str, AbstractFileSystem]", base_path: str = "") -> None:
51
62
  if not FSSPEC_INSTALLED:
52
63
  raise MissingDependencyError(package="fsspec", install_package="fsspec")
@@ -63,6 +74,10 @@ class FSSpecBackend(ObjectStoreBase):
63
74
  self.fs = fs
64
75
  self.protocol = getattr(fs, "protocol", "unknown")
65
76
  self._fs_uri = f"{self.protocol}://"
77
+
78
+ # Set instance-level capabilities based on detected protocol
79
+ self._instance_capabilities = self._detect_capabilities()
80
+
66
81
  super().__init__()
67
82
 
68
83
  @classmethod
@@ -71,7 +86,6 @@ class FSSpecBackend(ObjectStoreBase):
71
86
  fs_config = config.get("fs_config", {})
72
87
  base_path = config.get("base_path", "")
73
88
 
74
- # Create filesystem instance from protocol
75
89
  import fsspec
76
90
 
77
91
  fs_instance = fsspec.filesystem(protocol, **fs_config)
@@ -82,12 +96,47 @@ class FSSpecBackend(ObjectStoreBase):
82
96
  """Resolve path relative to base_path."""
83
97
  path_str = str(path)
84
98
  if self.base_path:
85
- # Ensure no double slashes
86
99
  clean_base = self.base_path.rstrip("/")
87
100
  clean_path = path_str.lstrip("/")
88
101
  return f"{clean_base}/{clean_path}"
89
102
  return path_str
90
103
 
104
+ def _detect_capabilities(self) -> StorageCapabilities:
105
+ """Detect capabilities based on underlying filesystem protocol."""
106
+ protocol = self.protocol.lower()
107
+
108
+ if protocol in {"s3", "s3a", "s3n"}:
109
+ return StorageCapabilities.s3_compatible()
110
+ if protocol in {"gcs", "gs"}:
111
+ return StorageCapabilities.gcs()
112
+ if protocol in {"abfs", "az", "azure"}:
113
+ return StorageCapabilities.azure_blob()
114
+ if protocol in {"file", "local"}:
115
+ return StorageCapabilities.local_filesystem()
116
+ return StorageCapabilities(
117
+ supports_arrow=PYARROW_INSTALLED,
118
+ supports_streaming=PYARROW_INSTALLED,
119
+ supports_async=True,
120
+ supports_compression=True,
121
+ is_remote=True,
122
+ is_cloud_native=False,
123
+ )
124
+
125
+ @property
126
+ def capabilities(self) -> StorageCapabilities:
127
+ """Return instance-specific capabilities based on detected protocol."""
128
+ return getattr(self, "_instance_capabilities", self.__class__._default_capabilities)
129
+
130
+ @classmethod
131
+ def has_capability(cls, capability: str) -> bool:
132
+ """Check if backend has a specific capability."""
133
+ return getattr(cls._default_capabilities, capability, False)
134
+
135
+ @classmethod
136
+ def get_capabilities(cls) -> StorageCapabilities:
137
+ """Get all capabilities for this backend."""
138
+ return cls._default_capabilities
139
+
91
140
  @property
92
141
  def backend_type(self) -> str:
93
142
  return "fsspec"
@@ -174,7 +223,6 @@ class FSSpecBackend(ObjectStoreBase):
174
223
  else:
175
224
  pattern = f"{resolved_prefix}/*" if resolved_prefix else "*"
176
225
 
177
- # Get all files (not directories)
178
226
  paths = [str(path) for path in self.fs.glob(pattern, **kwargs) if not self.fs.isdir(path)]
179
227
  return sorted(paths)
180
228
 
@@ -200,7 +248,6 @@ class FSSpecBackend(ObjectStoreBase):
200
248
  """Get object metadata."""
201
249
  info = self.fs.info(self._resolve_path(path), **kwargs)
202
250
 
203
- # Convert fsspec info to dict
204
251
  if isinstance(info, dict):
205
252
  return info
206
253
 
@@ -210,7 +257,6 @@ class FSSpecBackend(ObjectStoreBase):
210
257
  except AttributeError:
211
258
  pass
212
259
 
213
- # Fallback to basic metadata with safe attribute access
214
260
  resolved_path = self._resolve_path(path)
215
261
  return {
216
262
  "path": resolved_path,
@@ -241,7 +287,7 @@ class FSSpecBackend(ObjectStoreBase):
241
287
  return await async_(self.read_bytes)(path, **kwargs)
242
288
 
243
289
  async def write_bytes_async(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
244
- """Async write bytes. Wras the sync implementation."""
290
+ """Async write bytes. Wraps the sync implementation."""
245
291
  return await async_(self.write_bytes)(path, data, **kwargs)
246
292
 
247
293
  async def _stream_file_batches_async(self, obj_path: Union[str, Path]) -> "AsyncIterator[ArrowRecordBatch]":
@@ -268,7 +314,6 @@ class FSSpecBackend(ObjectStoreBase):
268
314
  if not PYARROW_INSTALLED:
269
315
  raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
270
316
 
271
- # Get paths asynchronously
272
317
  paths = await async_(self.glob)(pattern, **kwargs)
273
318
 
274
319
  # Stream batches from each path
@@ -9,10 +9,11 @@ from __future__ import annotations
9
9
 
10
10
  import fnmatch
11
11
  import logging
12
- from typing import TYPE_CHECKING, Any
12
+ from typing import TYPE_CHECKING, Any, ClassVar
13
13
 
14
14
  from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
15
15
  from sqlspec.storage.backends.base import ObjectStoreBase
16
+ from sqlspec.storage.capabilities import HasStorageCapabilities, StorageCapabilities
16
17
  from sqlspec.typing import OBSTORE_INSTALLED
17
18
 
18
19
  if TYPE_CHECKING:
@@ -26,7 +27,7 @@ __all__ = ("ObStoreBackend",)
26
27
  logger = logging.getLogger(__name__)
27
28
 
28
29
 
29
- class ObStoreBackend(ObjectStoreBase):
30
+ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
30
31
  """High-performance object storage backend using obstore.
31
32
 
32
33
  This backend leverages obstore's Rust-based implementation for maximum
@@ -40,6 +41,18 @@ class ObStoreBackend(ObjectStoreBase):
40
41
  Features native Arrow support and ~9x better performance than fsspec.
41
42
  """
42
43
 
44
+ # ObStore has excellent native capabilities
45
+ capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
46
+ supports_arrow=True,
47
+ supports_streaming=True,
48
+ supports_async=True,
49
+ supports_batch_operations=True,
50
+ supports_multipart_upload=True,
51
+ supports_compression=True,
52
+ is_cloud_native=True,
53
+ has_low_latency=True,
54
+ )
55
+
43
56
  def __init__(self, store_uri: str, base_path: str = "", **store_options: Any) -> None:
44
57
  """Initialize obstore backend.
45
58
 
@@ -58,14 +71,12 @@ class ObStoreBackend(ObjectStoreBase):
58
71
  self.store_options = store_options
59
72
  self.store: Any # Will be set based on store_uri
60
73
 
61
- # Initialize obstore instance
62
74
  if store_uri.startswith("memory://"):
63
75
  # MemoryStore doesn't use from_url - create directly
64
76
  from obstore.store import MemoryStore
65
77
 
66
78
  self.store = MemoryStore()
67
79
  elif store_uri.startswith("file://"):
68
- # For file:// URIs, use LocalStore with root directory
69
80
  from obstore.store import LocalStore
70
81
 
71
82
  # LocalStore works with directory paths, so we use root
@@ -86,15 +97,12 @@ class ObStoreBackend(ObjectStoreBase):
86
97
 
87
98
  def _resolve_path(self, path: str | Path) -> str:
88
99
  """Resolve path relative to base_path."""
89
- # Convert Path to string
90
100
  path_str = str(path)
91
101
  # For file:// URIs, the path passed in is already absolute
92
102
  if self.store_uri.startswith("file://") and path_str.startswith("/"):
93
- # Remove leading slash for LocalStore (it's relative to its root)
94
103
  return path_str.lstrip("/")
95
104
 
96
105
  if self.base_path:
97
- # Ensure no double slashes by stripping trailing slash from base_path
98
106
  clean_base = self.base_path.rstrip("/")
99
107
  clean_path = path_str.lstrip("/")
100
108
  return f"{clean_base}/{clean_path}"
@@ -113,7 +121,6 @@ class ObStoreBackend(ObjectStoreBase):
113
121
  resolved_path = self._resolve_path(path)
114
122
  result = self.store.get(resolved_path)
115
123
  bytes_data = result.bytes()
116
- # Handle obstore's Bytes type - it might have a method to get raw bytes
117
124
  if hasattr(bytes_data, "__bytes__"):
118
125
  return bytes(bytes_data)
119
126
  if hasattr(bytes_data, "tobytes"):
@@ -210,17 +217,14 @@ class ObStoreBackend(ObjectStoreBase):
210
217
  resolved_pattern = self._resolve_path(pattern)
211
218
  all_objects = self.list_objects(recursive=True, **kwargs)
212
219
 
213
- # For complex patterns with **, use PurePosixPath
214
220
  if "**" in pattern:
215
221
  matching_objects = []
216
222
 
217
223
  # Special case: **/*.ext should also match *.ext in root
218
224
  if pattern.startswith("**/"):
219
- # Get the suffix pattern
220
225
  suffix_pattern = pattern[3:] # Remove **/
221
226
 
222
227
  for obj in all_objects:
223
- # Check if object ends with the suffix pattern
224
228
  obj_path = PurePosixPath(obj)
225
229
  # Try both the full pattern and just the suffix
226
230
  if obj_path.match(resolved_pattern) or obj_path.match(suffix_pattern):
@@ -271,7 +275,6 @@ class ObStoreBackend(ObjectStoreBase):
271
275
  if resolved_path.endswith("/"):
272
276
  return True
273
277
 
274
- # Check if there are any objects with this prefix
275
278
  try:
276
279
  objects = self.list_objects(prefix=str(path), recursive=False)
277
280
  return len(objects) > 0
@@ -282,7 +285,6 @@ class ObStoreBackend(ObjectStoreBase):
282
285
  """Read Arrow table using obstore."""
283
286
  try:
284
287
  resolved_path = self._resolve_path(path)
285
- # Check if the store has native Arrow support
286
288
  if hasattr(self.store, "read_arrow"):
287
289
  return self.store.read_arrow(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
288
290
  # Fall back to reading as Parquet via bytes
@@ -301,7 +303,6 @@ class ObStoreBackend(ObjectStoreBase):
301
303
  """Write Arrow table using obstore."""
302
304
  try:
303
305
  resolved_path = self._resolve_path(path)
304
- # Check if the store has native Arrow support
305
306
  if hasattr(self.store, "write_arrow"):
306
307
  self.store.write_arrow(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
307
308
  else:
@@ -321,7 +322,6 @@ class ObStoreBackend(ObjectStoreBase):
321
322
 
322
323
  for field in schema:
323
324
  if str(field.type).startswith("decimal64"):
324
- # Convert decimal64 to decimal128
325
325
  import re
326
326
 
327
327
  match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
@@ -367,7 +367,6 @@ class ObStoreBackend(ObjectStoreBase):
367
367
  resolved_path = self._resolve_path(path)
368
368
  result = await self.store.get_async(resolved_path)
369
369
  bytes_data = result.bytes()
370
- # Handle obstore's Bytes type - it might have a method to get raw bytes
371
370
  if hasattr(bytes_data, "__bytes__"):
372
371
  return bytes(bytes_data)
373
372
  if hasattr(bytes_data, "tobytes"):
@@ -441,10 +440,8 @@ class ObStoreBackend(ObjectStoreBase):
441
440
  resolved_path = self._resolve_path(path)
442
441
  metadata = await self.store.head_async(resolved_path)
443
442
 
444
- # Convert obstore ObjectMeta to dict
445
443
  result = {"path": resolved_path, "exists": True}
446
444
 
447
- # Extract metadata attributes if available
448
445
  for attr in ["size", "last_modified", "e_tag", "version"]:
449
446
  if hasattr(metadata, attr):
450
447
  result[attr] = getattr(metadata, attr)
@@ -465,7 +462,6 @@ class ObStoreBackend(ObjectStoreBase):
465
462
  async def write_arrow_async(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
466
463
  """Async write Arrow table using native obstore async."""
467
464
  resolved_path = self._resolve_path(path)
468
- # Check if the store has native async Arrow support
469
465
  if hasattr(self.store, "write_arrow_async"):
470
466
  await self.store.write_arrow_async(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
471
467
  else:
@@ -0,0 +1,101 @@
1
+ """Storage backend capability system.
2
+
3
+ This module provides a centralized way to track and query storage backend capabilities.
4
+ """
5
+
6
+ from dataclasses import dataclass
7
+ from typing import ClassVar
8
+
9
+ __all__ = ("HasStorageCapabilities", "StorageCapabilities")
10
+
11
+
12
+ @dataclass
13
+ class StorageCapabilities:
14
+ """Tracks capabilities of a storage backend."""
15
+
16
+ # Basic operations
17
+ supports_read: bool = True
18
+ supports_write: bool = True
19
+ supports_delete: bool = True
20
+ supports_list: bool = True
21
+ supports_exists: bool = True
22
+ supports_copy: bool = True
23
+ supports_move: bool = True
24
+ supports_metadata: bool = True
25
+
26
+ # Advanced operations
27
+ supports_arrow: bool = False
28
+ supports_streaming: bool = False
29
+ supports_async: bool = False
30
+ supports_batch_operations: bool = False
31
+ supports_multipart_upload: bool = False
32
+ supports_compression: bool = False
33
+
34
+ # Protocol-specific features
35
+ supports_s3_select: bool = False
36
+ supports_gcs_compose: bool = False
37
+ supports_azure_snapshots: bool = False
38
+
39
+ # Performance characteristics
40
+ is_remote: bool = True
41
+ is_cloud_native: bool = False
42
+ has_low_latency: bool = False
43
+
44
+ @classmethod
45
+ def local_filesystem(cls) -> "StorageCapabilities":
46
+ """Capabilities for local filesystem backend."""
47
+ return cls(
48
+ is_remote=False, has_low_latency=True, supports_arrow=True, supports_streaming=True, supports_async=True
49
+ )
50
+
51
+ @classmethod
52
+ def s3_compatible(cls) -> "StorageCapabilities":
53
+ """Capabilities for S3-compatible backends."""
54
+ return cls(
55
+ is_cloud_native=True,
56
+ supports_multipart_upload=True,
57
+ supports_s3_select=True,
58
+ supports_arrow=True,
59
+ supports_streaming=True,
60
+ supports_async=True,
61
+ )
62
+
63
+ @classmethod
64
+ def gcs(cls) -> "StorageCapabilities":
65
+ """Capabilities for Google Cloud Storage."""
66
+ return cls(
67
+ is_cloud_native=True,
68
+ supports_multipart_upload=True,
69
+ supports_gcs_compose=True,
70
+ supports_arrow=True,
71
+ supports_streaming=True,
72
+ supports_async=True,
73
+ )
74
+
75
+ @classmethod
76
+ def azure_blob(cls) -> "StorageCapabilities":
77
+ """Capabilities for Azure Blob Storage."""
78
+ return cls(
79
+ is_cloud_native=True,
80
+ supports_multipart_upload=True,
81
+ supports_azure_snapshots=True,
82
+ supports_arrow=True,
83
+ supports_streaming=True,
84
+ supports_async=True,
85
+ )
86
+
87
+
88
+ class HasStorageCapabilities:
89
+ """Mixin for storage backends that expose their capabilities."""
90
+
91
+ capabilities: ClassVar[StorageCapabilities]
92
+
93
+ @classmethod
94
+ def has_capability(cls, capability: str) -> bool:
95
+ """Check if backend has a specific capability."""
96
+ return getattr(cls.capabilities, capability, False)
97
+
98
+ @classmethod
99
+ def get_capabilities(cls) -> StorageCapabilities:
100
+ """Get all capabilities for this backend."""
101
+ return cls.capabilities