sqlspec 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (155) hide show
  1. sqlspec/__init__.py +16 -3
  2. sqlspec/_serialization.py +3 -10
  3. sqlspec/_sql.py +1147 -0
  4. sqlspec/_typing.py +343 -41
  5. sqlspec/adapters/adbc/__init__.py +2 -6
  6. sqlspec/adapters/adbc/config.py +474 -149
  7. sqlspec/adapters/adbc/driver.py +330 -621
  8. sqlspec/adapters/aiosqlite/__init__.py +2 -6
  9. sqlspec/adapters/aiosqlite/config.py +143 -57
  10. sqlspec/adapters/aiosqlite/driver.py +269 -431
  11. sqlspec/adapters/asyncmy/__init__.py +3 -8
  12. sqlspec/adapters/asyncmy/config.py +247 -202
  13. sqlspec/adapters/asyncmy/driver.py +218 -436
  14. sqlspec/adapters/asyncpg/__init__.py +4 -7
  15. sqlspec/adapters/asyncpg/config.py +329 -176
  16. sqlspec/adapters/asyncpg/driver.py +417 -487
  17. sqlspec/adapters/bigquery/__init__.py +2 -2
  18. sqlspec/adapters/bigquery/config.py +407 -0
  19. sqlspec/adapters/bigquery/driver.py +600 -553
  20. sqlspec/adapters/duckdb/__init__.py +4 -1
  21. sqlspec/adapters/duckdb/config.py +432 -321
  22. sqlspec/adapters/duckdb/driver.py +392 -406
  23. sqlspec/adapters/oracledb/__init__.py +3 -8
  24. sqlspec/adapters/oracledb/config.py +625 -0
  25. sqlspec/adapters/oracledb/driver.py +548 -921
  26. sqlspec/adapters/psqlpy/__init__.py +4 -7
  27. sqlspec/adapters/psqlpy/config.py +372 -203
  28. sqlspec/adapters/psqlpy/driver.py +197 -533
  29. sqlspec/adapters/psycopg/__init__.py +3 -8
  30. sqlspec/adapters/psycopg/config.py +741 -0
  31. sqlspec/adapters/psycopg/driver.py +734 -694
  32. sqlspec/adapters/sqlite/__init__.py +2 -6
  33. sqlspec/adapters/sqlite/config.py +146 -81
  34. sqlspec/adapters/sqlite/driver.py +242 -405
  35. sqlspec/base.py +220 -784
  36. sqlspec/config.py +354 -0
  37. sqlspec/driver/__init__.py +22 -0
  38. sqlspec/driver/_async.py +252 -0
  39. sqlspec/driver/_common.py +338 -0
  40. sqlspec/driver/_sync.py +261 -0
  41. sqlspec/driver/mixins/__init__.py +17 -0
  42. sqlspec/driver/mixins/_pipeline.py +523 -0
  43. sqlspec/driver/mixins/_result_utils.py +122 -0
  44. sqlspec/driver/mixins/_sql_translator.py +35 -0
  45. sqlspec/driver/mixins/_storage.py +993 -0
  46. sqlspec/driver/mixins/_type_coercion.py +131 -0
  47. sqlspec/exceptions.py +299 -7
  48. sqlspec/extensions/aiosql/__init__.py +10 -0
  49. sqlspec/extensions/aiosql/adapter.py +474 -0
  50. sqlspec/extensions/litestar/__init__.py +1 -6
  51. sqlspec/extensions/litestar/_utils.py +1 -5
  52. sqlspec/extensions/litestar/config.py +5 -6
  53. sqlspec/extensions/litestar/handlers.py +13 -12
  54. sqlspec/extensions/litestar/plugin.py +22 -24
  55. sqlspec/extensions/litestar/providers.py +37 -55
  56. sqlspec/loader.py +528 -0
  57. sqlspec/service/__init__.py +3 -0
  58. sqlspec/service/base.py +24 -0
  59. sqlspec/service/pagination.py +26 -0
  60. sqlspec/statement/__init__.py +21 -0
  61. sqlspec/statement/builder/__init__.py +54 -0
  62. sqlspec/statement/builder/_ddl_utils.py +119 -0
  63. sqlspec/statement/builder/_parsing_utils.py +135 -0
  64. sqlspec/statement/builder/base.py +328 -0
  65. sqlspec/statement/builder/ddl.py +1379 -0
  66. sqlspec/statement/builder/delete.py +80 -0
  67. sqlspec/statement/builder/insert.py +274 -0
  68. sqlspec/statement/builder/merge.py +95 -0
  69. sqlspec/statement/builder/mixins/__init__.py +65 -0
  70. sqlspec/statement/builder/mixins/_aggregate_functions.py +151 -0
  71. sqlspec/statement/builder/mixins/_case_builder.py +91 -0
  72. sqlspec/statement/builder/mixins/_common_table_expr.py +91 -0
  73. sqlspec/statement/builder/mixins/_delete_from.py +34 -0
  74. sqlspec/statement/builder/mixins/_from.py +61 -0
  75. sqlspec/statement/builder/mixins/_group_by.py +119 -0
  76. sqlspec/statement/builder/mixins/_having.py +35 -0
  77. sqlspec/statement/builder/mixins/_insert_from_select.py +48 -0
  78. sqlspec/statement/builder/mixins/_insert_into.py +36 -0
  79. sqlspec/statement/builder/mixins/_insert_values.py +69 -0
  80. sqlspec/statement/builder/mixins/_join.py +110 -0
  81. sqlspec/statement/builder/mixins/_limit_offset.py +53 -0
  82. sqlspec/statement/builder/mixins/_merge_clauses.py +405 -0
  83. sqlspec/statement/builder/mixins/_order_by.py +46 -0
  84. sqlspec/statement/builder/mixins/_pivot.py +82 -0
  85. sqlspec/statement/builder/mixins/_returning.py +37 -0
  86. sqlspec/statement/builder/mixins/_select_columns.py +60 -0
  87. sqlspec/statement/builder/mixins/_set_ops.py +122 -0
  88. sqlspec/statement/builder/mixins/_unpivot.py +80 -0
  89. sqlspec/statement/builder/mixins/_update_from.py +54 -0
  90. sqlspec/statement/builder/mixins/_update_set.py +91 -0
  91. sqlspec/statement/builder/mixins/_update_table.py +29 -0
  92. sqlspec/statement/builder/mixins/_where.py +374 -0
  93. sqlspec/statement/builder/mixins/_window_functions.py +86 -0
  94. sqlspec/statement/builder/protocols.py +20 -0
  95. sqlspec/statement/builder/select.py +206 -0
  96. sqlspec/statement/builder/update.py +178 -0
  97. sqlspec/statement/filters.py +571 -0
  98. sqlspec/statement/parameters.py +736 -0
  99. sqlspec/statement/pipelines/__init__.py +67 -0
  100. sqlspec/statement/pipelines/analyzers/__init__.py +9 -0
  101. sqlspec/statement/pipelines/analyzers/_analyzer.py +649 -0
  102. sqlspec/statement/pipelines/base.py +315 -0
  103. sqlspec/statement/pipelines/context.py +119 -0
  104. sqlspec/statement/pipelines/result_types.py +41 -0
  105. sqlspec/statement/pipelines/transformers/__init__.py +8 -0
  106. sqlspec/statement/pipelines/transformers/_expression_simplifier.py +256 -0
  107. sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +623 -0
  108. sqlspec/statement/pipelines/transformers/_remove_comments.py +66 -0
  109. sqlspec/statement/pipelines/transformers/_remove_hints.py +81 -0
  110. sqlspec/statement/pipelines/validators/__init__.py +23 -0
  111. sqlspec/statement/pipelines/validators/_dml_safety.py +275 -0
  112. sqlspec/statement/pipelines/validators/_parameter_style.py +297 -0
  113. sqlspec/statement/pipelines/validators/_performance.py +703 -0
  114. sqlspec/statement/pipelines/validators/_security.py +990 -0
  115. sqlspec/statement/pipelines/validators/base.py +67 -0
  116. sqlspec/statement/result.py +527 -0
  117. sqlspec/statement/splitter.py +701 -0
  118. sqlspec/statement/sql.py +1198 -0
  119. sqlspec/storage/__init__.py +15 -0
  120. sqlspec/storage/backends/__init__.py +0 -0
  121. sqlspec/storage/backends/base.py +166 -0
  122. sqlspec/storage/backends/fsspec.py +315 -0
  123. sqlspec/storage/backends/obstore.py +464 -0
  124. sqlspec/storage/protocol.py +170 -0
  125. sqlspec/storage/registry.py +315 -0
  126. sqlspec/typing.py +157 -36
  127. sqlspec/utils/correlation.py +155 -0
  128. sqlspec/utils/deprecation.py +3 -6
  129. sqlspec/utils/fixtures.py +6 -11
  130. sqlspec/utils/logging.py +135 -0
  131. sqlspec/utils/module_loader.py +45 -43
  132. sqlspec/utils/serializers.py +4 -0
  133. sqlspec/utils/singleton.py +6 -8
  134. sqlspec/utils/sync_tools.py +15 -27
  135. sqlspec/utils/text.py +58 -26
  136. {sqlspec-0.11.1.dist-info → sqlspec-0.12.0.dist-info}/METADATA +97 -26
  137. sqlspec-0.12.0.dist-info/RECORD +145 -0
  138. sqlspec/adapters/bigquery/config/__init__.py +0 -3
  139. sqlspec/adapters/bigquery/config/_common.py +0 -40
  140. sqlspec/adapters/bigquery/config/_sync.py +0 -87
  141. sqlspec/adapters/oracledb/config/__init__.py +0 -9
  142. sqlspec/adapters/oracledb/config/_asyncio.py +0 -186
  143. sqlspec/adapters/oracledb/config/_common.py +0 -131
  144. sqlspec/adapters/oracledb/config/_sync.py +0 -186
  145. sqlspec/adapters/psycopg/config/__init__.py +0 -19
  146. sqlspec/adapters/psycopg/config/_async.py +0 -169
  147. sqlspec/adapters/psycopg/config/_common.py +0 -56
  148. sqlspec/adapters/psycopg/config/_sync.py +0 -168
  149. sqlspec/filters.py +0 -331
  150. sqlspec/mixins.py +0 -305
  151. sqlspec/statement.py +0 -378
  152. sqlspec-0.11.1.dist-info/RECORD +0 -69
  153. {sqlspec-0.11.1.dist-info → sqlspec-0.12.0.dist-info}/WHEEL +0 -0
  154. {sqlspec-0.11.1.dist-info → sqlspec-0.12.0.dist-info}/licenses/LICENSE +0 -0
  155. {sqlspec-0.11.1.dist-info → sqlspec-0.12.0.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1,464 @@
1
+ """High-performance object storage using obstore.
2
+
3
+ This backend implements the ObjectStoreProtocol using obstore,
4
+ providing native support for S3, GCS, Azure, and local file storage
5
+ with excellent performance characteristics and native Arrow support.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import fnmatch
11
+ import logging
12
+ from typing import TYPE_CHECKING, Any, cast
13
+
14
+ from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
15
+ from sqlspec.storage.backends.base import ObjectStoreBase
16
+ from sqlspec.typing import OBSTORE_INSTALLED
17
+
18
+ if TYPE_CHECKING:
19
+ from collections.abc import AsyncIterator, Iterator
20
+
21
+ from sqlspec.typing import ArrowRecordBatch, ArrowTable
22
+
23
+ __all__ = ("ObStoreBackend",)
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class ObStoreBackend(ObjectStoreBase):
29
+ """High-performance object storage backend using obstore.
30
+
31
+ This backend leverages obstore's Rust-based implementation for maximum
32
+ performance, providing native support for:
33
+ - AWS S3 and S3-compatible stores
34
+ - Google Cloud Storage
35
+ - Azure Blob Storage
36
+ - Local filesystem
37
+ - HTTP endpoints
38
+
39
+ Features native Arrow support and ~9x better performance than fsspec.
40
+ """
41
+
42
+ def __init__(self, store_uri: str, base_path: str = "", **store_options: Any) -> None:
43
+ """Initialize obstore backend.
44
+
45
+ Args:
46
+ store_uri: Storage URI (e.g., 's3://bucket', 'file:///path', 'gs://bucket')
47
+ base_path: Base path prefix for all operations
48
+ **store_options: Additional options for obstore configuration
49
+ """
50
+
51
+ if not OBSTORE_INSTALLED:
52
+ raise MissingDependencyError(package="obstore", install_package="obstore")
53
+
54
+ try:
55
+ self.store_uri = store_uri
56
+ self.base_path = base_path.rstrip("/") if base_path else ""
57
+ self.store_options = store_options
58
+ self.store: Any # Will be set based on store_uri
59
+
60
+ # Initialize obstore instance
61
+ if store_uri.startswith("memory://"):
62
+ # MemoryStore doesn't use from_url - create directly
63
+ from obstore.store import MemoryStore
64
+
65
+ self.store = MemoryStore()
66
+ elif store_uri.startswith("file://"):
67
+ # For file:// URIs, use LocalStore with root directory
68
+ from obstore.store import LocalStore
69
+
70
+ # LocalStore works with directory paths, so we use root
71
+ self.store = LocalStore("/")
72
+ # The full path will be handled in _resolve_path
73
+ else:
74
+ # Use obstore's from_url for automatic URI parsing
75
+ from obstore.store import from_url
76
+
77
+ self.store = from_url(store_uri, **store_options) # pyright: ignore[reportAttributeAccessIssue]
78
+
79
+ # Log successful initialization
80
+ logger.debug("ObStore backend initialized for %s", store_uri)
81
+
82
+ except Exception as exc:
83
+ msg = f"Failed to initialize obstore backend for {store_uri}"
84
+ raise StorageOperationFailedError(msg) from exc
85
+
86
+ def _resolve_path(self, path: str) -> str:
87
+ """Resolve path relative to base_path."""
88
+ # For file:// URIs, the path passed in is already absolute
89
+ if self.store_uri.startswith("file://") and path.startswith("/"):
90
+ # Remove leading slash for LocalStore (it's relative to its root)
91
+ return path.lstrip("/")
92
+
93
+ if self.base_path:
94
+ # Ensure no double slashes by stripping trailing slash from base_path
95
+ clean_base = self.base_path.rstrip("/")
96
+ clean_path = path.lstrip("/")
97
+ return f"{clean_base}/{clean_path}"
98
+ return path
99
+
100
+ @property
101
+ def backend_type(self) -> str:
102
+ """Return backend type identifier."""
103
+ return "obstore"
104
+
105
+ # Implementation of abstract methods from ObjectStoreBase
106
+
107
+ def read_bytes(self, path: str, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
108
+ """Read bytes using obstore."""
109
+ try:
110
+ resolved_path = self._resolve_path(path)
111
+ result = self.store.get(resolved_path)
112
+ return result.bytes() # type: ignore[no-any-return] # pyright: ignore[reportReturnType]
113
+ except Exception as exc:
114
+ msg = f"Failed to read bytes from {path}"
115
+ raise StorageOperationFailedError(msg) from exc
116
+
117
+ def write_bytes(self, path: str, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
118
+ """Write bytes using obstore."""
119
+ try:
120
+ resolved_path = self._resolve_path(path)
121
+ self.store.put(resolved_path, data)
122
+ except Exception as exc:
123
+ msg = f"Failed to write bytes to {path}"
124
+ raise StorageOperationFailedError(msg) from exc
125
+
126
+ def read_text(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
127
+ """Read text using obstore."""
128
+ data = self.read_bytes(path, **kwargs)
129
+ return data.decode(encoding)
130
+
131
+ def write_text(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
132
+ """Write text using obstore."""
133
+ encoded_data = data.encode(encoding)
134
+ self.write_bytes(path, encoded_data, **kwargs)
135
+
136
+ def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
137
+ """List objects using obstore."""
138
+ resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
139
+ objects: list[str] = []
140
+
141
+ def _get_item_path(item: Any) -> str:
142
+ """Extract path from item, trying path attribute first, then key."""
143
+ if hasattr(item, "path"):
144
+ return str(item.path)
145
+ if hasattr(item, "key"):
146
+ return str(item.key)
147
+ return str(item)
148
+
149
+ if not recursive:
150
+ objects.extend(_get_item_path(item) for item in self.store.list_with_delimiter(resolved_prefix)) # pyright: ignore
151
+ else:
152
+ objects.extend(_get_item_path(item) for item in self.store.list(resolved_prefix))
153
+
154
+ return sorted(objects)
155
+
156
+ def exists(self, path: str, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
157
+ """Check if object exists using obstore."""
158
+ try:
159
+ self.store.head(self._resolve_path(path))
160
+ except Exception:
161
+ return False
162
+ return True
163
+
164
+ def delete(self, path: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
165
+ """Delete object using obstore."""
166
+ try:
167
+ self.store.delete(self._resolve_path(path))
168
+ except Exception as exc:
169
+ msg = f"Failed to delete {path}"
170
+ raise StorageOperationFailedError(msg) from exc
171
+
172
+ def copy(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
173
+ """Copy object using obstore."""
174
+ try:
175
+ self.store.copy(self._resolve_path(source), self._resolve_path(destination))
176
+ except Exception as exc:
177
+ msg = f"Failed to copy {source} to {destination}"
178
+ raise StorageOperationFailedError(msg) from exc
179
+
180
+ def move(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
181
+ """Move object using obstore."""
182
+ try:
183
+ self.store.rename(self._resolve_path(source), self._resolve_path(destination))
184
+ except Exception as exc:
185
+ msg = f"Failed to move {source} to {destination}"
186
+ raise StorageOperationFailedError(msg) from exc
187
+
188
+ def glob(self, pattern: str, **kwargs: Any) -> list[str]:
189
+ """Find objects matching pattern using obstore.
190
+
191
+ Note: obstore does not support server-side globbing. This implementation
192
+ lists all objects and filters them client-side, which may be inefficient
193
+ for large buckets.
194
+ """
195
+ from pathlib import PurePosixPath
196
+
197
+ # List all objects and filter by pattern
198
+ resolved_pattern = self._resolve_path(pattern)
199
+ all_objects = self.list_objects(recursive=True, **kwargs)
200
+
201
+ # For complex patterns with **, use PurePosixPath
202
+ if "**" in pattern:
203
+ matching_objects = []
204
+
205
+ # Special case: **/*.ext should also match *.ext in root
206
+ if pattern.startswith("**/"):
207
+ # Get the suffix pattern
208
+ suffix_pattern = pattern[3:] # Remove **/
209
+
210
+ for obj in all_objects:
211
+ # Check if object ends with the suffix pattern
212
+ obj_path = PurePosixPath(obj)
213
+ # Try both the full pattern and just the suffix
214
+ if obj_path.match(resolved_pattern) or obj_path.match(suffix_pattern):
215
+ matching_objects.append(obj)
216
+ else:
217
+ # Standard ** pattern matching
218
+ for obj in all_objects:
219
+ obj_path = PurePosixPath(obj)
220
+ if obj_path.match(resolved_pattern):
221
+ matching_objects.append(obj)
222
+
223
+ return matching_objects
224
+ # Use standard fnmatch for simple patterns
225
+ return [obj for obj in all_objects if fnmatch.fnmatch(obj, resolved_pattern)]
226
+
227
+ def get_metadata(self, path: str, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
228
+ """Get object metadata using obstore."""
229
+ resolved_path = self._resolve_path(path)
230
+ try:
231
+ metadata = self.store.head(resolved_path)
232
+ result = {"path": resolved_path, "exists": True}
233
+ for attr in ("size", "last_modified", "e_tag", "version"):
234
+ if hasattr(metadata, attr):
235
+ result[attr] = getattr(metadata, attr)
236
+
237
+ # Include custom metadata if available
238
+ if hasattr(metadata, "metadata"):
239
+ custom_metadata = getattr(metadata, "metadata", None)
240
+ if custom_metadata:
241
+ result["custom_metadata"] = custom_metadata
242
+ except Exception:
243
+ # Object doesn't exist
244
+ return {"path": resolved_path, "exists": False}
245
+ else:
246
+ return result
247
+
248
+ def is_object(self, path: str) -> bool:
249
+ """Check if path is an object using obstore."""
250
+ resolved_path = self._resolve_path(path)
251
+ # An object exists and doesn't end with /
252
+ return self.exists(path) and not resolved_path.endswith("/")
253
+
254
+ def is_path(self, path: str) -> bool:
255
+ """Check if path is a prefix/directory using obstore."""
256
+ resolved_path = self._resolve_path(path)
257
+
258
+ # A path/prefix either ends with / or has objects under it
259
+ if resolved_path.endswith("/"):
260
+ return True
261
+
262
+ # Check if there are any objects with this prefix
263
+ try:
264
+ objects = self.list_objects(prefix=path, recursive=False)
265
+ return len(objects) > 0
266
+ except Exception:
267
+ return False
268
+
269
+ def read_arrow(self, path: str, **kwargs: Any) -> ArrowTable:
270
+ """Read Arrow table using obstore."""
271
+ try:
272
+ resolved_path = self._resolve_path(path)
273
+ # Check if the store has native Arrow support
274
+ if hasattr(self.store, "read_arrow"):
275
+ return self.store.read_arrow(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
276
+ # Fall back to reading as Parquet via bytes
277
+ import io
278
+
279
+ import pyarrow.parquet as pq
280
+
281
+ data = self.read_bytes(resolved_path)
282
+ buffer = io.BytesIO(data)
283
+ return pq.read_table(buffer, **kwargs)
284
+ except Exception as exc:
285
+ msg = f"Failed to read Arrow table from {path}"
286
+ raise StorageOperationFailedError(msg) from exc
287
+
288
+ def write_arrow(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
289
+ """Write Arrow table using obstore."""
290
+ try:
291
+ resolved_path = self._resolve_path(path)
292
+ # Check if the store has native Arrow support
293
+ if hasattr(self.store, "write_arrow"):
294
+ self.store.write_arrow(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
295
+ else:
296
+ # Fall back to writing as Parquet via bytes
297
+ import io
298
+
299
+ import pyarrow as pa
300
+ import pyarrow.parquet as pq
301
+
302
+ buffer = io.BytesIO()
303
+
304
+ # Check for decimal64 columns and convert to decimal128
305
+ # PyArrow doesn't support decimal64 in Parquet files
306
+ schema = table.schema
307
+ needs_conversion = False
308
+ new_fields = []
309
+
310
+ for field in schema:
311
+ if str(field.type).startswith("decimal64"):
312
+ # Convert decimal64 to decimal128
313
+ import re
314
+
315
+ match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
316
+ if match:
317
+ precision, scale = int(match.group(1)), int(match.group(2))
318
+ new_field = pa.field(field.name, pa.decimal128(precision, scale))
319
+ new_fields.append(new_field)
320
+ needs_conversion = True
321
+ else:
322
+ new_fields.append(field)
323
+ else:
324
+ new_fields.append(field)
325
+
326
+ if needs_conversion:
327
+ new_schema = pa.schema(new_fields)
328
+ table = table.cast(new_schema)
329
+
330
+ pq.write_table(table, buffer, **kwargs)
331
+ buffer.seek(0)
332
+ self.write_bytes(resolved_path, buffer.read())
333
+ except Exception as exc:
334
+ msg = f"Failed to write Arrow table to {path}"
335
+ raise StorageOperationFailedError(msg) from exc
336
+
337
+ def stream_arrow(self, pattern: str, **kwargs: Any) -> Iterator[ArrowRecordBatch]:
338
+ """Stream Arrow record batches using obstore.
339
+
340
+ Yields:
341
+ Iterator of Arrow record batches from matching objects.
342
+ """
343
+ try:
344
+ resolved_pattern = self._resolve_path(pattern)
345
+ yield from self.store.stream_arrow(resolved_pattern, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
346
+ except Exception as exc:
347
+ msg = f"Failed to stream Arrow data for pattern {pattern}"
348
+ raise StorageOperationFailedError(msg) from exc
349
+
350
+ # Private async implementations for instrumentation support
351
+ # These are called by the base class async methods after instrumentation
352
+
353
+ async def read_bytes_async(self, path: str, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
354
+ """Private async read bytes using native obstore async if available."""
355
+ resolved_path = self._resolve_path(path)
356
+ result = await self.store.get_async(resolved_path)
357
+ return cast("bytes", result.bytes()) # pyright: ignore[reportReturnType]
358
+
359
+ async def write_bytes_async(self, path: str, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
360
+ """Private async write bytes using native obstore async."""
361
+ resolved_path = self._resolve_path(path)
362
+ await self.store.put_async(resolved_path, data)
363
+
364
+ async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
365
+ """Private async list objects using native obstore async if available."""
366
+ resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
367
+
368
+ # Note: store.list_async returns an async iterator
369
+ objects = [str(item.path) async for item in self.store.list_async(resolved_prefix)] # pyright: ignore[reportAttributeAccessIssue]
370
+
371
+ # Manual filtering for non-recursive if needed as obstore lacks an
372
+ # async version of list_with_delimiter.
373
+ if not recursive and resolved_prefix:
374
+ base_depth = resolved_prefix.count("/")
375
+ objects = [obj for obj in objects if obj.count("/") <= base_depth + 1]
376
+
377
+ return sorted(objects)
378
+
379
+ # Implement all other required abstract async methods
380
+ # ObStore provides native async for most operations
381
+
382
+ async def read_text_async(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
383
+ """Async read text using native obstore async."""
384
+ data = await self.read_bytes_async(path, **kwargs)
385
+ return data.decode(encoding)
386
+
387
+ async def write_text_async(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
388
+ """Async write text using native obstore async."""
389
+ encoded_data = data.encode(encoding)
390
+ await self.write_bytes_async(path, encoded_data, **kwargs)
391
+
392
+ async def exists_async(self, path: str, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
393
+ """Async check if object exists using native obstore async."""
394
+ resolved_path = self._resolve_path(path)
395
+ try:
396
+ await self.store.head_async(resolved_path)
397
+ except Exception:
398
+ return False
399
+ return True
400
+
401
+ async def delete_async(self, path: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
402
+ """Async delete object using native obstore async."""
403
+ resolved_path = self._resolve_path(path)
404
+ await self.store.delete_async(resolved_path)
405
+
406
+ async def copy_async(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
407
+ """Async copy object using native obstore async."""
408
+ source_path = self._resolve_path(source)
409
+ dest_path = self._resolve_path(destination)
410
+ await self.store.copy_async(source_path, dest_path)
411
+
412
+ async def move_async(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
413
+ """Async move object using native obstore async."""
414
+ source_path = self._resolve_path(source)
415
+ dest_path = self._resolve_path(destination)
416
+ await self.store.rename_async(source_path, dest_path)
417
+
418
+ async def get_metadata_async(self, path: str, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
419
+ """Async get object metadata using native obstore async."""
420
+ resolved_path = self._resolve_path(path)
421
+ metadata = await self.store.head_async(resolved_path)
422
+
423
+ # Convert obstore ObjectMeta to dict
424
+ result = {"path": resolved_path, "exists": True}
425
+
426
+ # Extract metadata attributes if available
427
+ for attr in ["size", "last_modified", "e_tag", "version"]:
428
+ if hasattr(metadata, attr):
429
+ result[attr] = getattr(metadata, attr)
430
+
431
+ # Include custom metadata if available
432
+ if hasattr(metadata, "metadata"):
433
+ custom_metadata = getattr(metadata, "metadata", None)
434
+ if custom_metadata:
435
+ result["custom_metadata"] = custom_metadata
436
+
437
+ return result
438
+
439
+ async def read_arrow_async(self, path: str, **kwargs: Any) -> ArrowTable:
440
+ """Async read Arrow table using native obstore async."""
441
+ resolved_path = self._resolve_path(path)
442
+ return await self.store.read_arrow_async(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
443
+
444
+ async def write_arrow_async(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
445
+ """Async write Arrow table using native obstore async."""
446
+ resolved_path = self._resolve_path(path)
447
+ # Check if the store has native async Arrow support
448
+ if hasattr(self.store, "write_arrow_async"):
449
+ await self.store.write_arrow_async(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
450
+ else:
451
+ # Fall back to writing as Parquet via bytes
452
+ import io
453
+
454
+ import pyarrow.parquet as pq
455
+
456
+ buffer = io.BytesIO()
457
+ pq.write_table(table, buffer, **kwargs)
458
+ buffer.seek(0)
459
+ await self.write_bytes_async(resolved_path, buffer.read())
460
+
461
+ async def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator[ArrowRecordBatch]:
462
+ resolved_pattern = self._resolve_path(pattern)
463
+ async for batch in self.store.stream_arrow_async(resolved_pattern, **kwargs): # pyright: ignore[reportAttributeAccessIssue]
464
+ yield batch
@@ -0,0 +1,170 @@
1
+ from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
2
+
3
+ if TYPE_CHECKING:
4
+ from collections.abc import AsyncIterator, Iterator
5
+
6
+ from sqlspec.typing import ArrowRecordBatch, ArrowTable
7
+
8
+ __all__ = ("ObjectStoreProtocol",)
9
+
10
+
11
+ @runtime_checkable
12
+ class ObjectStoreProtocol(Protocol):
13
+ """Unified protocol for object storage operations.
14
+
15
+ This protocol defines the interface for all storage backends with built-in
16
+ instrumentation support. Backends must implement both sync and async operations
17
+ where possible, with async operations suffixed with _async.
18
+
19
+ All methods use 'path' terminology for consistency with object store patterns.
20
+ """
21
+
22
+ def __init__(self, uri: str, **kwargs: Any) -> None:
23
+ return
24
+
25
+ # Core Operations (sync)
26
+ def read_bytes(self, path: str, **kwargs: Any) -> bytes:
27
+ """Read bytes from an object."""
28
+ return b""
29
+
30
+ def write_bytes(self, path: str, data: bytes, **kwargs: Any) -> None:
31
+ """Write bytes to an object."""
32
+ return
33
+
34
+ def read_text(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
35
+ """Read text from an object."""
36
+ return ""
37
+
38
+ def write_text(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
39
+ """Write text to an object."""
40
+ return
41
+
42
+ # Object Operations
43
+ def exists(self, path: str, **kwargs: Any) -> bool:
44
+ """Check if an object exists."""
45
+ return False
46
+
47
+ def delete(self, path: str, **kwargs: Any) -> None:
48
+ """Delete an object."""
49
+ return
50
+
51
+ def copy(self, source: str, destination: str, **kwargs: Any) -> None:
52
+ """Copy an object."""
53
+ return
54
+
55
+ def move(self, source: str, destination: str, **kwargs: Any) -> None:
56
+ """Move an object."""
57
+ return
58
+
59
+ # Listing Operations
60
+ def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
61
+ """List objects with optional prefix."""
62
+ return []
63
+
64
+ def glob(self, pattern: str, **kwargs: Any) -> list[str]:
65
+ """Find objects matching a glob pattern."""
66
+ return []
67
+
68
+ # Path Operations
69
+ def is_object(self, path: str) -> bool:
70
+ """Check if path points to an object."""
71
+ return False
72
+
73
+ def is_path(self, path: str) -> bool:
74
+ """Check if path points to a prefix (directory-like)."""
75
+ return False
76
+
77
+ def get_metadata(self, path: str, **kwargs: Any) -> dict[str, Any]:
78
+ """Get object metadata."""
79
+ return {}
80
+
81
+ # Arrow Operations
82
+ def read_arrow(self, path: str, **kwargs: Any) -> "ArrowTable":
83
+ """Read an Arrow table from storage.
84
+
85
+ For obstore backend, this should use native arrow operations when available.
86
+ """
87
+ msg = "Arrow reading not implemented"
88
+ raise NotImplementedError(msg)
89
+
90
+ def write_arrow(self, path: str, table: "ArrowTable", **kwargs: Any) -> None:
91
+ """Write an Arrow table to storage.
92
+
93
+ For obstore backend, this should use native arrow operations when available.
94
+ """
95
+ msg = "Arrow writing not implemented"
96
+ raise NotImplementedError(msg)
97
+
98
+ def stream_arrow(self, pattern: str, **kwargs: Any) -> "Iterator[ArrowRecordBatch]":
99
+ """Stream Arrow record batches from matching objects.
100
+
101
+ For obstore backend, this should use native streaming when available.
102
+ """
103
+ msg = "Arrow streaming not implemented"
104
+ raise NotImplementedError(msg)
105
+
106
+ # Async versions
107
+ async def read_bytes_async(self, path: str, **kwargs: Any) -> bytes:
108
+ """Async read bytes from an object."""
109
+ msg = "Async operations not implemented"
110
+ raise NotImplementedError(msg)
111
+
112
+ async def write_bytes_async(self, path: str, data: bytes, **kwargs: Any) -> None:
113
+ """Async write bytes to an object."""
114
+ msg = "Async operations not implemented"
115
+ raise NotImplementedError(msg)
116
+
117
+ async def read_text_async(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
118
+ """Async read text from an object."""
119
+ msg = "Async operations not implemented"
120
+ raise NotImplementedError(msg)
121
+
122
+ async def write_text_async(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
123
+ """Async write text to an object."""
124
+ msg = "Async operations not implemented"
125
+ raise NotImplementedError(msg)
126
+
127
+ async def exists_async(self, path: str, **kwargs: Any) -> bool:
128
+ """Async check if an object exists."""
129
+ msg = "Async operations not implemented"
130
+ raise NotImplementedError(msg)
131
+
132
+ async def delete_async(self, path: str, **kwargs: Any) -> None:
133
+ """Async delete an object."""
134
+ msg = "Async operations not implemented"
135
+ raise NotImplementedError(msg)
136
+
137
+ async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
138
+ """Async list objects with optional prefix."""
139
+ msg = "Async operations not implemented"
140
+ raise NotImplementedError(msg)
141
+
142
+ async def copy_async(self, source: str, destination: str, **kwargs: Any) -> None:
143
+ """Async copy an object."""
144
+ msg = "Async operations not implemented"
145
+ raise NotImplementedError(msg)
146
+
147
+ async def move_async(self, source: str, destination: str, **kwargs: Any) -> None:
148
+ """Async move an object."""
149
+ msg = "Async operations not implemented"
150
+ raise NotImplementedError(msg)
151
+
152
+ async def get_metadata_async(self, path: str, **kwargs: Any) -> dict[str, Any]:
153
+ """Async get object metadata."""
154
+ msg = "Async operations not implemented"
155
+ raise NotImplementedError(msg)
156
+
157
+ async def read_arrow_async(self, path: str, **kwargs: Any) -> "ArrowTable":
158
+ """Async read an Arrow table from storage."""
159
+ msg = "Async arrow reading not implemented"
160
+ raise NotImplementedError(msg)
161
+
162
+ async def write_arrow_async(self, path: str, table: "ArrowTable", **kwargs: Any) -> None:
163
+ """Async write an Arrow table to storage."""
164
+ msg = "Async arrow writing not implemented"
165
+ raise NotImplementedError(msg)
166
+
167
+ async def stream_arrow_async(self, pattern: str, **kwargs: Any) -> "AsyncIterator[ArrowRecordBatch]":
168
+ """Async stream Arrow record batches from matching objects."""
169
+ msg = "Async arrow streaming not implemented"
170
+ raise NotImplementedError(msg)