sqlspec 0.16.1__cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (148) hide show
  1. 51ff5a9eadfdefd49f98__mypyc.cpython-311-aarch64-linux-gnu.so +0 -0
  2. sqlspec/__init__.py +92 -0
  3. sqlspec/__main__.py +12 -0
  4. sqlspec/__metadata__.py +14 -0
  5. sqlspec/_serialization.py +77 -0
  6. sqlspec/_sql.py +1780 -0
  7. sqlspec/_typing.py +680 -0
  8. sqlspec/adapters/__init__.py +0 -0
  9. sqlspec/adapters/adbc/__init__.py +5 -0
  10. sqlspec/adapters/adbc/_types.py +12 -0
  11. sqlspec/adapters/adbc/config.py +361 -0
  12. sqlspec/adapters/adbc/driver.py +512 -0
  13. sqlspec/adapters/aiosqlite/__init__.py +19 -0
  14. sqlspec/adapters/aiosqlite/_types.py +13 -0
  15. sqlspec/adapters/aiosqlite/config.py +253 -0
  16. sqlspec/adapters/aiosqlite/driver.py +248 -0
  17. sqlspec/adapters/asyncmy/__init__.py +19 -0
  18. sqlspec/adapters/asyncmy/_types.py +12 -0
  19. sqlspec/adapters/asyncmy/config.py +180 -0
  20. sqlspec/adapters/asyncmy/driver.py +274 -0
  21. sqlspec/adapters/asyncpg/__init__.py +21 -0
  22. sqlspec/adapters/asyncpg/_types.py +17 -0
  23. sqlspec/adapters/asyncpg/config.py +229 -0
  24. sqlspec/adapters/asyncpg/driver.py +344 -0
  25. sqlspec/adapters/bigquery/__init__.py +18 -0
  26. sqlspec/adapters/bigquery/_types.py +12 -0
  27. sqlspec/adapters/bigquery/config.py +298 -0
  28. sqlspec/adapters/bigquery/driver.py +558 -0
  29. sqlspec/adapters/duckdb/__init__.py +22 -0
  30. sqlspec/adapters/duckdb/_types.py +12 -0
  31. sqlspec/adapters/duckdb/config.py +504 -0
  32. sqlspec/adapters/duckdb/driver.py +368 -0
  33. sqlspec/adapters/oracledb/__init__.py +32 -0
  34. sqlspec/adapters/oracledb/_types.py +14 -0
  35. sqlspec/adapters/oracledb/config.py +317 -0
  36. sqlspec/adapters/oracledb/driver.py +538 -0
  37. sqlspec/adapters/psqlpy/__init__.py +16 -0
  38. sqlspec/adapters/psqlpy/_types.py +11 -0
  39. sqlspec/adapters/psqlpy/config.py +214 -0
  40. sqlspec/adapters/psqlpy/driver.py +530 -0
  41. sqlspec/adapters/psycopg/__init__.py +32 -0
  42. sqlspec/adapters/psycopg/_types.py +17 -0
  43. sqlspec/adapters/psycopg/config.py +426 -0
  44. sqlspec/adapters/psycopg/driver.py +796 -0
  45. sqlspec/adapters/sqlite/__init__.py +15 -0
  46. sqlspec/adapters/sqlite/_types.py +11 -0
  47. sqlspec/adapters/sqlite/config.py +240 -0
  48. sqlspec/adapters/sqlite/driver.py +294 -0
  49. sqlspec/base.py +571 -0
  50. sqlspec/builder/__init__.py +62 -0
  51. sqlspec/builder/_base.py +473 -0
  52. sqlspec/builder/_column.py +320 -0
  53. sqlspec/builder/_ddl.py +1346 -0
  54. sqlspec/builder/_ddl_utils.py +103 -0
  55. sqlspec/builder/_delete.py +76 -0
  56. sqlspec/builder/_insert.py +256 -0
  57. sqlspec/builder/_merge.py +71 -0
  58. sqlspec/builder/_parsing_utils.py +140 -0
  59. sqlspec/builder/_select.py +170 -0
  60. sqlspec/builder/_update.py +188 -0
  61. sqlspec/builder/mixins/__init__.py +55 -0
  62. sqlspec/builder/mixins/_cte_and_set_ops.py +222 -0
  63. sqlspec/builder/mixins/_delete_operations.py +41 -0
  64. sqlspec/builder/mixins/_insert_operations.py +244 -0
  65. sqlspec/builder/mixins/_join_operations.py +122 -0
  66. sqlspec/builder/mixins/_merge_operations.py +476 -0
  67. sqlspec/builder/mixins/_order_limit_operations.py +135 -0
  68. sqlspec/builder/mixins/_pivot_operations.py +153 -0
  69. sqlspec/builder/mixins/_select_operations.py +603 -0
  70. sqlspec/builder/mixins/_update_operations.py +187 -0
  71. sqlspec/builder/mixins/_where_clause.py +621 -0
  72. sqlspec/cli.py +247 -0
  73. sqlspec/config.py +395 -0
  74. sqlspec/core/__init__.py +63 -0
  75. sqlspec/core/cache.cpython-311-aarch64-linux-gnu.so +0 -0
  76. sqlspec/core/cache.py +871 -0
  77. sqlspec/core/compiler.cpython-311-aarch64-linux-gnu.so +0 -0
  78. sqlspec/core/compiler.py +417 -0
  79. sqlspec/core/filters.cpython-311-aarch64-linux-gnu.so +0 -0
  80. sqlspec/core/filters.py +830 -0
  81. sqlspec/core/hashing.cpython-311-aarch64-linux-gnu.so +0 -0
  82. sqlspec/core/hashing.py +310 -0
  83. sqlspec/core/parameters.cpython-311-aarch64-linux-gnu.so +0 -0
  84. sqlspec/core/parameters.py +1237 -0
  85. sqlspec/core/result.cpython-311-aarch64-linux-gnu.so +0 -0
  86. sqlspec/core/result.py +677 -0
  87. sqlspec/core/splitter.cpython-311-aarch64-linux-gnu.so +0 -0
  88. sqlspec/core/splitter.py +819 -0
  89. sqlspec/core/statement.cpython-311-aarch64-linux-gnu.so +0 -0
  90. sqlspec/core/statement.py +676 -0
  91. sqlspec/driver/__init__.py +19 -0
  92. sqlspec/driver/_async.py +502 -0
  93. sqlspec/driver/_common.py +631 -0
  94. sqlspec/driver/_sync.py +503 -0
  95. sqlspec/driver/mixins/__init__.py +6 -0
  96. sqlspec/driver/mixins/_result_tools.py +193 -0
  97. sqlspec/driver/mixins/_sql_translator.py +86 -0
  98. sqlspec/exceptions.py +193 -0
  99. sqlspec/extensions/__init__.py +0 -0
  100. sqlspec/extensions/aiosql/__init__.py +10 -0
  101. sqlspec/extensions/aiosql/adapter.py +461 -0
  102. sqlspec/extensions/litestar/__init__.py +6 -0
  103. sqlspec/extensions/litestar/_utils.py +52 -0
  104. sqlspec/extensions/litestar/cli.py +48 -0
  105. sqlspec/extensions/litestar/config.py +92 -0
  106. sqlspec/extensions/litestar/handlers.py +260 -0
  107. sqlspec/extensions/litestar/plugin.py +145 -0
  108. sqlspec/extensions/litestar/providers.py +454 -0
  109. sqlspec/loader.cpython-311-aarch64-linux-gnu.so +0 -0
  110. sqlspec/loader.py +760 -0
  111. sqlspec/migrations/__init__.py +35 -0
  112. sqlspec/migrations/base.py +414 -0
  113. sqlspec/migrations/commands.py +443 -0
  114. sqlspec/migrations/loaders.py +402 -0
  115. sqlspec/migrations/runner.py +213 -0
  116. sqlspec/migrations/tracker.py +140 -0
  117. sqlspec/migrations/utils.py +129 -0
  118. sqlspec/protocols.py +407 -0
  119. sqlspec/py.typed +0 -0
  120. sqlspec/storage/__init__.py +23 -0
  121. sqlspec/storage/backends/__init__.py +0 -0
  122. sqlspec/storage/backends/base.py +163 -0
  123. sqlspec/storage/backends/fsspec.py +386 -0
  124. sqlspec/storage/backends/obstore.py +459 -0
  125. sqlspec/storage/capabilities.py +102 -0
  126. sqlspec/storage/registry.py +239 -0
  127. sqlspec/typing.py +299 -0
  128. sqlspec/utils/__init__.py +3 -0
  129. sqlspec/utils/correlation.py +150 -0
  130. sqlspec/utils/deprecation.py +106 -0
  131. sqlspec/utils/fixtures.cpython-311-aarch64-linux-gnu.so +0 -0
  132. sqlspec/utils/fixtures.py +58 -0
  133. sqlspec/utils/logging.py +127 -0
  134. sqlspec/utils/module_loader.py +89 -0
  135. sqlspec/utils/serializers.py +4 -0
  136. sqlspec/utils/singleton.py +32 -0
  137. sqlspec/utils/sync_tools.cpython-311-aarch64-linux-gnu.so +0 -0
  138. sqlspec/utils/sync_tools.py +237 -0
  139. sqlspec/utils/text.cpython-311-aarch64-linux-gnu.so +0 -0
  140. sqlspec/utils/text.py +96 -0
  141. sqlspec/utils/type_guards.cpython-311-aarch64-linux-gnu.so +0 -0
  142. sqlspec/utils/type_guards.py +1139 -0
  143. sqlspec-0.16.1.dist-info/METADATA +365 -0
  144. sqlspec-0.16.1.dist-info/RECORD +148 -0
  145. sqlspec-0.16.1.dist-info/WHEEL +7 -0
  146. sqlspec-0.16.1.dist-info/entry_points.txt +2 -0
  147. sqlspec-0.16.1.dist-info/licenses/LICENSE +21 -0
  148. sqlspec-0.16.1.dist-info/licenses/NOTICE +29 -0
@@ -0,0 +1,459 @@
1
+ """Object storage backend using obstore.
2
+
3
+ Implements the ObjectStoreProtocol using obstore,
4
+ providing native support for S3, GCS, Azure, and local file storage
5
+ with Arrow support.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import fnmatch
11
+ import logging
12
+ from typing import TYPE_CHECKING, Any, ClassVar, Final, cast
13
+
14
+ from mypy_extensions import mypyc_attr
15
+
16
+ from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
17
+ from sqlspec.storage.backends.base import ObjectStoreBase
18
+ from sqlspec.storage.capabilities import HasStorageCapabilities, StorageCapabilities
19
+ from sqlspec.typing import OBSTORE_INSTALLED
20
+
21
+ if TYPE_CHECKING:
22
+ from collections.abc import AsyncIterator, Iterator
23
+ from pathlib import Path
24
+
25
+ from sqlspec.typing import ArrowRecordBatch, ArrowTable
26
+
27
+ __all__ = ("ObStoreBackend",)
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ class _AsyncArrowIterator:
33
+ """Helper class to work around mypyc's lack of async generator support."""
34
+
35
+ def __init__(self, store: Any, pattern: str, **kwargs: Any) -> None:
36
+ self.store = store
37
+ self.pattern = pattern
38
+ self.kwargs = kwargs
39
+ self._iterator: Any | None = None
40
+
41
+ def __aiter__(self) -> _AsyncArrowIterator:
42
+ return self
43
+
44
+ async def __anext__(self) -> ArrowRecordBatch:
45
+ if self._iterator is None:
46
+ self._iterator = self.store.stream_arrow_async(self.pattern, **self.kwargs)
47
+ if self._iterator is not None:
48
+ return cast("ArrowRecordBatch", await self._iterator.__anext__())
49
+ raise StopAsyncIteration
50
+
51
+
52
+ DEFAULT_OPTIONS: Final[dict[str, Any]] = {"connect_timeout": "30s", "request_timeout": "60s"}
53
+
54
+
55
+ @mypyc_attr(allow_interpreted_subclasses=True)
56
+ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
57
+ """Object storage backend using obstore.
58
+
59
+ Uses obstore's Rust-based implementation for storage operations,
60
+ providing native support for AWS S3, Google Cloud Storage, Azure Blob Storage,
61
+ local filesystem, and HTTP endpoints.
62
+
63
+ Includes native Arrow support.
64
+ """
65
+
66
+ capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
67
+ supports_arrow=True,
68
+ supports_streaming=True,
69
+ supports_async=True,
70
+ supports_batch_operations=True,
71
+ supports_multipart_upload=True,
72
+ supports_compression=True,
73
+ is_cloud_native=True,
74
+ has_low_latency=True,
75
+ )
76
+
77
+ __slots__ = ("_path_cache", "base_path", "protocol", "store", "store_options", "store_uri")
78
+
79
+ def __init__(self, store_uri: str, base_path: str = "", **store_options: Any) -> None:
80
+ """Initialize obstore backend.
81
+
82
+ Args:
83
+ store_uri: Storage URI (e.g., 's3://bucket', 'file:///path', 'gs://bucket')
84
+ base_path: Base path prefix for all operations
85
+ **store_options: Additional options for obstore configuration
86
+ """
87
+
88
+ if not OBSTORE_INSTALLED:
89
+ raise MissingDependencyError(package="obstore", install_package="obstore")
90
+
91
+ try:
92
+ self.store_uri = store_uri
93
+ self.base_path = base_path.rstrip("/") if base_path else ""
94
+ self.store_options = store_options
95
+ self.store: Any
96
+ self._path_cache: dict[str, str] = {}
97
+ self.protocol = store_uri.split("://", 1)[0] if "://" in store_uri else "file"
98
+
99
+ if store_uri.startswith("memory://"):
100
+ from obstore.store import MemoryStore
101
+
102
+ self.store = MemoryStore()
103
+ elif store_uri.startswith("file://"):
104
+ from obstore.store import LocalStore
105
+
106
+ self.store = LocalStore("/")
107
+ else:
108
+ from obstore.store import from_url
109
+
110
+ self.store = from_url(store_uri, **store_options) # pyright: ignore[reportAttributeAccessIssue]
111
+
112
+ logger.debug("ObStore backend initialized for %s", store_uri)
113
+
114
+ except Exception as exc:
115
+ msg = f"Failed to initialize obstore backend for {store_uri}"
116
+ raise StorageOperationFailedError(msg) from exc
117
+
118
+ def _resolve_path(self, path: str | Path) -> str:
119
+ """Resolve path relative to base_path."""
120
+ path_str = str(path)
121
+ if path_str.startswith("file://"):
122
+ path_str = path_str.removeprefix("file://")
123
+ if self.store_uri.startswith("file://") and path_str.startswith("/"):
124
+ return path_str.lstrip("/")
125
+ if self.base_path:
126
+ clean_base = self.base_path.rstrip("/")
127
+ clean_path = path_str.lstrip("/")
128
+ return f"{clean_base}/{clean_path}"
129
+ return path_str
130
+
131
+ @property
132
+ def backend_type(self) -> str:
133
+ """Return backend type identifier."""
134
+ return "obstore"
135
+
136
+ def read_bytes(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
137
+ """Read bytes using obstore."""
138
+ try:
139
+ result = self.store.get(self._resolve_path(path))
140
+ return cast("bytes", result.bytes().to_bytes())
141
+ except Exception as exc:
142
+ msg = f"Failed to read bytes from {path}"
143
+ raise StorageOperationFailedError(msg) from exc
144
+
145
+ def write_bytes(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
146
+ """Write bytes using obstore."""
147
+ try:
148
+ self.store.put(self._resolve_path(path), data)
149
+ except Exception as exc:
150
+ msg = f"Failed to write bytes to {path}"
151
+ raise StorageOperationFailedError(msg) from exc
152
+
153
+ def read_text(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
154
+ """Read text using obstore."""
155
+ return self.read_bytes(path, **kwargs).decode(encoding)
156
+
157
+ def write_text(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
158
+ """Write text using obstore."""
159
+ self.write_bytes(path, data.encode(encoding), **kwargs)
160
+
161
+ def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
162
+ """List objects using obstore."""
163
+ try:
164
+ resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
165
+ items = (
166
+ self.store.list_with_delimiter(resolved_prefix) if not recursive else self.store.list(resolved_prefix)
167
+ )
168
+ return sorted(str(getattr(item, "path", getattr(item, "key", str(item)))) for item in items)
169
+ except Exception as exc:
170
+ msg = f"Failed to list objects with prefix '{prefix}'"
171
+ raise StorageOperationFailedError(msg) from exc
172
+
173
+ def exists(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
174
+ """Check if object exists using obstore."""
175
+ try:
176
+ self.store.head(self._resolve_path(path))
177
+ except Exception:
178
+ return False
179
+ return True
180
+
181
+ def delete(self, path: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
182
+ """Delete object using obstore."""
183
+ try:
184
+ self.store.delete(self._resolve_path(path))
185
+ except Exception as exc:
186
+ msg = f"Failed to delete {path}"
187
+ raise StorageOperationFailedError(msg) from exc
188
+
189
+ def copy(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
190
+ """Copy object using obstore."""
191
+ try:
192
+ self.store.copy(self._resolve_path(source), self._resolve_path(destination))
193
+ except Exception as exc:
194
+ msg = f"Failed to copy {source} to {destination}"
195
+ raise StorageOperationFailedError(msg) from exc
196
+
197
+ def move(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
198
+ """Move object using obstore."""
199
+ try:
200
+ self.store.rename(self._resolve_path(source), self._resolve_path(destination))
201
+ except Exception as exc:
202
+ msg = f"Failed to move {source} to {destination}"
203
+ raise StorageOperationFailedError(msg) from exc
204
+
205
+ def glob(self, pattern: str, **kwargs: Any) -> list[str]:
206
+ """Find objects matching pattern using obstore.
207
+
208
+ Lists all objects and filters them client-side using the pattern.
209
+ """
210
+ from pathlib import PurePosixPath
211
+
212
+ resolved_pattern = self._resolve_path(pattern)
213
+ all_objects = self.list_objects(recursive=True, **kwargs)
214
+
215
+ if "**" in pattern:
216
+ matching_objects = []
217
+
218
+ if pattern.startswith("**/"):
219
+ suffix_pattern = pattern[3:]
220
+
221
+ for obj in all_objects:
222
+ obj_path = PurePosixPath(obj)
223
+ if obj_path.match(resolved_pattern) or obj_path.match(suffix_pattern):
224
+ matching_objects.append(obj)
225
+ else:
226
+ for obj in all_objects:
227
+ obj_path = PurePosixPath(obj)
228
+ if obj_path.match(resolved_pattern):
229
+ matching_objects.append(obj)
230
+
231
+ return matching_objects
232
+ return [obj for obj in all_objects if fnmatch.fnmatch(obj, resolved_pattern)]
233
+
234
+ def get_metadata(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
235
+ """Get object metadata using obstore."""
236
+ resolved_path = self._resolve_path(path)
237
+ result: dict[str, Any] = {}
238
+ try:
239
+ metadata = self.store.head(resolved_path)
240
+ result.update(
241
+ {
242
+ "path": resolved_path,
243
+ "exists": True,
244
+ "size": getattr(metadata, "size", None),
245
+ "last_modified": getattr(metadata, "last_modified", None),
246
+ "e_tag": getattr(metadata, "e_tag", None),
247
+ "version": getattr(metadata, "version", None),
248
+ }
249
+ )
250
+ if hasattr(metadata, "metadata") and metadata.metadata:
251
+ result["custom_metadata"] = metadata.metadata
252
+
253
+ except Exception:
254
+ return {"path": resolved_path, "exists": False}
255
+ else:
256
+ return result
257
+
258
+ def is_object(self, path: str | Path) -> bool:
259
+ """Check if path is an object using obstore."""
260
+ resolved_path = self._resolve_path(path)
261
+ return self.exists(path) and not resolved_path.endswith("/")
262
+
263
+ def is_path(self, path: str | Path) -> bool:
264
+ """Check if path is a prefix/directory using obstore."""
265
+ resolved_path = self._resolve_path(path)
266
+
267
+ if resolved_path.endswith("/"):
268
+ return True
269
+
270
+ try:
271
+ objects = self.list_objects(prefix=str(path), recursive=True)
272
+ return len(objects) > 0
273
+ except Exception:
274
+ return False
275
+
276
+ def read_arrow(self, path: str | Path, **kwargs: Any) -> ArrowTable:
277
+ """Read Arrow table using obstore."""
278
+ try:
279
+ resolved_path = self._resolve_path(path)
280
+ if hasattr(self.store, "read_arrow"):
281
+ return self.store.read_arrow(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
282
+
283
+ import io
284
+
285
+ import pyarrow.parquet as pq
286
+
287
+ data = self.read_bytes(resolved_path)
288
+ buffer = io.BytesIO(data)
289
+ return pq.read_table(buffer, **kwargs)
290
+ except Exception as exc:
291
+ msg = f"Failed to read Arrow table from {path}"
292
+ raise StorageOperationFailedError(msg) from exc
293
+
294
+ def write_arrow(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
295
+ """Write Arrow table using obstore."""
296
+ try:
297
+ resolved_path = self._resolve_path(path)
298
+ if hasattr(self.store, "write_arrow"):
299
+ self.store.write_arrow(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
300
+ else:
301
+ import io
302
+
303
+ import pyarrow as pa
304
+ import pyarrow.parquet as pq
305
+
306
+ buffer = io.BytesIO()
307
+
308
+ schema = table.schema
309
+ if any(str(f.type).startswith("decimal64") for f in schema):
310
+ new_fields = []
311
+ for field in schema:
312
+ if str(field.type).startswith("decimal64"):
313
+ import re
314
+
315
+ match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
316
+ if match:
317
+ precision, scale = int(match.group(1)), int(match.group(2))
318
+ new_fields.append(pa.field(field.name, pa.decimal128(precision, scale)))
319
+ else:
320
+ new_fields.append(field) # pragma: no cover
321
+ else:
322
+ new_fields.append(field)
323
+ table = table.cast(pa.schema(new_fields))
324
+
325
+ pq.write_table(table, buffer, **kwargs)
326
+ buffer.seek(0)
327
+ self.write_bytes(resolved_path, buffer.read())
328
+ except Exception as exc:
329
+ msg = f"Failed to write Arrow table to {path}"
330
+ raise StorageOperationFailedError(msg) from exc
331
+
332
+ def stream_arrow(self, pattern: str, **kwargs: Any) -> Iterator[ArrowRecordBatch]:
333
+ """Stream Arrow record batches using obstore.
334
+
335
+ Yields:
336
+ Iterator of Arrow record batches from matching objects.
337
+ """
338
+ try:
339
+ resolved_pattern = self._resolve_path(pattern)
340
+ yield from self.store.stream_arrow(resolved_pattern, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
341
+ except Exception as exc:
342
+ msg = f"Failed to stream Arrow data for pattern {pattern}"
343
+ raise StorageOperationFailedError(msg) from exc
344
+
345
+ async def read_bytes_async(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
346
+ """Read bytes from storage asynchronously."""
347
+ try:
348
+ resolved_path = self._resolve_path(path)
349
+ result = await self.store.get_async(resolved_path)
350
+ bytes_obj = await result.bytes_async()
351
+ return bytes_obj.to_bytes() # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
352
+ except Exception as exc:
353
+ msg = f"Failed to read bytes from {path}"
354
+ raise StorageOperationFailedError(msg) from exc
355
+
356
+ async def write_bytes_async(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
357
+ """Write bytes to storage asynchronously."""
358
+ resolved_path = self._resolve_path(path)
359
+ await self.store.put_async(resolved_path, data)
360
+
361
+ async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
362
+ """List objects in storage asynchronously."""
363
+ try:
364
+ resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
365
+
366
+ objects = [str(item.path) async for item in self.store.list_async(resolved_prefix)] # pyright: ignore[reportAttributeAccessIssue]
367
+
368
+ if not recursive and resolved_prefix:
369
+ base_depth = resolved_prefix.count("/")
370
+ objects = [obj for obj in objects if obj.count("/") <= base_depth + 1]
371
+
372
+ return sorted(objects)
373
+ except Exception as exc:
374
+ msg = f"Failed to list objects with prefix '{prefix}'"
375
+ raise StorageOperationFailedError(msg) from exc
376
+
377
+ async def read_text_async(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
378
+ """Read text from storage asynchronously."""
379
+ data = await self.read_bytes_async(path, **kwargs)
380
+ return data.decode(encoding)
381
+
382
+ async def write_text_async(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
383
+ """Write text to storage asynchronously."""
384
+ encoded_data = data.encode(encoding)
385
+ await self.write_bytes_async(path, encoded_data, **kwargs)
386
+
387
+ async def exists_async(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
388
+ """Check if object exists in storage asynchronously."""
389
+ resolved_path = self._resolve_path(path)
390
+ try:
391
+ await self.store.head_async(resolved_path)
392
+ except Exception:
393
+ return False
394
+ return True
395
+
396
+ async def delete_async(self, path: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
397
+ """Delete object from storage asynchronously."""
398
+ resolved_path = self._resolve_path(path)
399
+ await self.store.delete_async(resolved_path)
400
+
401
+ async def copy_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
402
+ """Copy object in storage asynchronously."""
403
+ source_path = self._resolve_path(source)
404
+ dest_path = self._resolve_path(destination)
405
+ await self.store.copy_async(source_path, dest_path)
406
+
407
+ async def move_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
408
+ """Move object in storage asynchronously."""
409
+ source_path = self._resolve_path(source)
410
+ dest_path = self._resolve_path(destination)
411
+ await self.store.rename_async(source_path, dest_path)
412
+
413
+ async def get_metadata_async(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
414
+ """Get object metadata from storage asynchronously."""
415
+ resolved_path = self._resolve_path(path)
416
+ result: dict[str, Any] = {}
417
+ try:
418
+ metadata = await self.store.head_async(resolved_path)
419
+ result.update(
420
+ {
421
+ "path": resolved_path,
422
+ "exists": True,
423
+ "size": metadata.size,
424
+ "last_modified": metadata.last_modified,
425
+ "e_tag": metadata.e_tag,
426
+ "version": metadata.version,
427
+ }
428
+ )
429
+ if hasattr(metadata, "metadata") and metadata.metadata:
430
+ result["custom_metadata"] = metadata.metadata
431
+
432
+ except Exception:
433
+ return {"path": resolved_path, "exists": False}
434
+ else:
435
+ return result
436
+
437
+ async def read_arrow_async(self, path: str | Path, **kwargs: Any) -> ArrowTable:
438
+ """Read Arrow table from storage asynchronously."""
439
+ resolved_path = self._resolve_path(path)
440
+ return await self.store.read_arrow_async(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
441
+
442
+ async def write_arrow_async(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
443
+ """Write Arrow table to storage asynchronously."""
444
+ resolved_path = self._resolve_path(path)
445
+ if hasattr(self.store, "write_arrow_async"):
446
+ await self.store.write_arrow_async(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
447
+ else:
448
+ import io
449
+
450
+ import pyarrow.parquet as pq
451
+
452
+ buffer = io.BytesIO()
453
+ pq.write_table(table, buffer, **kwargs)
454
+ buffer.seek(0)
455
+ await self.write_bytes_async(resolved_path, buffer.read())
456
+
457
+ def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator[ArrowRecordBatch]:
458
+ resolved_pattern = self._resolve_path(pattern)
459
+ return _AsyncArrowIterator(self.store, resolved_pattern, **kwargs)
@@ -0,0 +1,102 @@
1
+ """Storage backend capability system.
2
+
3
+ This module provides a centralized way to track and query storage backend capabilities.
4
+ """
5
+
6
+ from dataclasses import dataclass
7
+ from typing import ClassVar
8
+
9
+ from mypy_extensions import mypyc_attr
10
+
11
+ __all__ = ("HasStorageCapabilities", "StorageCapabilities")
12
+
13
+
14
+ @dataclass
15
+ class StorageCapabilities:
16
+ """Tracks capabilities of a storage backend."""
17
+
18
+ supports_read: bool = True
19
+ supports_write: bool = True
20
+ supports_delete: bool = True
21
+ supports_list: bool = True
22
+ supports_exists: bool = True
23
+ supports_copy: bool = True
24
+ supports_move: bool = True
25
+ supports_metadata: bool = True
26
+
27
+ supports_arrow: bool = False
28
+ supports_streaming: bool = False
29
+ supports_async: bool = False
30
+ supports_batch_operations: bool = False
31
+ supports_multipart_upload: bool = False
32
+ supports_compression: bool = False
33
+
34
+ supports_s3_select: bool = False
35
+ supports_gcs_compose: bool = False
36
+ supports_azure_snapshots: bool = False
37
+
38
+ is_remote: bool = True
39
+ is_cloud_native: bool = False
40
+ has_low_latency: bool = False
41
+
42
+ @classmethod
43
+ def local_filesystem(cls) -> "StorageCapabilities":
44
+ """Capabilities for local filesystem backend."""
45
+ return cls(
46
+ is_remote=False, has_low_latency=True, supports_arrow=True, supports_streaming=True, supports_async=True
47
+ )
48
+
49
+ @classmethod
50
+ def s3_compatible(cls) -> "StorageCapabilities":
51
+ """Capabilities for S3-compatible backends."""
52
+ return cls(
53
+ is_cloud_native=True,
54
+ supports_multipart_upload=True,
55
+ supports_s3_select=True,
56
+ supports_arrow=True,
57
+ supports_streaming=True,
58
+ supports_async=True,
59
+ )
60
+
61
+ @classmethod
62
+ def gcs(cls) -> "StorageCapabilities":
63
+ """Capabilities for Google Cloud Storage."""
64
+ return cls(
65
+ is_cloud_native=True,
66
+ supports_multipart_upload=True,
67
+ supports_gcs_compose=True,
68
+ supports_arrow=True,
69
+ supports_streaming=True,
70
+ supports_async=True,
71
+ )
72
+
73
+ @classmethod
74
+ def azure_blob(cls) -> "StorageCapabilities":
75
+ """Capabilities for Azure Blob Storage."""
76
+ return cls(
77
+ is_cloud_native=True,
78
+ supports_multipart_upload=True,
79
+ supports_azure_snapshots=True,
80
+ supports_arrow=True,
81
+ supports_streaming=True,
82
+ supports_async=True,
83
+ )
84
+
85
+
86
+ @mypyc_attr(allow_interpreted_subclasses=True)
87
+ class HasStorageCapabilities:
88
+ """Mixin for storage backends that expose their capabilities."""
89
+
90
+ __slots__ = ()
91
+
92
+ capabilities: ClassVar[StorageCapabilities]
93
+
94
+ @classmethod
95
+ def has_capability(cls, capability: str) -> bool:
96
+ """Check if backend has a specific capability."""
97
+ return getattr(cls.capabilities, capability, False)
98
+
99
+ @classmethod
100
+ def get_capabilities(cls) -> StorageCapabilities:
101
+ """Get all capabilities for this backend."""
102
+ return cls.capabilities