sqlspec 0.12.1__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (113) hide show
  1. sqlspec/_sql.py +21 -180
  2. sqlspec/adapters/adbc/config.py +10 -12
  3. sqlspec/adapters/adbc/driver.py +120 -118
  4. sqlspec/adapters/aiosqlite/config.py +3 -3
  5. sqlspec/adapters/aiosqlite/driver.py +116 -141
  6. sqlspec/adapters/asyncmy/config.py +3 -4
  7. sqlspec/adapters/asyncmy/driver.py +123 -135
  8. sqlspec/adapters/asyncpg/config.py +3 -7
  9. sqlspec/adapters/asyncpg/driver.py +98 -140
  10. sqlspec/adapters/bigquery/config.py +4 -5
  11. sqlspec/adapters/bigquery/driver.py +231 -181
  12. sqlspec/adapters/duckdb/config.py +3 -6
  13. sqlspec/adapters/duckdb/driver.py +132 -124
  14. sqlspec/adapters/oracledb/config.py +6 -5
  15. sqlspec/adapters/oracledb/driver.py +242 -259
  16. sqlspec/adapters/psqlpy/config.py +3 -7
  17. sqlspec/adapters/psqlpy/driver.py +118 -93
  18. sqlspec/adapters/psycopg/config.py +34 -30
  19. sqlspec/adapters/psycopg/driver.py +342 -214
  20. sqlspec/adapters/sqlite/config.py +3 -3
  21. sqlspec/adapters/sqlite/driver.py +150 -104
  22. sqlspec/config.py +0 -4
  23. sqlspec/driver/_async.py +89 -98
  24. sqlspec/driver/_common.py +52 -17
  25. sqlspec/driver/_sync.py +81 -105
  26. sqlspec/driver/connection.py +207 -0
  27. sqlspec/driver/mixins/_csv_writer.py +91 -0
  28. sqlspec/driver/mixins/_pipeline.py +38 -49
  29. sqlspec/driver/mixins/_result_utils.py +27 -9
  30. sqlspec/driver/mixins/_storage.py +149 -216
  31. sqlspec/driver/mixins/_type_coercion.py +3 -4
  32. sqlspec/driver/parameters.py +138 -0
  33. sqlspec/exceptions.py +10 -2
  34. sqlspec/extensions/aiosql/adapter.py +0 -10
  35. sqlspec/extensions/litestar/handlers.py +0 -1
  36. sqlspec/extensions/litestar/plugin.py +0 -3
  37. sqlspec/extensions/litestar/providers.py +0 -14
  38. sqlspec/loader.py +31 -118
  39. sqlspec/protocols.py +542 -0
  40. sqlspec/service/__init__.py +3 -2
  41. sqlspec/service/_util.py +147 -0
  42. sqlspec/service/base.py +1116 -9
  43. sqlspec/statement/builder/__init__.py +42 -32
  44. sqlspec/statement/builder/_ddl_utils.py +0 -10
  45. sqlspec/statement/builder/_parsing_utils.py +10 -4
  46. sqlspec/statement/builder/base.py +70 -23
  47. sqlspec/statement/builder/column.py +283 -0
  48. sqlspec/statement/builder/ddl.py +102 -65
  49. sqlspec/statement/builder/delete.py +23 -7
  50. sqlspec/statement/builder/insert.py +29 -15
  51. sqlspec/statement/builder/merge.py +4 -4
  52. sqlspec/statement/builder/mixins/_aggregate_functions.py +113 -14
  53. sqlspec/statement/builder/mixins/_common_table_expr.py +0 -1
  54. sqlspec/statement/builder/mixins/_delete_from.py +1 -1
  55. sqlspec/statement/builder/mixins/_from.py +10 -8
  56. sqlspec/statement/builder/mixins/_group_by.py +0 -1
  57. sqlspec/statement/builder/mixins/_insert_from_select.py +0 -1
  58. sqlspec/statement/builder/mixins/_insert_values.py +0 -2
  59. sqlspec/statement/builder/mixins/_join.py +20 -13
  60. sqlspec/statement/builder/mixins/_limit_offset.py +3 -3
  61. sqlspec/statement/builder/mixins/_merge_clauses.py +3 -4
  62. sqlspec/statement/builder/mixins/_order_by.py +2 -2
  63. sqlspec/statement/builder/mixins/_pivot.py +4 -7
  64. sqlspec/statement/builder/mixins/_select_columns.py +6 -5
  65. sqlspec/statement/builder/mixins/_unpivot.py +6 -9
  66. sqlspec/statement/builder/mixins/_update_from.py +2 -1
  67. sqlspec/statement/builder/mixins/_update_set.py +11 -8
  68. sqlspec/statement/builder/mixins/_where.py +61 -34
  69. sqlspec/statement/builder/select.py +32 -17
  70. sqlspec/statement/builder/update.py +25 -11
  71. sqlspec/statement/filters.py +39 -14
  72. sqlspec/statement/parameter_manager.py +220 -0
  73. sqlspec/statement/parameters.py +210 -79
  74. sqlspec/statement/pipelines/__init__.py +166 -23
  75. sqlspec/statement/pipelines/analyzers/_analyzer.py +22 -25
  76. sqlspec/statement/pipelines/context.py +35 -39
  77. sqlspec/statement/pipelines/transformers/__init__.py +2 -3
  78. sqlspec/statement/pipelines/transformers/_expression_simplifier.py +19 -187
  79. sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +667 -43
  80. sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +76 -0
  81. sqlspec/statement/pipelines/validators/_dml_safety.py +33 -18
  82. sqlspec/statement/pipelines/validators/_parameter_style.py +87 -14
  83. sqlspec/statement/pipelines/validators/_performance.py +38 -23
  84. sqlspec/statement/pipelines/validators/_security.py +39 -62
  85. sqlspec/statement/result.py +37 -129
  86. sqlspec/statement/splitter.py +0 -12
  87. sqlspec/statement/sql.py +885 -379
  88. sqlspec/statement/sql_compiler.py +140 -0
  89. sqlspec/storage/__init__.py +10 -2
  90. sqlspec/storage/backends/fsspec.py +82 -35
  91. sqlspec/storage/backends/obstore.py +66 -49
  92. sqlspec/storage/capabilities.py +101 -0
  93. sqlspec/storage/registry.py +56 -83
  94. sqlspec/typing.py +6 -434
  95. sqlspec/utils/cached_property.py +25 -0
  96. sqlspec/utils/correlation.py +0 -2
  97. sqlspec/utils/logging.py +0 -6
  98. sqlspec/utils/sync_tools.py +0 -4
  99. sqlspec/utils/text.py +0 -5
  100. sqlspec/utils/type_guards.py +892 -0
  101. {sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/METADATA +1 -1
  102. sqlspec-0.13.0.dist-info/RECORD +150 -0
  103. sqlspec/statement/builder/protocols.py +0 -20
  104. sqlspec/statement/pipelines/base.py +0 -315
  105. sqlspec/statement/pipelines/result_types.py +0 -41
  106. sqlspec/statement/pipelines/transformers/_remove_comments.py +0 -66
  107. sqlspec/statement/pipelines/transformers/_remove_hints.py +0 -81
  108. sqlspec/statement/pipelines/validators/base.py +0 -67
  109. sqlspec/storage/protocol.py +0 -170
  110. sqlspec-0.12.1.dist-info/RECORD +0 -145
  111. {sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/WHEEL +0 -0
  112. {sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/licenses/LICENSE +0 -0
  113. {sqlspec-0.12.1.dist-info → sqlspec-0.13.0.dist-info}/licenses/NOTICE +0 -0
@@ -9,14 +9,16 @@ from __future__ import annotations
9
9
 
10
10
  import fnmatch
11
11
  import logging
12
- from typing import TYPE_CHECKING, Any, cast
12
+ from typing import TYPE_CHECKING, Any, ClassVar
13
13
 
14
14
  from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
15
15
  from sqlspec.storage.backends.base import ObjectStoreBase
16
+ from sqlspec.storage.capabilities import HasStorageCapabilities, StorageCapabilities
16
17
  from sqlspec.typing import OBSTORE_INSTALLED
17
18
 
18
19
  if TYPE_CHECKING:
19
20
  from collections.abc import AsyncIterator, Iterator
21
+ from pathlib import Path
20
22
 
21
23
  from sqlspec.typing import ArrowRecordBatch, ArrowTable
22
24
 
@@ -25,7 +27,7 @@ __all__ = ("ObStoreBackend",)
25
27
  logger = logging.getLogger(__name__)
26
28
 
27
29
 
28
- class ObStoreBackend(ObjectStoreBase):
30
+ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
29
31
  """High-performance object storage backend using obstore.
30
32
 
31
33
  This backend leverages obstore's Rust-based implementation for maximum
@@ -39,6 +41,18 @@ class ObStoreBackend(ObjectStoreBase):
39
41
  Features native Arrow support and ~9x better performance than fsspec.
40
42
  """
41
43
 
44
+ # ObStore has excellent native capabilities
45
+ capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
46
+ supports_arrow=True,
47
+ supports_streaming=True,
48
+ supports_async=True,
49
+ supports_batch_operations=True,
50
+ supports_multipart_upload=True,
51
+ supports_compression=True,
52
+ is_cloud_native=True,
53
+ has_low_latency=True,
54
+ )
55
+
42
56
  def __init__(self, store_uri: str, base_path: str = "", **store_options: Any) -> None:
43
57
  """Initialize obstore backend.
44
58
 
@@ -57,14 +71,12 @@ class ObStoreBackend(ObjectStoreBase):
57
71
  self.store_options = store_options
58
72
  self.store: Any # Will be set based on store_uri
59
73
 
60
- # Initialize obstore instance
61
74
  if store_uri.startswith("memory://"):
62
75
  # MemoryStore doesn't use from_url - create directly
63
76
  from obstore.store import MemoryStore
64
77
 
65
78
  self.store = MemoryStore()
66
79
  elif store_uri.startswith("file://"):
67
- # For file:// URIs, use LocalStore with root directory
68
80
  from obstore.store import LocalStore
69
81
 
70
82
  # LocalStore works with directory paths, so we use root
@@ -83,19 +95,18 @@ class ObStoreBackend(ObjectStoreBase):
83
95
  msg = f"Failed to initialize obstore backend for {store_uri}"
84
96
  raise StorageOperationFailedError(msg) from exc
85
97
 
86
- def _resolve_path(self, path: str) -> str:
98
+ def _resolve_path(self, path: str | Path) -> str:
87
99
  """Resolve path relative to base_path."""
100
+ path_str = str(path)
88
101
  # For file:// URIs, the path passed in is already absolute
89
- if self.store_uri.startswith("file://") and path.startswith("/"):
90
- # Remove leading slash for LocalStore (it's relative to its root)
91
- return path.lstrip("/")
102
+ if self.store_uri.startswith("file://") and path_str.startswith("/"):
103
+ return path_str.lstrip("/")
92
104
 
93
105
  if self.base_path:
94
- # Ensure no double slashes by stripping trailing slash from base_path
95
106
  clean_base = self.base_path.rstrip("/")
96
- clean_path = path.lstrip("/")
107
+ clean_path = path_str.lstrip("/")
97
108
  return f"{clean_base}/{clean_path}"
98
- return path
109
+ return path_str
99
110
 
100
111
  @property
101
112
  def backend_type(self) -> str:
@@ -104,17 +115,25 @@ class ObStoreBackend(ObjectStoreBase):
104
115
 
105
116
  # Implementation of abstract methods from ObjectStoreBase
106
117
 
107
- def read_bytes(self, path: str, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
118
+ def read_bytes(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
108
119
  """Read bytes using obstore."""
109
120
  try:
110
121
  resolved_path = self._resolve_path(path)
111
122
  result = self.store.get(resolved_path)
112
- return result.bytes() # type: ignore[no-any-return] # pyright: ignore[reportReturnType]
123
+ bytes_data = result.bytes()
124
+ if hasattr(bytes_data, "__bytes__"):
125
+ return bytes(bytes_data)
126
+ if hasattr(bytes_data, "tobytes"):
127
+ return bytes_data.tobytes() # type: ignore[no-any-return]
128
+ if isinstance(bytes_data, bytes):
129
+ return bytes_data
130
+ # Try to convert to bytes
131
+ return bytes(bytes_data)
113
132
  except Exception as exc:
114
133
  msg = f"Failed to read bytes from {path}"
115
134
  raise StorageOperationFailedError(msg) from exc
116
135
 
117
- def write_bytes(self, path: str, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
136
+ def write_bytes(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
118
137
  """Write bytes using obstore."""
119
138
  try:
120
139
  resolved_path = self._resolve_path(path)
@@ -123,12 +142,12 @@ class ObStoreBackend(ObjectStoreBase):
123
142
  msg = f"Failed to write bytes to {path}"
124
143
  raise StorageOperationFailedError(msg) from exc
125
144
 
126
- def read_text(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
145
+ def read_text(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
127
146
  """Read text using obstore."""
128
147
  data = self.read_bytes(path, **kwargs)
129
148
  return data.decode(encoding)
130
149
 
131
- def write_text(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
150
+ def write_text(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
132
151
  """Write text using obstore."""
133
152
  encoded_data = data.encode(encoding)
134
153
  self.write_bytes(path, encoded_data, **kwargs)
@@ -153,7 +172,7 @@ class ObStoreBackend(ObjectStoreBase):
153
172
 
154
173
  return sorted(objects)
155
174
 
156
- def exists(self, path: str, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
175
+ def exists(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
157
176
  """Check if object exists using obstore."""
158
177
  try:
159
178
  self.store.head(self._resolve_path(path))
@@ -161,7 +180,7 @@ class ObStoreBackend(ObjectStoreBase):
161
180
  return False
162
181
  return True
163
182
 
164
- def delete(self, path: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
183
+ def delete(self, path: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
165
184
  """Delete object using obstore."""
166
185
  try:
167
186
  self.store.delete(self._resolve_path(path))
@@ -169,7 +188,7 @@ class ObStoreBackend(ObjectStoreBase):
169
188
  msg = f"Failed to delete {path}"
170
189
  raise StorageOperationFailedError(msg) from exc
171
190
 
172
- def copy(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
191
+ def copy(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
173
192
  """Copy object using obstore."""
174
193
  try:
175
194
  self.store.copy(self._resolve_path(source), self._resolve_path(destination))
@@ -177,7 +196,7 @@ class ObStoreBackend(ObjectStoreBase):
177
196
  msg = f"Failed to copy {source} to {destination}"
178
197
  raise StorageOperationFailedError(msg) from exc
179
198
 
180
- def move(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
199
+ def move(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
181
200
  """Move object using obstore."""
182
201
  try:
183
202
  self.store.rename(self._resolve_path(source), self._resolve_path(destination))
@@ -198,17 +217,14 @@ class ObStoreBackend(ObjectStoreBase):
198
217
  resolved_pattern = self._resolve_path(pattern)
199
218
  all_objects = self.list_objects(recursive=True, **kwargs)
200
219
 
201
- # For complex patterns with **, use PurePosixPath
202
220
  if "**" in pattern:
203
221
  matching_objects = []
204
222
 
205
223
  # Special case: **/*.ext should also match *.ext in root
206
224
  if pattern.startswith("**/"):
207
- # Get the suffix pattern
208
225
  suffix_pattern = pattern[3:] # Remove **/
209
226
 
210
227
  for obj in all_objects:
211
- # Check if object ends with the suffix pattern
212
228
  obj_path = PurePosixPath(obj)
213
229
  # Try both the full pattern and just the suffix
214
230
  if obj_path.match(resolved_pattern) or obj_path.match(suffix_pattern):
@@ -224,7 +240,7 @@ class ObStoreBackend(ObjectStoreBase):
224
240
  # Use standard fnmatch for simple patterns
225
241
  return [obj for obj in all_objects if fnmatch.fnmatch(obj, resolved_pattern)]
226
242
 
227
- def get_metadata(self, path: str, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
243
+ def get_metadata(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
228
244
  """Get object metadata using obstore."""
229
245
  resolved_path = self._resolve_path(path)
230
246
  try:
@@ -245,13 +261,13 @@ class ObStoreBackend(ObjectStoreBase):
245
261
  else:
246
262
  return result
247
263
 
248
- def is_object(self, path: str) -> bool:
264
+ def is_object(self, path: str | Path) -> bool:
249
265
  """Check if path is an object using obstore."""
250
266
  resolved_path = self._resolve_path(path)
251
267
  # An object exists and doesn't end with /
252
268
  return self.exists(path) and not resolved_path.endswith("/")
253
269
 
254
- def is_path(self, path: str) -> bool:
270
+ def is_path(self, path: str | Path) -> bool:
255
271
  """Check if path is a prefix/directory using obstore."""
256
272
  resolved_path = self._resolve_path(path)
257
273
 
@@ -259,18 +275,16 @@ class ObStoreBackend(ObjectStoreBase):
259
275
  if resolved_path.endswith("/"):
260
276
  return True
261
277
 
262
- # Check if there are any objects with this prefix
263
278
  try:
264
- objects = self.list_objects(prefix=path, recursive=False)
279
+ objects = self.list_objects(prefix=str(path), recursive=False)
265
280
  return len(objects) > 0
266
281
  except Exception:
267
282
  return False
268
283
 
269
- def read_arrow(self, path: str, **kwargs: Any) -> ArrowTable:
284
+ def read_arrow(self, path: str | Path, **kwargs: Any) -> ArrowTable:
270
285
  """Read Arrow table using obstore."""
271
286
  try:
272
287
  resolved_path = self._resolve_path(path)
273
- # Check if the store has native Arrow support
274
288
  if hasattr(self.store, "read_arrow"):
275
289
  return self.store.read_arrow(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
276
290
  # Fall back to reading as Parquet via bytes
@@ -285,11 +299,10 @@ class ObStoreBackend(ObjectStoreBase):
285
299
  msg = f"Failed to read Arrow table from {path}"
286
300
  raise StorageOperationFailedError(msg) from exc
287
301
 
288
- def write_arrow(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
302
+ def write_arrow(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
289
303
  """Write Arrow table using obstore."""
290
304
  try:
291
305
  resolved_path = self._resolve_path(path)
292
- # Check if the store has native Arrow support
293
306
  if hasattr(self.store, "write_arrow"):
294
307
  self.store.write_arrow(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
295
308
  else:
@@ -309,7 +322,6 @@ class ObStoreBackend(ObjectStoreBase):
309
322
 
310
323
  for field in schema:
311
324
  if str(field.type).startswith("decimal64"):
312
- # Convert decimal64 to decimal128
313
325
  import re
314
326
 
315
327
  match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
@@ -350,13 +362,21 @@ class ObStoreBackend(ObjectStoreBase):
350
362
  # Private async implementations for instrumentation support
351
363
  # These are called by the base class async methods after instrumentation
352
364
 
353
- async def read_bytes_async(self, path: str, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
365
+ async def read_bytes_async(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
354
366
  """Private async read bytes using native obstore async if available."""
355
367
  resolved_path = self._resolve_path(path)
356
368
  result = await self.store.get_async(resolved_path)
357
- return cast("bytes", result.bytes()) # pyright: ignore[reportReturnType]
358
-
359
- async def write_bytes_async(self, path: str, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
369
+ bytes_data = result.bytes()
370
+ if hasattr(bytes_data, "__bytes__"):
371
+ return bytes(bytes_data)
372
+ if hasattr(bytes_data, "tobytes"):
373
+ return bytes_data.tobytes() # type: ignore[no-any-return]
374
+ if isinstance(bytes_data, bytes):
375
+ return bytes_data
376
+ # Try to convert to bytes
377
+ return bytes(bytes_data)
378
+
379
+ async def write_bytes_async(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
360
380
  """Private async write bytes using native obstore async."""
361
381
  resolved_path = self._resolve_path(path)
362
382
  await self.store.put_async(resolved_path, data)
@@ -379,17 +399,17 @@ class ObStoreBackend(ObjectStoreBase):
379
399
  # Implement all other required abstract async methods
380
400
  # ObStore provides native async for most operations
381
401
 
382
- async def read_text_async(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
402
+ async def read_text_async(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
383
403
  """Async read text using native obstore async."""
384
404
  data = await self.read_bytes_async(path, **kwargs)
385
405
  return data.decode(encoding)
386
406
 
387
- async def write_text_async(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
407
+ async def write_text_async(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
388
408
  """Async write text using native obstore async."""
389
409
  encoded_data = data.encode(encoding)
390
410
  await self.write_bytes_async(path, encoded_data, **kwargs)
391
411
 
392
- async def exists_async(self, path: str, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
412
+ async def exists_async(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
393
413
  """Async check if object exists using native obstore async."""
394
414
  resolved_path = self._resolve_path(path)
395
415
  try:
@@ -398,32 +418,30 @@ class ObStoreBackend(ObjectStoreBase):
398
418
  return False
399
419
  return True
400
420
 
401
- async def delete_async(self, path: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
421
+ async def delete_async(self, path: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
402
422
  """Async delete object using native obstore async."""
403
423
  resolved_path = self._resolve_path(path)
404
424
  await self.store.delete_async(resolved_path)
405
425
 
406
- async def copy_async(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
426
+ async def copy_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
407
427
  """Async copy object using native obstore async."""
408
428
  source_path = self._resolve_path(source)
409
429
  dest_path = self._resolve_path(destination)
410
430
  await self.store.copy_async(source_path, dest_path)
411
431
 
412
- async def move_async(self, source: str, destination: str, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
432
+ async def move_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
413
433
  """Async move object using native obstore async."""
414
434
  source_path = self._resolve_path(source)
415
435
  dest_path = self._resolve_path(destination)
416
436
  await self.store.rename_async(source_path, dest_path)
417
437
 
418
- async def get_metadata_async(self, path: str, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
438
+ async def get_metadata_async(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
419
439
  """Async get object metadata using native obstore async."""
420
440
  resolved_path = self._resolve_path(path)
421
441
  metadata = await self.store.head_async(resolved_path)
422
442
 
423
- # Convert obstore ObjectMeta to dict
424
443
  result = {"path": resolved_path, "exists": True}
425
444
 
426
- # Extract metadata attributes if available
427
445
  for attr in ["size", "last_modified", "e_tag", "version"]:
428
446
  if hasattr(metadata, attr):
429
447
  result[attr] = getattr(metadata, attr)
@@ -436,15 +454,14 @@ class ObStoreBackend(ObjectStoreBase):
436
454
 
437
455
  return result
438
456
 
439
- async def read_arrow_async(self, path: str, **kwargs: Any) -> ArrowTable:
457
+ async def read_arrow_async(self, path: str | Path, **kwargs: Any) -> ArrowTable:
440
458
  """Async read Arrow table using native obstore async."""
441
459
  resolved_path = self._resolve_path(path)
442
460
  return await self.store.read_arrow_async(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
443
461
 
444
- async def write_arrow_async(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
462
+ async def write_arrow_async(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
445
463
  """Async write Arrow table using native obstore async."""
446
464
  resolved_path = self._resolve_path(path)
447
- # Check if the store has native async Arrow support
448
465
  if hasattr(self.store, "write_arrow_async"):
449
466
  await self.store.write_arrow_async(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
450
467
  else:
@@ -0,0 +1,101 @@
1
+ """Storage backend capability system.
2
+
3
+ This module provides a centralized way to track and query storage backend capabilities.
4
+ """
5
+
6
+ from dataclasses import dataclass
7
+ from typing import ClassVar
8
+
9
+ __all__ = ("HasStorageCapabilities", "StorageCapabilities")
10
+
11
+
12
+ @dataclass
13
+ class StorageCapabilities:
14
+ """Tracks capabilities of a storage backend."""
15
+
16
+ # Basic operations
17
+ supports_read: bool = True
18
+ supports_write: bool = True
19
+ supports_delete: bool = True
20
+ supports_list: bool = True
21
+ supports_exists: bool = True
22
+ supports_copy: bool = True
23
+ supports_move: bool = True
24
+ supports_metadata: bool = True
25
+
26
+ # Advanced operations
27
+ supports_arrow: bool = False
28
+ supports_streaming: bool = False
29
+ supports_async: bool = False
30
+ supports_batch_operations: bool = False
31
+ supports_multipart_upload: bool = False
32
+ supports_compression: bool = False
33
+
34
+ # Protocol-specific features
35
+ supports_s3_select: bool = False
36
+ supports_gcs_compose: bool = False
37
+ supports_azure_snapshots: bool = False
38
+
39
+ # Performance characteristics
40
+ is_remote: bool = True
41
+ is_cloud_native: bool = False
42
+ has_low_latency: bool = False
43
+
44
+ @classmethod
45
+ def local_filesystem(cls) -> "StorageCapabilities":
46
+ """Capabilities for local filesystem backend."""
47
+ return cls(
48
+ is_remote=False, has_low_latency=True, supports_arrow=True, supports_streaming=True, supports_async=True
49
+ )
50
+
51
+ @classmethod
52
+ def s3_compatible(cls) -> "StorageCapabilities":
53
+ """Capabilities for S3-compatible backends."""
54
+ return cls(
55
+ is_cloud_native=True,
56
+ supports_multipart_upload=True,
57
+ supports_s3_select=True,
58
+ supports_arrow=True,
59
+ supports_streaming=True,
60
+ supports_async=True,
61
+ )
62
+
63
+ @classmethod
64
+ def gcs(cls) -> "StorageCapabilities":
65
+ """Capabilities for Google Cloud Storage."""
66
+ return cls(
67
+ is_cloud_native=True,
68
+ supports_multipart_upload=True,
69
+ supports_gcs_compose=True,
70
+ supports_arrow=True,
71
+ supports_streaming=True,
72
+ supports_async=True,
73
+ )
74
+
75
+ @classmethod
76
+ def azure_blob(cls) -> "StorageCapabilities":
77
+ """Capabilities for Azure Blob Storage."""
78
+ return cls(
79
+ is_cloud_native=True,
80
+ supports_multipart_upload=True,
81
+ supports_azure_snapshots=True,
82
+ supports_arrow=True,
83
+ supports_streaming=True,
84
+ supports_async=True,
85
+ )
86
+
87
+
88
+ class HasStorageCapabilities:
89
+ """Mixin for storage backends that expose their capabilities."""
90
+
91
+ capabilities: ClassVar[StorageCapabilities]
92
+
93
+ @classmethod
94
+ def has_capability(cls, capability: str) -> bool:
95
+ """Check if backend has a specific capability."""
96
+ return getattr(cls.capabilities, capability, False)
97
+
98
+ @classmethod
99
+ def get_capabilities(cls) -> StorageCapabilities:
100
+ """Get all capabilities for this backend."""
101
+ return cls.capabilities