sqlspec 0.14.1__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (159) hide show
  1. sqlspec/__init__.py +50 -25
  2. sqlspec/__main__.py +1 -1
  3. sqlspec/__metadata__.py +1 -3
  4. sqlspec/_serialization.py +1 -2
  5. sqlspec/_sql.py +480 -121
  6. sqlspec/_typing.py +278 -142
  7. sqlspec/adapters/adbc/__init__.py +4 -3
  8. sqlspec/adapters/adbc/_types.py +12 -0
  9. sqlspec/adapters/adbc/config.py +115 -260
  10. sqlspec/adapters/adbc/driver.py +462 -367
  11. sqlspec/adapters/aiosqlite/__init__.py +18 -3
  12. sqlspec/adapters/aiosqlite/_types.py +13 -0
  13. sqlspec/adapters/aiosqlite/config.py +199 -129
  14. sqlspec/adapters/aiosqlite/driver.py +230 -269
  15. sqlspec/adapters/asyncmy/__init__.py +18 -3
  16. sqlspec/adapters/asyncmy/_types.py +12 -0
  17. sqlspec/adapters/asyncmy/config.py +80 -168
  18. sqlspec/adapters/asyncmy/driver.py +260 -225
  19. sqlspec/adapters/asyncpg/__init__.py +19 -4
  20. sqlspec/adapters/asyncpg/_types.py +17 -0
  21. sqlspec/adapters/asyncpg/config.py +82 -181
  22. sqlspec/adapters/asyncpg/driver.py +285 -383
  23. sqlspec/adapters/bigquery/__init__.py +17 -3
  24. sqlspec/adapters/bigquery/_types.py +12 -0
  25. sqlspec/adapters/bigquery/config.py +191 -258
  26. sqlspec/adapters/bigquery/driver.py +474 -646
  27. sqlspec/adapters/duckdb/__init__.py +14 -3
  28. sqlspec/adapters/duckdb/_types.py +12 -0
  29. sqlspec/adapters/duckdb/config.py +415 -351
  30. sqlspec/adapters/duckdb/driver.py +343 -413
  31. sqlspec/adapters/oracledb/__init__.py +19 -5
  32. sqlspec/adapters/oracledb/_types.py +14 -0
  33. sqlspec/adapters/oracledb/config.py +123 -379
  34. sqlspec/adapters/oracledb/driver.py +507 -560
  35. sqlspec/adapters/psqlpy/__init__.py +13 -3
  36. sqlspec/adapters/psqlpy/_types.py +11 -0
  37. sqlspec/adapters/psqlpy/config.py +93 -254
  38. sqlspec/adapters/psqlpy/driver.py +505 -234
  39. sqlspec/adapters/psycopg/__init__.py +19 -5
  40. sqlspec/adapters/psycopg/_types.py +17 -0
  41. sqlspec/adapters/psycopg/config.py +143 -403
  42. sqlspec/adapters/psycopg/driver.py +706 -872
  43. sqlspec/adapters/sqlite/__init__.py +14 -3
  44. sqlspec/adapters/sqlite/_types.py +11 -0
  45. sqlspec/adapters/sqlite/config.py +202 -118
  46. sqlspec/adapters/sqlite/driver.py +264 -303
  47. sqlspec/base.py +105 -9
  48. sqlspec/{statement/builder → builder}/__init__.py +12 -14
  49. sqlspec/{statement/builder → builder}/_base.py +120 -55
  50. sqlspec/{statement/builder → builder}/_column.py +17 -6
  51. sqlspec/{statement/builder → builder}/_ddl.py +46 -79
  52. sqlspec/{statement/builder → builder}/_ddl_utils.py +5 -10
  53. sqlspec/{statement/builder → builder}/_delete.py +6 -25
  54. sqlspec/{statement/builder → builder}/_insert.py +18 -65
  55. sqlspec/builder/_merge.py +56 -0
  56. sqlspec/{statement/builder → builder}/_parsing_utils.py +8 -11
  57. sqlspec/{statement/builder → builder}/_select.py +11 -56
  58. sqlspec/{statement/builder → builder}/_update.py +12 -18
  59. sqlspec/{statement/builder → builder}/mixins/__init__.py +10 -14
  60. sqlspec/{statement/builder → builder}/mixins/_cte_and_set_ops.py +48 -59
  61. sqlspec/{statement/builder → builder}/mixins/_insert_operations.py +34 -18
  62. sqlspec/{statement/builder → builder}/mixins/_join_operations.py +1 -3
  63. sqlspec/{statement/builder → builder}/mixins/_merge_operations.py +19 -9
  64. sqlspec/{statement/builder → builder}/mixins/_order_limit_operations.py +3 -3
  65. sqlspec/{statement/builder → builder}/mixins/_pivot_operations.py +4 -8
  66. sqlspec/{statement/builder → builder}/mixins/_select_operations.py +25 -38
  67. sqlspec/{statement/builder → builder}/mixins/_update_operations.py +15 -16
  68. sqlspec/{statement/builder → builder}/mixins/_where_clause.py +210 -137
  69. sqlspec/cli.py +4 -5
  70. sqlspec/config.py +180 -133
  71. sqlspec/core/__init__.py +63 -0
  72. sqlspec/core/cache.py +873 -0
  73. sqlspec/core/compiler.py +396 -0
  74. sqlspec/core/filters.py +830 -0
  75. sqlspec/core/hashing.py +310 -0
  76. sqlspec/core/parameters.py +1209 -0
  77. sqlspec/core/result.py +664 -0
  78. sqlspec/{statement → core}/splitter.py +321 -191
  79. sqlspec/core/statement.py +666 -0
  80. sqlspec/driver/__init__.py +7 -10
  81. sqlspec/driver/_async.py +387 -176
  82. sqlspec/driver/_common.py +527 -289
  83. sqlspec/driver/_sync.py +390 -172
  84. sqlspec/driver/mixins/__init__.py +2 -19
  85. sqlspec/driver/mixins/_result_tools.py +164 -0
  86. sqlspec/driver/mixins/_sql_translator.py +6 -3
  87. sqlspec/exceptions.py +5 -252
  88. sqlspec/extensions/aiosql/adapter.py +93 -96
  89. sqlspec/extensions/litestar/cli.py +1 -1
  90. sqlspec/extensions/litestar/config.py +0 -1
  91. sqlspec/extensions/litestar/handlers.py +15 -26
  92. sqlspec/extensions/litestar/plugin.py +18 -16
  93. sqlspec/extensions/litestar/providers.py +17 -52
  94. sqlspec/loader.py +424 -105
  95. sqlspec/migrations/__init__.py +12 -0
  96. sqlspec/migrations/base.py +92 -68
  97. sqlspec/migrations/commands.py +24 -106
  98. sqlspec/migrations/loaders.py +402 -0
  99. sqlspec/migrations/runner.py +49 -51
  100. sqlspec/migrations/tracker.py +31 -44
  101. sqlspec/migrations/utils.py +64 -24
  102. sqlspec/protocols.py +7 -183
  103. sqlspec/storage/__init__.py +1 -1
  104. sqlspec/storage/backends/base.py +37 -40
  105. sqlspec/storage/backends/fsspec.py +136 -112
  106. sqlspec/storage/backends/obstore.py +138 -160
  107. sqlspec/storage/capabilities.py +5 -4
  108. sqlspec/storage/registry.py +57 -106
  109. sqlspec/typing.py +136 -115
  110. sqlspec/utils/__init__.py +2 -3
  111. sqlspec/utils/correlation.py +0 -3
  112. sqlspec/utils/deprecation.py +6 -6
  113. sqlspec/utils/fixtures.py +6 -6
  114. sqlspec/utils/logging.py +0 -2
  115. sqlspec/utils/module_loader.py +7 -12
  116. sqlspec/utils/singleton.py +0 -1
  117. sqlspec/utils/sync_tools.py +17 -38
  118. sqlspec/utils/text.py +12 -51
  119. sqlspec/utils/type_guards.py +443 -232
  120. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/METADATA +7 -2
  121. sqlspec-0.16.0.dist-info/RECORD +134 -0
  122. sqlspec/adapters/adbc/transformers.py +0 -108
  123. sqlspec/driver/connection.py +0 -207
  124. sqlspec/driver/mixins/_cache.py +0 -114
  125. sqlspec/driver/mixins/_csv_writer.py +0 -91
  126. sqlspec/driver/mixins/_pipeline.py +0 -508
  127. sqlspec/driver/mixins/_query_tools.py +0 -796
  128. sqlspec/driver/mixins/_result_utils.py +0 -138
  129. sqlspec/driver/mixins/_storage.py +0 -912
  130. sqlspec/driver/mixins/_type_coercion.py +0 -128
  131. sqlspec/driver/parameters.py +0 -138
  132. sqlspec/statement/__init__.py +0 -21
  133. sqlspec/statement/builder/_merge.py +0 -95
  134. sqlspec/statement/cache.py +0 -50
  135. sqlspec/statement/filters.py +0 -625
  136. sqlspec/statement/parameters.py +0 -956
  137. sqlspec/statement/pipelines/__init__.py +0 -210
  138. sqlspec/statement/pipelines/analyzers/__init__.py +0 -9
  139. sqlspec/statement/pipelines/analyzers/_analyzer.py +0 -646
  140. sqlspec/statement/pipelines/context.py +0 -109
  141. sqlspec/statement/pipelines/transformers/__init__.py +0 -7
  142. sqlspec/statement/pipelines/transformers/_expression_simplifier.py +0 -88
  143. sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +0 -1247
  144. sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +0 -76
  145. sqlspec/statement/pipelines/validators/__init__.py +0 -23
  146. sqlspec/statement/pipelines/validators/_dml_safety.py +0 -290
  147. sqlspec/statement/pipelines/validators/_parameter_style.py +0 -370
  148. sqlspec/statement/pipelines/validators/_performance.py +0 -714
  149. sqlspec/statement/pipelines/validators/_security.py +0 -967
  150. sqlspec/statement/result.py +0 -435
  151. sqlspec/statement/sql.py +0 -1774
  152. sqlspec/utils/cached_property.py +0 -25
  153. sqlspec/utils/statement_hashing.py +0 -203
  154. sqlspec-0.14.1.dist-info/RECORD +0 -145
  155. /sqlspec/{statement/builder → builder}/mixins/_delete_operations.py +0 -0
  156. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/WHEEL +0 -0
  157. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/entry_points.txt +0 -0
  158. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/licenses/LICENSE +0 -0
  159. {sqlspec-0.14.1.dist-info → sqlspec-0.16.0.dist-info}/licenses/NOTICE +0 -0
@@ -1,15 +1,17 @@
1
- """High-performance object storage using obstore.
1
+ """Object storage backend using obstore.
2
2
 
3
- This backend implements the ObjectStoreProtocol using obstore,
3
+ Implements the ObjectStoreProtocol using obstore,
4
4
  providing native support for S3, GCS, Azure, and local file storage
5
- with excellent performance characteristics and native Arrow support.
5
+ with Arrow support.
6
6
  """
7
7
 
8
8
  from __future__ import annotations
9
9
 
10
10
  import fnmatch
11
11
  import logging
12
- from typing import TYPE_CHECKING, Any, ClassVar
12
+ from typing import TYPE_CHECKING, Any, ClassVar, Final, cast
13
+
14
+ from mypy_extensions import mypyc_attr
13
15
 
14
16
  from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
15
17
  from sqlspec.storage.backends.base import ObjectStoreBase
@@ -27,21 +29,40 @@ __all__ = ("ObStoreBackend",)
27
29
  logger = logging.getLogger(__name__)
28
30
 
29
31
 
32
+ class _AsyncArrowIterator:
33
+ """Helper class to work around mypyc's lack of async generator support."""
34
+
35
+ def __init__(self, store: Any, pattern: str, **kwargs: Any) -> None:
36
+ self.store = store
37
+ self.pattern = pattern
38
+ self.kwargs = kwargs
39
+ self._iterator: Any | None = None
40
+
41
+ def __aiter__(self) -> _AsyncArrowIterator:
42
+ return self
43
+
44
+ async def __anext__(self) -> ArrowRecordBatch:
45
+ if self._iterator is None:
46
+ self._iterator = self.store.stream_arrow_async(self.pattern, **self.kwargs)
47
+ if self._iterator is not None:
48
+ return cast("ArrowRecordBatch", await self._iterator.__anext__())
49
+ raise StopAsyncIteration
50
+
51
+
52
+ DEFAULT_OPTIONS: Final[dict[str, Any]] = {"connect_timeout": "30s", "request_timeout": "60s"}
53
+
54
+
55
+ @mypyc_attr(allow_interpreted_subclasses=True)
30
56
  class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
31
- """High-performance object storage backend using obstore.
57
+ """Object storage backend using obstore.
32
58
 
33
- This backend leverages obstore's Rust-based implementation for maximum
34
- performance, providing native support for:
35
- - AWS S3 and S3-compatible stores
36
- - Google Cloud Storage
37
- - Azure Blob Storage
38
- - Local filesystem
39
- - HTTP endpoints
59
+ Uses obstore's Rust-based implementation for storage operations,
60
+ providing native support for AWS S3, Google Cloud Storage, Azure Blob Storage,
61
+ local filesystem, and HTTP endpoints.
40
62
 
41
- Features native Arrow support and ~9x better performance than fsspec.
63
+ Includes native Arrow support.
42
64
  """
43
65
 
44
- # ObStore has excellent native capabilities
45
66
  capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
46
67
  supports_arrow=True,
47
68
  supports_streaming=True,
@@ -53,6 +74,8 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
53
74
  has_low_latency=True,
54
75
  )
55
76
 
77
+ __slots__ = ("_path_cache", "base_path", "protocol", "store", "store_options", "store_uri")
78
+
56
79
  def __init__(self, store_uri: str, base_path: str = "", **store_options: Any) -> None:
57
80
  """Initialize obstore backend.
58
81
 
@@ -69,26 +92,23 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
69
92
  self.store_uri = store_uri
70
93
  self.base_path = base_path.rstrip("/") if base_path else ""
71
94
  self.store_options = store_options
72
- self.store: Any # Will be set based on store_uri
95
+ self.store: Any
96
+ self._path_cache: dict[str, str] = {}
97
+ self.protocol = store_uri.split("://", 1)[0] if "://" in store_uri else "file"
73
98
 
74
99
  if store_uri.startswith("memory://"):
75
- # MemoryStore doesn't use from_url - create directly
76
100
  from obstore.store import MemoryStore
77
101
 
78
102
  self.store = MemoryStore()
79
103
  elif store_uri.startswith("file://"):
80
104
  from obstore.store import LocalStore
81
105
 
82
- # LocalStore works with directory paths, so we use root
83
106
  self.store = LocalStore("/")
84
- # The full path will be handled in _resolve_path
85
107
  else:
86
- # Use obstore's from_url for automatic URI parsing
87
108
  from obstore.store import from_url
88
109
 
89
110
  self.store = from_url(store_uri, **store_options) # pyright: ignore[reportAttributeAccessIssue]
90
111
 
91
- # Log successful initialization
92
112
  logger.debug("ObStore backend initialized for %s", store_uri)
93
113
 
94
114
  except Exception as exc:
@@ -98,10 +118,10 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
98
118
  def _resolve_path(self, path: str | Path) -> str:
99
119
  """Resolve path relative to base_path."""
100
120
  path_str = str(path)
101
- # For file:// URIs, the path passed in is already absolute
121
+ if path_str.startswith("file://"):
122
+ path_str = path_str.removeprefix("file://")
102
123
  if self.store_uri.startswith("file://") and path_str.startswith("/"):
103
124
  return path_str.lstrip("/")
104
-
105
125
  if self.base_path:
106
126
  clean_base = self.base_path.rstrip("/")
107
127
  clean_path = path_str.lstrip("/")
@@ -113,22 +133,11 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
113
133
  """Return backend type identifier."""
114
134
  return "obstore"
115
135
 
116
- # Implementation of abstract methods from ObjectStoreBase
117
-
118
136
  def read_bytes(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
119
137
  """Read bytes using obstore."""
120
138
  try:
121
- resolved_path = self._resolve_path(path)
122
- result = self.store.get(resolved_path)
123
- bytes_data = result.bytes()
124
- if hasattr(bytes_data, "__bytes__"):
125
- return bytes(bytes_data)
126
- if hasattr(bytes_data, "tobytes"):
127
- return bytes_data.tobytes() # type: ignore[no-any-return]
128
- if isinstance(bytes_data, bytes):
129
- return bytes_data
130
- # Try to convert to bytes
131
- return bytes(bytes_data)
139
+ result = self.store.get(self._resolve_path(path))
140
+ return cast("bytes", result.bytes().to_bytes())
132
141
  except Exception as exc:
133
142
  msg = f"Failed to read bytes from {path}"
134
143
  raise StorageOperationFailedError(msg) from exc
@@ -136,41 +145,30 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
136
145
  def write_bytes(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
137
146
  """Write bytes using obstore."""
138
147
  try:
139
- resolved_path = self._resolve_path(path)
140
- self.store.put(resolved_path, data)
148
+ self.store.put(self._resolve_path(path), data)
141
149
  except Exception as exc:
142
150
  msg = f"Failed to write bytes to {path}"
143
151
  raise StorageOperationFailedError(msg) from exc
144
152
 
145
153
  def read_text(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
146
154
  """Read text using obstore."""
147
- data = self.read_bytes(path, **kwargs)
148
- return data.decode(encoding)
155
+ return self.read_bytes(path, **kwargs).decode(encoding)
149
156
 
150
157
  def write_text(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
151
158
  """Write text using obstore."""
152
- encoded_data = data.encode(encoding)
153
- self.write_bytes(path, encoded_data, **kwargs)
159
+ self.write_bytes(path, data.encode(encoding), **kwargs)
154
160
 
155
161
  def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
156
162
  """List objects using obstore."""
157
- resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
158
- objects: list[str] = []
159
-
160
- def _get_item_path(item: Any) -> str:
161
- """Extract path from item, trying path attribute first, then key."""
162
- if hasattr(item, "path"):
163
- return str(item.path)
164
- if hasattr(item, "key"):
165
- return str(item.key)
166
- return str(item)
167
-
168
- if not recursive:
169
- objects.extend(_get_item_path(item) for item in self.store.list_with_delimiter(resolved_prefix)) # pyright: ignore
170
- else:
171
- objects.extend(_get_item_path(item) for item in self.store.list(resolved_prefix))
172
-
173
- return sorted(objects)
163
+ try:
164
+ resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
165
+ items = (
166
+ self.store.list_with_delimiter(resolved_prefix) if not recursive else self.store.list(resolved_prefix)
167
+ )
168
+ return sorted(str(getattr(item, "path", getattr(item, "key", str(item)))) for item in items)
169
+ except Exception as exc:
170
+ msg = f"Failed to list objects with prefix '{prefix}'"
171
+ raise StorageOperationFailedError(msg) from exc
174
172
 
175
173
  def exists(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
176
174
  """Check if object exists using obstore."""
@@ -207,56 +205,52 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
207
205
  def glob(self, pattern: str, **kwargs: Any) -> list[str]:
208
206
  """Find objects matching pattern using obstore.
209
207
 
210
- Note: obstore does not support server-side globbing. This implementation
211
- lists all objects and filters them client-side, which may be inefficient
212
- for large buckets.
208
+ Lists all objects and filters them client-side using the pattern.
213
209
  """
214
210
  from pathlib import PurePosixPath
215
211
 
216
- # List all objects and filter by pattern
217
212
  resolved_pattern = self._resolve_path(pattern)
218
213
  all_objects = self.list_objects(recursive=True, **kwargs)
219
214
 
220
215
  if "**" in pattern:
221
216
  matching_objects = []
222
217
 
223
- # Special case: **/*.ext should also match *.ext in root
224
218
  if pattern.startswith("**/"):
225
- suffix_pattern = pattern[3:] # Remove **/
219
+ suffix_pattern = pattern[3:]
226
220
 
227
221
  for obj in all_objects:
228
222
  obj_path = PurePosixPath(obj)
229
- # Try both the full pattern and just the suffix
230
223
  if obj_path.match(resolved_pattern) or obj_path.match(suffix_pattern):
231
224
  matching_objects.append(obj)
232
225
  else:
233
- # Standard ** pattern matching
234
226
  for obj in all_objects:
235
227
  obj_path = PurePosixPath(obj)
236
228
  if obj_path.match(resolved_pattern):
237
229
  matching_objects.append(obj)
238
230
 
239
231
  return matching_objects
240
- # Use standard fnmatch for simple patterns
241
232
  return [obj for obj in all_objects if fnmatch.fnmatch(obj, resolved_pattern)]
242
233
 
243
234
  def get_metadata(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
244
235
  """Get object metadata using obstore."""
245
236
  resolved_path = self._resolve_path(path)
237
+ result: dict[str, Any] = {}
246
238
  try:
247
239
  metadata = self.store.head(resolved_path)
248
- result = {"path": resolved_path, "exists": True}
249
- for attr in ("size", "last_modified", "e_tag", "version"):
250
- if hasattr(metadata, attr):
251
- result[attr] = getattr(metadata, attr)
252
-
253
- # Include custom metadata if available
254
- if hasattr(metadata, "metadata"):
255
- custom_metadata = getattr(metadata, "metadata", None)
256
- if custom_metadata:
257
- result["custom_metadata"] = custom_metadata
240
+ result.update(
241
+ {
242
+ "path": resolved_path,
243
+ "exists": True,
244
+ "size": getattr(metadata, "size", None),
245
+ "last_modified": getattr(metadata, "last_modified", None),
246
+ "e_tag": getattr(metadata, "e_tag", None),
247
+ "version": getattr(metadata, "version", None),
248
+ }
249
+ )
250
+ if hasattr(metadata, "metadata") and metadata.metadata:
251
+ result["custom_metadata"] = metadata.metadata
252
+
258
253
  except Exception:
259
- # Object doesn't exist
260
254
  return {"path": resolved_path, "exists": False}
261
255
  else:
262
256
  return result
@@ -264,19 +258,17 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
264
258
  def is_object(self, path: str | Path) -> bool:
265
259
  """Check if path is an object using obstore."""
266
260
  resolved_path = self._resolve_path(path)
267
- # An object exists and doesn't end with /
268
261
  return self.exists(path) and not resolved_path.endswith("/")
269
262
 
270
263
  def is_path(self, path: str | Path) -> bool:
271
264
  """Check if path is a prefix/directory using obstore."""
272
265
  resolved_path = self._resolve_path(path)
273
266
 
274
- # A path/prefix either ends with / or has objects under it
275
267
  if resolved_path.endswith("/"):
276
268
  return True
277
269
 
278
270
  try:
279
- objects = self.list_objects(prefix=str(path), recursive=False)
271
+ objects = self.list_objects(prefix=str(path), recursive=True)
280
272
  return len(objects) > 0
281
273
  except Exception:
282
274
  return False
@@ -287,7 +279,7 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
287
279
  resolved_path = self._resolve_path(path)
288
280
  if hasattr(self.store, "read_arrow"):
289
281
  return self.store.read_arrow(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
290
- # Fall back to reading as Parquet via bytes
282
+
291
283
  import io
292
284
 
293
285
  import pyarrow.parquet as pq
@@ -306,7 +298,6 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
306
298
  if hasattr(self.store, "write_arrow"):
307
299
  self.store.write_arrow(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
308
300
  else:
309
- # Fall back to writing as Parquet via bytes
310
301
  import io
311
302
 
312
303
  import pyarrow as pa
@@ -314,30 +305,22 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
314
305
 
315
306
  buffer = io.BytesIO()
316
307
 
317
- # Check for decimal64 columns and convert to decimal128
318
- # PyArrow doesn't support decimal64 in Parquet files
319
308
  schema = table.schema
320
- needs_conversion = False
321
- new_fields = []
322
-
323
- for field in schema:
324
- if str(field.type).startswith("decimal64"):
325
- import re
326
-
327
- match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
328
- if match:
329
- precision, scale = int(match.group(1)), int(match.group(2))
330
- new_field = pa.field(field.name, pa.decimal128(precision, scale))
331
- new_fields.append(new_field)
332
- needs_conversion = True
309
+ if any(str(f.type).startswith("decimal64") for f in schema):
310
+ new_fields = []
311
+ for field in schema:
312
+ if str(field.type).startswith("decimal64"):
313
+ import re
314
+
315
+ match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
316
+ if match:
317
+ precision, scale = int(match.group(1)), int(match.group(2))
318
+ new_fields.append(pa.field(field.name, pa.decimal128(precision, scale)))
319
+ else:
320
+ new_fields.append(field) # pragma: no cover
333
321
  else:
334
322
  new_fields.append(field)
335
- else:
336
- new_fields.append(field)
337
-
338
- if needs_conversion:
339
- new_schema = pa.schema(new_fields)
340
- table = table.cast(new_schema)
323
+ table = table.cast(pa.schema(new_fields))
341
324
 
342
325
  pq.write_table(table, buffer, **kwargs)
343
326
  buffer.seek(0)
@@ -359,58 +342,50 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
359
342
  msg = f"Failed to stream Arrow data for pattern {pattern}"
360
343
  raise StorageOperationFailedError(msg) from exc
361
344
 
362
- # Private async implementations for instrumentation support
363
- # These are called by the base class async methods after instrumentation
364
-
365
345
  async def read_bytes_async(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
366
- """Private async read bytes using native obstore async if available."""
367
- resolved_path = self._resolve_path(path)
368
- result = await self.store.get_async(resolved_path)
369
- bytes_data = result.bytes()
370
- if hasattr(bytes_data, "__bytes__"):
371
- return bytes(bytes_data)
372
- if hasattr(bytes_data, "tobytes"):
373
- return bytes_data.tobytes() # type: ignore[no-any-return]
374
- if isinstance(bytes_data, bytes):
375
- return bytes_data
376
- # Try to convert to bytes
377
- return bytes(bytes_data)
346
+ """Read bytes from storage asynchronously."""
347
+ try:
348
+ resolved_path = self._resolve_path(path)
349
+ result = await self.store.get_async(resolved_path)
350
+ bytes_obj = await result.bytes_async()
351
+ return bytes_obj.to_bytes() # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
352
+ except Exception as exc:
353
+ msg = f"Failed to read bytes from {path}"
354
+ raise StorageOperationFailedError(msg) from exc
378
355
 
379
356
  async def write_bytes_async(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
380
- """Private async write bytes using native obstore async."""
357
+ """Write bytes to storage asynchronously."""
381
358
  resolved_path = self._resolve_path(path)
382
359
  await self.store.put_async(resolved_path, data)
383
360
 
384
361
  async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
385
- """Private async list objects using native obstore async if available."""
386
- resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
387
-
388
- # Note: store.list_async returns an async iterator
389
- objects = [str(item.path) async for item in self.store.list_async(resolved_prefix)] # pyright: ignore[reportAttributeAccessIssue]
362
+ """List objects in storage asynchronously."""
363
+ try:
364
+ resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
390
365
 
391
- # Manual filtering for non-recursive if needed as obstore lacks an
392
- # async version of list_with_delimiter.
393
- if not recursive and resolved_prefix:
394
- base_depth = resolved_prefix.count("/")
395
- objects = [obj for obj in objects if obj.count("/") <= base_depth + 1]
366
+ objects = [str(item.path) async for item in self.store.list_async(resolved_prefix)] # pyright: ignore[reportAttributeAccessIssue]
396
367
 
397
- return sorted(objects)
368
+ if not recursive and resolved_prefix:
369
+ base_depth = resolved_prefix.count("/")
370
+ objects = [obj for obj in objects if obj.count("/") <= base_depth + 1]
398
371
 
399
- # Implement all other required abstract async methods
400
- # ObStore provides native async for most operations
372
+ return sorted(objects)
373
+ except Exception as exc:
374
+ msg = f"Failed to list objects with prefix '{prefix}'"
375
+ raise StorageOperationFailedError(msg) from exc
401
376
 
402
377
  async def read_text_async(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
403
- """Async read text using native obstore async."""
378
+ """Read text from storage asynchronously."""
404
379
  data = await self.read_bytes_async(path, **kwargs)
405
380
  return data.decode(encoding)
406
381
 
407
382
  async def write_text_async(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
408
- """Async write text using native obstore async."""
383
+ """Write text to storage asynchronously."""
409
384
  encoded_data = data.encode(encoding)
410
385
  await self.write_bytes_async(path, encoded_data, **kwargs)
411
386
 
412
387
  async def exists_async(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
413
- """Async check if object exists using native obstore async."""
388
+ """Check if object exists in storage asynchronously."""
414
389
  resolved_path = self._resolve_path(path)
415
390
  try:
416
391
  await self.store.head_async(resolved_path)
@@ -419,53 +394,57 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
419
394
  return True
420
395
 
421
396
  async def delete_async(self, path: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
422
- """Async delete object using native obstore async."""
397
+ """Delete object from storage asynchronously."""
423
398
  resolved_path = self._resolve_path(path)
424
399
  await self.store.delete_async(resolved_path)
425
400
 
426
401
  async def copy_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
427
- """Async copy object using native obstore async."""
402
+ """Copy object in storage asynchronously."""
428
403
  source_path = self._resolve_path(source)
429
404
  dest_path = self._resolve_path(destination)
430
405
  await self.store.copy_async(source_path, dest_path)
431
406
 
432
407
  async def move_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
433
- """Async move object using native obstore async."""
408
+ """Move object in storage asynchronously."""
434
409
  source_path = self._resolve_path(source)
435
410
  dest_path = self._resolve_path(destination)
436
411
  await self.store.rename_async(source_path, dest_path)
437
412
 
438
413
  async def get_metadata_async(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
439
- """Async get object metadata using native obstore async."""
414
+ """Get object metadata from storage asynchronously."""
440
415
  resolved_path = self._resolve_path(path)
441
- metadata = await self.store.head_async(resolved_path)
442
-
443
- result = {"path": resolved_path, "exists": True}
444
-
445
- for attr in ["size", "last_modified", "e_tag", "version"]:
446
- if hasattr(metadata, attr):
447
- result[attr] = getattr(metadata, attr)
448
-
449
- # Include custom metadata if available
450
- if hasattr(metadata, "metadata"):
451
- custom_metadata = getattr(metadata, "metadata", None)
452
- if custom_metadata:
453
- result["custom_metadata"] = custom_metadata
416
+ result: dict[str, Any] = {}
417
+ try:
418
+ metadata = await self.store.head_async(resolved_path)
419
+ result.update(
420
+ {
421
+ "path": resolved_path,
422
+ "exists": True,
423
+ "size": metadata.size,
424
+ "last_modified": metadata.last_modified,
425
+ "e_tag": metadata.e_tag,
426
+ "version": metadata.version,
427
+ }
428
+ )
429
+ if hasattr(metadata, "metadata") and metadata.metadata:
430
+ result["custom_metadata"] = metadata.metadata
454
431
 
455
- return result
432
+ except Exception:
433
+ return {"path": resolved_path, "exists": False}
434
+ else:
435
+ return result
456
436
 
457
437
  async def read_arrow_async(self, path: str | Path, **kwargs: Any) -> ArrowTable:
458
- """Async read Arrow table using native obstore async."""
438
+ """Read Arrow table from storage asynchronously."""
459
439
  resolved_path = self._resolve_path(path)
460
440
  return await self.store.read_arrow_async(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
461
441
 
462
442
  async def write_arrow_async(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
463
- """Async write Arrow table using native obstore async."""
443
+ """Write Arrow table to storage asynchronously."""
464
444
  resolved_path = self._resolve_path(path)
465
445
  if hasattr(self.store, "write_arrow_async"):
466
446
  await self.store.write_arrow_async(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
467
447
  else:
468
- # Fall back to writing as Parquet via bytes
469
448
  import io
470
449
 
471
450
  import pyarrow.parquet as pq
@@ -475,7 +454,6 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
475
454
  buffer.seek(0)
476
455
  await self.write_bytes_async(resolved_path, buffer.read())
477
456
 
478
- async def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator[ArrowRecordBatch]:
457
+ def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator[ArrowRecordBatch]:
479
458
  resolved_pattern = self._resolve_path(pattern)
480
- async for batch in self.store.stream_arrow_async(resolved_pattern, **kwargs): # pyright: ignore[reportAttributeAccessIssue]
481
- yield batch
459
+ return _AsyncArrowIterator(self.store, resolved_pattern, **kwargs)
@@ -6,6 +6,8 @@ This module provides a centralized way to track and query storage backend capabi
6
6
  from dataclasses import dataclass
7
7
  from typing import ClassVar
8
8
 
9
+ from mypy_extensions import mypyc_attr
10
+
9
11
  __all__ = ("HasStorageCapabilities", "StorageCapabilities")
10
12
 
11
13
 
@@ -13,7 +15,6 @@ __all__ = ("HasStorageCapabilities", "StorageCapabilities")
13
15
  class StorageCapabilities:
14
16
  """Tracks capabilities of a storage backend."""
15
17
 
16
- # Basic operations
17
18
  supports_read: bool = True
18
19
  supports_write: bool = True
19
20
  supports_delete: bool = True
@@ -23,7 +24,6 @@ class StorageCapabilities:
23
24
  supports_move: bool = True
24
25
  supports_metadata: bool = True
25
26
 
26
- # Advanced operations
27
27
  supports_arrow: bool = False
28
28
  supports_streaming: bool = False
29
29
  supports_async: bool = False
@@ -31,12 +31,10 @@ class StorageCapabilities:
31
31
  supports_multipart_upload: bool = False
32
32
  supports_compression: bool = False
33
33
 
34
- # Protocol-specific features
35
34
  supports_s3_select: bool = False
36
35
  supports_gcs_compose: bool = False
37
36
  supports_azure_snapshots: bool = False
38
37
 
39
- # Performance characteristics
40
38
  is_remote: bool = True
41
39
  is_cloud_native: bool = False
42
40
  has_low_latency: bool = False
@@ -85,9 +83,12 @@ class StorageCapabilities:
85
83
  )
86
84
 
87
85
 
86
+ @mypyc_attr(allow_interpreted_subclasses=True)
88
87
  class HasStorageCapabilities:
89
88
  """Mixin for storage backends that expose their capabilities."""
90
89
 
90
+ __slots__ = ()
91
+
91
92
  capabilities: ClassVar[StorageCapabilities]
92
93
 
93
94
  @classmethod