sqlspec 0.14.0__py3-none-any.whl → 0.15.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (158) hide show
  1. sqlspec/__init__.py +50 -25
  2. sqlspec/__main__.py +12 -0
  3. sqlspec/__metadata__.py +1 -3
  4. sqlspec/_serialization.py +1 -2
  5. sqlspec/_sql.py +256 -120
  6. sqlspec/_typing.py +278 -142
  7. sqlspec/adapters/adbc/__init__.py +4 -3
  8. sqlspec/adapters/adbc/_types.py +12 -0
  9. sqlspec/adapters/adbc/config.py +115 -248
  10. sqlspec/adapters/adbc/driver.py +462 -353
  11. sqlspec/adapters/aiosqlite/__init__.py +18 -3
  12. sqlspec/adapters/aiosqlite/_types.py +13 -0
  13. sqlspec/adapters/aiosqlite/config.py +199 -129
  14. sqlspec/adapters/aiosqlite/driver.py +230 -269
  15. sqlspec/adapters/asyncmy/__init__.py +18 -3
  16. sqlspec/adapters/asyncmy/_types.py +12 -0
  17. sqlspec/adapters/asyncmy/config.py +80 -168
  18. sqlspec/adapters/asyncmy/driver.py +260 -225
  19. sqlspec/adapters/asyncpg/__init__.py +19 -4
  20. sqlspec/adapters/asyncpg/_types.py +17 -0
  21. sqlspec/adapters/asyncpg/config.py +82 -181
  22. sqlspec/adapters/asyncpg/driver.py +285 -383
  23. sqlspec/adapters/bigquery/__init__.py +17 -3
  24. sqlspec/adapters/bigquery/_types.py +12 -0
  25. sqlspec/adapters/bigquery/config.py +191 -258
  26. sqlspec/adapters/bigquery/driver.py +474 -646
  27. sqlspec/adapters/duckdb/__init__.py +14 -3
  28. sqlspec/adapters/duckdb/_types.py +12 -0
  29. sqlspec/adapters/duckdb/config.py +415 -351
  30. sqlspec/adapters/duckdb/driver.py +343 -413
  31. sqlspec/adapters/oracledb/__init__.py +19 -5
  32. sqlspec/adapters/oracledb/_types.py +14 -0
  33. sqlspec/adapters/oracledb/config.py +123 -379
  34. sqlspec/adapters/oracledb/driver.py +507 -560
  35. sqlspec/adapters/psqlpy/__init__.py +13 -3
  36. sqlspec/adapters/psqlpy/_types.py +11 -0
  37. sqlspec/adapters/psqlpy/config.py +93 -254
  38. sqlspec/adapters/psqlpy/driver.py +505 -234
  39. sqlspec/adapters/psycopg/__init__.py +19 -5
  40. sqlspec/adapters/psycopg/_types.py +17 -0
  41. sqlspec/adapters/psycopg/config.py +143 -403
  42. sqlspec/adapters/psycopg/driver.py +706 -872
  43. sqlspec/adapters/sqlite/__init__.py +14 -3
  44. sqlspec/adapters/sqlite/_types.py +11 -0
  45. sqlspec/adapters/sqlite/config.py +202 -118
  46. sqlspec/adapters/sqlite/driver.py +264 -303
  47. sqlspec/base.py +105 -9
  48. sqlspec/{statement/builder → builder}/__init__.py +12 -14
  49. sqlspec/{statement/builder → builder}/_base.py +120 -55
  50. sqlspec/{statement/builder → builder}/_column.py +17 -6
  51. sqlspec/{statement/builder → builder}/_ddl.py +46 -79
  52. sqlspec/{statement/builder → builder}/_ddl_utils.py +5 -10
  53. sqlspec/{statement/builder → builder}/_delete.py +6 -25
  54. sqlspec/{statement/builder → builder}/_insert.py +6 -64
  55. sqlspec/builder/_merge.py +56 -0
  56. sqlspec/{statement/builder → builder}/_parsing_utils.py +3 -10
  57. sqlspec/{statement/builder → builder}/_select.py +11 -56
  58. sqlspec/{statement/builder → builder}/_update.py +12 -18
  59. sqlspec/{statement/builder → builder}/mixins/__init__.py +10 -14
  60. sqlspec/{statement/builder → builder}/mixins/_cte_and_set_ops.py +48 -59
  61. sqlspec/{statement/builder → builder}/mixins/_insert_operations.py +22 -16
  62. sqlspec/{statement/builder → builder}/mixins/_join_operations.py +1 -3
  63. sqlspec/{statement/builder → builder}/mixins/_merge_operations.py +3 -5
  64. sqlspec/{statement/builder → builder}/mixins/_order_limit_operations.py +3 -3
  65. sqlspec/{statement/builder → builder}/mixins/_pivot_operations.py +4 -8
  66. sqlspec/{statement/builder → builder}/mixins/_select_operations.py +21 -36
  67. sqlspec/{statement/builder → builder}/mixins/_update_operations.py +3 -14
  68. sqlspec/{statement/builder → builder}/mixins/_where_clause.py +52 -79
  69. sqlspec/cli.py +4 -5
  70. sqlspec/config.py +180 -133
  71. sqlspec/core/__init__.py +63 -0
  72. sqlspec/core/cache.py +873 -0
  73. sqlspec/core/compiler.py +396 -0
  74. sqlspec/core/filters.py +828 -0
  75. sqlspec/core/hashing.py +310 -0
  76. sqlspec/core/parameters.py +1209 -0
  77. sqlspec/core/result.py +664 -0
  78. sqlspec/{statement → core}/splitter.py +321 -191
  79. sqlspec/core/statement.py +651 -0
  80. sqlspec/driver/__init__.py +7 -10
  81. sqlspec/driver/_async.py +387 -176
  82. sqlspec/driver/_common.py +527 -289
  83. sqlspec/driver/_sync.py +390 -172
  84. sqlspec/driver/mixins/__init__.py +2 -19
  85. sqlspec/driver/mixins/_result_tools.py +168 -0
  86. sqlspec/driver/mixins/_sql_translator.py +6 -3
  87. sqlspec/exceptions.py +5 -252
  88. sqlspec/extensions/aiosql/adapter.py +93 -96
  89. sqlspec/extensions/litestar/config.py +0 -1
  90. sqlspec/extensions/litestar/handlers.py +15 -26
  91. sqlspec/extensions/litestar/plugin.py +16 -14
  92. sqlspec/extensions/litestar/providers.py +17 -52
  93. sqlspec/loader.py +424 -105
  94. sqlspec/migrations/__init__.py +12 -0
  95. sqlspec/migrations/base.py +92 -68
  96. sqlspec/migrations/commands.py +24 -106
  97. sqlspec/migrations/loaders.py +402 -0
  98. sqlspec/migrations/runner.py +49 -51
  99. sqlspec/migrations/tracker.py +31 -44
  100. sqlspec/migrations/utils.py +64 -24
  101. sqlspec/protocols.py +7 -183
  102. sqlspec/storage/__init__.py +1 -1
  103. sqlspec/storage/backends/base.py +37 -40
  104. sqlspec/storage/backends/fsspec.py +136 -112
  105. sqlspec/storage/backends/obstore.py +138 -160
  106. sqlspec/storage/capabilities.py +5 -4
  107. sqlspec/storage/registry.py +57 -106
  108. sqlspec/typing.py +136 -115
  109. sqlspec/utils/__init__.py +2 -3
  110. sqlspec/utils/correlation.py +0 -3
  111. sqlspec/utils/deprecation.py +6 -6
  112. sqlspec/utils/fixtures.py +6 -6
  113. sqlspec/utils/logging.py +0 -2
  114. sqlspec/utils/module_loader.py +7 -12
  115. sqlspec/utils/singleton.py +0 -1
  116. sqlspec/utils/sync_tools.py +16 -37
  117. sqlspec/utils/text.py +12 -51
  118. sqlspec/utils/type_guards.py +443 -232
  119. {sqlspec-0.14.0.dist-info → sqlspec-0.15.0.dist-info}/METADATA +7 -2
  120. sqlspec-0.15.0.dist-info/RECORD +134 -0
  121. sqlspec-0.15.0.dist-info/entry_points.txt +2 -0
  122. sqlspec/driver/connection.py +0 -207
  123. sqlspec/driver/mixins/_cache.py +0 -114
  124. sqlspec/driver/mixins/_csv_writer.py +0 -91
  125. sqlspec/driver/mixins/_pipeline.py +0 -508
  126. sqlspec/driver/mixins/_query_tools.py +0 -796
  127. sqlspec/driver/mixins/_result_utils.py +0 -138
  128. sqlspec/driver/mixins/_storage.py +0 -912
  129. sqlspec/driver/mixins/_type_coercion.py +0 -128
  130. sqlspec/driver/parameters.py +0 -138
  131. sqlspec/statement/__init__.py +0 -21
  132. sqlspec/statement/builder/_merge.py +0 -95
  133. sqlspec/statement/cache.py +0 -50
  134. sqlspec/statement/filters.py +0 -625
  135. sqlspec/statement/parameters.py +0 -996
  136. sqlspec/statement/pipelines/__init__.py +0 -210
  137. sqlspec/statement/pipelines/analyzers/__init__.py +0 -9
  138. sqlspec/statement/pipelines/analyzers/_analyzer.py +0 -646
  139. sqlspec/statement/pipelines/context.py +0 -115
  140. sqlspec/statement/pipelines/transformers/__init__.py +0 -7
  141. sqlspec/statement/pipelines/transformers/_expression_simplifier.py +0 -88
  142. sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +0 -1247
  143. sqlspec/statement/pipelines/transformers/_remove_comments_and_hints.py +0 -76
  144. sqlspec/statement/pipelines/validators/__init__.py +0 -23
  145. sqlspec/statement/pipelines/validators/_dml_safety.py +0 -290
  146. sqlspec/statement/pipelines/validators/_parameter_style.py +0 -370
  147. sqlspec/statement/pipelines/validators/_performance.py +0 -714
  148. sqlspec/statement/pipelines/validators/_security.py +0 -967
  149. sqlspec/statement/result.py +0 -435
  150. sqlspec/statement/sql.py +0 -1774
  151. sqlspec/utils/cached_property.py +0 -25
  152. sqlspec/utils/statement_hashing.py +0 -203
  153. sqlspec-0.14.0.dist-info/RECORD +0 -143
  154. sqlspec-0.14.0.dist-info/entry_points.txt +0 -2
  155. /sqlspec/{statement/builder → builder}/mixins/_delete_operations.py +0 -0
  156. {sqlspec-0.14.0.dist-info → sqlspec-0.15.0.dist-info}/WHEEL +0 -0
  157. {sqlspec-0.14.0.dist-info → sqlspec-0.15.0.dist-info}/licenses/LICENSE +0 -0
  158. {sqlspec-0.14.0.dist-info → sqlspec-0.15.0.dist-info}/licenses/NOTICE +0 -0
@@ -1,166 +1,163 @@
1
1
  """Base class for storage backends."""
2
2
 
3
- from __future__ import annotations
4
-
5
3
  from abc import ABC, abstractmethod
6
- from typing import TYPE_CHECKING, Any
4
+ from collections.abc import AsyncIterator, Iterator
5
+ from typing import Any
7
6
 
8
- if TYPE_CHECKING:
9
- from collections.abc import AsyncIterator, Iterator
7
+ from mypy_extensions import mypyc_attr
10
8
 
11
- from sqlspec.typing import ArrowRecordBatch, ArrowTable
9
+ from sqlspec.typing import ArrowRecordBatch, ArrowTable
12
10
 
13
11
  __all__ = ("ObjectStoreBase",)
14
12
 
15
13
 
14
+ @mypyc_attr(allow_interpreted_subclasses=True)
16
15
  class ObjectStoreBase(ABC):
17
- """Base class for instrumented storage backends."""
16
+ """Base class for storage backends."""
17
+
18
+ __slots__ = ()
18
19
 
19
- # Sync Operations
20
20
  @abstractmethod
21
21
  def read_bytes(self, path: str, **kwargs: Any) -> bytes:
22
- """Actual implementation of read_bytes in subclasses."""
22
+ """Read bytes from storage."""
23
23
  raise NotImplementedError
24
24
 
25
25
  @abstractmethod
26
26
  def write_bytes(self, path: str, data: bytes, **kwargs: Any) -> None:
27
- """Actual implementation of write_bytes in subclasses."""
27
+ """Write bytes to storage."""
28
28
  raise NotImplementedError
29
29
 
30
30
  @abstractmethod
31
31
  def read_text(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
32
- """Actual implementation of read_text in subclasses."""
32
+ """Read text from storage."""
33
33
  raise NotImplementedError
34
34
 
35
35
  @abstractmethod
36
36
  def write_text(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
37
- """Actual implementation of write_text in subclasses."""
37
+ """Write text to storage."""
38
38
  raise NotImplementedError
39
39
 
40
40
  @abstractmethod
41
41
  def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
42
- """Actual implementation of list_objects in subclasses."""
42
+ """List objects in storage."""
43
43
  raise NotImplementedError
44
44
 
45
45
  @abstractmethod
46
46
  def exists(self, path: str, **kwargs: Any) -> bool:
47
- """Actual implementation of exists in subclasses."""
47
+ """Check if object exists in storage."""
48
48
  raise NotImplementedError
49
49
 
50
50
  @abstractmethod
51
51
  def delete(self, path: str, **kwargs: Any) -> None:
52
- """Actual implementation of delete in subclasses."""
52
+ """Delete object from storage."""
53
53
  raise NotImplementedError
54
54
 
55
55
  @abstractmethod
56
56
  def copy(self, source: str, destination: str, **kwargs: Any) -> None:
57
- """Actual implementation of copy in subclasses."""
57
+ """Copy object within storage."""
58
58
  raise NotImplementedError
59
59
 
60
60
  @abstractmethod
61
61
  def move(self, source: str, destination: str, **kwargs: Any) -> None:
62
- """Actual implementation of move in subclasses."""
62
+ """Move object within storage."""
63
63
  raise NotImplementedError
64
64
 
65
65
  @abstractmethod
66
66
  def glob(self, pattern: str, **kwargs: Any) -> list[str]:
67
- """Actual implementation of glob in subclasses."""
67
+ """Find objects matching pattern."""
68
68
  raise NotImplementedError
69
69
 
70
70
  @abstractmethod
71
71
  def get_metadata(self, path: str, **kwargs: Any) -> dict[str, Any]:
72
- """Actual implementation of get_metadata in subclasses."""
72
+ """Get object metadata from storage."""
73
73
  raise NotImplementedError
74
74
 
75
75
  @abstractmethod
76
76
  def is_object(self, path: str) -> bool:
77
- """Actual implementation of is_object in subclasses."""
77
+ """Check if path points to an object."""
78
78
  raise NotImplementedError
79
79
 
80
80
  @abstractmethod
81
81
  def is_path(self, path: str) -> bool:
82
- """Actual implementation of is_path in subclasses."""
82
+ """Check if path points to a directory."""
83
83
  raise NotImplementedError
84
84
 
85
85
  @abstractmethod
86
86
  def read_arrow(self, path: str, **kwargs: Any) -> ArrowTable:
87
- """Actual implementation of read_arrow in subclasses."""
87
+ """Read Arrow table from storage."""
88
88
  raise NotImplementedError
89
89
 
90
90
  @abstractmethod
91
91
  def write_arrow(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
92
- """Actual implementation of write_arrow in subclasses."""
92
+ """Write Arrow table to storage."""
93
93
  raise NotImplementedError
94
94
 
95
95
  @abstractmethod
96
96
  def stream_arrow(self, pattern: str, **kwargs: Any) -> Iterator[ArrowRecordBatch]:
97
- """Actual implementation of stream_arrow in subclasses."""
97
+ """Stream Arrow record batches from storage."""
98
98
  raise NotImplementedError
99
99
 
100
- # Abstract async methods that subclasses must implement
101
- # Backends can either provide native async implementations or wrap sync methods
102
-
103
100
  @abstractmethod
104
101
  async def read_bytes_async(self, path: str, **kwargs: Any) -> bytes:
105
- """Actual async implementation of read_bytes in subclasses."""
102
+ """Read bytes from storage asynchronously."""
106
103
  raise NotImplementedError
107
104
 
108
105
  @abstractmethod
109
106
  async def write_bytes_async(self, path: str, data: bytes, **kwargs: Any) -> None:
110
- """Actual async implementation of write_bytes in subclasses."""
107
+ """Write bytes to storage asynchronously."""
111
108
  raise NotImplementedError
112
109
 
113
110
  @abstractmethod
114
111
  async def read_text_async(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
115
- """Actual async implementation of read_text in subclasses."""
112
+ """Read text from storage asynchronously."""
116
113
  raise NotImplementedError
117
114
 
118
115
  @abstractmethod
119
116
  async def write_text_async(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
120
- """Actual async implementation of write_text in subclasses."""
117
+ """Write text to storage asynchronously."""
121
118
  raise NotImplementedError
122
119
 
123
120
  @abstractmethod
124
121
  async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
125
- """Actual async implementation of list_objects in subclasses."""
122
+ """List objects in storage asynchronously."""
126
123
  raise NotImplementedError
127
124
 
128
125
  @abstractmethod
129
126
  async def exists_async(self, path: str, **kwargs: Any) -> bool:
130
- """Actual async implementation of exists in subclasses."""
127
+ """Check if object exists in storage asynchronously."""
131
128
  raise NotImplementedError
132
129
 
133
130
  @abstractmethod
134
131
  async def delete_async(self, path: str, **kwargs: Any) -> None:
135
- """Actual async implementation of delete in subclasses."""
132
+ """Delete object from storage asynchronously."""
136
133
  raise NotImplementedError
137
134
 
138
135
  @abstractmethod
139
136
  async def copy_async(self, source: str, destination: str, **kwargs: Any) -> None:
140
- """Actual async implementation of copy in subclasses."""
137
+ """Copy object within storage asynchronously."""
141
138
  raise NotImplementedError
142
139
 
143
140
  @abstractmethod
144
141
  async def move_async(self, source: str, destination: str, **kwargs: Any) -> None:
145
- """Actual async implementation of move in subclasses."""
142
+ """Move object within storage asynchronously."""
146
143
  raise NotImplementedError
147
144
 
148
145
  @abstractmethod
149
146
  async def get_metadata_async(self, path: str, **kwargs: Any) -> dict[str, Any]:
150
- """Actual async implementation of get_metadata in subclasses."""
147
+ """Get object metadata from storage asynchronously."""
151
148
  raise NotImplementedError
152
149
 
153
150
  @abstractmethod
154
151
  async def read_arrow_async(self, path: str, **kwargs: Any) -> ArrowTable:
155
- """Actual async implementation of read_arrow in subclasses."""
152
+ """Read Arrow table from storage asynchronously."""
156
153
  raise NotImplementedError
157
154
 
158
155
  @abstractmethod
159
156
  async def write_arrow_async(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
160
- """Actual async implementation of write_arrow in subclasses."""
157
+ """Write Arrow table to storage asynchronously."""
161
158
  raise NotImplementedError
162
159
 
163
160
  @abstractmethod
164
161
  def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator[ArrowRecordBatch]:
165
- """Actual async implementation of stream_arrow in subclasses."""
162
+ """Stream Arrow record batches from storage asynchronously."""
166
163
  raise NotImplementedError
@@ -1,10 +1,8 @@
1
- # pyright: ignore=reportUnknownVariableType
2
1
  import logging
3
- from io import BytesIO
4
2
  from pathlib import Path
5
- from typing import TYPE_CHECKING, Any, ClassVar, Union
3
+ from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union
6
4
 
7
- from sqlspec.exceptions import MissingDependencyError
5
+ from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
8
6
  from sqlspec.storage.backends.base import ObjectStoreBase
9
7
  from sqlspec.storage.capabilities import StorageCapabilities
10
8
  from sqlspec.typing import FSSPEC_INSTALLED, PYARROW_INSTALLED
@@ -21,34 +19,51 @@ __all__ = ("FSSpecBackend",)
21
19
 
22
20
  logger = logging.getLogger(__name__)
23
21
 
24
- # Constants for URI validation
25
- URI_PARTS_MIN_COUNT = 2
26
- """Minimum number of parts in a valid cloud storage URI (bucket/path)."""
27
22
 
28
- AZURE_URI_PARTS_MIN_COUNT = 2
29
- """Minimum number of parts in an Azure URI (account/container)."""
23
+ class _ArrowStreamer:
24
+ def __init__(self, backend: "FSSpecBackend", pattern: str, **kwargs: Any) -> None:
25
+ self.backend = backend
26
+ self.pattern = pattern
27
+ self.kwargs = kwargs
28
+ self.paths_iterator: Optional[Iterator[str]] = None
29
+ self.batch_iterator: Optional[Iterator[ArrowRecordBatch]] = None
30
30
 
31
- AZURE_URI_BLOB_INDEX = 2
32
- """Index of blob name in Azure URI parts."""
31
+ def __aiter__(self) -> "_ArrowStreamer":
32
+ return self
33
33
 
34
+ async def _initialize(self) -> None:
35
+ """Initialize the paths iterator."""
36
+ if self.paths_iterator is None:
37
+ paths = await async_(self.backend.glob)(self.pattern, **self.kwargs)
38
+ self.paths_iterator = iter(paths)
34
39
 
35
- def _join_path(prefix: str, path: str) -> str:
36
- if not prefix:
37
- return path
38
- prefix = prefix.rstrip("/")
39
- path = path.lstrip("/")
40
- return f"{prefix}/{path}"
40
+ async def __anext__(self) -> "ArrowRecordBatch":
41
+ await self._initialize()
42
+
43
+ if self.batch_iterator:
44
+ try:
45
+ return next(self.batch_iterator)
46
+ except StopIteration:
47
+ self.batch_iterator = None
48
+
49
+ if self.paths_iterator:
50
+ try:
51
+ path = next(self.paths_iterator)
52
+ self.batch_iterator = await async_(self.backend._stream_file_batches)(path)
53
+ return await self.__anext__()
54
+ except StopIteration:
55
+ raise StopAsyncIteration
56
+ raise StopAsyncIteration
41
57
 
42
58
 
43
59
  class FSSpecBackend(ObjectStoreBase):
44
- """Extended protocol support via fsspec.
60
+ """Storage backend using fsspec.
45
61
 
46
- This backend implements the ObjectStoreProtocol using fsspec,
47
- providing support for extended protocols not covered by obstore
48
- and offering fallback capabilities.
62
+ Implements the ObjectStoreProtocol using fsspec,
63
+ providing support for various protocols including HTTP, HTTPS, FTP,
64
+ and cloud storage services.
49
65
  """
50
66
 
51
- # FSSpec supports most operations but varies by underlying filesystem
52
67
  _default_capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
53
68
  supports_arrow=PYARROW_INSTALLED,
54
69
  supports_streaming=PYARROW_INSTALLED,
@@ -75,7 +90,6 @@ class FSSpecBackend(ObjectStoreBase):
75
90
  self.protocol = getattr(fs, "protocol", "unknown")
76
91
  self._fs_uri = f"{self.protocol}://"
77
92
 
78
- # Set instance-level capabilities based on detected protocol
79
93
  self._instance_capabilities = self._detect_capabilities()
80
94
 
81
95
  super().__init__()
@@ -145,17 +159,24 @@ class FSSpecBackend(ObjectStoreBase):
145
159
  def base_uri(self) -> str:
146
160
  return self._fs_uri
147
161
 
148
- # Core Operations (sync)
149
162
  def read_bytes(self, path: Union[str, Path], **kwargs: Any) -> bytes:
150
163
  """Read bytes from an object."""
151
- resolved_path = self._resolve_path(path)
152
- return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
164
+ try:
165
+ resolved_path = self._resolve_path(path)
166
+ return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
167
+ except Exception as exc:
168
+ msg = f"Failed to read bytes from {path}"
169
+ raise StorageOperationFailedError(msg) from exc
153
170
 
154
171
  def write_bytes(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
155
172
  """Write bytes to an object."""
156
- resolved_path = self._resolve_path(path)
157
- with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
158
- f.write(data) # pyright: ignore
173
+ try:
174
+ resolved_path = self._resolve_path(path)
175
+ with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
176
+ f.write(data) # pyright: ignore
177
+ except Exception as exc:
178
+ msg = f"Failed to write bytes to {path}"
179
+ raise StorageOperationFailedError(msg) from exc
159
180
 
160
181
  def read_text(self, path: Union[str, Path], encoding: str = "utf-8", **kwargs: Any) -> str:
161
182
  """Read text from an object."""
@@ -166,7 +187,6 @@ class FSSpecBackend(ObjectStoreBase):
166
187
  """Write text to an object."""
167
188
  self.write_bytes(path, data.encode(encoding), **kwargs)
168
189
 
169
- # Object Operations
170
190
  def exists(self, path: Union[str, Path], **kwargs: Any) -> bool:
171
191
  """Check if an object exists."""
172
192
  resolved_path = self._resolve_path(path)
@@ -174,66 +194,81 @@ class FSSpecBackend(ObjectStoreBase):
174
194
 
175
195
  def delete(self, path: Union[str, Path], **kwargs: Any) -> None:
176
196
  """Delete an object."""
177
- resolved_path = self._resolve_path(path)
178
- self.fs.rm(resolved_path, **kwargs)
197
+ try:
198
+ resolved_path = self._resolve_path(path)
199
+ self.fs.rm(resolved_path, **kwargs)
200
+ except Exception as exc:
201
+ msg = f"Failed to delete {path}"
202
+ raise StorageOperationFailedError(msg) from exc
179
203
 
180
204
  def copy(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
181
205
  """Copy an object."""
182
- source_path = self._resolve_path(source)
183
- dest_path = self._resolve_path(destination)
184
- self.fs.copy(source_path, dest_path, **kwargs)
206
+ try:
207
+ source_path = self._resolve_path(source)
208
+ dest_path = self._resolve_path(destination)
209
+ self.fs.copy(source_path, dest_path, **kwargs)
210
+ except Exception as exc:
211
+ msg = f"Failed to copy {source} to {destination}"
212
+ raise StorageOperationFailedError(msg) from exc
185
213
 
186
214
  def move(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
187
215
  """Move an object."""
188
- source_path = self._resolve_path(source)
189
- dest_path = self._resolve_path(destination)
190
- self.fs.mv(source_path, dest_path, **kwargs)
216
+ try:
217
+ source_path = self._resolve_path(source)
218
+ dest_path = self._resolve_path(destination)
219
+ self.fs.mv(source_path, dest_path, **kwargs)
220
+ except Exception as exc:
221
+ msg = f"Failed to move {source} to {destination}"
222
+ raise StorageOperationFailedError(msg) from exc
191
223
 
192
- # Arrow Operations
193
224
  def read_arrow(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
194
225
  """Read an Arrow table from storage."""
195
226
  if not PYARROW_INSTALLED:
196
227
  raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
228
+ try:
229
+ import pyarrow.parquet as pq
197
230
 
198
- import pyarrow.parquet as pq
199
-
200
- resolved_path = self._resolve_path(path)
201
- with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
202
- return pq.read_table(f)
231
+ resolved_path = self._resolve_path(path)
232
+ with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
233
+ return pq.read_table(f)
234
+ except Exception as exc:
235
+ msg = f"Failed to read Arrow table from {path}"
236
+ raise StorageOperationFailedError(msg) from exc
203
237
 
204
238
  def write_arrow(self, path: Union[str, Path], table: "ArrowTable", **kwargs: Any) -> None:
205
239
  """Write an Arrow table to storage."""
206
240
  if not PYARROW_INSTALLED:
207
241
  raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
242
+ try:
243
+ import pyarrow.parquet as pq
208
244
 
209
- import pyarrow.parquet as pq
210
-
211
- resolved_path = self._resolve_path(path)
212
- with self.fs.open(resolved_path, mode="wb") as f:
213
- pq.write_table(table, f, **kwargs) # pyright: ignore
245
+ resolved_path = self._resolve_path(path)
246
+ with self.fs.open(resolved_path, mode="wb") as f:
247
+ pq.write_table(table, f, **kwargs) # pyright: ignore
248
+ except Exception as exc:
249
+ msg = f"Failed to write Arrow table to {path}"
250
+ raise StorageOperationFailedError(msg) from exc
214
251
 
215
- # Listing Operations
216
252
  def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
217
253
  """List objects with optional prefix."""
218
- resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path
219
-
220
- # Use fs.glob for listing files
221
- if recursive:
222
- pattern = f"{resolved_prefix}/**" if resolved_prefix else "**"
223
- else:
224
- pattern = f"{resolved_prefix}/*" if resolved_prefix else "*"
225
-
226
- paths = [str(path) for path in self.fs.glob(pattern, **kwargs) if not self.fs.isdir(path)]
227
- return sorted(paths)
254
+ try:
255
+ resolved_prefix = self._resolve_path(prefix)
256
+ if recursive:
257
+ return sorted(self.fs.find(resolved_prefix, **kwargs))
258
+ return sorted(self.fs.ls(resolved_prefix, detail=False, **kwargs))
259
+ except Exception as exc:
260
+ msg = f"Failed to list objects with prefix '{prefix}'"
261
+ raise StorageOperationFailedError(msg) from exc
228
262
 
229
263
  def glob(self, pattern: str, **kwargs: Any) -> list[str]:
230
264
  """Find objects matching a glob pattern."""
231
- resolved_pattern = self._resolve_path(pattern)
232
- # Use fsspec's native glob
233
- paths = [str(path) for path in self.fs.glob(resolved_pattern, **kwargs) if not self.fs.isdir(path)]
234
- return sorted(paths)
265
+ try:
266
+ resolved_pattern = self._resolve_path(pattern)
267
+ return sorted(self.fs.glob(resolved_pattern, **kwargs)) # pyright: ignore
268
+ except Exception as exc:
269
+ msg = f"Failed to glob with pattern '{pattern}'"
270
+ raise StorageOperationFailedError(msg) from exc
235
271
 
236
- # Path Operations
237
272
  def is_object(self, path: str) -> bool:
238
273
  """Check if path points to an object."""
239
274
  resolved_path = self._resolve_path(path)
@@ -246,23 +281,29 @@ class FSSpecBackend(ObjectStoreBase):
246
281
 
247
282
  def get_metadata(self, path: Union[str, Path], **kwargs: Any) -> dict[str, Any]:
248
283
  """Get object metadata."""
249
- info = self.fs.info(self._resolve_path(path), **kwargs)
250
-
251
- if isinstance(info, dict):
252
- return info
253
-
254
- # Try to get dict representation
255
284
  try:
256
- return vars(info) # type: ignore[no-any-return]
257
- except AttributeError:
258
- pass
259
-
260
- resolved_path = self._resolve_path(path)
285
+ resolved_path = self._resolve_path(path)
286
+ info = self.fs.info(resolved_path, **kwargs)
287
+ if isinstance(info, dict):
288
+ return {
289
+ "path": resolved_path,
290
+ "exists": True,
291
+ "size": info.get("size"),
292
+ "last_modified": info.get("mtime"),
293
+ "type": info.get("type", "file"),
294
+ }
295
+
296
+ except FileNotFoundError:
297
+ return {"path": self._resolve_path(path), "exists": False}
298
+ except Exception as exc:
299
+ msg = f"Failed to get metadata for {path}"
300
+ raise StorageOperationFailedError(msg) from exc
261
301
  return {
262
302
  "path": resolved_path,
263
- "exists": self.fs.exists(resolved_path),
264
- "size": getattr(info, "size", None),
265
- "type": getattr(info, "type", "file"),
303
+ "exists": True,
304
+ "size": info.size,
305
+ "last_modified": info.mtime,
306
+ "type": info.type,
266
307
  }
267
308
 
268
309
  def _stream_file_batches(self, obj_path: Union[str, Path]) -> "Iterator[ArrowRecordBatch]":
@@ -278,85 +319,68 @@ class FSSpecBackend(ObjectStoreBase):
278
319
  if not PYARROW_INSTALLED:
279
320
  raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
280
321
 
281
- # Stream each file as record batches
282
322
  for obj_path in self.glob(pattern, **kwargs):
283
323
  yield from self._stream_file_batches(obj_path)
284
324
 
285
325
  async def read_bytes_async(self, path: Union[str, Path], **kwargs: Any) -> bytes:
286
- """Async read bytes. Wraps the sync implementation."""
326
+ """Read bytes from storage asynchronously."""
287
327
  return await async_(self.read_bytes)(path, **kwargs)
288
328
 
289
329
  async def write_bytes_async(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
290
- """Async write bytes. Wraps the sync implementation."""
330
+ """Write bytes to storage asynchronously."""
291
331
  return await async_(self.write_bytes)(path, data, **kwargs)
292
332
 
293
- async def _stream_file_batches_async(self, obj_path: Union[str, Path]) -> "AsyncIterator[ArrowRecordBatch]":
294
- import pyarrow.parquet as pq
295
-
296
- data = await self.read_bytes_async(obj_path)
297
- parquet_file = pq.ParquetFile(BytesIO(data))
298
- for batch in parquet_file.iter_batches():
299
- yield batch
300
-
301
- async def stream_arrow_async(self, pattern: str, **kwargs: Any) -> "AsyncIterator[ArrowRecordBatch]":
302
- """Async stream Arrow record batches.
303
-
304
- This implementation provides file-level async streaming. Each file is
305
- read into memory before its batches are processed.
333
+ def stream_arrow_async(self, pattern: str, **kwargs: Any) -> "AsyncIterator[ArrowRecordBatch]":
334
+ """Stream Arrow record batches from storage asynchronously.
306
335
 
307
336
  Args:
308
337
  pattern: The glob pattern to match.
309
338
  **kwargs: Additional arguments to pass to the glob method.
310
339
 
311
- Yields:
340
+ Returns:
312
341
  AsyncIterator of Arrow record batches
313
342
  """
314
343
  if not PYARROW_INSTALLED:
315
344
  raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
316
345
 
317
- paths = await async_(self.glob)(pattern, **kwargs)
318
-
319
- # Stream batches from each path
320
- for path in paths:
321
- async for batch in self._stream_file_batches_async(path):
322
- yield batch
346
+ return _ArrowStreamer(self, pattern, **kwargs)
323
347
 
324
348
  async def read_text_async(self, path: Union[str, Path], encoding: str = "utf-8", **kwargs: Any) -> str:
325
- """Async read text. Wraps the sync implementation."""
349
+ """Read text from storage asynchronously."""
326
350
  return await async_(self.read_text)(path, encoding, **kwargs)
327
351
 
328
352
  async def write_text_async(self, path: Union[str, Path], data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
329
- """Async write text. Wraps the sync implementation."""
353
+ """Write text to storage asynchronously."""
330
354
  await async_(self.write_text)(path, data, encoding, **kwargs)
331
355
 
332
356
  async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
333
- """Async list objects. Wraps the sync implementation."""
357
+ """List objects in storage asynchronously."""
334
358
  return await async_(self.list_objects)(prefix, recursive, **kwargs)
335
359
 
336
360
  async def exists_async(self, path: Union[str, Path], **kwargs: Any) -> bool:
337
- """Async exists check. Wraps the sync implementation."""
361
+ """Check if object exists in storage asynchronously."""
338
362
  return await async_(self.exists)(path, **kwargs)
339
363
 
340
364
  async def delete_async(self, path: Union[str, Path], **kwargs: Any) -> None:
341
- """Async delete. Wraps the sync implementation."""
365
+ """Delete object from storage asynchronously."""
342
366
  await async_(self.delete)(path, **kwargs)
343
367
 
344
368
  async def copy_async(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
345
- """Async copy. Wraps the sync implementation."""
369
+ """Copy object in storage asynchronously."""
346
370
  await async_(self.copy)(source, destination, **kwargs)
347
371
 
348
372
  async def move_async(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
349
- """Async move. Wraps the sync implementation."""
373
+ """Move object in storage asynchronously."""
350
374
  await async_(self.move)(source, destination, **kwargs)
351
375
 
352
376
  async def get_metadata_async(self, path: Union[str, Path], **kwargs: Any) -> dict[str, Any]:
353
- """Async get metadata. Wraps the sync implementation."""
377
+ """Get object metadata from storage asynchronously."""
354
378
  return await async_(self.get_metadata)(path, **kwargs)
355
379
 
356
380
  async def read_arrow_async(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
357
- """Async read Arrow. Wraps the sync implementation."""
381
+ """Read Arrow table from storage asynchronously."""
358
382
  return await async_(self.read_arrow)(path, **kwargs)
359
383
 
360
384
  async def write_arrow_async(self, path: Union[str, Path], table: "ArrowTable", **kwargs: Any) -> None:
361
- """Async write Arrow. Wraps the sync implementation."""
385
+ """Write Arrow table to storage asynchronously."""
362
386
  await async_(self.write_arrow)(path, table, **kwargs)