sqlspec 0.16.1__cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (148) hide show
  1. 51ff5a9eadfdefd49f98__mypyc.cpython-39-aarch64-linux-gnu.so +0 -0
  2. sqlspec/__init__.py +92 -0
  3. sqlspec/__main__.py +12 -0
  4. sqlspec/__metadata__.py +14 -0
  5. sqlspec/_serialization.py +77 -0
  6. sqlspec/_sql.py +1780 -0
  7. sqlspec/_typing.py +680 -0
  8. sqlspec/adapters/__init__.py +0 -0
  9. sqlspec/adapters/adbc/__init__.py +5 -0
  10. sqlspec/adapters/adbc/_types.py +12 -0
  11. sqlspec/adapters/adbc/config.py +361 -0
  12. sqlspec/adapters/adbc/driver.py +512 -0
  13. sqlspec/adapters/aiosqlite/__init__.py +19 -0
  14. sqlspec/adapters/aiosqlite/_types.py +13 -0
  15. sqlspec/adapters/aiosqlite/config.py +253 -0
  16. sqlspec/adapters/aiosqlite/driver.py +248 -0
  17. sqlspec/adapters/asyncmy/__init__.py +19 -0
  18. sqlspec/adapters/asyncmy/_types.py +12 -0
  19. sqlspec/adapters/asyncmy/config.py +180 -0
  20. sqlspec/adapters/asyncmy/driver.py +274 -0
  21. sqlspec/adapters/asyncpg/__init__.py +21 -0
  22. sqlspec/adapters/asyncpg/_types.py +17 -0
  23. sqlspec/adapters/asyncpg/config.py +229 -0
  24. sqlspec/adapters/asyncpg/driver.py +344 -0
  25. sqlspec/adapters/bigquery/__init__.py +18 -0
  26. sqlspec/adapters/bigquery/_types.py +12 -0
  27. sqlspec/adapters/bigquery/config.py +298 -0
  28. sqlspec/adapters/bigquery/driver.py +558 -0
  29. sqlspec/adapters/duckdb/__init__.py +22 -0
  30. sqlspec/adapters/duckdb/_types.py +12 -0
  31. sqlspec/adapters/duckdb/config.py +504 -0
  32. sqlspec/adapters/duckdb/driver.py +368 -0
  33. sqlspec/adapters/oracledb/__init__.py +32 -0
  34. sqlspec/adapters/oracledb/_types.py +14 -0
  35. sqlspec/adapters/oracledb/config.py +317 -0
  36. sqlspec/adapters/oracledb/driver.py +538 -0
  37. sqlspec/adapters/psqlpy/__init__.py +16 -0
  38. sqlspec/adapters/psqlpy/_types.py +11 -0
  39. sqlspec/adapters/psqlpy/config.py +214 -0
  40. sqlspec/adapters/psqlpy/driver.py +530 -0
  41. sqlspec/adapters/psycopg/__init__.py +32 -0
  42. sqlspec/adapters/psycopg/_types.py +17 -0
  43. sqlspec/adapters/psycopg/config.py +426 -0
  44. sqlspec/adapters/psycopg/driver.py +796 -0
  45. sqlspec/adapters/sqlite/__init__.py +15 -0
  46. sqlspec/adapters/sqlite/_types.py +11 -0
  47. sqlspec/adapters/sqlite/config.py +240 -0
  48. sqlspec/adapters/sqlite/driver.py +294 -0
  49. sqlspec/base.py +571 -0
  50. sqlspec/builder/__init__.py +62 -0
  51. sqlspec/builder/_base.py +473 -0
  52. sqlspec/builder/_column.py +320 -0
  53. sqlspec/builder/_ddl.py +1346 -0
  54. sqlspec/builder/_ddl_utils.py +103 -0
  55. sqlspec/builder/_delete.py +76 -0
  56. sqlspec/builder/_insert.py +256 -0
  57. sqlspec/builder/_merge.py +71 -0
  58. sqlspec/builder/_parsing_utils.py +140 -0
  59. sqlspec/builder/_select.py +170 -0
  60. sqlspec/builder/_update.py +188 -0
  61. sqlspec/builder/mixins/__init__.py +55 -0
  62. sqlspec/builder/mixins/_cte_and_set_ops.py +222 -0
  63. sqlspec/builder/mixins/_delete_operations.py +41 -0
  64. sqlspec/builder/mixins/_insert_operations.py +244 -0
  65. sqlspec/builder/mixins/_join_operations.py +122 -0
  66. sqlspec/builder/mixins/_merge_operations.py +476 -0
  67. sqlspec/builder/mixins/_order_limit_operations.py +135 -0
  68. sqlspec/builder/mixins/_pivot_operations.py +153 -0
  69. sqlspec/builder/mixins/_select_operations.py +603 -0
  70. sqlspec/builder/mixins/_update_operations.py +187 -0
  71. sqlspec/builder/mixins/_where_clause.py +621 -0
  72. sqlspec/cli.py +247 -0
  73. sqlspec/config.py +395 -0
  74. sqlspec/core/__init__.py +63 -0
  75. sqlspec/core/cache.cpython-39-aarch64-linux-gnu.so +0 -0
  76. sqlspec/core/cache.py +871 -0
  77. sqlspec/core/compiler.cpython-39-aarch64-linux-gnu.so +0 -0
  78. sqlspec/core/compiler.py +417 -0
  79. sqlspec/core/filters.cpython-39-aarch64-linux-gnu.so +0 -0
  80. sqlspec/core/filters.py +830 -0
  81. sqlspec/core/hashing.cpython-39-aarch64-linux-gnu.so +0 -0
  82. sqlspec/core/hashing.py +310 -0
  83. sqlspec/core/parameters.cpython-39-aarch64-linux-gnu.so +0 -0
  84. sqlspec/core/parameters.py +1237 -0
  85. sqlspec/core/result.cpython-39-aarch64-linux-gnu.so +0 -0
  86. sqlspec/core/result.py +677 -0
  87. sqlspec/core/splitter.cpython-39-aarch64-linux-gnu.so +0 -0
  88. sqlspec/core/splitter.py +819 -0
  89. sqlspec/core/statement.cpython-39-aarch64-linux-gnu.so +0 -0
  90. sqlspec/core/statement.py +676 -0
  91. sqlspec/driver/__init__.py +19 -0
  92. sqlspec/driver/_async.py +502 -0
  93. sqlspec/driver/_common.py +631 -0
  94. sqlspec/driver/_sync.py +503 -0
  95. sqlspec/driver/mixins/__init__.py +6 -0
  96. sqlspec/driver/mixins/_result_tools.py +193 -0
  97. sqlspec/driver/mixins/_sql_translator.py +86 -0
  98. sqlspec/exceptions.py +193 -0
  99. sqlspec/extensions/__init__.py +0 -0
  100. sqlspec/extensions/aiosql/__init__.py +10 -0
  101. sqlspec/extensions/aiosql/adapter.py +461 -0
  102. sqlspec/extensions/litestar/__init__.py +6 -0
  103. sqlspec/extensions/litestar/_utils.py +52 -0
  104. sqlspec/extensions/litestar/cli.py +48 -0
  105. sqlspec/extensions/litestar/config.py +92 -0
  106. sqlspec/extensions/litestar/handlers.py +260 -0
  107. sqlspec/extensions/litestar/plugin.py +145 -0
  108. sqlspec/extensions/litestar/providers.py +454 -0
  109. sqlspec/loader.cpython-39-aarch64-linux-gnu.so +0 -0
  110. sqlspec/loader.py +760 -0
  111. sqlspec/migrations/__init__.py +35 -0
  112. sqlspec/migrations/base.py +414 -0
  113. sqlspec/migrations/commands.py +443 -0
  114. sqlspec/migrations/loaders.py +402 -0
  115. sqlspec/migrations/runner.py +213 -0
  116. sqlspec/migrations/tracker.py +140 -0
  117. sqlspec/migrations/utils.py +129 -0
  118. sqlspec/protocols.py +407 -0
  119. sqlspec/py.typed +0 -0
  120. sqlspec/storage/__init__.py +23 -0
  121. sqlspec/storage/backends/__init__.py +0 -0
  122. sqlspec/storage/backends/base.py +163 -0
  123. sqlspec/storage/backends/fsspec.py +386 -0
  124. sqlspec/storage/backends/obstore.py +459 -0
  125. sqlspec/storage/capabilities.py +102 -0
  126. sqlspec/storage/registry.py +239 -0
  127. sqlspec/typing.py +299 -0
  128. sqlspec/utils/__init__.py +3 -0
  129. sqlspec/utils/correlation.py +150 -0
  130. sqlspec/utils/deprecation.py +106 -0
  131. sqlspec/utils/fixtures.cpython-39-aarch64-linux-gnu.so +0 -0
  132. sqlspec/utils/fixtures.py +58 -0
  133. sqlspec/utils/logging.py +127 -0
  134. sqlspec/utils/module_loader.py +89 -0
  135. sqlspec/utils/serializers.py +4 -0
  136. sqlspec/utils/singleton.py +32 -0
  137. sqlspec/utils/sync_tools.cpython-39-aarch64-linux-gnu.so +0 -0
  138. sqlspec/utils/sync_tools.py +237 -0
  139. sqlspec/utils/text.cpython-39-aarch64-linux-gnu.so +0 -0
  140. sqlspec/utils/text.py +96 -0
  141. sqlspec/utils/type_guards.cpython-39-aarch64-linux-gnu.so +0 -0
  142. sqlspec/utils/type_guards.py +1139 -0
  143. sqlspec-0.16.1.dist-info/METADATA +365 -0
  144. sqlspec-0.16.1.dist-info/RECORD +148 -0
  145. sqlspec-0.16.1.dist-info/WHEEL +7 -0
  146. sqlspec-0.16.1.dist-info/entry_points.txt +2 -0
  147. sqlspec-0.16.1.dist-info/licenses/LICENSE +21 -0
  148. sqlspec-0.16.1.dist-info/licenses/NOTICE +29 -0
File without changes
@@ -0,0 +1,163 @@
1
+ """Base class for storage backends."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from collections.abc import AsyncIterator, Iterator
5
+ from typing import Any
6
+
7
+ from mypy_extensions import mypyc_attr
8
+
9
+ from sqlspec.typing import ArrowRecordBatch, ArrowTable
10
+
11
+ __all__ = ("ObjectStoreBase",)
12
+
13
+
14
+ @mypyc_attr(allow_interpreted_subclasses=True)
15
+ class ObjectStoreBase(ABC):
16
+ """Base class for storage backends."""
17
+
18
+ __slots__ = ()
19
+
20
+ @abstractmethod
21
+ def read_bytes(self, path: str, **kwargs: Any) -> bytes:
22
+ """Read bytes from storage."""
23
+ raise NotImplementedError
24
+
25
+ @abstractmethod
26
+ def write_bytes(self, path: str, data: bytes, **kwargs: Any) -> None:
27
+ """Write bytes to storage."""
28
+ raise NotImplementedError
29
+
30
+ @abstractmethod
31
+ def read_text(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
32
+ """Read text from storage."""
33
+ raise NotImplementedError
34
+
35
+ @abstractmethod
36
+ def write_text(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
37
+ """Write text to storage."""
38
+ raise NotImplementedError
39
+
40
+ @abstractmethod
41
+ def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
42
+ """List objects in storage."""
43
+ raise NotImplementedError
44
+
45
+ @abstractmethod
46
+ def exists(self, path: str, **kwargs: Any) -> bool:
47
+ """Check if object exists in storage."""
48
+ raise NotImplementedError
49
+
50
+ @abstractmethod
51
+ def delete(self, path: str, **kwargs: Any) -> None:
52
+ """Delete object from storage."""
53
+ raise NotImplementedError
54
+
55
+ @abstractmethod
56
+ def copy(self, source: str, destination: str, **kwargs: Any) -> None:
57
+ """Copy object within storage."""
58
+ raise NotImplementedError
59
+
60
+ @abstractmethod
61
+ def move(self, source: str, destination: str, **kwargs: Any) -> None:
62
+ """Move object within storage."""
63
+ raise NotImplementedError
64
+
65
+ @abstractmethod
66
+ def glob(self, pattern: str, **kwargs: Any) -> list[str]:
67
+ """Find objects matching pattern."""
68
+ raise NotImplementedError
69
+
70
+ @abstractmethod
71
+ def get_metadata(self, path: str, **kwargs: Any) -> dict[str, Any]:
72
+ """Get object metadata from storage."""
73
+ raise NotImplementedError
74
+
75
+ @abstractmethod
76
+ def is_object(self, path: str) -> bool:
77
+ """Check if path points to an object."""
78
+ raise NotImplementedError
79
+
80
+ @abstractmethod
81
+ def is_path(self, path: str) -> bool:
82
+ """Check if path points to a directory."""
83
+ raise NotImplementedError
84
+
85
+ @abstractmethod
86
+ def read_arrow(self, path: str, **kwargs: Any) -> ArrowTable:
87
+ """Read Arrow table from storage."""
88
+ raise NotImplementedError
89
+
90
+ @abstractmethod
91
+ def write_arrow(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
92
+ """Write Arrow table to storage."""
93
+ raise NotImplementedError
94
+
95
+ @abstractmethod
96
+ def stream_arrow(self, pattern: str, **kwargs: Any) -> Iterator[ArrowRecordBatch]:
97
+ """Stream Arrow record batches from storage."""
98
+ raise NotImplementedError
99
+
100
+ @abstractmethod
101
+ async def read_bytes_async(self, path: str, **kwargs: Any) -> bytes:
102
+ """Read bytes from storage asynchronously."""
103
+ raise NotImplementedError
104
+
105
+ @abstractmethod
106
+ async def write_bytes_async(self, path: str, data: bytes, **kwargs: Any) -> None:
107
+ """Write bytes to storage asynchronously."""
108
+ raise NotImplementedError
109
+
110
+ @abstractmethod
111
+ async def read_text_async(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
112
+ """Read text from storage asynchronously."""
113
+ raise NotImplementedError
114
+
115
+ @abstractmethod
116
+ async def write_text_async(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
117
+ """Write text to storage asynchronously."""
118
+ raise NotImplementedError
119
+
120
+ @abstractmethod
121
+ async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
122
+ """List objects in storage asynchronously."""
123
+ raise NotImplementedError
124
+
125
+ @abstractmethod
126
+ async def exists_async(self, path: str, **kwargs: Any) -> bool:
127
+ """Check if object exists in storage asynchronously."""
128
+ raise NotImplementedError
129
+
130
+ @abstractmethod
131
+ async def delete_async(self, path: str, **kwargs: Any) -> None:
132
+ """Delete object from storage asynchronously."""
133
+ raise NotImplementedError
134
+
135
+ @abstractmethod
136
+ async def copy_async(self, source: str, destination: str, **kwargs: Any) -> None:
137
+ """Copy object within storage asynchronously."""
138
+ raise NotImplementedError
139
+
140
+ @abstractmethod
141
+ async def move_async(self, source: str, destination: str, **kwargs: Any) -> None:
142
+ """Move object within storage asynchronously."""
143
+ raise NotImplementedError
144
+
145
+ @abstractmethod
146
+ async def get_metadata_async(self, path: str, **kwargs: Any) -> dict[str, Any]:
147
+ """Get object metadata from storage asynchronously."""
148
+ raise NotImplementedError
149
+
150
+ @abstractmethod
151
+ async def read_arrow_async(self, path: str, **kwargs: Any) -> ArrowTable:
152
+ """Read Arrow table from storage asynchronously."""
153
+ raise NotImplementedError
154
+
155
+ @abstractmethod
156
+ async def write_arrow_async(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
157
+ """Write Arrow table to storage asynchronously."""
158
+ raise NotImplementedError
159
+
160
+ @abstractmethod
161
+ def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator[ArrowRecordBatch]:
162
+ """Stream Arrow record batches from storage asynchronously."""
163
+ raise NotImplementedError
@@ -0,0 +1,386 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union
4
+
5
+ from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
6
+ from sqlspec.storage.backends.base import ObjectStoreBase
7
+ from sqlspec.storage.capabilities import StorageCapabilities
8
+ from sqlspec.typing import FSSPEC_INSTALLED, PYARROW_INSTALLED
9
+ from sqlspec.utils.sync_tools import async_
10
+
11
+ if TYPE_CHECKING:
12
+ from collections.abc import AsyncIterator, Iterator
13
+
14
+ from fsspec import AbstractFileSystem
15
+
16
+ from sqlspec.typing import ArrowRecordBatch, ArrowTable
17
+
18
+ __all__ = ("FSSpecBackend",)
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class _ArrowStreamer:
24
+ def __init__(self, backend: "FSSpecBackend", pattern: str, **kwargs: Any) -> None:
25
+ self.backend = backend
26
+ self.pattern = pattern
27
+ self.kwargs = kwargs
28
+ self.paths_iterator: Optional[Iterator[str]] = None
29
+ self.batch_iterator: Optional[Iterator[ArrowRecordBatch]] = None
30
+
31
+ def __aiter__(self) -> "_ArrowStreamer":
32
+ return self
33
+
34
+ async def _initialize(self) -> None:
35
+ """Initialize the paths iterator."""
36
+ if self.paths_iterator is None:
37
+ paths = await async_(self.backend.glob)(self.pattern, **self.kwargs)
38
+ self.paths_iterator = iter(paths)
39
+
40
+ async def __anext__(self) -> "ArrowRecordBatch":
41
+ await self._initialize()
42
+
43
+ if self.batch_iterator:
44
+ try:
45
+ return next(self.batch_iterator)
46
+ except StopIteration:
47
+ self.batch_iterator = None
48
+
49
+ if self.paths_iterator:
50
+ try:
51
+ path = next(self.paths_iterator)
52
+ self.batch_iterator = await async_(self.backend._stream_file_batches)(path)
53
+ return await self.__anext__()
54
+ except StopIteration:
55
+ raise StopAsyncIteration
56
+ raise StopAsyncIteration
57
+
58
+
59
+ class FSSpecBackend(ObjectStoreBase):
60
+ """Storage backend using fsspec.
61
+
62
+ Implements the ObjectStoreProtocol using fsspec,
63
+ providing support for various protocols including HTTP, HTTPS, FTP,
64
+ and cloud storage services.
65
+ """
66
+
67
+ _default_capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
68
+ supports_arrow=PYARROW_INSTALLED,
69
+ supports_streaming=PYARROW_INSTALLED,
70
+ supports_async=True,
71
+ supports_compression=True,
72
+ is_remote=True,
73
+ is_cloud_native=False,
74
+ )
75
+
76
+ def __init__(self, fs: "Union[str, AbstractFileSystem]", base_path: str = "") -> None:
77
+ if not FSSPEC_INSTALLED:
78
+ raise MissingDependencyError(package="fsspec", install_package="fsspec")
79
+
80
+ self.base_path = base_path.rstrip("/") if base_path else ""
81
+
82
+ if isinstance(fs, str):
83
+ import fsspec
84
+
85
+ self.fs = fsspec.filesystem(fs.split("://")[0])
86
+ self.protocol = fs.split("://")[0]
87
+ self._fs_uri = fs
88
+ else:
89
+ self.fs = fs
90
+ self.protocol = getattr(fs, "protocol", "unknown")
91
+ self._fs_uri = f"{self.protocol}://"
92
+
93
+ self._instance_capabilities = self._detect_capabilities()
94
+
95
+ super().__init__()
96
+
97
+ @classmethod
98
+ def from_config(cls, config: "dict[str, Any]") -> "FSSpecBackend":
99
+ protocol = config["protocol"]
100
+ fs_config = config.get("fs_config", {})
101
+ base_path = config.get("base_path", "")
102
+
103
+ import fsspec
104
+
105
+ fs_instance = fsspec.filesystem(protocol, **fs_config)
106
+
107
+ return cls(fs=fs_instance, base_path=base_path)
108
+
109
+ def _resolve_path(self, path: Union[str, Path]) -> str:
110
+ """Resolve path relative to base_path."""
111
+ path_str = str(path)
112
+ if self.base_path:
113
+ clean_base = self.base_path.rstrip("/")
114
+ clean_path = path_str.lstrip("/")
115
+ return f"{clean_base}/{clean_path}"
116
+ return path_str
117
+
118
+ def _detect_capabilities(self) -> StorageCapabilities:
119
+ """Detect capabilities based on underlying filesystem protocol."""
120
+ protocol = self.protocol.lower()
121
+
122
+ if protocol in {"s3", "s3a", "s3n"}:
123
+ return StorageCapabilities.s3_compatible()
124
+ if protocol in {"gcs", "gs"}:
125
+ return StorageCapabilities.gcs()
126
+ if protocol in {"abfs", "az", "azure"}:
127
+ return StorageCapabilities.azure_blob()
128
+ if protocol in {"file", "local"}:
129
+ return StorageCapabilities.local_filesystem()
130
+ return StorageCapabilities(
131
+ supports_arrow=PYARROW_INSTALLED,
132
+ supports_streaming=PYARROW_INSTALLED,
133
+ supports_async=True,
134
+ supports_compression=True,
135
+ is_remote=True,
136
+ is_cloud_native=False,
137
+ )
138
+
139
+ @property
140
+ def capabilities(self) -> StorageCapabilities:
141
+ """Return instance-specific capabilities based on detected protocol."""
142
+ return getattr(self, "_instance_capabilities", self.__class__._default_capabilities)
143
+
144
+ @classmethod
145
+ def has_capability(cls, capability: str) -> bool:
146
+ """Check if backend has a specific capability."""
147
+ return getattr(cls._default_capabilities, capability, False)
148
+
149
+ @classmethod
150
+ def get_capabilities(cls) -> StorageCapabilities:
151
+ """Get all capabilities for this backend."""
152
+ return cls._default_capabilities
153
+
154
+ @property
155
+ def backend_type(self) -> str:
156
+ return "fsspec"
157
+
158
+ @property
159
+ def base_uri(self) -> str:
160
+ return self._fs_uri
161
+
162
+ def read_bytes(self, path: Union[str, Path], **kwargs: Any) -> bytes:
163
+ """Read bytes from an object."""
164
+ try:
165
+ resolved_path = self._resolve_path(path)
166
+ return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
167
+ except Exception as exc:
168
+ msg = f"Failed to read bytes from {path}"
169
+ raise StorageOperationFailedError(msg) from exc
170
+
171
+ def write_bytes(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
172
+ """Write bytes to an object."""
173
+ try:
174
+ resolved_path = self._resolve_path(path)
175
+ with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
176
+ f.write(data) # pyright: ignore
177
+ except Exception as exc:
178
+ msg = f"Failed to write bytes to {path}"
179
+ raise StorageOperationFailedError(msg) from exc
180
+
181
+ def read_text(self, path: Union[str, Path], encoding: str = "utf-8", **kwargs: Any) -> str:
182
+ """Read text from an object."""
183
+ data = self.read_bytes(path, **kwargs)
184
+ return data.decode(encoding)
185
+
186
+ def write_text(self, path: Union[str, Path], data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
187
+ """Write text to an object."""
188
+ self.write_bytes(path, data.encode(encoding), **kwargs)
189
+
190
+ def exists(self, path: Union[str, Path], **kwargs: Any) -> bool:
191
+ """Check if an object exists."""
192
+ resolved_path = self._resolve_path(path)
193
+ return self.fs.exists(resolved_path, **kwargs) # type: ignore[no-any-return]
194
+
195
+ def delete(self, path: Union[str, Path], **kwargs: Any) -> None:
196
+ """Delete an object."""
197
+ try:
198
+ resolved_path = self._resolve_path(path)
199
+ self.fs.rm(resolved_path, **kwargs)
200
+ except Exception as exc:
201
+ msg = f"Failed to delete {path}"
202
+ raise StorageOperationFailedError(msg) from exc
203
+
204
+ def copy(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
205
+ """Copy an object."""
206
+ try:
207
+ source_path = self._resolve_path(source)
208
+ dest_path = self._resolve_path(destination)
209
+ self.fs.copy(source_path, dest_path, **kwargs)
210
+ except Exception as exc:
211
+ msg = f"Failed to copy {source} to {destination}"
212
+ raise StorageOperationFailedError(msg) from exc
213
+
214
+ def move(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
215
+ """Move an object."""
216
+ try:
217
+ source_path = self._resolve_path(source)
218
+ dest_path = self._resolve_path(destination)
219
+ self.fs.mv(source_path, dest_path, **kwargs)
220
+ except Exception as exc:
221
+ msg = f"Failed to move {source} to {destination}"
222
+ raise StorageOperationFailedError(msg) from exc
223
+
224
+ def read_arrow(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
225
+ """Read an Arrow table from storage."""
226
+ if not PYARROW_INSTALLED:
227
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
228
+ try:
229
+ import pyarrow.parquet as pq
230
+
231
+ resolved_path = self._resolve_path(path)
232
+ with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
233
+ return pq.read_table(f)
234
+ except Exception as exc:
235
+ msg = f"Failed to read Arrow table from {path}"
236
+ raise StorageOperationFailedError(msg) from exc
237
+
238
+ def write_arrow(self, path: Union[str, Path], table: "ArrowTable", **kwargs: Any) -> None:
239
+ """Write an Arrow table to storage."""
240
+ if not PYARROW_INSTALLED:
241
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
242
+ try:
243
+ import pyarrow.parquet as pq
244
+
245
+ resolved_path = self._resolve_path(path)
246
+ with self.fs.open(resolved_path, mode="wb") as f:
247
+ pq.write_table(table, f, **kwargs) # pyright: ignore
248
+ except Exception as exc:
249
+ msg = f"Failed to write Arrow table to {path}"
250
+ raise StorageOperationFailedError(msg) from exc
251
+
252
+ def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
253
+ """List objects with optional prefix."""
254
+ try:
255
+ resolved_prefix = self._resolve_path(prefix)
256
+ if recursive:
257
+ return sorted(self.fs.find(resolved_prefix, **kwargs))
258
+ return sorted(self.fs.ls(resolved_prefix, detail=False, **kwargs))
259
+ except Exception as exc:
260
+ msg = f"Failed to list objects with prefix '{prefix}'"
261
+ raise StorageOperationFailedError(msg) from exc
262
+
263
+ def glob(self, pattern: str, **kwargs: Any) -> list[str]:
264
+ """Find objects matching a glob pattern."""
265
+ try:
266
+ resolved_pattern = self._resolve_path(pattern)
267
+ return sorted(self.fs.glob(resolved_pattern, **kwargs)) # pyright: ignore
268
+ except Exception as exc:
269
+ msg = f"Failed to glob with pattern '{pattern}'"
270
+ raise StorageOperationFailedError(msg) from exc
271
+
272
+ def is_object(self, path: str) -> bool:
273
+ """Check if path points to an object."""
274
+ resolved_path = self._resolve_path(path)
275
+ return self.fs.exists(resolved_path) and not self.fs.isdir(resolved_path)
276
+
277
+ def is_path(self, path: str) -> bool:
278
+ """Check if path points to a prefix (directory-like)."""
279
+ resolved_path = self._resolve_path(path)
280
+ return self.fs.isdir(resolved_path) # type: ignore[no-any-return]
281
+
282
+ def get_metadata(self, path: Union[str, Path], **kwargs: Any) -> dict[str, Any]:
283
+ """Get object metadata."""
284
+ try:
285
+ resolved_path = self._resolve_path(path)
286
+ info = self.fs.info(resolved_path, **kwargs)
287
+ if isinstance(info, dict):
288
+ return {
289
+ "path": resolved_path,
290
+ "exists": True,
291
+ "size": info.get("size"),
292
+ "last_modified": info.get("mtime"),
293
+ "type": info.get("type", "file"),
294
+ }
295
+
296
+ except FileNotFoundError:
297
+ return {"path": self._resolve_path(path), "exists": False}
298
+ except Exception as exc:
299
+ msg = f"Failed to get metadata for {path}"
300
+ raise StorageOperationFailedError(msg) from exc
301
+ return {
302
+ "path": resolved_path,
303
+ "exists": True,
304
+ "size": info.size,
305
+ "last_modified": info.mtime,
306
+ "type": info.type,
307
+ }
308
+
309
+ def _stream_file_batches(self, obj_path: Union[str, Path]) -> "Iterator[ArrowRecordBatch]":
310
+ import pyarrow.parquet as pq
311
+
312
+ with self.fs.open(obj_path, mode="rb") as f:
313
+ parquet_file = pq.ParquetFile(f) # pyright: ignore[reportArgumentType]
314
+ yield from parquet_file.iter_batches()
315
+
316
+ def stream_arrow(self, pattern: str, **kwargs: Any) -> "Iterator[ArrowRecordBatch]":
317
+ if not FSSPEC_INSTALLED:
318
+ raise MissingDependencyError(package="fsspec", install_package="fsspec")
319
+ if not PYARROW_INSTALLED:
320
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
321
+
322
+ for obj_path in self.glob(pattern, **kwargs):
323
+ yield from self._stream_file_batches(obj_path)
324
+
325
+ async def read_bytes_async(self, path: Union[str, Path], **kwargs: Any) -> bytes:
326
+ """Read bytes from storage asynchronously."""
327
+ return await async_(self.read_bytes)(path, **kwargs)
328
+
329
+ async def write_bytes_async(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
330
+ """Write bytes to storage asynchronously."""
331
+ return await async_(self.write_bytes)(path, data, **kwargs)
332
+
333
+ def stream_arrow_async(self, pattern: str, **kwargs: Any) -> "AsyncIterator[ArrowRecordBatch]":
334
+ """Stream Arrow record batches from storage asynchronously.
335
+
336
+ Args:
337
+ pattern: The glob pattern to match.
338
+ **kwargs: Additional arguments to pass to the glob method.
339
+
340
+ Returns:
341
+ AsyncIterator of Arrow record batches
342
+ """
343
+ if not PYARROW_INSTALLED:
344
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
345
+
346
+ return _ArrowStreamer(self, pattern, **kwargs)
347
+
348
+ async def read_text_async(self, path: Union[str, Path], encoding: str = "utf-8", **kwargs: Any) -> str:
349
+ """Read text from storage asynchronously."""
350
+ return await async_(self.read_text)(path, encoding, **kwargs)
351
+
352
+ async def write_text_async(self, path: Union[str, Path], data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
353
+ """Write text to storage asynchronously."""
354
+ await async_(self.write_text)(path, data, encoding, **kwargs)
355
+
356
+ async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
357
+ """List objects in storage asynchronously."""
358
+ return await async_(self.list_objects)(prefix, recursive, **kwargs)
359
+
360
+ async def exists_async(self, path: Union[str, Path], **kwargs: Any) -> bool:
361
+ """Check if object exists in storage asynchronously."""
362
+ return await async_(self.exists)(path, **kwargs)
363
+
364
+ async def delete_async(self, path: Union[str, Path], **kwargs: Any) -> None:
365
+ """Delete object from storage asynchronously."""
366
+ await async_(self.delete)(path, **kwargs)
367
+
368
+ async def copy_async(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
369
+ """Copy object in storage asynchronously."""
370
+ await async_(self.copy)(source, destination, **kwargs)
371
+
372
+ async def move_async(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
373
+ """Move object in storage asynchronously."""
374
+ await async_(self.move)(source, destination, **kwargs)
375
+
376
+ async def get_metadata_async(self, path: Union[str, Path], **kwargs: Any) -> dict[str, Any]:
377
+ """Get object metadata from storage asynchronously."""
378
+ return await async_(self.get_metadata)(path, **kwargs)
379
+
380
+ async def read_arrow_async(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
381
+ """Read Arrow table from storage asynchronously."""
382
+ return await async_(self.read_arrow)(path, **kwargs)
383
+
384
+ async def write_arrow_async(self, path: Union[str, Path], table: "ArrowTable", **kwargs: Any) -> None:
385
+ """Write Arrow table to storage asynchronously."""
386
+ await async_(self.write_arrow)(path, table, **kwargs)