sqlspec 0.11.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (155) hide show
  1. sqlspec/__init__.py +16 -3
  2. sqlspec/_serialization.py +3 -10
  3. sqlspec/_sql.py +1147 -0
  4. sqlspec/_typing.py +343 -41
  5. sqlspec/adapters/adbc/__init__.py +2 -6
  6. sqlspec/adapters/adbc/config.py +474 -149
  7. sqlspec/adapters/adbc/driver.py +330 -621
  8. sqlspec/adapters/aiosqlite/__init__.py +2 -6
  9. sqlspec/adapters/aiosqlite/config.py +143 -57
  10. sqlspec/adapters/aiosqlite/driver.py +269 -431
  11. sqlspec/adapters/asyncmy/__init__.py +3 -8
  12. sqlspec/adapters/asyncmy/config.py +247 -202
  13. sqlspec/adapters/asyncmy/driver.py +218 -436
  14. sqlspec/adapters/asyncpg/__init__.py +4 -7
  15. sqlspec/adapters/asyncpg/config.py +329 -176
  16. sqlspec/adapters/asyncpg/driver.py +417 -487
  17. sqlspec/adapters/bigquery/__init__.py +2 -2
  18. sqlspec/adapters/bigquery/config.py +407 -0
  19. sqlspec/adapters/bigquery/driver.py +600 -553
  20. sqlspec/adapters/duckdb/__init__.py +4 -1
  21. sqlspec/adapters/duckdb/config.py +432 -321
  22. sqlspec/adapters/duckdb/driver.py +392 -406
  23. sqlspec/adapters/oracledb/__init__.py +3 -8
  24. sqlspec/adapters/oracledb/config.py +625 -0
  25. sqlspec/adapters/oracledb/driver.py +548 -921
  26. sqlspec/adapters/psqlpy/__init__.py +4 -7
  27. sqlspec/adapters/psqlpy/config.py +372 -203
  28. sqlspec/adapters/psqlpy/driver.py +197 -533
  29. sqlspec/adapters/psycopg/__init__.py +3 -8
  30. sqlspec/adapters/psycopg/config.py +741 -0
  31. sqlspec/adapters/psycopg/driver.py +734 -694
  32. sqlspec/adapters/sqlite/__init__.py +2 -6
  33. sqlspec/adapters/sqlite/config.py +146 -81
  34. sqlspec/adapters/sqlite/driver.py +242 -405
  35. sqlspec/base.py +220 -784
  36. sqlspec/config.py +354 -0
  37. sqlspec/driver/__init__.py +22 -0
  38. sqlspec/driver/_async.py +252 -0
  39. sqlspec/driver/_common.py +338 -0
  40. sqlspec/driver/_sync.py +261 -0
  41. sqlspec/driver/mixins/__init__.py +17 -0
  42. sqlspec/driver/mixins/_pipeline.py +523 -0
  43. sqlspec/driver/mixins/_result_utils.py +122 -0
  44. sqlspec/driver/mixins/_sql_translator.py +35 -0
  45. sqlspec/driver/mixins/_storage.py +993 -0
  46. sqlspec/driver/mixins/_type_coercion.py +131 -0
  47. sqlspec/exceptions.py +299 -7
  48. sqlspec/extensions/aiosql/__init__.py +10 -0
  49. sqlspec/extensions/aiosql/adapter.py +474 -0
  50. sqlspec/extensions/litestar/__init__.py +1 -6
  51. sqlspec/extensions/litestar/_utils.py +1 -5
  52. sqlspec/extensions/litestar/config.py +5 -6
  53. sqlspec/extensions/litestar/handlers.py +13 -12
  54. sqlspec/extensions/litestar/plugin.py +22 -24
  55. sqlspec/extensions/litestar/providers.py +37 -55
  56. sqlspec/loader.py +528 -0
  57. sqlspec/service/__init__.py +3 -0
  58. sqlspec/service/base.py +24 -0
  59. sqlspec/service/pagination.py +26 -0
  60. sqlspec/statement/__init__.py +21 -0
  61. sqlspec/statement/builder/__init__.py +54 -0
  62. sqlspec/statement/builder/_ddl_utils.py +119 -0
  63. sqlspec/statement/builder/_parsing_utils.py +135 -0
  64. sqlspec/statement/builder/base.py +328 -0
  65. sqlspec/statement/builder/ddl.py +1379 -0
  66. sqlspec/statement/builder/delete.py +80 -0
  67. sqlspec/statement/builder/insert.py +274 -0
  68. sqlspec/statement/builder/merge.py +95 -0
  69. sqlspec/statement/builder/mixins/__init__.py +65 -0
  70. sqlspec/statement/builder/mixins/_aggregate_functions.py +151 -0
  71. sqlspec/statement/builder/mixins/_case_builder.py +91 -0
  72. sqlspec/statement/builder/mixins/_common_table_expr.py +91 -0
  73. sqlspec/statement/builder/mixins/_delete_from.py +34 -0
  74. sqlspec/statement/builder/mixins/_from.py +61 -0
  75. sqlspec/statement/builder/mixins/_group_by.py +119 -0
  76. sqlspec/statement/builder/mixins/_having.py +35 -0
  77. sqlspec/statement/builder/mixins/_insert_from_select.py +48 -0
  78. sqlspec/statement/builder/mixins/_insert_into.py +36 -0
  79. sqlspec/statement/builder/mixins/_insert_values.py +69 -0
  80. sqlspec/statement/builder/mixins/_join.py +110 -0
  81. sqlspec/statement/builder/mixins/_limit_offset.py +53 -0
  82. sqlspec/statement/builder/mixins/_merge_clauses.py +405 -0
  83. sqlspec/statement/builder/mixins/_order_by.py +46 -0
  84. sqlspec/statement/builder/mixins/_pivot.py +82 -0
  85. sqlspec/statement/builder/mixins/_returning.py +37 -0
  86. sqlspec/statement/builder/mixins/_select_columns.py +60 -0
  87. sqlspec/statement/builder/mixins/_set_ops.py +122 -0
  88. sqlspec/statement/builder/mixins/_unpivot.py +80 -0
  89. sqlspec/statement/builder/mixins/_update_from.py +54 -0
  90. sqlspec/statement/builder/mixins/_update_set.py +91 -0
  91. sqlspec/statement/builder/mixins/_update_table.py +29 -0
  92. sqlspec/statement/builder/mixins/_where.py +374 -0
  93. sqlspec/statement/builder/mixins/_window_functions.py +86 -0
  94. sqlspec/statement/builder/protocols.py +20 -0
  95. sqlspec/statement/builder/select.py +206 -0
  96. sqlspec/statement/builder/update.py +178 -0
  97. sqlspec/statement/filters.py +571 -0
  98. sqlspec/statement/parameters.py +736 -0
  99. sqlspec/statement/pipelines/__init__.py +67 -0
  100. sqlspec/statement/pipelines/analyzers/__init__.py +9 -0
  101. sqlspec/statement/pipelines/analyzers/_analyzer.py +649 -0
  102. sqlspec/statement/pipelines/base.py +315 -0
  103. sqlspec/statement/pipelines/context.py +119 -0
  104. sqlspec/statement/pipelines/result_types.py +41 -0
  105. sqlspec/statement/pipelines/transformers/__init__.py +8 -0
  106. sqlspec/statement/pipelines/transformers/_expression_simplifier.py +256 -0
  107. sqlspec/statement/pipelines/transformers/_literal_parameterizer.py +623 -0
  108. sqlspec/statement/pipelines/transformers/_remove_comments.py +66 -0
  109. sqlspec/statement/pipelines/transformers/_remove_hints.py +81 -0
  110. sqlspec/statement/pipelines/validators/__init__.py +23 -0
  111. sqlspec/statement/pipelines/validators/_dml_safety.py +275 -0
  112. sqlspec/statement/pipelines/validators/_parameter_style.py +297 -0
  113. sqlspec/statement/pipelines/validators/_performance.py +703 -0
  114. sqlspec/statement/pipelines/validators/_security.py +990 -0
  115. sqlspec/statement/pipelines/validators/base.py +67 -0
  116. sqlspec/statement/result.py +527 -0
  117. sqlspec/statement/splitter.py +701 -0
  118. sqlspec/statement/sql.py +1198 -0
  119. sqlspec/storage/__init__.py +15 -0
  120. sqlspec/storage/backends/__init__.py +0 -0
  121. sqlspec/storage/backends/base.py +166 -0
  122. sqlspec/storage/backends/fsspec.py +315 -0
  123. sqlspec/storage/backends/obstore.py +464 -0
  124. sqlspec/storage/protocol.py +170 -0
  125. sqlspec/storage/registry.py +315 -0
  126. sqlspec/typing.py +157 -36
  127. sqlspec/utils/correlation.py +155 -0
  128. sqlspec/utils/deprecation.py +3 -6
  129. sqlspec/utils/fixtures.py +6 -11
  130. sqlspec/utils/logging.py +135 -0
  131. sqlspec/utils/module_loader.py +45 -43
  132. sqlspec/utils/serializers.py +4 -0
  133. sqlspec/utils/singleton.py +6 -8
  134. sqlspec/utils/sync_tools.py +15 -27
  135. sqlspec/utils/text.py +58 -26
  136. {sqlspec-0.11.1.dist-info → sqlspec-0.12.0.dist-info}/METADATA +97 -26
  137. sqlspec-0.12.0.dist-info/RECORD +145 -0
  138. sqlspec/adapters/bigquery/config/__init__.py +0 -3
  139. sqlspec/adapters/bigquery/config/_common.py +0 -40
  140. sqlspec/adapters/bigquery/config/_sync.py +0 -87
  141. sqlspec/adapters/oracledb/config/__init__.py +0 -9
  142. sqlspec/adapters/oracledb/config/_asyncio.py +0 -186
  143. sqlspec/adapters/oracledb/config/_common.py +0 -131
  144. sqlspec/adapters/oracledb/config/_sync.py +0 -186
  145. sqlspec/adapters/psycopg/config/__init__.py +0 -19
  146. sqlspec/adapters/psycopg/config/_async.py +0 -169
  147. sqlspec/adapters/psycopg/config/_common.py +0 -56
  148. sqlspec/adapters/psycopg/config/_sync.py +0 -168
  149. sqlspec/filters.py +0 -331
  150. sqlspec/mixins.py +0 -305
  151. sqlspec/statement.py +0 -378
  152. sqlspec-0.11.1.dist-info/RECORD +0 -69
  153. {sqlspec-0.11.1.dist-info → sqlspec-0.12.0.dist-info}/WHEEL +0 -0
  154. {sqlspec-0.11.1.dist-info → sqlspec-0.12.0.dist-info}/licenses/LICENSE +0 -0
  155. {sqlspec-0.11.1.dist-info → sqlspec-0.12.0.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1,15 @@
1
+ """Storage abstraction layer for SQLSpec.
2
+
3
+ This module provides a flexible storage system with:
4
+ - Multiple backend support (local, fsspec, obstore)
5
+ - Lazy loading and configuration-based registration
6
+ - URI scheme-based automatic backend resolution
7
+ - Key-based named storage configurations
8
+ """
9
+
10
+ from sqlspec.storage.protocol import ObjectStoreProtocol
11
+ from sqlspec.storage.registry import StorageRegistry
12
+
13
+ storage_registry = StorageRegistry()
14
+
15
+ __all__ = ("ObjectStoreProtocol", "StorageRegistry", "storage_registry")
File without changes
@@ -0,0 +1,166 @@
1
+ """Base class for storage backends."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ if TYPE_CHECKING:
9
+ from collections.abc import AsyncIterator, Iterator
10
+
11
+ from sqlspec.typing import ArrowRecordBatch, ArrowTable
12
+
13
+ __all__ = ("ObjectStoreBase",)
14
+
15
+
16
+ class ObjectStoreBase(ABC):
17
+ """Base class for instrumented storage backends."""
18
+
19
+ # Sync Operations
20
+ @abstractmethod
21
+ def read_bytes(self, path: str, **kwargs: Any) -> bytes:
22
+ """Actual implementation of read_bytes in subclasses."""
23
+ raise NotImplementedError
24
+
25
+ @abstractmethod
26
+ def write_bytes(self, path: str, data: bytes, **kwargs: Any) -> None:
27
+ """Actual implementation of write_bytes in subclasses."""
28
+ raise NotImplementedError
29
+
30
+ @abstractmethod
31
+ def read_text(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
32
+ """Actual implementation of read_text in subclasses."""
33
+ raise NotImplementedError
34
+
35
+ @abstractmethod
36
+ def write_text(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
37
+ """Actual implementation of write_text in subclasses."""
38
+ raise NotImplementedError
39
+
40
+ @abstractmethod
41
+ def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
42
+ """Actual implementation of list_objects in subclasses."""
43
+ raise NotImplementedError
44
+
45
+ @abstractmethod
46
+ def exists(self, path: str, **kwargs: Any) -> bool:
47
+ """Actual implementation of exists in subclasses."""
48
+ raise NotImplementedError
49
+
50
+ @abstractmethod
51
+ def delete(self, path: str, **kwargs: Any) -> None:
52
+ """Actual implementation of delete in subclasses."""
53
+ raise NotImplementedError
54
+
55
+ @abstractmethod
56
+ def copy(self, source: str, destination: str, **kwargs: Any) -> None:
57
+ """Actual implementation of copy in subclasses."""
58
+ raise NotImplementedError
59
+
60
+ @abstractmethod
61
+ def move(self, source: str, destination: str, **kwargs: Any) -> None:
62
+ """Actual implementation of move in subclasses."""
63
+ raise NotImplementedError
64
+
65
+ @abstractmethod
66
+ def glob(self, pattern: str, **kwargs: Any) -> list[str]:
67
+ """Actual implementation of glob in subclasses."""
68
+ raise NotImplementedError
69
+
70
+ @abstractmethod
71
+ def get_metadata(self, path: str, **kwargs: Any) -> dict[str, Any]:
72
+ """Actual implementation of get_metadata in subclasses."""
73
+ raise NotImplementedError
74
+
75
+ @abstractmethod
76
+ def is_object(self, path: str) -> bool:
77
+ """Actual implementation of is_object in subclasses."""
78
+ raise NotImplementedError
79
+
80
+ @abstractmethod
81
+ def is_path(self, path: str) -> bool:
82
+ """Actual implementation of is_path in subclasses."""
83
+ raise NotImplementedError
84
+
85
+ @abstractmethod
86
+ def read_arrow(self, path: str, **kwargs: Any) -> ArrowTable:
87
+ """Actual implementation of read_arrow in subclasses."""
88
+ raise NotImplementedError
89
+
90
+ @abstractmethod
91
+ def write_arrow(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
92
+ """Actual implementation of write_arrow in subclasses."""
93
+ raise NotImplementedError
94
+
95
+ @abstractmethod
96
+ def stream_arrow(self, pattern: str, **kwargs: Any) -> Iterator[ArrowRecordBatch]:
97
+ """Actual implementation of stream_arrow in subclasses."""
98
+ raise NotImplementedError
99
+
100
+ # Abstract async methods that subclasses must implement
101
+ # Backends can either provide native async implementations or wrap sync methods
102
+
103
+ @abstractmethod
104
+ async def read_bytes_async(self, path: str, **kwargs: Any) -> bytes:
105
+ """Actual async implementation of read_bytes in subclasses."""
106
+ raise NotImplementedError
107
+
108
+ @abstractmethod
109
+ async def write_bytes_async(self, path: str, data: bytes, **kwargs: Any) -> None:
110
+ """Actual async implementation of write_bytes in subclasses."""
111
+ raise NotImplementedError
112
+
113
+ @abstractmethod
114
+ async def read_text_async(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
115
+ """Actual async implementation of read_text in subclasses."""
116
+ raise NotImplementedError
117
+
118
+ @abstractmethod
119
+ async def write_text_async(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
120
+ """Actual async implementation of write_text in subclasses."""
121
+ raise NotImplementedError
122
+
123
+ @abstractmethod
124
+ async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
125
+ """Actual async implementation of list_objects in subclasses."""
126
+ raise NotImplementedError
127
+
128
+ @abstractmethod
129
+ async def exists_async(self, path: str, **kwargs: Any) -> bool:
130
+ """Actual async implementation of exists in subclasses."""
131
+ raise NotImplementedError
132
+
133
+ @abstractmethod
134
+ async def delete_async(self, path: str, **kwargs: Any) -> None:
135
+ """Actual async implementation of delete in subclasses."""
136
+ raise NotImplementedError
137
+
138
+ @abstractmethod
139
+ async def copy_async(self, source: str, destination: str, **kwargs: Any) -> None:
140
+ """Actual async implementation of copy in subclasses."""
141
+ raise NotImplementedError
142
+
143
+ @abstractmethod
144
+ async def move_async(self, source: str, destination: str, **kwargs: Any) -> None:
145
+ """Actual async implementation of move in subclasses."""
146
+ raise NotImplementedError
147
+
148
+ @abstractmethod
149
+ async def get_metadata_async(self, path: str, **kwargs: Any) -> dict[str, Any]:
150
+ """Actual async implementation of get_metadata in subclasses."""
151
+ raise NotImplementedError
152
+
153
+ @abstractmethod
154
+ async def read_arrow_async(self, path: str, **kwargs: Any) -> ArrowTable:
155
+ """Actual async implementation of read_arrow in subclasses."""
156
+ raise NotImplementedError
157
+
158
+ @abstractmethod
159
+ async def write_arrow_async(self, path: str, table: ArrowTable, **kwargs: Any) -> None:
160
+ """Actual async implementation of write_arrow in subclasses."""
161
+ raise NotImplementedError
162
+
163
+ @abstractmethod
164
+ def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator[ArrowRecordBatch]:
165
+ """Actual async implementation of stream_arrow in subclasses."""
166
+ raise NotImplementedError
@@ -0,0 +1,315 @@
1
+ # pyright: ignore=reportUnknownVariableType
2
+ import logging
3
+ from io import BytesIO
4
+ from typing import TYPE_CHECKING, Any, Union
5
+
6
+ from sqlspec.exceptions import MissingDependencyError
7
+ from sqlspec.storage.backends.base import ObjectStoreBase
8
+ from sqlspec.typing import FSSPEC_INSTALLED, PYARROW_INSTALLED
9
+ from sqlspec.utils.sync_tools import async_
10
+
11
+ if TYPE_CHECKING:
12
+ from collections.abc import AsyncIterator, Iterator
13
+
14
+ from fsspec import AbstractFileSystem
15
+
16
+ from sqlspec.typing import ArrowRecordBatch, ArrowTable
17
+
18
+ __all__ = ("FSSpecBackend",)
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Constants for URI validation
23
+ URI_PARTS_MIN_COUNT = 2
24
+ """Minimum number of parts in a valid cloud storage URI (bucket/path)."""
25
+
26
+ AZURE_URI_PARTS_MIN_COUNT = 2
27
+ """Minimum number of parts in an Azure URI (account/container)."""
28
+
29
+ AZURE_URI_BLOB_INDEX = 2
30
+ """Index of blob name in Azure URI parts."""
31
+
32
+
33
+ def _join_path(prefix: str, path: str) -> str:
34
+ if not prefix:
35
+ return path
36
+ prefix = prefix.rstrip("/")
37
+ path = path.lstrip("/")
38
+ return f"{prefix}/{path}"
39
+
40
+
41
+ class FSSpecBackend(ObjectStoreBase):
42
+ """Extended protocol support via fsspec.
43
+
44
+ This backend implements the ObjectStoreProtocol using fsspec,
45
+ providing support for extended protocols not covered by obstore
46
+ and offering fallback capabilities.
47
+ """
48
+
49
+ def __init__(self, fs: "Union[str, AbstractFileSystem]", base_path: str = "") -> None:
50
+ if not FSSPEC_INSTALLED:
51
+ raise MissingDependencyError(package="fsspec", install_package="fsspec")
52
+
53
+ self.base_path = base_path.rstrip("/") if base_path else ""
54
+
55
+ if isinstance(fs, str):
56
+ import fsspec
57
+
58
+ self.fs = fsspec.filesystem(fs.split("://")[0])
59
+ self.protocol = fs.split("://")[0]
60
+ self._fs_uri = fs
61
+ else:
62
+ self.fs = fs
63
+ self.protocol = getattr(fs, "protocol", "unknown")
64
+ self._fs_uri = f"{self.protocol}://"
65
+ super().__init__()
66
+
67
+ @classmethod
68
+ def from_config(cls, config: "dict[str, Any]") -> "FSSpecBackend":
69
+ protocol = config["protocol"]
70
+ fs_config = config.get("fs_config", {})
71
+ base_path = config.get("base_path", "")
72
+
73
+ # Create filesystem instance from protocol
74
+ import fsspec
75
+
76
+ fs_instance = fsspec.filesystem(protocol, **fs_config)
77
+
78
+ return cls(fs=fs_instance, base_path=base_path)
79
+
80
+ def _resolve_path(self, path: str) -> str:
81
+ """Resolve path relative to base_path."""
82
+ if self.base_path:
83
+ # Ensure no double slashes
84
+ clean_base = self.base_path.rstrip("/")
85
+ clean_path = path.lstrip("/")
86
+ return f"{clean_base}/{clean_path}"
87
+ return path
88
+
89
+ @property
90
+ def backend_type(self) -> str:
91
+ return "fsspec"
92
+
93
+ @property
94
+ def base_uri(self) -> str:
95
+ return self._fs_uri
96
+
97
+ # Core Operations (sync)
98
+ def read_bytes(self, path: str, **kwargs: Any) -> bytes:
99
+ """Read bytes from an object."""
100
+ resolved_path = self._resolve_path(path)
101
+ return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
102
+
103
+ def write_bytes(self, path: str, data: bytes, **kwargs: Any) -> None:
104
+ """Write bytes to an object."""
105
+ resolved_path = self._resolve_path(path)
106
+ with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
107
+ f.write(data) # pyright: ignore
108
+
109
+ def read_text(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
110
+ """Read text from an object."""
111
+ data = self.read_bytes(path, **kwargs)
112
+ return data.decode(encoding)
113
+
114
+ def write_text(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
115
+ """Write text to an object."""
116
+ self.write_bytes(path, data.encode(encoding), **kwargs)
117
+
118
+ # Object Operations
119
+ def exists(self, path: str, **kwargs: Any) -> bool:
120
+ """Check if an object exists."""
121
+ resolved_path = self._resolve_path(path)
122
+ return self.fs.exists(resolved_path, **kwargs) # type: ignore[no-any-return]
123
+
124
+ def delete(self, path: str, **kwargs: Any) -> None:
125
+ """Delete an object."""
126
+ resolved_path = self._resolve_path(path)
127
+ self.fs.rm(resolved_path, **kwargs)
128
+
129
+ def copy(self, source: str, destination: str, **kwargs: Any) -> None:
130
+ """Copy an object."""
131
+ source_path = self._resolve_path(source)
132
+ dest_path = self._resolve_path(destination)
133
+ self.fs.copy(source_path, dest_path, **kwargs)
134
+
135
+ def move(self, source: str, destination: str, **kwargs: Any) -> None:
136
+ """Move an object."""
137
+ source_path = self._resolve_path(source)
138
+ dest_path = self._resolve_path(destination)
139
+ self.fs.mv(source_path, dest_path, **kwargs)
140
+
141
+ # Arrow Operations
142
+ def read_arrow(self, path: str, **kwargs: Any) -> "ArrowTable":
143
+ """Read an Arrow table from storage."""
144
+ if not PYARROW_INSTALLED:
145
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
146
+
147
+ import pyarrow.parquet as pq
148
+
149
+ resolved_path = self._resolve_path(path)
150
+ with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
151
+ return pq.read_table(f)
152
+
153
+ def write_arrow(self, path: str, table: "ArrowTable", **kwargs: Any) -> None:
154
+ """Write an Arrow table to storage."""
155
+ if not PYARROW_INSTALLED:
156
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
157
+
158
+ import pyarrow.parquet as pq
159
+
160
+ resolved_path = self._resolve_path(path)
161
+ with self.fs.open(resolved_path, mode="wb") as f:
162
+ pq.write_table(table, f, **kwargs) # pyright: ignore
163
+
164
+ # Listing Operations
165
+ def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
166
+ """List objects with optional prefix."""
167
+ resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path
168
+
169
+ # Use fs.glob for listing files
170
+ if recursive:
171
+ pattern = f"{resolved_prefix}/**" if resolved_prefix else "**"
172
+ else:
173
+ pattern = f"{resolved_prefix}/*" if resolved_prefix else "*"
174
+
175
+ # Get all files (not directories)
176
+ paths = [str(path) for path in self.fs.glob(pattern, **kwargs) if not self.fs.isdir(path)]
177
+ return sorted(paths)
178
+
179
+ def glob(self, pattern: str, **kwargs: Any) -> list[str]:
180
+ """Find objects matching a glob pattern."""
181
+ resolved_pattern = self._resolve_path(pattern)
182
+ # Use fsspec's native glob
183
+ paths = [str(path) for path in self.fs.glob(resolved_pattern, **kwargs) if not self.fs.isdir(path)]
184
+ return sorted(paths)
185
+
186
+ # Path Operations
187
+ def is_object(self, path: str) -> bool:
188
+ """Check if path points to an object."""
189
+ resolved_path = self._resolve_path(path)
190
+ return self.fs.exists(resolved_path) and not self.fs.isdir(resolved_path)
191
+
192
+ def is_path(self, path: str) -> bool:
193
+ """Check if path points to a prefix (directory-like)."""
194
+ resolved_path = self._resolve_path(path)
195
+ return self.fs.isdir(resolved_path) # type: ignore[no-any-return]
196
+
197
+ def get_metadata(self, path: str, **kwargs: Any) -> dict[str, Any]:
198
+ """Get object metadata."""
199
+ info = self.fs.info(self._resolve_path(path), **kwargs)
200
+
201
+ # Convert fsspec info to dict
202
+ if isinstance(info, dict):
203
+ return info
204
+
205
+ # Try to get dict representation
206
+ try:
207
+ return vars(info) # type: ignore[no-any-return]
208
+ except AttributeError:
209
+ pass
210
+
211
+ # Fallback to basic metadata with safe attribute access
212
+ resolved_path = self._resolve_path(path)
213
+ return {
214
+ "path": resolved_path,
215
+ "exists": self.fs.exists(resolved_path),
216
+ "size": getattr(info, "size", None),
217
+ "type": getattr(info, "type", "file"),
218
+ }
219
+
220
+ def _stream_file_batches(self, obj_path: str) -> "Iterator[ArrowRecordBatch]":
221
+ import pyarrow.parquet as pq
222
+
223
+ with self.fs.open(obj_path, mode="rb") as f:
224
+ parquet_file = pq.ParquetFile(f) # pyright: ignore[reportArgumentType]
225
+ yield from parquet_file.iter_batches()
226
+
227
+ def stream_arrow(self, pattern: str, **kwargs: Any) -> "Iterator[ArrowRecordBatch]":
228
+ if not FSSPEC_INSTALLED:
229
+ raise MissingDependencyError(package="fsspec", install_package="fsspec")
230
+ if not PYARROW_INSTALLED:
231
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
232
+
233
+ # Stream each file as record batches
234
+ for obj_path in self.glob(pattern, **kwargs):
235
+ yield from self._stream_file_batches(obj_path)
236
+
237
+ async def read_bytes_async(self, path: str, **kwargs: Any) -> bytes:
238
+ """Async read bytes. Wraps the sync implementation."""
239
+ return await async_(self.read_bytes)(path, **kwargs)
240
+
241
+ async def write_bytes_async(self, path: str, data: bytes, **kwargs: Any) -> None:
242
+ """Async write bytes. Wras the sync implementation."""
243
+ return await async_(self.write_bytes)(path, data, **kwargs)
244
+
245
+ async def _stream_file_batches_async(self, obj_path: str) -> "AsyncIterator[ArrowRecordBatch]":
246
+ import pyarrow.parquet as pq
247
+
248
+ data = await self.read_bytes_async(obj_path)
249
+ parquet_file = pq.ParquetFile(BytesIO(data))
250
+ for batch in parquet_file.iter_batches():
251
+ yield batch
252
+
253
+ async def stream_arrow_async(self, pattern: str, **kwargs: Any) -> "AsyncIterator[ArrowRecordBatch]":
254
+ """Async stream Arrow record batches.
255
+
256
+ This implementation provides file-level async streaming. Each file is
257
+ read into memory before its batches are processed.
258
+
259
+ Args:
260
+ pattern: The glob pattern to match.
261
+ **kwargs: Additional arguments to pass to the glob method.
262
+
263
+ Yields:
264
+ AsyncIterator of Arrow record batches
265
+ """
266
+ if not PYARROW_INSTALLED:
267
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
268
+
269
+ # Get paths asynchronously
270
+ paths = await async_(self.glob)(pattern, **kwargs)
271
+
272
+ # Stream batches from each path
273
+ for path in paths:
274
+ async for batch in self._stream_file_batches_async(path):
275
+ yield batch
276
+
277
+ async def read_text_async(self, path: str, encoding: str = "utf-8", **kwargs: Any) -> str:
278
+ """Async read text. Wraps the sync implementation."""
279
+ return await async_(self.read_text)(path, encoding, **kwargs)
280
+
281
+ async def write_text_async(self, path: str, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
282
+ """Async write text. Wraps the sync implementation."""
283
+ await async_(self.write_text)(path, data, encoding, **kwargs)
284
+
285
+ async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
286
+ """Async list objects. Wraps the sync implementation."""
287
+ return await async_(self.list_objects)(prefix, recursive, **kwargs)
288
+
289
+ async def exists_async(self, path: str, **kwargs: Any) -> bool:
290
+ """Async exists check. Wraps the sync implementation."""
291
+ return await async_(self.exists)(path, **kwargs)
292
+
293
+ async def delete_async(self, path: str, **kwargs: Any) -> None:
294
+ """Async delete. Wraps the sync implementation."""
295
+ await async_(self.delete)(path, **kwargs)
296
+
297
+ async def copy_async(self, source: str, destination: str, **kwargs: Any) -> None:
298
+ """Async copy. Wraps the sync implementation."""
299
+ await async_(self.copy)(source, destination, **kwargs)
300
+
301
+ async def move_async(self, source: str, destination: str, **kwargs: Any) -> None:
302
+ """Async move. Wraps the sync implementation."""
303
+ await async_(self.move)(source, destination, **kwargs)
304
+
305
+ async def get_metadata_async(self, path: str, **kwargs: Any) -> dict[str, Any]:
306
+ """Async get metadata. Wraps the sync implementation."""
307
+ return await async_(self.get_metadata)(path, **kwargs)
308
+
309
+ async def read_arrow_async(self, path: str, **kwargs: Any) -> "ArrowTable":
310
+ """Async read Arrow. Wraps the sync implementation."""
311
+ return await async_(self.read_arrow)(path, **kwargs)
312
+
313
+ async def write_arrow_async(self, path: str, table: "ArrowTable", **kwargs: Any) -> None:
314
+ """Async write Arrow. Wraps the sync implementation."""
315
+ await async_(self.write_arrow)(path, table, **kwargs)