sqlspec 0.25.0__py3-none-any.whl → 0.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (199) hide show
  1. sqlspec/__init__.py +7 -15
  2. sqlspec/_serialization.py +256 -24
  3. sqlspec/_typing.py +71 -52
  4. sqlspec/adapters/adbc/_types.py +1 -1
  5. sqlspec/adapters/adbc/adk/__init__.py +5 -0
  6. sqlspec/adapters/adbc/adk/store.py +870 -0
  7. sqlspec/adapters/adbc/config.py +69 -12
  8. sqlspec/adapters/adbc/data_dictionary.py +340 -0
  9. sqlspec/adapters/adbc/driver.py +266 -58
  10. sqlspec/adapters/adbc/litestar/__init__.py +5 -0
  11. sqlspec/adapters/adbc/litestar/store.py +504 -0
  12. sqlspec/adapters/adbc/type_converter.py +153 -0
  13. sqlspec/adapters/aiosqlite/_types.py +1 -1
  14. sqlspec/adapters/aiosqlite/adk/__init__.py +5 -0
  15. sqlspec/adapters/aiosqlite/adk/store.py +527 -0
  16. sqlspec/adapters/aiosqlite/config.py +88 -15
  17. sqlspec/adapters/aiosqlite/data_dictionary.py +149 -0
  18. sqlspec/adapters/aiosqlite/driver.py +143 -40
  19. sqlspec/adapters/aiosqlite/litestar/__init__.py +5 -0
  20. sqlspec/adapters/aiosqlite/litestar/store.py +281 -0
  21. sqlspec/adapters/aiosqlite/pool.py +7 -7
  22. sqlspec/adapters/asyncmy/__init__.py +7 -1
  23. sqlspec/adapters/asyncmy/_types.py +2 -2
  24. sqlspec/adapters/asyncmy/adk/__init__.py +5 -0
  25. sqlspec/adapters/asyncmy/adk/store.py +493 -0
  26. sqlspec/adapters/asyncmy/config.py +68 -23
  27. sqlspec/adapters/asyncmy/data_dictionary.py +161 -0
  28. sqlspec/adapters/asyncmy/driver.py +313 -58
  29. sqlspec/adapters/asyncmy/litestar/__init__.py +5 -0
  30. sqlspec/adapters/asyncmy/litestar/store.py +296 -0
  31. sqlspec/adapters/asyncpg/__init__.py +2 -1
  32. sqlspec/adapters/asyncpg/_type_handlers.py +71 -0
  33. sqlspec/adapters/asyncpg/_types.py +11 -7
  34. sqlspec/adapters/asyncpg/adk/__init__.py +5 -0
  35. sqlspec/adapters/asyncpg/adk/store.py +450 -0
  36. sqlspec/adapters/asyncpg/config.py +59 -35
  37. sqlspec/adapters/asyncpg/data_dictionary.py +173 -0
  38. sqlspec/adapters/asyncpg/driver.py +170 -25
  39. sqlspec/adapters/asyncpg/litestar/__init__.py +5 -0
  40. sqlspec/adapters/asyncpg/litestar/store.py +253 -0
  41. sqlspec/adapters/bigquery/_types.py +1 -1
  42. sqlspec/adapters/bigquery/adk/__init__.py +5 -0
  43. sqlspec/adapters/bigquery/adk/store.py +576 -0
  44. sqlspec/adapters/bigquery/config.py +27 -10
  45. sqlspec/adapters/bigquery/data_dictionary.py +149 -0
  46. sqlspec/adapters/bigquery/driver.py +368 -142
  47. sqlspec/adapters/bigquery/litestar/__init__.py +5 -0
  48. sqlspec/adapters/bigquery/litestar/store.py +327 -0
  49. sqlspec/adapters/bigquery/type_converter.py +125 -0
  50. sqlspec/adapters/duckdb/_types.py +1 -1
  51. sqlspec/adapters/duckdb/adk/__init__.py +14 -0
  52. sqlspec/adapters/duckdb/adk/store.py +553 -0
  53. sqlspec/adapters/duckdb/config.py +80 -20
  54. sqlspec/adapters/duckdb/data_dictionary.py +163 -0
  55. sqlspec/adapters/duckdb/driver.py +167 -45
  56. sqlspec/adapters/duckdb/litestar/__init__.py +5 -0
  57. sqlspec/adapters/duckdb/litestar/store.py +332 -0
  58. sqlspec/adapters/duckdb/pool.py +4 -4
  59. sqlspec/adapters/duckdb/type_converter.py +133 -0
  60. sqlspec/adapters/oracledb/_numpy_handlers.py +133 -0
  61. sqlspec/adapters/oracledb/_types.py +20 -2
  62. sqlspec/adapters/oracledb/adk/__init__.py +5 -0
  63. sqlspec/adapters/oracledb/adk/store.py +1745 -0
  64. sqlspec/adapters/oracledb/config.py +122 -32
  65. sqlspec/adapters/oracledb/data_dictionary.py +509 -0
  66. sqlspec/adapters/oracledb/driver.py +353 -91
  67. sqlspec/adapters/oracledb/litestar/__init__.py +5 -0
  68. sqlspec/adapters/oracledb/litestar/store.py +767 -0
  69. sqlspec/adapters/oracledb/migrations.py +348 -73
  70. sqlspec/adapters/oracledb/type_converter.py +207 -0
  71. sqlspec/adapters/psqlpy/_type_handlers.py +44 -0
  72. sqlspec/adapters/psqlpy/_types.py +2 -1
  73. sqlspec/adapters/psqlpy/adk/__init__.py +5 -0
  74. sqlspec/adapters/psqlpy/adk/store.py +482 -0
  75. sqlspec/adapters/psqlpy/config.py +46 -17
  76. sqlspec/adapters/psqlpy/data_dictionary.py +172 -0
  77. sqlspec/adapters/psqlpy/driver.py +123 -209
  78. sqlspec/adapters/psqlpy/litestar/__init__.py +5 -0
  79. sqlspec/adapters/psqlpy/litestar/store.py +272 -0
  80. sqlspec/adapters/psqlpy/type_converter.py +102 -0
  81. sqlspec/adapters/psycopg/_type_handlers.py +80 -0
  82. sqlspec/adapters/psycopg/_types.py +2 -1
  83. sqlspec/adapters/psycopg/adk/__init__.py +5 -0
  84. sqlspec/adapters/psycopg/adk/store.py +944 -0
  85. sqlspec/adapters/psycopg/config.py +69 -35
  86. sqlspec/adapters/psycopg/data_dictionary.py +331 -0
  87. sqlspec/adapters/psycopg/driver.py +238 -81
  88. sqlspec/adapters/psycopg/litestar/__init__.py +5 -0
  89. sqlspec/adapters/psycopg/litestar/store.py +554 -0
  90. sqlspec/adapters/sqlite/__init__.py +2 -1
  91. sqlspec/adapters/sqlite/_type_handlers.py +86 -0
  92. sqlspec/adapters/sqlite/_types.py +1 -1
  93. sqlspec/adapters/sqlite/adk/__init__.py +5 -0
  94. sqlspec/adapters/sqlite/adk/store.py +572 -0
  95. sqlspec/adapters/sqlite/config.py +87 -15
  96. sqlspec/adapters/sqlite/data_dictionary.py +149 -0
  97. sqlspec/adapters/sqlite/driver.py +137 -54
  98. sqlspec/adapters/sqlite/litestar/__init__.py +5 -0
  99. sqlspec/adapters/sqlite/litestar/store.py +318 -0
  100. sqlspec/adapters/sqlite/pool.py +18 -9
  101. sqlspec/base.py +45 -26
  102. sqlspec/builder/__init__.py +73 -4
  103. sqlspec/builder/_base.py +162 -89
  104. sqlspec/builder/_column.py +62 -29
  105. sqlspec/builder/_ddl.py +180 -121
  106. sqlspec/builder/_delete.py +5 -4
  107. sqlspec/builder/_dml.py +388 -0
  108. sqlspec/{_sql.py → builder/_factory.py} +53 -94
  109. sqlspec/builder/_insert.py +32 -131
  110. sqlspec/builder/_join.py +375 -0
  111. sqlspec/builder/_merge.py +446 -11
  112. sqlspec/builder/_parsing_utils.py +111 -17
  113. sqlspec/builder/_select.py +1457 -24
  114. sqlspec/builder/_update.py +11 -42
  115. sqlspec/cli.py +307 -194
  116. sqlspec/config.py +252 -67
  117. sqlspec/core/__init__.py +5 -4
  118. sqlspec/core/cache.py +17 -17
  119. sqlspec/core/compiler.py +62 -9
  120. sqlspec/core/filters.py +37 -37
  121. sqlspec/core/hashing.py +9 -9
  122. sqlspec/core/parameters.py +83 -48
  123. sqlspec/core/result.py +102 -46
  124. sqlspec/core/splitter.py +16 -17
  125. sqlspec/core/statement.py +36 -30
  126. sqlspec/core/type_conversion.py +235 -0
  127. sqlspec/driver/__init__.py +7 -6
  128. sqlspec/driver/_async.py +188 -151
  129. sqlspec/driver/_common.py +285 -80
  130. sqlspec/driver/_sync.py +188 -152
  131. sqlspec/driver/mixins/_result_tools.py +20 -236
  132. sqlspec/driver/mixins/_sql_translator.py +4 -4
  133. sqlspec/exceptions.py +75 -7
  134. sqlspec/extensions/adk/__init__.py +53 -0
  135. sqlspec/extensions/adk/_types.py +51 -0
  136. sqlspec/extensions/adk/converters.py +172 -0
  137. sqlspec/extensions/adk/migrations/0001_create_adk_tables.py +144 -0
  138. sqlspec/extensions/adk/migrations/__init__.py +0 -0
  139. sqlspec/extensions/adk/service.py +181 -0
  140. sqlspec/extensions/adk/store.py +536 -0
  141. sqlspec/extensions/aiosql/adapter.py +73 -53
  142. sqlspec/extensions/litestar/__init__.py +21 -4
  143. sqlspec/extensions/litestar/cli.py +54 -10
  144. sqlspec/extensions/litestar/config.py +59 -266
  145. sqlspec/extensions/litestar/handlers.py +46 -17
  146. sqlspec/extensions/litestar/migrations/0001_create_session_table.py +137 -0
  147. sqlspec/extensions/litestar/migrations/__init__.py +3 -0
  148. sqlspec/extensions/litestar/plugin.py +324 -223
  149. sqlspec/extensions/litestar/providers.py +25 -25
  150. sqlspec/extensions/litestar/store.py +265 -0
  151. sqlspec/loader.py +30 -49
  152. sqlspec/migrations/__init__.py +4 -3
  153. sqlspec/migrations/base.py +302 -39
  154. sqlspec/migrations/commands.py +611 -144
  155. sqlspec/migrations/context.py +142 -0
  156. sqlspec/migrations/fix.py +199 -0
  157. sqlspec/migrations/loaders.py +68 -23
  158. sqlspec/migrations/runner.py +543 -107
  159. sqlspec/migrations/tracker.py +237 -21
  160. sqlspec/migrations/utils.py +51 -3
  161. sqlspec/migrations/validation.py +177 -0
  162. sqlspec/protocols.py +66 -36
  163. sqlspec/storage/_utils.py +98 -0
  164. sqlspec/storage/backends/fsspec.py +134 -106
  165. sqlspec/storage/backends/local.py +78 -51
  166. sqlspec/storage/backends/obstore.py +278 -162
  167. sqlspec/storage/registry.py +75 -39
  168. sqlspec/typing.py +16 -84
  169. sqlspec/utils/config_resolver.py +153 -0
  170. sqlspec/utils/correlation.py +4 -5
  171. sqlspec/utils/data_transformation.py +3 -2
  172. sqlspec/utils/deprecation.py +9 -8
  173. sqlspec/utils/fixtures.py +4 -4
  174. sqlspec/utils/logging.py +46 -6
  175. sqlspec/utils/module_loader.py +2 -2
  176. sqlspec/utils/schema.py +288 -0
  177. sqlspec/utils/serializers.py +50 -2
  178. sqlspec/utils/sync_tools.py +21 -17
  179. sqlspec/utils/text.py +1 -2
  180. sqlspec/utils/type_guards.py +111 -20
  181. sqlspec/utils/version.py +433 -0
  182. {sqlspec-0.25.0.dist-info → sqlspec-0.27.0.dist-info}/METADATA +40 -21
  183. sqlspec-0.27.0.dist-info/RECORD +207 -0
  184. sqlspec/builder/mixins/__init__.py +0 -55
  185. sqlspec/builder/mixins/_cte_and_set_ops.py +0 -254
  186. sqlspec/builder/mixins/_delete_operations.py +0 -50
  187. sqlspec/builder/mixins/_insert_operations.py +0 -282
  188. sqlspec/builder/mixins/_join_operations.py +0 -389
  189. sqlspec/builder/mixins/_merge_operations.py +0 -592
  190. sqlspec/builder/mixins/_order_limit_operations.py +0 -152
  191. sqlspec/builder/mixins/_pivot_operations.py +0 -157
  192. sqlspec/builder/mixins/_select_operations.py +0 -936
  193. sqlspec/builder/mixins/_update_operations.py +0 -218
  194. sqlspec/builder/mixins/_where_clause.py +0 -1304
  195. sqlspec-0.25.0.dist-info/RECORD +0 -139
  196. sqlspec-0.25.0.dist-info/licenses/NOTICE +0 -29
  197. {sqlspec-0.25.0.dist-info → sqlspec-0.27.0.dist-info}/WHEEL +0 -0
  198. {sqlspec-0.25.0.dist-info → sqlspec-0.27.0.dist-info}/entry_points.txt +0 -0
  199. {sqlspec-0.25.0.dist-info → sqlspec-0.27.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,10 +1,13 @@
1
1
  # pyright: reportPrivateUsage=false
2
2
  import logging
3
3
  from pathlib import Path
4
- from typing import TYPE_CHECKING, Any, Optional, Union
4
+ from typing import TYPE_CHECKING, Any
5
+
6
+ from mypy_extensions import mypyc_attr
5
7
 
6
8
  from sqlspec.exceptions import MissingDependencyError
7
- from sqlspec.typing import FSSPEC_INSTALLED, PYARROW_INSTALLED
9
+ from sqlspec.storage._utils import ensure_pyarrow, resolve_storage_path
10
+ from sqlspec.typing import FSSPEC_INSTALLED
8
11
  from sqlspec.utils.sync_tools import async_
9
12
 
10
13
  if TYPE_CHECKING:
@@ -18,41 +21,80 @@ logger = logging.getLogger(__name__)
18
21
 
19
22
 
20
23
  class _ArrowStreamer:
24
+ """Async iterator for streaming Arrow batches from FSSpec backend.
25
+
26
+ Uses async_() to offload blocking operations to thread pool,
27
+ preventing event loop blocking during file I/O and iteration.
28
+
29
+ CRITICAL: Creates generators on main thread, offloads only next() calls.
30
+ """
31
+
32
+ __slots__ = ("_initialized", "backend", "batch_iterator", "kwargs", "paths_iterator", "pattern")
33
+
21
34
  def __init__(self, backend: "FSSpecBackend", pattern: str, **kwargs: Any) -> None:
22
35
  self.backend = backend
23
36
  self.pattern = pattern
24
37
  self.kwargs = kwargs
25
- self.paths_iterator: Optional[Iterator[str]] = None
26
- self.batch_iterator: Optional[Iterator[ArrowRecordBatch]] = None
38
+ self.paths_iterator: Iterator[str] | None = None
39
+ self.batch_iterator: Iterator[ArrowRecordBatch] | None = None
40
+ self._initialized = False
27
41
 
28
42
  def __aiter__(self) -> "_ArrowStreamer":
29
43
  return self
30
44
 
31
45
  async def _initialize(self) -> None:
32
- """Initialize paths iterator."""
33
- if self.paths_iterator is None:
46
+ """Initialize paths iterator asynchronously."""
47
+ if not self._initialized:
34
48
  paths = await async_(self.backend.glob)(self.pattern, **self.kwargs)
35
49
  self.paths_iterator = iter(paths)
50
+ self._initialized = True
36
51
 
37
52
  async def __anext__(self) -> "ArrowRecordBatch":
53
+ """Get next Arrow batch asynchronously.
54
+
55
+ Iterative state machine that avoids recursion and blocking calls.
56
+
57
+ Returns:
58
+ Arrow record batches from matching files.
59
+
60
+ Raises:
61
+ StopAsyncIteration: When no more batches available.
62
+ """
38
63
  await self._initialize()
39
64
 
40
- if self.batch_iterator:
65
+ while True:
66
+ if self.batch_iterator is not None:
67
+
68
+ def _safe_next_batch() -> "ArrowRecordBatch":
69
+ try:
70
+ return next(self.batch_iterator) # type: ignore[arg-type]
71
+ except StopIteration as e:
72
+ raise StopAsyncIteration from e
73
+
74
+ try:
75
+ return await async_(_safe_next_batch)()
76
+ except StopAsyncIteration:
77
+ self.batch_iterator = None
78
+ continue
79
+
41
80
  try:
42
- return next(self.batch_iterator)
43
- except StopIteration:
44
- self.batch_iterator = None
81
+ path = next(self.paths_iterator) # type: ignore[arg-type]
82
+ except StopIteration as e:
83
+ raise StopAsyncIteration from e
84
+
85
+ self.batch_iterator = self.backend._stream_file_batches(path)
45
86
 
46
- if self.paths_iterator:
87
+ async def aclose(self) -> None:
88
+ """Close underlying batch iterator."""
89
+ if self.batch_iterator is not None:
47
90
  try:
48
- path = next(self.paths_iterator)
49
- self.batch_iterator = await async_(self.backend._stream_file_batches)(path)
50
- return await self.__anext__()
51
- except StopIteration:
52
- raise StopAsyncIteration
53
- raise StopAsyncIteration
91
+ close_method = self.batch_iterator.close # type: ignore[attr-defined]
92
+ await async_(close_method)()
93
+ except AttributeError:
94
+ pass
54
95
 
55
96
 
97
+ @mypyc_attr(allow_interpreted_subclasses=True)
56
98
  class FSSpecBackend:
57
99
  """Storage backend using fsspec.
58
100
 
@@ -60,19 +102,37 @@ class FSSpecBackend:
60
102
  including HTTP, HTTPS, FTP, and cloud storage services.
61
103
  """
62
104
 
105
+ __slots__ = ("_fs_uri", "backend_type", "base_path", "fs", "protocol")
106
+
63
107
  def __init__(self, uri: str, **kwargs: Any) -> None:
64
- self._ensure_fsspec()
108
+ if not FSSPEC_INSTALLED:
109
+ raise MissingDependencyError(package="fsspec", install_package="fsspec")
65
110
 
66
111
  base_path = kwargs.pop("base_path", "")
67
- self.base_path = base_path.rstrip("/") if base_path else ""
68
112
 
69
113
  if "://" in uri:
70
114
  self.protocol = uri.split("://", maxsplit=1)[0]
71
115
  self._fs_uri = uri
116
+
117
+ # For S3/cloud URIs, extract bucket/path from URI as base_path
118
+ if self.protocol in {"s3", "gs", "az", "gcs"}:
119
+ from urllib.parse import urlparse
120
+
121
+ parsed = urlparse(uri)
122
+ # Combine netloc (bucket) and path for base_path
123
+ if parsed.netloc:
124
+ uri_base_path = parsed.netloc
125
+ if parsed.path and parsed.path != "/":
126
+ uri_base_path = f"{uri_base_path}{parsed.path}"
127
+ # Only use URI base_path if no explicit base_path provided
128
+ if not base_path:
129
+ base_path = uri_base_path
72
130
  else:
73
131
  self.protocol = uri
74
132
  self._fs_uri = f"{uri}://"
75
133
 
134
+ self.base_path = base_path.rstrip("/") if base_path else ""
135
+
76
136
  import fsspec
77
137
 
78
138
  self.fs = fsspec.filesystem(self.protocol, **kwargs)
@@ -93,48 +153,19 @@ class FSSpecBackend:
93
153
 
94
154
  return cls(uri=uri, **kwargs)
95
155
 
96
- def _ensure_fsspec(self) -> None:
97
- """Ensure fsspec is available for operations."""
98
- if not FSSPEC_INSTALLED:
99
- raise MissingDependencyError(package="fsspec", install_package="fsspec")
100
-
101
- def _ensure_pyarrow(self) -> None:
102
- """Ensure PyArrow is available for Arrow operations."""
103
- if not PYARROW_INSTALLED:
104
- raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
105
-
106
- def _resolve_path(self, path: Union[str, Path]) -> str:
107
- """Resolve path relative to base_path."""
108
- path_str = str(path)
109
- if self.base_path:
110
- clean_base = self.base_path.rstrip("/")
111
- clean_path = path_str.lstrip("/")
112
- return f"{clean_base}/{clean_path}"
113
- if self.protocol == "s3" and "://" in self._fs_uri:
114
- # For S3, we need to include the bucket from the URI
115
- # Extract bucket and path from URI like s3://bucket/path
116
- uri_parts = self._fs_uri.split("://", 1)[1] # Remove s3://
117
- if "/" in uri_parts:
118
- # URI has bucket and base path
119
- return f"{uri_parts.rstrip('/')}/{path_str.lstrip('/')}"
120
- # URI has only bucket
121
- return f"{uri_parts}/{path_str.lstrip('/')}"
122
- return path_str
123
-
124
156
  @property
125
157
  def base_uri(self) -> str:
126
158
  return self._fs_uri
127
159
 
128
- def read_bytes(self, path: Union[str, Path], **kwargs: Any) -> bytes:
160
+ def read_bytes(self, path: str | Path, **kwargs: Any) -> bytes:
129
161
  """Read bytes from an object."""
130
- resolved_path = self._resolve_path(path)
162
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
131
163
  return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
132
164
 
133
- def write_bytes(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
165
+ def write_bytes(self, path: str | Path, data: bytes, **kwargs: Any) -> None:
134
166
  """Write bytes to an object."""
135
- resolved_path = self._resolve_path(path)
167
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
136
168
 
137
- # Only create directories for local file systems, not for cloud storage
138
169
  if self.protocol == "file":
139
170
  parent_dir = str(Path(resolved_path).parent)
140
171
  if parent_dir and not self.fs.exists(parent_dir):
@@ -143,84 +174,85 @@ class FSSpecBackend:
143
174
  with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
144
175
  f.write(data) # pyright: ignore
145
176
 
146
- def read_text(self, path: Union[str, Path], encoding: str = "utf-8", **kwargs: Any) -> str:
177
+ def read_text(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
147
178
  """Read text from an object."""
148
179
  data = self.read_bytes(path, **kwargs)
149
180
  return data.decode(encoding)
150
181
 
151
- def write_text(self, path: Union[str, Path], data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
182
+ def write_text(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
152
183
  """Write text to an object."""
153
184
  self.write_bytes(path, data.encode(encoding), **kwargs)
154
185
 
155
- def exists(self, path: Union[str, Path], **kwargs: Any) -> bool:
186
+ def exists(self, path: str | Path, **kwargs: Any) -> bool:
156
187
  """Check if an object exists."""
157
- resolved_path = self._resolve_path(path)
188
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
158
189
  return self.fs.exists(resolved_path, **kwargs) # type: ignore[no-any-return]
159
190
 
160
- def delete(self, path: Union[str, Path], **kwargs: Any) -> None:
191
+ def delete(self, path: str | Path, **kwargs: Any) -> None:
161
192
  """Delete an object."""
162
- resolved_path = self._resolve_path(path)
193
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
163
194
  self.fs.rm(resolved_path, **kwargs)
164
195
 
165
- def copy(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
196
+ def copy(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None:
166
197
  """Copy an object."""
167
- source_path = self._resolve_path(source)
168
- dest_path = self._resolve_path(destination)
198
+ source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=False)
199
+ dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=False)
169
200
  self.fs.copy(source_path, dest_path, **kwargs)
170
201
 
171
- def move(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
202
+ def move(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None:
172
203
  """Move an object."""
173
- source_path = self._resolve_path(source)
174
- dest_path = self._resolve_path(destination)
204
+ source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=False)
205
+ dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=False)
175
206
  self.fs.mv(source_path, dest_path, **kwargs)
176
207
 
177
- def read_arrow(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
208
+ def read_arrow(self, path: str | Path, **kwargs: Any) -> "ArrowTable":
178
209
  """Read an Arrow table from storage."""
179
- if not PYARROW_INSTALLED:
180
- raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
210
+ ensure_pyarrow()
181
211
  import pyarrow.parquet as pq
182
212
 
183
- resolved_path = self._resolve_path(path)
213
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
184
214
  with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
185
215
  return pq.read_table(f)
186
216
 
187
- def write_arrow(self, path: Union[str, Path], table: "ArrowTable", **kwargs: Any) -> None:
217
+ def write_arrow(self, path: str | Path, table: "ArrowTable", **kwargs: Any) -> None:
188
218
  """Write an Arrow table to storage."""
189
- if not PYARROW_INSTALLED:
190
- raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
219
+ ensure_pyarrow()
191
220
  import pyarrow.parquet as pq
192
221
 
193
- resolved_path = self._resolve_path(path)
222
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
194
223
  with self.fs.open(resolved_path, mode="wb") as f:
195
224
  pq.write_table(table, f, **kwargs) # pyright: ignore
196
225
 
197
226
  def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
198
227
  """List objects with optional prefix."""
199
- resolved_prefix = self._resolve_path(prefix)
228
+ resolved_prefix = resolve_storage_path(prefix, self.base_path, self.protocol, strip_file_scheme=False)
200
229
  if recursive:
201
230
  return sorted(self.fs.find(resolved_prefix, **kwargs))
202
231
  return sorted(self.fs.ls(resolved_prefix, detail=False, **kwargs))
203
232
 
204
233
  def glob(self, pattern: str, **kwargs: Any) -> list[str]:
205
234
  """Find objects matching a glob pattern."""
206
- resolved_pattern = self._resolve_path(pattern)
235
+ resolved_pattern = resolve_storage_path(pattern, self.base_path, self.protocol, strip_file_scheme=False)
207
236
  return sorted(self.fs.glob(resolved_pattern, **kwargs)) # pyright: ignore
208
237
 
209
- def is_object(self, path: Union[str, Path]) -> bool:
238
+ def is_object(self, path: str | Path) -> bool:
210
239
  """Check if path points to an object."""
211
- resolved_path = self._resolve_path(path)
240
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
212
241
  return self.fs.exists(resolved_path) and not self.fs.isdir(resolved_path)
213
242
 
214
- def is_path(self, path: Union[str, Path]) -> bool:
243
+ def is_path(self, path: str | Path) -> bool:
215
244
  """Check if path points to a prefix (directory-like)."""
216
- resolved_path = self._resolve_path(path)
245
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
217
246
  return self.fs.isdir(resolved_path) # type: ignore[no-any-return]
218
247
 
219
- def get_metadata(self, path: Union[str, Path], **kwargs: Any) -> dict[str, Any]:
248
+ def get_metadata(self, path: str | Path, **kwargs: Any) -> dict[str, Any]:
220
249
  """Get object metadata."""
250
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
221
251
  try:
222
- resolved_path = self._resolve_path(path)
223
252
  info = self.fs.info(resolved_path, **kwargs)
253
+ except FileNotFoundError:
254
+ return {"path": resolved_path, "exists": False}
255
+ else:
224
256
  if isinstance(info, dict):
225
257
  return {
226
258
  "path": resolved_path,
@@ -229,23 +261,20 @@ class FSSpecBackend:
229
261
  "last_modified": info.get("mtime"),
230
262
  "type": info.get("type", "file"),
231
263
  }
232
-
233
- except FileNotFoundError:
234
- return {"path": self._resolve_path(path), "exists": False}
235
- return {
236
- "path": resolved_path,
237
- "exists": True,
238
- "size": info.size,
239
- "last_modified": info.mtime,
240
- "type": info.type,
241
- }
264
+ return {
265
+ "path": resolved_path,
266
+ "exists": True,
267
+ "size": info.size,
268
+ "last_modified": info.mtime,
269
+ "type": info.type,
270
+ }
242
271
 
243
272
  def sign(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
244
273
  """Generate a signed URL for the file."""
245
- resolved_path = self._resolve_path(path)
274
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=False)
246
275
  return f"{self._fs_uri}{resolved_path}"
247
276
 
248
- def _stream_file_batches(self, obj_path: Union[str, Path]) -> "Iterator[ArrowRecordBatch]":
277
+ def _stream_file_batches(self, obj_path: str | Path) -> "Iterator[ArrowRecordBatch]":
249
278
  import pyarrow.parquet as pq
250
279
 
251
280
  with self.fs.open(obj_path, mode="rb") as f:
@@ -253,17 +282,16 @@ class FSSpecBackend:
253
282
  yield from parquet_file.iter_batches()
254
283
 
255
284
  def stream_arrow(self, pattern: str, **kwargs: Any) -> "Iterator[ArrowRecordBatch]":
256
- self._ensure_fsspec()
257
- self._ensure_pyarrow()
285
+ ensure_pyarrow()
258
286
 
259
287
  for obj_path in self.glob(pattern, **kwargs):
260
288
  yield from self._stream_file_batches(obj_path)
261
289
 
262
- async def read_bytes_async(self, path: Union[str, Path], **kwargs: Any) -> bytes:
290
+ async def read_bytes_async(self, path: str | Path, **kwargs: Any) -> bytes:
263
291
  """Read bytes from storage asynchronously."""
264
292
  return await async_(self.read_bytes)(path, **kwargs)
265
293
 
266
- async def write_bytes_async(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
294
+ async def write_bytes_async(self, path: str | Path, data: bytes, **kwargs: Any) -> None:
267
295
  """Write bytes to storage asynchronously."""
268
296
  return await async_(self.write_bytes)(path, data, **kwargs)
269
297
 
@@ -277,15 +305,15 @@ class FSSpecBackend:
277
305
  Returns:
278
306
  AsyncIterator of Arrow record batches
279
307
  """
280
- self._ensure_pyarrow()
308
+ ensure_pyarrow()
281
309
 
282
310
  return _ArrowStreamer(self, pattern, **kwargs)
283
311
 
284
- async def read_text_async(self, path: Union[str, Path], encoding: str = "utf-8", **kwargs: Any) -> str:
312
+ async def read_text_async(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
285
313
  """Read text from storage asynchronously."""
286
314
  return await async_(self.read_text)(path, encoding, **kwargs)
287
315
 
288
- async def write_text_async(self, path: Union[str, Path], data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
316
+ async def write_text_async(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
289
317
  """Write text to storage asynchronously."""
290
318
  await async_(self.write_text)(path, data, encoding, **kwargs)
291
319
 
@@ -293,23 +321,23 @@ class FSSpecBackend:
293
321
  """List objects in storage asynchronously."""
294
322
  return await async_(self.list_objects)(prefix, recursive, **kwargs)
295
323
 
296
- async def exists_async(self, path: Union[str, Path], **kwargs: Any) -> bool:
324
+ async def exists_async(self, path: str | Path, **kwargs: Any) -> bool:
297
325
  """Check if object exists in storage asynchronously."""
298
326
  return await async_(self.exists)(path, **kwargs)
299
327
 
300
- async def delete_async(self, path: Union[str, Path], **kwargs: Any) -> None:
328
+ async def delete_async(self, path: str | Path, **kwargs: Any) -> None:
301
329
  """Delete object from storage asynchronously."""
302
330
  await async_(self.delete)(path, **kwargs)
303
331
 
304
- async def copy_async(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
332
+ async def copy_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None:
305
333
  """Copy object in storage asynchronously."""
306
334
  await async_(self.copy)(source, destination, **kwargs)
307
335
 
308
- async def move_async(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
336
+ async def move_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None:
309
337
  """Move object in storage asynchronously."""
310
338
  await async_(self.move)(source, destination, **kwargs)
311
339
 
312
- async def get_metadata_async(self, path: Union[str, Path], **kwargs: Any) -> dict[str, Any]:
340
+ async def get_metadata_async(self, path: str | Path, **kwargs: Any) -> dict[str, Any]:
313
341
  """Get object metadata from storage asynchronously."""
314
342
  return await async_(self.get_metadata)(path, **kwargs)
315
343
 
@@ -317,10 +345,10 @@ class FSSpecBackend:
317
345
  """Generate a signed URL asynchronously."""
318
346
  return await async_(self.sign)(path, expires_in, for_upload)
319
347
 
320
- async def read_arrow_async(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
348
+ async def read_arrow_async(self, path: str | Path, **kwargs: Any) -> "ArrowTable":
321
349
  """Read Arrow table from storage asynchronously."""
322
350
  return await async_(self.read_arrow)(path, **kwargs)
323
351
 
324
- async def write_arrow_async(self, path: Union[str, Path], table: "ArrowTable", **kwargs: Any) -> None:
352
+ async def write_arrow_async(self, path: str | Path, table: "ArrowTable", **kwargs: Any) -> None:
325
353
  """Write Arrow table to storage asynchronously."""
326
354
  await async_(self.write_arrow)(path, table, **kwargs)