sqlspec 0.25.0__py3-none-any.whl → 0.27.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

Files changed (199) hide show
  1. sqlspec/__init__.py +7 -15
  2. sqlspec/_serialization.py +256 -24
  3. sqlspec/_typing.py +71 -52
  4. sqlspec/adapters/adbc/_types.py +1 -1
  5. sqlspec/adapters/adbc/adk/__init__.py +5 -0
  6. sqlspec/adapters/adbc/adk/store.py +870 -0
  7. sqlspec/adapters/adbc/config.py +69 -12
  8. sqlspec/adapters/adbc/data_dictionary.py +340 -0
  9. sqlspec/adapters/adbc/driver.py +266 -58
  10. sqlspec/adapters/adbc/litestar/__init__.py +5 -0
  11. sqlspec/adapters/adbc/litestar/store.py +504 -0
  12. sqlspec/adapters/adbc/type_converter.py +153 -0
  13. sqlspec/adapters/aiosqlite/_types.py +1 -1
  14. sqlspec/adapters/aiosqlite/adk/__init__.py +5 -0
  15. sqlspec/adapters/aiosqlite/adk/store.py +527 -0
  16. sqlspec/adapters/aiosqlite/config.py +88 -15
  17. sqlspec/adapters/aiosqlite/data_dictionary.py +149 -0
  18. sqlspec/adapters/aiosqlite/driver.py +143 -40
  19. sqlspec/adapters/aiosqlite/litestar/__init__.py +5 -0
  20. sqlspec/adapters/aiosqlite/litestar/store.py +281 -0
  21. sqlspec/adapters/aiosqlite/pool.py +7 -7
  22. sqlspec/adapters/asyncmy/__init__.py +7 -1
  23. sqlspec/adapters/asyncmy/_types.py +2 -2
  24. sqlspec/adapters/asyncmy/adk/__init__.py +5 -0
  25. sqlspec/adapters/asyncmy/adk/store.py +493 -0
  26. sqlspec/adapters/asyncmy/config.py +68 -23
  27. sqlspec/adapters/asyncmy/data_dictionary.py +161 -0
  28. sqlspec/adapters/asyncmy/driver.py +313 -58
  29. sqlspec/adapters/asyncmy/litestar/__init__.py +5 -0
  30. sqlspec/adapters/asyncmy/litestar/store.py +296 -0
  31. sqlspec/adapters/asyncpg/__init__.py +2 -1
  32. sqlspec/adapters/asyncpg/_type_handlers.py +71 -0
  33. sqlspec/adapters/asyncpg/_types.py +11 -7
  34. sqlspec/adapters/asyncpg/adk/__init__.py +5 -0
  35. sqlspec/adapters/asyncpg/adk/store.py +450 -0
  36. sqlspec/adapters/asyncpg/config.py +59 -35
  37. sqlspec/adapters/asyncpg/data_dictionary.py +173 -0
  38. sqlspec/adapters/asyncpg/driver.py +170 -25
  39. sqlspec/adapters/asyncpg/litestar/__init__.py +5 -0
  40. sqlspec/adapters/asyncpg/litestar/store.py +253 -0
  41. sqlspec/adapters/bigquery/_types.py +1 -1
  42. sqlspec/adapters/bigquery/adk/__init__.py +5 -0
  43. sqlspec/adapters/bigquery/adk/store.py +576 -0
  44. sqlspec/adapters/bigquery/config.py +27 -10
  45. sqlspec/adapters/bigquery/data_dictionary.py +149 -0
  46. sqlspec/adapters/bigquery/driver.py +368 -142
  47. sqlspec/adapters/bigquery/litestar/__init__.py +5 -0
  48. sqlspec/adapters/bigquery/litestar/store.py +327 -0
  49. sqlspec/adapters/bigquery/type_converter.py +125 -0
  50. sqlspec/adapters/duckdb/_types.py +1 -1
  51. sqlspec/adapters/duckdb/adk/__init__.py +14 -0
  52. sqlspec/adapters/duckdb/adk/store.py +553 -0
  53. sqlspec/adapters/duckdb/config.py +80 -20
  54. sqlspec/adapters/duckdb/data_dictionary.py +163 -0
  55. sqlspec/adapters/duckdb/driver.py +167 -45
  56. sqlspec/adapters/duckdb/litestar/__init__.py +5 -0
  57. sqlspec/adapters/duckdb/litestar/store.py +332 -0
  58. sqlspec/adapters/duckdb/pool.py +4 -4
  59. sqlspec/adapters/duckdb/type_converter.py +133 -0
  60. sqlspec/adapters/oracledb/_numpy_handlers.py +133 -0
  61. sqlspec/adapters/oracledb/_types.py +20 -2
  62. sqlspec/adapters/oracledb/adk/__init__.py +5 -0
  63. sqlspec/adapters/oracledb/adk/store.py +1745 -0
  64. sqlspec/adapters/oracledb/config.py +122 -32
  65. sqlspec/adapters/oracledb/data_dictionary.py +509 -0
  66. sqlspec/adapters/oracledb/driver.py +353 -91
  67. sqlspec/adapters/oracledb/litestar/__init__.py +5 -0
  68. sqlspec/adapters/oracledb/litestar/store.py +767 -0
  69. sqlspec/adapters/oracledb/migrations.py +348 -73
  70. sqlspec/adapters/oracledb/type_converter.py +207 -0
  71. sqlspec/adapters/psqlpy/_type_handlers.py +44 -0
  72. sqlspec/adapters/psqlpy/_types.py +2 -1
  73. sqlspec/adapters/psqlpy/adk/__init__.py +5 -0
  74. sqlspec/adapters/psqlpy/adk/store.py +482 -0
  75. sqlspec/adapters/psqlpy/config.py +46 -17
  76. sqlspec/adapters/psqlpy/data_dictionary.py +172 -0
  77. sqlspec/adapters/psqlpy/driver.py +123 -209
  78. sqlspec/adapters/psqlpy/litestar/__init__.py +5 -0
  79. sqlspec/adapters/psqlpy/litestar/store.py +272 -0
  80. sqlspec/adapters/psqlpy/type_converter.py +102 -0
  81. sqlspec/adapters/psycopg/_type_handlers.py +80 -0
  82. sqlspec/adapters/psycopg/_types.py +2 -1
  83. sqlspec/adapters/psycopg/adk/__init__.py +5 -0
  84. sqlspec/adapters/psycopg/adk/store.py +944 -0
  85. sqlspec/adapters/psycopg/config.py +69 -35
  86. sqlspec/adapters/psycopg/data_dictionary.py +331 -0
  87. sqlspec/adapters/psycopg/driver.py +238 -81
  88. sqlspec/adapters/psycopg/litestar/__init__.py +5 -0
  89. sqlspec/adapters/psycopg/litestar/store.py +554 -0
  90. sqlspec/adapters/sqlite/__init__.py +2 -1
  91. sqlspec/adapters/sqlite/_type_handlers.py +86 -0
  92. sqlspec/adapters/sqlite/_types.py +1 -1
  93. sqlspec/adapters/sqlite/adk/__init__.py +5 -0
  94. sqlspec/adapters/sqlite/adk/store.py +572 -0
  95. sqlspec/adapters/sqlite/config.py +87 -15
  96. sqlspec/adapters/sqlite/data_dictionary.py +149 -0
  97. sqlspec/adapters/sqlite/driver.py +137 -54
  98. sqlspec/adapters/sqlite/litestar/__init__.py +5 -0
  99. sqlspec/adapters/sqlite/litestar/store.py +318 -0
  100. sqlspec/adapters/sqlite/pool.py +18 -9
  101. sqlspec/base.py +45 -26
  102. sqlspec/builder/__init__.py +73 -4
  103. sqlspec/builder/_base.py +162 -89
  104. sqlspec/builder/_column.py +62 -29
  105. sqlspec/builder/_ddl.py +180 -121
  106. sqlspec/builder/_delete.py +5 -4
  107. sqlspec/builder/_dml.py +388 -0
  108. sqlspec/{_sql.py → builder/_factory.py} +53 -94
  109. sqlspec/builder/_insert.py +32 -131
  110. sqlspec/builder/_join.py +375 -0
  111. sqlspec/builder/_merge.py +446 -11
  112. sqlspec/builder/_parsing_utils.py +111 -17
  113. sqlspec/builder/_select.py +1457 -24
  114. sqlspec/builder/_update.py +11 -42
  115. sqlspec/cli.py +307 -194
  116. sqlspec/config.py +252 -67
  117. sqlspec/core/__init__.py +5 -4
  118. sqlspec/core/cache.py +17 -17
  119. sqlspec/core/compiler.py +62 -9
  120. sqlspec/core/filters.py +37 -37
  121. sqlspec/core/hashing.py +9 -9
  122. sqlspec/core/parameters.py +83 -48
  123. sqlspec/core/result.py +102 -46
  124. sqlspec/core/splitter.py +16 -17
  125. sqlspec/core/statement.py +36 -30
  126. sqlspec/core/type_conversion.py +235 -0
  127. sqlspec/driver/__init__.py +7 -6
  128. sqlspec/driver/_async.py +188 -151
  129. sqlspec/driver/_common.py +285 -80
  130. sqlspec/driver/_sync.py +188 -152
  131. sqlspec/driver/mixins/_result_tools.py +20 -236
  132. sqlspec/driver/mixins/_sql_translator.py +4 -4
  133. sqlspec/exceptions.py +75 -7
  134. sqlspec/extensions/adk/__init__.py +53 -0
  135. sqlspec/extensions/adk/_types.py +51 -0
  136. sqlspec/extensions/adk/converters.py +172 -0
  137. sqlspec/extensions/adk/migrations/0001_create_adk_tables.py +144 -0
  138. sqlspec/extensions/adk/migrations/__init__.py +0 -0
  139. sqlspec/extensions/adk/service.py +181 -0
  140. sqlspec/extensions/adk/store.py +536 -0
  141. sqlspec/extensions/aiosql/adapter.py +73 -53
  142. sqlspec/extensions/litestar/__init__.py +21 -4
  143. sqlspec/extensions/litestar/cli.py +54 -10
  144. sqlspec/extensions/litestar/config.py +59 -266
  145. sqlspec/extensions/litestar/handlers.py +46 -17
  146. sqlspec/extensions/litestar/migrations/0001_create_session_table.py +137 -0
  147. sqlspec/extensions/litestar/migrations/__init__.py +3 -0
  148. sqlspec/extensions/litestar/plugin.py +324 -223
  149. sqlspec/extensions/litestar/providers.py +25 -25
  150. sqlspec/extensions/litestar/store.py +265 -0
  151. sqlspec/loader.py +30 -49
  152. sqlspec/migrations/__init__.py +4 -3
  153. sqlspec/migrations/base.py +302 -39
  154. sqlspec/migrations/commands.py +611 -144
  155. sqlspec/migrations/context.py +142 -0
  156. sqlspec/migrations/fix.py +199 -0
  157. sqlspec/migrations/loaders.py +68 -23
  158. sqlspec/migrations/runner.py +543 -107
  159. sqlspec/migrations/tracker.py +237 -21
  160. sqlspec/migrations/utils.py +51 -3
  161. sqlspec/migrations/validation.py +177 -0
  162. sqlspec/protocols.py +66 -36
  163. sqlspec/storage/_utils.py +98 -0
  164. sqlspec/storage/backends/fsspec.py +134 -106
  165. sqlspec/storage/backends/local.py +78 -51
  166. sqlspec/storage/backends/obstore.py +278 -162
  167. sqlspec/storage/registry.py +75 -39
  168. sqlspec/typing.py +16 -84
  169. sqlspec/utils/config_resolver.py +153 -0
  170. sqlspec/utils/correlation.py +4 -5
  171. sqlspec/utils/data_transformation.py +3 -2
  172. sqlspec/utils/deprecation.py +9 -8
  173. sqlspec/utils/fixtures.py +4 -4
  174. sqlspec/utils/logging.py +46 -6
  175. sqlspec/utils/module_loader.py +2 -2
  176. sqlspec/utils/schema.py +288 -0
  177. sqlspec/utils/serializers.py +50 -2
  178. sqlspec/utils/sync_tools.py +21 -17
  179. sqlspec/utils/text.py +1 -2
  180. sqlspec/utils/type_guards.py +111 -20
  181. sqlspec/utils/version.py +433 -0
  182. {sqlspec-0.25.0.dist-info → sqlspec-0.27.0.dist-info}/METADATA +40 -21
  183. sqlspec-0.27.0.dist-info/RECORD +207 -0
  184. sqlspec/builder/mixins/__init__.py +0 -55
  185. sqlspec/builder/mixins/_cte_and_set_ops.py +0 -254
  186. sqlspec/builder/mixins/_delete_operations.py +0 -50
  187. sqlspec/builder/mixins/_insert_operations.py +0 -282
  188. sqlspec/builder/mixins/_join_operations.py +0 -389
  189. sqlspec/builder/mixins/_merge_operations.py +0 -592
  190. sqlspec/builder/mixins/_order_limit_operations.py +0 -152
  191. sqlspec/builder/mixins/_pivot_operations.py +0 -157
  192. sqlspec/builder/mixins/_select_operations.py +0 -936
  193. sqlspec/builder/mixins/_update_operations.py +0 -218
  194. sqlspec/builder/mixins/_where_clause.py +0 -1304
  195. sqlspec-0.25.0.dist-info/RECORD +0 -139
  196. sqlspec-0.25.0.dist-info/licenses/NOTICE +0 -29
  197. {sqlspec-0.25.0.dist-info → sqlspec-0.27.0.dist-info}/WHEEL +0 -0
  198. {sqlspec-0.25.0.dist-info → sqlspec-0.27.0.dist-info}/entry_points.txt +0 -0
  199. {sqlspec-0.25.0.dist-info → sqlspec-0.27.0.dist-info}/licenses/LICENSE +0 -0
@@ -5,18 +5,22 @@ and local file storage.
5
5
  """
6
6
 
7
7
  import fnmatch
8
+ import io
8
9
  import logging
9
10
  from collections.abc import AsyncIterator, Iterator
10
- from typing import TYPE_CHECKING, Any, Final, Optional, Union, cast
11
+ from typing import TYPE_CHECKING, Any, Final, cast
11
12
  from urllib.parse import urlparse
12
13
 
14
+ from sqlspec.utils.sync_tools import async_
15
+
13
16
  if TYPE_CHECKING:
14
17
  from pathlib import Path
15
18
 
16
19
  from mypy_extensions import mypyc_attr
17
20
 
18
21
  from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
19
- from sqlspec.typing import OBSTORE_INSTALLED, PYARROW_INSTALLED, ArrowRecordBatch, ArrowTable
22
+ from sqlspec.storage._utils import ensure_pyarrow, resolve_storage_path
23
+ from sqlspec.typing import OBSTORE_INSTALLED, ArrowRecordBatch, ArrowTable
20
24
 
21
25
  __all__ = ("ObStoreBackend",)
22
26
 
@@ -24,37 +28,64 @@ logger = logging.getLogger(__name__)
24
28
 
25
29
 
26
30
  class _AsyncArrowIterator:
27
- """Helper class to work around mypyc's lack of async generator support."""
31
+ """Helper class to work around mypyc's lack of async generator support.
32
+
33
+ Uses hybrid async/sync pattern:
34
+ - Native async I/O for network operations (S3, GCS, Azure)
35
+ - Thread pool for CPU-bound PyArrow parsing
36
+ """
37
+
38
+ __slots__ = ("_current_file_iterator", "_files_iterator", "backend", "kwargs", "pattern")
28
39
 
29
40
  def __init__(self, backend: "ObStoreBackend", pattern: str, **kwargs: Any) -> None:
30
41
  self.backend = backend
31
42
  self.pattern = pattern
32
43
  self.kwargs = kwargs
33
- self._files_iterator: Optional[Iterator[str]] = None
34
- self._current_file_iterator: Optional[Iterator[ArrowRecordBatch]] = None
44
+ self._files_iterator: Iterator[str] | None = None
45
+ self._current_file_iterator: Iterator[ArrowRecordBatch] | None = None
35
46
 
36
47
  def __aiter__(self) -> "_AsyncArrowIterator":
37
48
  return self
38
49
 
39
50
  async def __anext__(self) -> ArrowRecordBatch:
51
+ import pyarrow.parquet as pq
52
+
40
53
  if self._files_iterator is None:
41
54
  files = self.backend.glob(self.pattern, **self.kwargs)
42
55
  self._files_iterator = iter(files)
43
56
 
44
57
  while True:
45
58
  if self._current_file_iterator is not None:
59
+
60
+ def _safe_next_batch() -> ArrowRecordBatch:
61
+ try:
62
+ return next(self._current_file_iterator) # type: ignore[arg-type]
63
+ except StopIteration as e:
64
+ raise StopAsyncIteration from e
65
+
46
66
  try:
47
- return next(self._current_file_iterator)
48
- except StopIteration:
67
+ return await async_(_safe_next_batch)()
68
+ except StopAsyncIteration:
49
69
  self._current_file_iterator = None
70
+ continue
50
71
 
51
72
  try:
52
73
  next_file = next(self._files_iterator)
53
- # Stream from this file
54
- file_batches = self.backend.stream_arrow(next_file)
55
- self._current_file_iterator = iter(file_batches)
56
- except StopIteration:
57
- raise StopAsyncIteration
74
+ except StopIteration as e:
75
+ raise StopAsyncIteration from e
76
+
77
+ data = await self.backend.read_bytes_async(next_file)
78
+ parquet_file = pq.ParquetFile(io.BytesIO(data))
79
+ self._current_file_iterator = parquet_file.iter_batches()
80
+
81
+ async def aclose(self) -> None:
82
+ """Close underlying file iterator."""
83
+ if self._current_file_iterator is not None:
84
+ try:
85
+ close_method = self._current_file_iterator.close # type: ignore[attr-defined]
86
+ await async_(close_method)() # pyright: ignore
87
+ except AttributeError:
88
+ pass
58
89
 
59
90
 
60
91
  DEFAULT_OPTIONS: Final[dict[str, Any]] = {"connect_timeout": "30s", "request_timeout": "60s"}
@@ -69,17 +100,17 @@ class ObStoreBackend:
69
100
  local filesystem, and HTTP endpoints.
70
101
  """
71
102
 
72
- __slots__ = ("_path_cache", "backend_type", "base_path", "protocol", "store", "store_options", "store_uri")
73
-
74
- def _ensure_obstore(self) -> None:
75
- """Ensure obstore is available for operations."""
76
- if not OBSTORE_INSTALLED:
77
- raise MissingDependencyError(package="obstore", install_package="obstore")
78
-
79
- def _ensure_pyarrow(self) -> None:
80
- """Ensure PyArrow is available for Arrow operations."""
81
- if not PYARROW_INSTALLED:
82
- raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
103
+ __slots__ = (
104
+ "_is_local_store",
105
+ "_local_store_root",
106
+ "_path_cache",
107
+ "backend_type",
108
+ "base_path",
109
+ "protocol",
110
+ "store",
111
+ "store_options",
112
+ "store_uri",
113
+ )
83
114
 
84
115
  def __init__(self, uri: str, **kwargs: Any) -> None:
85
116
  """Initialize obstore backend.
@@ -87,9 +118,12 @@ class ObStoreBackend:
87
118
  Args:
88
119
  uri: Storage URI (e.g., 's3://bucket', 'file:///path', 'gs://bucket')
89
120
  **kwargs: Additional options including base_path and obstore configuration
90
- """
91
121
 
92
- self._ensure_obstore()
122
+ Raises:
123
+ MissingDependencyError: If obstore is not installed.
124
+ """
125
+ if not OBSTORE_INSTALLED:
126
+ raise MissingDependencyError(package="obstore", install_package="obstore")
93
127
 
94
128
  try:
95
129
  # Extract base_path from kwargs
@@ -100,6 +134,8 @@ class ObStoreBackend:
100
134
  self.store_options = kwargs
101
135
  self.store: Any
102
136
  self._path_cache: dict[str, str] = {}
137
+ self._is_local_store = False
138
+ self._local_store_root = ""
103
139
  self.protocol = uri.split("://", 1)[0] if "://" in uri else "file"
104
140
  self.backend_type = "obstore"
105
141
 
@@ -112,11 +148,26 @@ class ObStoreBackend:
112
148
 
113
149
  from obstore.store import LocalStore
114
150
 
151
+ # Parse URI to extract path
152
+ # Note: urlparse splits on '#', so we need to reconstruct the full path
115
153
  parsed = urlparse(uri)
116
- path = parsed.path or "/"
117
- # Create directory if it doesn't exist (ObStore LocalStore requires it)
118
- PathlibPath(path).mkdir(parents=True, exist_ok=True)
119
- self.store = LocalStore(path)
154
+ path_str = parsed.path or "/"
155
+ # Append fragment if present (handles paths with '#' character)
156
+ if parsed.fragment:
157
+ path_str = f"{path_str}#{parsed.fragment}"
158
+ path_obj = PathlibPath(path_str)
159
+
160
+ # If path points to a file, use its parent as the base directory
161
+ if path_obj.is_file():
162
+ path_str = str(path_obj.parent)
163
+
164
+ # If base_path provided via kwargs, use it as LocalStore root
165
+ # Otherwise use the URI path
166
+ local_store_root = self.base_path or path_str
167
+
168
+ self._is_local_store = True
169
+ self._local_store_root = local_store_root
170
+ self.store = LocalStore(local_store_root, mkdir=True)
120
171
  else:
121
172
  from obstore.store import from_url
122
173
 
@@ -141,64 +192,91 @@ class ObStoreBackend:
141
192
 
142
193
  return cls(uri=store_uri, **kwargs)
143
194
 
144
- def _resolve_path(self, path: "Union[str, Path]") -> str:
145
- """Resolve path relative to base_path."""
146
- path_str = str(path)
147
- if path_str.startswith("file://"):
148
- path_str = path_str.removeprefix("file://")
149
- if self.store_uri.startswith("file://") and path_str.startswith("/"):
150
- return path_str.lstrip("/")
151
- if self.base_path:
152
- clean_base = self.base_path.rstrip("/")
153
- clean_path = path_str.lstrip("/")
154
- return f"{clean_base}/{clean_path}"
155
- return path_str
156
-
157
- def read_bytes(self, path: "Union[str, Path]", **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
195
+ def _resolve_path_for_local_store(self, path: "str | Path") -> str:
196
+ """Resolve path for LocalStore which expects relative paths from its root."""
197
+ from pathlib import Path as PathlibPath
198
+
199
+ path_obj = PathlibPath(str(path))
200
+
201
+ # If absolute path, try to make it relative to LocalStore root
202
+ if path_obj.is_absolute() and self._local_store_root:
203
+ try:
204
+ return str(path_obj.relative_to(self._local_store_root))
205
+ except ValueError:
206
+ # Path is outside LocalStore root - strip leading / as fallback
207
+ return str(path).lstrip("/")
208
+
209
+ # Relative path - return as-is (already relative to LocalStore root)
210
+ return str(path)
211
+
212
+ def read_bytes(self, path: "str | Path", **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
158
213
  """Read bytes using obstore."""
159
- result = self.store.get(self._resolve_path(path))
214
+ # For LocalStore, use special path resolution (relative to LocalStore root)
215
+ if self._is_local_store:
216
+ resolved_path = self._resolve_path_for_local_store(path)
217
+ else:
218
+ # For cloud storage, use standard resolution
219
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
220
+
221
+ result = self.store.get(resolved_path)
160
222
  return cast("bytes", result.bytes().to_bytes())
161
223
 
162
- def write_bytes(self, path: "Union[str, Path]", data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
224
+ def write_bytes(self, path: "str | Path", data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
163
225
  """Write bytes using obstore."""
164
- self.store.put(self._resolve_path(path), data)
226
+ # For LocalStore, use special path resolution (relative to LocalStore root)
227
+ if self._is_local_store:
228
+ resolved_path = self._resolve_path_for_local_store(path)
229
+ else:
230
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
165
231
 
166
- def read_text(self, path: "Union[str, Path]", encoding: str = "utf-8", **kwargs: Any) -> str:
232
+ self.store.put(resolved_path, data)
233
+
234
+ def read_text(self, path: "str | Path", encoding: str = "utf-8", **kwargs: Any) -> str:
167
235
  """Read text using obstore."""
168
236
  return self.read_bytes(path, **kwargs).decode(encoding)
169
237
 
170
- def write_text(self, path: "Union[str, Path]", data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
238
+ def write_text(self, path: "str | Path", data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
171
239
  """Write text using obstore."""
172
240
  self.write_bytes(path, data.encode(encoding), **kwargs)
173
241
 
174
242
  def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
175
243
  """List objects using obstore."""
176
- resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
244
+ resolved_prefix = (
245
+ resolve_storage_path(prefix, self.base_path, self.protocol, strip_file_scheme=True)
246
+ if prefix
247
+ else self.base_path or ""
248
+ )
177
249
  items = self.store.list_with_delimiter(resolved_prefix) if not recursive else self.store.list(resolved_prefix)
178
250
  paths: list[str] = []
179
251
  for batch in items:
180
252
  paths.extend(item["path"] for item in batch)
181
253
  return sorted(paths)
182
254
 
183
- def exists(self, path: "Union[str, Path]", **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
255
+ def exists(self, path: "str | Path", **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
184
256
  """Check if object exists using obstore."""
185
257
  try:
186
- self.store.head(self._resolve_path(path))
258
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
259
+ self.store.head(resolved_path)
187
260
  except Exception:
188
261
  return False
189
262
  return True
190
263
 
191
- def delete(self, path: "Union[str, Path]", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
264
+ def delete(self, path: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
192
265
  """Delete object using obstore."""
193
- self.store.delete(self._resolve_path(path))
266
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
267
+ self.store.delete(resolved_path)
194
268
 
195
- def copy(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
269
+ def copy(self, source: "str | Path", destination: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
196
270
  """Copy object using obstore."""
197
- self.store.copy(self._resolve_path(source), self._resolve_path(destination))
271
+ source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=True)
272
+ dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=True)
273
+ self.store.copy(source_path, dest_path)
198
274
 
199
- def move(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
275
+ def move(self, source: "str | Path", destination: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
200
276
  """Move object using obstore."""
201
- self.store.rename(self._resolve_path(source), self._resolve_path(destination))
277
+ source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=True)
278
+ dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=True)
279
+ self.store.rename(source_path, dest_path)
202
280
 
203
281
  def glob(self, pattern: str, **kwargs: Any) -> list[str]:
204
282
  """Find objects matching pattern.
@@ -207,7 +285,7 @@ class ObStoreBackend:
207
285
  """
208
286
  from pathlib import PurePosixPath
209
287
 
210
- resolved_pattern = self._resolve_path(pattern)
288
+ resolved_pattern = resolve_storage_path(pattern, self.base_path, self.protocol, strip_file_scheme=True)
211
289
  all_objects = self.list_objects(recursive=True, **kwargs)
212
290
 
213
291
  if "**" in pattern:
@@ -229,38 +307,50 @@ class ObStoreBackend:
229
307
  return matching_objects
230
308
  return [obj for obj in all_objects if fnmatch.fnmatch(obj, resolved_pattern)]
231
309
 
232
- def get_metadata(self, path: "Union[str, Path]", **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
310
+ def get_metadata(self, path: "str | Path", **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
233
311
  """Get object metadata using obstore."""
234
- resolved_path = self._resolve_path(path)
235
- result: dict[str, Any] = {}
312
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
313
+
236
314
  try:
237
315
  metadata = self.store.head(resolved_path)
238
- result.update(
239
- {
316
+ except Exception:
317
+ return {"path": resolved_path, "exists": False}
318
+ else:
319
+ if isinstance(metadata, dict):
320
+ result = {
240
321
  "path": resolved_path,
241
322
  "exists": True,
242
- "size": getattr(metadata, "size", None),
243
- "last_modified": getattr(metadata, "last_modified", None),
244
- "e_tag": getattr(metadata, "e_tag", None),
245
- "version": getattr(metadata, "version", None),
323
+ "size": metadata.get("size"),
324
+ "last_modified": metadata.get("last_modified"),
325
+ "e_tag": metadata.get("e_tag"),
326
+ "version": metadata.get("version"),
246
327
  }
247
- )
248
- if hasattr(metadata, "metadata") and metadata.metadata:
328
+ if metadata.get("metadata"):
329
+ result["custom_metadata"] = metadata["metadata"]
330
+ return result
331
+
332
+ result = {
333
+ "path": resolved_path,
334
+ "exists": True,
335
+ "size": metadata.size,
336
+ "last_modified": metadata.last_modified,
337
+ "e_tag": metadata.e_tag,
338
+ "version": metadata.version,
339
+ }
340
+
341
+ if metadata.metadata:
249
342
  result["custom_metadata"] = metadata.metadata
250
343
 
251
- except Exception:
252
- return {"path": resolved_path, "exists": False}
253
- else:
254
344
  return result
255
345
 
256
- def is_object(self, path: "Union[str, Path]") -> bool:
346
+ def is_object(self, path: "str | Path") -> bool:
257
347
  """Check if path is an object using obstore."""
258
- resolved_path = self._resolve_path(path)
348
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
259
349
  return self.exists(path) and not resolved_path.endswith("/")
260
350
 
261
- def is_path(self, path: "Union[str, Path]") -> bool:
351
+ def is_path(self, path: "str | Path") -> bool:
262
352
  """Check if path is a prefix/directory using obstore."""
263
- resolved_path = self._resolve_path(path)
353
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
264
354
 
265
355
  if resolved_path.endswith("/"):
266
356
  return True
@@ -271,53 +361,48 @@ class ObStoreBackend:
271
361
  except Exception:
272
362
  return False
273
363
 
274
- def read_arrow(self, path: "Union[str, Path]", **kwargs: Any) -> ArrowTable:
364
+ def read_arrow(self, path: "str | Path", **kwargs: Any) -> ArrowTable:
275
365
  """Read Arrow table using obstore."""
276
- resolved_path = self._resolve_path(path)
277
- if hasattr(self.store, "read_arrow"):
278
- return self.store.read_arrow(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
279
-
280
- self._ensure_pyarrow()
366
+ ensure_pyarrow()
281
367
  import io
282
368
 
283
369
  import pyarrow.parquet as pq
284
370
 
285
- return pq.read_table(io.BytesIO(self.read_bytes(resolved_path)), **kwargs)
371
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
372
+ data = self.read_bytes(resolved_path)
373
+ return pq.read_table(io.BytesIO(data), **kwargs)
286
374
 
287
- def write_arrow(self, path: "Union[str, Path]", table: ArrowTable, **kwargs: Any) -> None:
375
+ def write_arrow(self, path: "str | Path", table: ArrowTable, **kwargs: Any) -> None:
288
376
  """Write Arrow table using obstore."""
289
- resolved_path = self._resolve_path(path)
290
- if hasattr(self.store, "write_arrow"):
291
- self.store.write_arrow(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
292
- else:
293
- self._ensure_pyarrow()
294
- import io
295
-
296
- import pyarrow as pa
297
- import pyarrow.parquet as pq
298
-
299
- buffer = io.BytesIO()
300
-
301
- schema = table.schema
302
- if any(str(f.type).startswith("decimal64") for f in schema):
303
- new_fields = []
304
- for field in schema:
305
- if str(field.type).startswith("decimal64"):
306
- import re
307
-
308
- match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
309
- if match:
310
- precision, scale = int(match.group(1)), int(match.group(2))
311
- new_fields.append(pa.field(field.name, pa.decimal128(precision, scale)))
312
- else:
313
- new_fields.append(field) # pragma: no cover
377
+ ensure_pyarrow()
378
+ import io
379
+
380
+ import pyarrow as pa
381
+ import pyarrow.parquet as pq
382
+
383
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
384
+
385
+ schema = table.schema
386
+ if any(str(f.type).startswith("decimal64") for f in schema):
387
+ new_fields = []
388
+ for field in schema:
389
+ if str(field.type).startswith("decimal64"):
390
+ import re
391
+
392
+ match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
393
+ if match:
394
+ precision, scale = int(match.group(1)), int(match.group(2))
395
+ new_fields.append(pa.field(field.name, pa.decimal128(precision, scale)))
314
396
  else:
315
397
  new_fields.append(field)
316
- table = table.cast(pa.schema(new_fields))
398
+ else:
399
+ new_fields.append(field)
400
+ table = table.cast(pa.schema(new_fields))
317
401
 
318
- pq.write_table(table, buffer, **kwargs)
319
- buffer.seek(0)
320
- self.write_bytes(resolved_path, buffer.read())
402
+ buffer = io.BytesIO()
403
+ pq.write_table(table, buffer, **kwargs)
404
+ buffer.seek(0)
405
+ self.write_bytes(resolved_path, buffer.read())
321
406
 
322
407
  def stream_arrow(self, pattern: str, **kwargs: Any) -> Iterator[ArrowRecordBatch]:
323
408
  """Stream Arrow record batches.
@@ -325,13 +410,14 @@ class ObStoreBackend:
325
410
  Yields:
326
411
  Iterator of Arrow record batches from matching objects.
327
412
  """
328
- self._ensure_pyarrow()
413
+ ensure_pyarrow()
329
414
  from io import BytesIO
330
415
 
331
416
  import pyarrow.parquet as pq
332
417
 
333
418
  for obj_path in self.glob(pattern, **kwargs):
334
- result = self.store.get(self._resolve_path(obj_path))
419
+ resolved_path = resolve_storage_path(obj_path, self.base_path, self.protocol, strip_file_scheme=True)
420
+ result = self.store.get(resolved_path)
335
421
  bytes_obj = result.bytes()
336
422
  data = bytes_obj.to_bytes()
337
423
  buffer = BytesIO(data)
@@ -340,26 +426,38 @@ class ObStoreBackend:
340
426
 
341
427
  def sign(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
342
428
  """Generate a signed URL for the object."""
343
- resolved_path = self._resolve_path(path)
344
- if hasattr(self.store, "sign_url") and callable(self.store.sign_url):
345
- return self.store.sign_url(resolved_path, expires_in=expires_in) # type: ignore[no-any-return]
429
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
346
430
  return f"{self.store_uri}/{resolved_path}"
347
431
 
348
- async def read_bytes_async(self, path: "Union[str, Path]", **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
432
+ async def read_bytes_async(self, path: "str | Path", **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
349
433
  """Read bytes from storage asynchronously."""
350
- resolved_path = self._resolve_path(path)
434
+ # For LocalStore (file protocol with base_path), use special resolution
435
+ if self._is_local_store:
436
+ resolved_path = self._resolve_path_for_local_store(path)
437
+ else:
438
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
439
+
351
440
  result = await self.store.get_async(resolved_path)
352
441
  bytes_obj = await result.bytes_async()
353
442
  return bytes_obj.to_bytes() # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
354
443
 
355
- async def write_bytes_async(self, path: "Union[str, Path]", data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
444
+ async def write_bytes_async(self, path: "str | Path", data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
356
445
  """Write bytes to storage asynchronously."""
357
- resolved_path = self._resolve_path(path)
446
+ # For LocalStore (file protocol with base_path), use special resolution
447
+ if self._is_local_store:
448
+ resolved_path = self._resolve_path_for_local_store(path)
449
+ else:
450
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
451
+
358
452
  await self.store.put_async(resolved_path, data)
359
453
 
360
454
  async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
361
455
  """List objects in storage asynchronously."""
362
- resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
456
+ resolved_prefix = (
457
+ resolve_storage_path(prefix, self.base_path, self.protocol, strip_file_scheme=True)
458
+ if prefix
459
+ else self.base_path or ""
460
+ )
363
461
 
364
462
  objects: list[str] = []
365
463
  async for batch in self.store.list_async(resolved_prefix): # pyright: ignore[reportAttributeAccessIssue]
@@ -371,47 +469,72 @@ class ObStoreBackend:
371
469
 
372
470
  return sorted(objects)
373
471
 
374
- async def read_text_async(self, path: "Union[str, Path]", encoding: str = "utf-8", **kwargs: Any) -> str:
472
+ async def read_text_async(self, path: "str | Path", encoding: str = "utf-8", **kwargs: Any) -> str:
375
473
  """Read text from storage asynchronously."""
376
474
  data = await self.read_bytes_async(path, **kwargs)
377
475
  return data.decode(encoding)
378
476
 
379
- async def write_text_async(
380
- self, path: "Union[str, Path]", data: str, encoding: str = "utf-8", **kwargs: Any
381
- ) -> None: # pyright: ignore[reportUnusedParameter]
477
+ async def write_text_async(self, path: "str | Path", data: str, encoding: str = "utf-8", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
382
478
  """Write text to storage asynchronously."""
383
479
  encoded_data = data.encode(encoding)
384
480
  await self.write_bytes_async(path, encoded_data, **kwargs)
385
481
 
386
- async def exists_async(self, path: "Union[str, Path]", **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
482
+ async def exists_async(self, path: "str | Path", **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
387
483
  """Check if object exists in storage asynchronously."""
388
- resolved_path = self._resolve_path(path)
484
+ # For LocalStore (file protocol with base_path), use special resolution
485
+ if self._is_local_store:
486
+ resolved_path = self._resolve_path_for_local_store(path)
487
+ else:
488
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
489
+
389
490
  try:
390
491
  await self.store.head_async(resolved_path)
391
492
  except Exception:
392
493
  return False
393
494
  return True
394
495
 
395
- async def delete_async(self, path: "Union[str, Path]", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
496
+ async def delete_async(self, path: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
396
497
  """Delete object from storage asynchronously."""
397
- resolved_path = self._resolve_path(path)
498
+ # For LocalStore (file protocol with base_path), use special resolution
499
+ if self._is_local_store:
500
+ resolved_path = self._resolve_path_for_local_store(path)
501
+ else:
502
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
503
+
398
504
  await self.store.delete_async(resolved_path)
399
505
 
400
- async def copy_async(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
506
+ async def copy_async(self, source: "str | Path", destination: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
401
507
  """Copy object in storage asynchronously."""
402
- source_path = self._resolve_path(source)
403
- dest_path = self._resolve_path(destination)
508
+ # For LocalStore (file protocol with base_path), use special resolution
509
+ if self._is_local_store:
510
+ source_path = self._resolve_path_for_local_store(source)
511
+ dest_path = self._resolve_path_for_local_store(destination)
512
+ else:
513
+ source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=True)
514
+ dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=True)
515
+
404
516
  await self.store.copy_async(source_path, dest_path)
405
517
 
406
- async def move_async(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
518
+ async def move_async(self, source: "str | Path", destination: "str | Path", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
407
519
  """Move object in storage asynchronously."""
408
- source_path = self._resolve_path(source)
409
- dest_path = self._resolve_path(destination)
520
+ # For LocalStore (file protocol with base_path), use special resolution
521
+ if self._is_local_store:
522
+ source_path = self._resolve_path_for_local_store(source)
523
+ dest_path = self._resolve_path_for_local_store(destination)
524
+ else:
525
+ source_path = resolve_storage_path(source, self.base_path, self.protocol, strip_file_scheme=True)
526
+ dest_path = resolve_storage_path(destination, self.base_path, self.protocol, strip_file_scheme=True)
527
+
410
528
  await self.store.rename_async(source_path, dest_path)
411
529
 
412
- async def get_metadata_async(self, path: "Union[str, Path]", **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
530
+ async def get_metadata_async(self, path: "str | Path", **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
413
531
  """Get object metadata from storage asynchronously."""
414
- resolved_path = self._resolve_path(path)
532
+ # For LocalStore (file protocol with base_path), use special resolution
533
+ if self._is_local_store:
534
+ resolved_path = self._resolve_path_for_local_store(path)
535
+ else:
536
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
537
+
415
538
  result: dict[str, Any] = {}
416
539
  try:
417
540
  metadata = await self.store.head_async(resolved_path)
@@ -433,42 +556,35 @@ class ObStoreBackend:
433
556
  else:
434
557
  return result
435
558
 
436
- async def read_arrow_async(self, path: "Union[str, Path]", **kwargs: Any) -> ArrowTable:
559
+ async def read_arrow_async(self, path: "str | Path", **kwargs: Any) -> ArrowTable:
437
560
  """Read Arrow table from storage asynchronously."""
438
- resolved_path = self._resolve_path(path)
439
- if hasattr(self.store, "read_arrow_async"):
440
- return await self.store.read_arrow_async(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
441
-
442
- self._ensure_pyarrow()
561
+ ensure_pyarrow()
443
562
  import io
444
563
 
445
564
  import pyarrow.parquet as pq
446
565
 
447
- return pq.read_table(io.BytesIO(await self.read_bytes_async(resolved_path)), **kwargs)
566
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
567
+ data = await self.read_bytes_async(resolved_path)
568
+ return pq.read_table(io.BytesIO(data), **kwargs)
448
569
 
449
- async def write_arrow_async(self, path: "Union[str, Path]", table: ArrowTable, **kwargs: Any) -> None:
570
+ async def write_arrow_async(self, path: "str | Path", table: ArrowTable, **kwargs: Any) -> None:
450
571
  """Write Arrow table to storage asynchronously."""
451
- resolved_path = self._resolve_path(path)
452
- if hasattr(self.store, "write_arrow_async"):
453
- await self.store.write_arrow_async(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
454
- else:
455
- self._ensure_pyarrow()
456
- import io
572
+ ensure_pyarrow()
573
+ import io
457
574
 
458
- import pyarrow.parquet as pq
575
+ import pyarrow.parquet as pq
459
576
 
460
- buffer = io.BytesIO()
461
- pq.write_table(table, buffer, **kwargs)
462
- buffer.seek(0)
463
- await self.write_bytes_async(resolved_path, buffer.read())
577
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
578
+ buffer = io.BytesIO()
579
+ pq.write_table(table, buffer, **kwargs)
580
+ buffer.seek(0)
581
+ await self.write_bytes_async(resolved_path, buffer.read())
464
582
 
465
583
  def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator[ArrowRecordBatch]:
466
- resolved_pattern = self._resolve_path(pattern)
584
+ resolved_pattern = resolve_storage_path(pattern, self.base_path, self.protocol, strip_file_scheme=True)
467
585
  return _AsyncArrowIterator(self, resolved_pattern, **kwargs)
468
586
 
469
587
  async def sign_async(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
470
588
  """Generate a signed URL asynchronously."""
471
- resolved_path = self._resolve_path(path)
472
- if hasattr(self.store, "sign_url_async") and callable(self.store.sign_url_async):
473
- return await self.store.sign_url_async(resolved_path, expires_in=expires_in) # type: ignore[no-any-return]
589
+ resolved_path = resolve_storage_path(path, self.base_path, self.protocol, strip_file_scheme=True)
474
590
  return f"{self.store_uri}/{resolved_path}"