sqlspec 0.21.1__py3-none-any.whl → 0.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

@@ -4,24 +4,19 @@ Implements the ObjectStoreProtocol using obstore for S3, GCS, Azure,
4
4
  and local file storage.
5
5
  """
6
6
 
7
- from __future__ import annotations
8
-
9
7
  import fnmatch
10
8
  import logging
11
- from typing import TYPE_CHECKING, Any, ClassVar, Final, cast
12
-
13
- from mypy_extensions import mypyc_attr
14
-
15
- from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
16
- from sqlspec.storage.backends.base import ObjectStoreBase
17
- from sqlspec.storage.capabilities import HasStorageCapabilities, StorageCapabilities
18
- from sqlspec.typing import OBSTORE_INSTALLED
9
+ from collections.abc import AsyncIterator, Iterator
10
+ from typing import TYPE_CHECKING, Any, Final, Optional, Union, cast
11
+ from urllib.parse import urlparse
19
12
 
20
13
  if TYPE_CHECKING:
21
- from collections.abc import AsyncIterator, Iterator
22
14
  from pathlib import Path
23
15
 
24
- from sqlspec.typing import ArrowRecordBatch, ArrowTable
16
+ from mypy_extensions import mypyc_attr
17
+
18
+ from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
19
+ from sqlspec.typing import OBSTORE_INSTALLED, PYARROW_INSTALLED, ArrowRecordBatch, ArrowTable
25
20
 
26
21
  __all__ = ("ObStoreBackend",)
27
22
 
@@ -31,88 +26,122 @@ logger = logging.getLogger(__name__)
31
26
  class _AsyncArrowIterator:
32
27
  """Helper class to work around mypyc's lack of async generator support."""
33
28
 
34
- def __init__(self, store: Any, pattern: str, **kwargs: Any) -> None:
35
- self.store = store
29
+ def __init__(self, backend: "ObStoreBackend", pattern: str, **kwargs: Any) -> None:
30
+ self.backend = backend
36
31
  self.pattern = pattern
37
32
  self.kwargs = kwargs
38
- self._iterator: Any | None = None
33
+ self._files_iterator: Optional[Iterator[str]] = None
34
+ self._current_file_iterator: Optional[Iterator[ArrowRecordBatch]] = None
39
35
 
40
- def __aiter__(self) -> _AsyncArrowIterator:
36
+ def __aiter__(self) -> "_AsyncArrowIterator":
41
37
  return self
42
38
 
43
39
  async def __anext__(self) -> ArrowRecordBatch:
44
- if self._iterator is None:
45
- self._iterator = self.store.stream_arrow_async(self.pattern, **self.kwargs)
46
- if self._iterator is not None:
47
- return cast("ArrowRecordBatch", await self._iterator.__anext__())
48
- raise StopAsyncIteration
40
+ if self._files_iterator is None:
41
+ files = self.backend.glob(self.pattern, **self.kwargs)
42
+ self._files_iterator = iter(files)
43
+
44
+ while True:
45
+ if self._current_file_iterator is not None:
46
+ try:
47
+ return next(self._current_file_iterator)
48
+ except StopIteration:
49
+ self._current_file_iterator = None
50
+
51
+ try:
52
+ next_file = next(self._files_iterator)
53
+ # Stream from this file
54
+ file_batches = self.backend.stream_arrow(next_file)
55
+ self._current_file_iterator = iter(file_batches)
56
+ except StopIteration:
57
+ raise StopAsyncIteration
49
58
 
50
59
 
51
60
  DEFAULT_OPTIONS: Final[dict[str, Any]] = {"connect_timeout": "30s", "request_timeout": "60s"}
52
61
 
53
62
 
54
63
  @mypyc_attr(allow_interpreted_subclasses=True)
55
- class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
64
+ class ObStoreBackend:
56
65
  """Object storage backend using obstore.
57
66
 
58
- Uses obstore's Rust-based implementation for storage operations.
59
- Supports AWS S3, Google Cloud Storage, Azure Blob Storage,
67
+ Implements ObjectStoreProtocol using obstore's Rust-based implementation
68
+ for storage operations. Supports AWS S3, Google Cloud Storage, Azure Blob Storage,
60
69
  local filesystem, and HTTP endpoints.
61
70
  """
62
71
 
63
- capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
64
- supports_arrow=True,
65
- supports_streaming=True,
66
- supports_async=True,
67
- supports_batch_operations=True,
68
- supports_multipart_upload=True,
69
- supports_compression=True,
70
- is_cloud_native=True,
71
- has_low_latency=True,
72
- )
72
+ __slots__ = ("_path_cache", "backend_type", "base_path", "protocol", "store", "store_options", "store_uri")
73
+
74
+ def _ensure_obstore(self) -> None:
75
+ """Ensure obstore is available for operations."""
76
+ if not OBSTORE_INSTALLED:
77
+ raise MissingDependencyError(package="obstore", install_package="obstore")
73
78
 
74
- __slots__ = ("_path_cache", "base_path", "protocol", "store", "store_options", "store_uri")
79
+ def _ensure_pyarrow(self) -> None:
80
+ """Ensure PyArrow is available for Arrow operations."""
81
+ if not PYARROW_INSTALLED:
82
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
75
83
 
76
- def __init__(self, store_uri: str, base_path: str = "", **store_options: Any) -> None:
84
+ def __init__(self, uri: str, **kwargs: Any) -> None:
77
85
  """Initialize obstore backend.
78
86
 
79
87
  Args:
80
- store_uri: Storage URI (e.g., 's3://bucket', 'file:///path', 'gs://bucket')
81
- base_path: Base path prefix for all operations
82
- **store_options: Additional options for obstore configuration
88
+ uri: Storage URI (e.g., 's3://bucket', 'file:///path', 'gs://bucket')
89
+ **kwargs: Additional options including base_path and obstore configuration
83
90
  """
84
91
 
85
- if not OBSTORE_INSTALLED:
86
- raise MissingDependencyError(package="obstore", install_package="obstore")
92
+ self._ensure_obstore()
87
93
 
88
94
  try:
89
- self.store_uri = store_uri
95
+ # Extract base_path from kwargs
96
+ base_path = kwargs.pop("base_path", "")
97
+
98
+ self.store_uri = uri
90
99
  self.base_path = base_path.rstrip("/") if base_path else ""
91
- self.store_options = store_options
100
+ self.store_options = kwargs
92
101
  self.store: Any
93
102
  self._path_cache: dict[str, str] = {}
94
- self.protocol = store_uri.split("://", 1)[0] if "://" in store_uri else "file"
103
+ self.protocol = uri.split("://", 1)[0] if "://" in uri else "file"
104
+ self.backend_type = "obstore"
95
105
 
96
- if store_uri.startswith("memory://"):
106
+ if uri.startswith("memory://"):
97
107
  from obstore.store import MemoryStore
98
108
 
99
109
  self.store = MemoryStore()
100
- elif store_uri.startswith("file://"):
110
+ elif uri.startswith("file://"):
111
+ from pathlib import Path as PathlibPath
112
+
101
113
  from obstore.store import LocalStore
102
114
 
103
- self.store = LocalStore("/")
115
+ parsed = urlparse(uri)
116
+ path = parsed.path or "/"
117
+ # Create directory if it doesn't exist (ObStore LocalStore requires it)
118
+ PathlibPath(path).mkdir(parents=True, exist_ok=True)
119
+ self.store = LocalStore(path)
104
120
  else:
105
121
  from obstore.store import from_url
106
122
 
107
- self.store = from_url(store_uri, **store_options) # pyright: ignore[reportAttributeAccessIssue]
123
+ self.store = from_url(uri, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
108
124
 
109
- logger.debug("ObStore backend initialized for %s", store_uri)
125
+ logger.debug("ObStore backend initialized for %s", uri)
110
126
 
111
127
  except Exception as exc:
112
- msg = f"Failed to initialize obstore backend for {store_uri}"
128
+ msg = f"Failed to initialize obstore backend for {uri}"
113
129
  raise StorageOperationFailedError(msg) from exc
114
130
 
115
- def _resolve_path(self, path: str | Path) -> str:
131
+ @classmethod
132
+ def from_config(cls, config: dict[str, Any]) -> "ObStoreBackend":
133
+ """Create backend from configuration dictionary."""
134
+ store_uri = config["store_uri"]
135
+ base_path = config.get("base_path", "")
136
+ store_options = config.get("store_options", {})
137
+
138
+ kwargs = dict(store_options)
139
+ if base_path:
140
+ kwargs["base_path"] = base_path
141
+
142
+ return cls(uri=store_uri, **kwargs)
143
+
144
+ def _resolve_path(self, path: "Union[str, Path]") -> str:
116
145
  """Resolve path relative to base_path."""
117
146
  path_str = str(path)
118
147
  if path_str.startswith("file://"):
@@ -125,49 +154,33 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
125
154
  return f"{clean_base}/{clean_path}"
126
155
  return path_str
127
156
 
128
- @property
129
- def backend_type(self) -> str:
130
- """Return backend type identifier."""
131
- return "obstore"
132
-
133
- def read_bytes(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
157
+ def read_bytes(self, path: "Union[str, Path]", **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
134
158
  """Read bytes using obstore."""
135
- try:
136
- result = self.store.get(self._resolve_path(path))
137
- return cast("bytes", result.bytes().to_bytes())
138
- except Exception as exc:
139
- msg = f"Failed to read bytes from {path}"
140
- raise StorageOperationFailedError(msg) from exc
159
+ result = self.store.get(self._resolve_path(path))
160
+ return cast("bytes", result.bytes().to_bytes())
141
161
 
142
- def write_bytes(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
162
+ def write_bytes(self, path: "Union[str, Path]", data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
143
163
  """Write bytes using obstore."""
144
- try:
145
- self.store.put(self._resolve_path(path), data)
146
- except Exception as exc:
147
- msg = f"Failed to write bytes to {path}"
148
- raise StorageOperationFailedError(msg) from exc
164
+ self.store.put(self._resolve_path(path), data)
149
165
 
150
- def read_text(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
166
+ def read_text(self, path: "Union[str, Path]", encoding: str = "utf-8", **kwargs: Any) -> str:
151
167
  """Read text using obstore."""
152
168
  return self.read_bytes(path, **kwargs).decode(encoding)
153
169
 
154
- def write_text(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
170
+ def write_text(self, path: "Union[str, Path]", data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
155
171
  """Write text using obstore."""
156
172
  self.write_bytes(path, data.encode(encoding), **kwargs)
157
173
 
158
174
  def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
159
175
  """List objects using obstore."""
160
- try:
161
- resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
162
- items = (
163
- self.store.list_with_delimiter(resolved_prefix) if not recursive else self.store.list(resolved_prefix)
164
- )
165
- return sorted(str(getattr(item, "path", getattr(item, "key", str(item)))) for item in items)
166
- except Exception as exc:
167
- msg = f"Failed to list objects with prefix '{prefix}'"
168
- raise StorageOperationFailedError(msg) from exc
169
-
170
- def exists(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
176
+ resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
177
+ items = self.store.list_with_delimiter(resolved_prefix) if not recursive else self.store.list(resolved_prefix)
178
+ paths: list[str] = []
179
+ for batch in items:
180
+ paths.extend(item["path"] for item in batch)
181
+ return sorted(paths)
182
+
183
+ def exists(self, path: "Union[str, Path]", **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
171
184
  """Check if object exists using obstore."""
172
185
  try:
173
186
  self.store.head(self._resolve_path(path))
@@ -175,29 +188,17 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
175
188
  return False
176
189
  return True
177
190
 
178
- def delete(self, path: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
191
+ def delete(self, path: "Union[str, Path]", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
179
192
  """Delete object using obstore."""
180
- try:
181
- self.store.delete(self._resolve_path(path))
182
- except Exception as exc:
183
- msg = f"Failed to delete {path}"
184
- raise StorageOperationFailedError(msg) from exc
193
+ self.store.delete(self._resolve_path(path))
185
194
 
186
- def copy(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
195
+ def copy(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
187
196
  """Copy object using obstore."""
188
- try:
189
- self.store.copy(self._resolve_path(source), self._resolve_path(destination))
190
- except Exception as exc:
191
- msg = f"Failed to copy {source} to {destination}"
192
- raise StorageOperationFailedError(msg) from exc
197
+ self.store.copy(self._resolve_path(source), self._resolve_path(destination))
193
198
 
194
- def move(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
199
+ def move(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
195
200
  """Move object using obstore."""
196
- try:
197
- self.store.rename(self._resolve_path(source), self._resolve_path(destination))
198
- except Exception as exc:
199
- msg = f"Failed to move {source} to {destination}"
200
- raise StorageOperationFailedError(msg) from exc
201
+ self.store.rename(self._resolve_path(source), self._resolve_path(destination))
201
202
 
202
203
  def glob(self, pattern: str, **kwargs: Any) -> list[str]:
203
204
  """Find objects matching pattern.
@@ -228,7 +229,7 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
228
229
  return matching_objects
229
230
  return [obj for obj in all_objects if fnmatch.fnmatch(obj, resolved_pattern)]
230
231
 
231
- def get_metadata(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
232
+ def get_metadata(self, path: "Union[str, Path]", **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
232
233
  """Get object metadata using obstore."""
233
234
  resolved_path = self._resolve_path(path)
234
235
  result: dict[str, Any] = {}
@@ -252,12 +253,12 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
252
253
  else:
253
254
  return result
254
255
 
255
- def is_object(self, path: str | Path) -> bool:
256
+ def is_object(self, path: "Union[str, Path]") -> bool:
256
257
  """Check if path is an object using obstore."""
257
258
  resolved_path = self._resolve_path(path)
258
259
  return self.exists(path) and not resolved_path.endswith("/")
259
260
 
260
- def is_path(self, path: str | Path) -> bool:
261
+ def is_path(self, path: "Union[str, Path]") -> bool:
261
262
  """Check if path is a prefix/directory using obstore."""
262
263
  resolved_path = self._resolve_path(path)
263
264
 
@@ -270,61 +271,53 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
270
271
  except Exception:
271
272
  return False
272
273
 
273
- def read_arrow(self, path: str | Path, **kwargs: Any) -> ArrowTable:
274
+ def read_arrow(self, path: "Union[str, Path]", **kwargs: Any) -> ArrowTable:
274
275
  """Read Arrow table using obstore."""
275
- try:
276
- resolved_path = self._resolve_path(path)
277
- if hasattr(self.store, "read_arrow"):
278
- return self.store.read_arrow(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
276
+ resolved_path = self._resolve_path(path)
277
+ if hasattr(self.store, "read_arrow"):
278
+ return self.store.read_arrow(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
279
+
280
+ self._ensure_pyarrow()
281
+ import io
282
+
283
+ import pyarrow.parquet as pq
284
+
285
+ return pq.read_table(io.BytesIO(self.read_bytes(resolved_path)), **kwargs)
279
286
 
287
+ def write_arrow(self, path: "Union[str, Path]", table: ArrowTable, **kwargs: Any) -> None:
288
+ """Write Arrow table using obstore."""
289
+ resolved_path = self._resolve_path(path)
290
+ if hasattr(self.store, "write_arrow"):
291
+ self.store.write_arrow(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
292
+ else:
293
+ self._ensure_pyarrow()
280
294
  import io
281
295
 
296
+ import pyarrow as pa
282
297
  import pyarrow.parquet as pq
283
298
 
284
- data = self.read_bytes(resolved_path)
285
- buffer = io.BytesIO(data)
286
- return pq.read_table(buffer, **kwargs)
287
- except Exception as exc:
288
- msg = f"Failed to read Arrow table from {path}"
289
- raise StorageOperationFailedError(msg) from exc
299
+ buffer = io.BytesIO()
290
300
 
291
- def write_arrow(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
292
- """Write Arrow table using obstore."""
293
- try:
294
- resolved_path = self._resolve_path(path)
295
- if hasattr(self.store, "write_arrow"):
296
- self.store.write_arrow(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
297
- else:
298
- import io
299
-
300
- import pyarrow as pa
301
- import pyarrow.parquet as pq
302
-
303
- buffer = io.BytesIO()
304
-
305
- schema = table.schema
306
- if any(str(f.type).startswith("decimal64") for f in schema):
307
- new_fields = []
308
- for field in schema:
309
- if str(field.type).startswith("decimal64"):
310
- import re
311
-
312
- match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
313
- if match:
314
- precision, scale = int(match.group(1)), int(match.group(2))
315
- new_fields.append(pa.field(field.name, pa.decimal128(precision, scale)))
316
- else:
317
- new_fields.append(field) # pragma: no cover
301
+ schema = table.schema
302
+ if any(str(f.type).startswith("decimal64") for f in schema):
303
+ new_fields = []
304
+ for field in schema:
305
+ if str(field.type).startswith("decimal64"):
306
+ import re
307
+
308
+ match = re.match(r"decimal64\((\d+),\s*(\d+)\)", str(field.type))
309
+ if match:
310
+ precision, scale = int(match.group(1)), int(match.group(2))
311
+ new_fields.append(pa.field(field.name, pa.decimal128(precision, scale)))
318
312
  else:
319
- new_fields.append(field)
320
- table = table.cast(pa.schema(new_fields))
313
+ new_fields.append(field) # pragma: no cover
314
+ else:
315
+ new_fields.append(field)
316
+ table = table.cast(pa.schema(new_fields))
321
317
 
322
- pq.write_table(table, buffer, **kwargs)
323
- buffer.seek(0)
324
- self.write_bytes(resolved_path, buffer.read())
325
- except Exception as exc:
326
- msg = f"Failed to write Arrow table to {path}"
327
- raise StorageOperationFailedError(msg) from exc
318
+ pq.write_table(table, buffer, **kwargs)
319
+ buffer.seek(0)
320
+ self.write_bytes(resolved_path, buffer.read())
328
321
 
329
322
  def stream_arrow(self, pattern: str, **kwargs: Any) -> Iterator[ArrowRecordBatch]:
330
323
  """Stream Arrow record batches.
@@ -332,56 +325,65 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
332
325
  Yields:
333
326
  Iterator of Arrow record batches from matching objects.
334
327
  """
335
- try:
336
- resolved_pattern = self._resolve_path(pattern)
337
- yield from self.store.stream_arrow(resolved_pattern, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
338
- except Exception as exc:
339
- msg = f"Failed to stream Arrow data for pattern {pattern}"
340
- raise StorageOperationFailedError(msg) from exc
328
+ self._ensure_pyarrow()
329
+ from io import BytesIO
330
+
331
+ import pyarrow.parquet as pq
332
+
333
+ for obj_path in self.glob(pattern, **kwargs):
334
+ result = self.store.get(self._resolve_path(obj_path))
335
+ bytes_obj = result.bytes()
336
+ data = bytes_obj.to_bytes()
337
+ buffer = BytesIO(data)
338
+ parquet_file = pq.ParquetFile(buffer)
339
+ yield from parquet_file.iter_batches()
340
+
341
+ def sign(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
342
+ """Generate a signed URL for the object."""
343
+ resolved_path = self._resolve_path(path)
344
+ if hasattr(self.store, "sign_url") and callable(self.store.sign_url):
345
+ return self.store.sign_url(resolved_path, expires_in=expires_in) # type: ignore[no-any-return]
346
+ return f"{self.store_uri}/{resolved_path}"
341
347
 
342
- async def read_bytes_async(self, path: str | Path, **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
348
+ async def read_bytes_async(self, path: "Union[str, Path]", **kwargs: Any) -> bytes: # pyright: ignore[reportUnusedParameter]
343
349
  """Read bytes from storage asynchronously."""
344
- try:
345
- resolved_path = self._resolve_path(path)
346
- result = await self.store.get_async(resolved_path)
347
- bytes_obj = await result.bytes_async()
348
- return bytes_obj.to_bytes() # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
349
- except Exception as exc:
350
- msg = f"Failed to read bytes from {path}"
351
- raise StorageOperationFailedError(msg) from exc
350
+ resolved_path = self._resolve_path(path)
351
+ result = await self.store.get_async(resolved_path)
352
+ bytes_obj = await result.bytes_async()
353
+ return bytes_obj.to_bytes() # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
352
354
 
353
- async def write_bytes_async(self, path: str | Path, data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
355
+ async def write_bytes_async(self, path: "Union[str, Path]", data: bytes, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
354
356
  """Write bytes to storage asynchronously."""
355
357
  resolved_path = self._resolve_path(path)
356
358
  await self.store.put_async(resolved_path, data)
357
359
 
358
360
  async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]: # pyright: ignore[reportUnusedParameter]
359
361
  """List objects in storage asynchronously."""
360
- try:
361
- resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
362
+ resolved_prefix = self._resolve_path(prefix) if prefix else self.base_path or ""
362
363
 
363
- objects = [str(item.path) async for item in self.store.list_async(resolved_prefix)] # pyright: ignore[reportAttributeAccessIssue]
364
+ objects: list[str] = []
365
+ async for batch in self.store.list_async(resolved_prefix): # pyright: ignore[reportAttributeAccessIssue]
366
+ objects.extend(item["path"] for item in batch)
364
367
 
365
- if not recursive and resolved_prefix:
366
- base_depth = resolved_prefix.count("/")
367
- objects = [obj for obj in objects if obj.count("/") <= base_depth + 1]
368
+ if not recursive and resolved_prefix:
369
+ base_depth = resolved_prefix.count("/")
370
+ objects = [obj for obj in objects if obj.count("/") <= base_depth + 1]
368
371
 
369
- return sorted(objects)
370
- except Exception as exc:
371
- msg = f"Failed to list objects with prefix '{prefix}'"
372
- raise StorageOperationFailedError(msg) from exc
372
+ return sorted(objects)
373
373
 
374
- async def read_text_async(self, path: str | Path, encoding: str = "utf-8", **kwargs: Any) -> str:
374
+ async def read_text_async(self, path: "Union[str, Path]", encoding: str = "utf-8", **kwargs: Any) -> str:
375
375
  """Read text from storage asynchronously."""
376
376
  data = await self.read_bytes_async(path, **kwargs)
377
377
  return data.decode(encoding)
378
378
 
379
- async def write_text_async(self, path: str | Path, data: str, encoding: str = "utf-8", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
379
+ async def write_text_async(
380
+ self, path: "Union[str, Path]", data: str, encoding: str = "utf-8", **kwargs: Any
381
+ ) -> None: # pyright: ignore[reportUnusedParameter]
380
382
  """Write text to storage asynchronously."""
381
383
  encoded_data = data.encode(encoding)
382
384
  await self.write_bytes_async(path, encoded_data, **kwargs)
383
385
 
384
- async def exists_async(self, path: str | Path, **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
386
+ async def exists_async(self, path: "Union[str, Path]", **kwargs: Any) -> bool: # pyright: ignore[reportUnusedParameter]
385
387
  """Check if object exists in storage asynchronously."""
386
388
  resolved_path = self._resolve_path(path)
387
389
  try:
@@ -390,24 +392,24 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
390
392
  return False
391
393
  return True
392
394
 
393
- async def delete_async(self, path: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
395
+ async def delete_async(self, path: "Union[str, Path]", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
394
396
  """Delete object from storage asynchronously."""
395
397
  resolved_path = self._resolve_path(path)
396
398
  await self.store.delete_async(resolved_path)
397
399
 
398
- async def copy_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
400
+ async def copy_async(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
399
401
  """Copy object in storage asynchronously."""
400
402
  source_path = self._resolve_path(source)
401
403
  dest_path = self._resolve_path(destination)
402
404
  await self.store.copy_async(source_path, dest_path)
403
405
 
404
- async def move_async(self, source: str | Path, destination: str | Path, **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
406
+ async def move_async(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None: # pyright: ignore[reportUnusedParameter]
405
407
  """Move object in storage asynchronously."""
406
408
  source_path = self._resolve_path(source)
407
409
  dest_path = self._resolve_path(destination)
408
410
  await self.store.rename_async(source_path, dest_path)
409
411
 
410
- async def get_metadata_async(self, path: str | Path, **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
412
+ async def get_metadata_async(self, path: "Union[str, Path]", **kwargs: Any) -> dict[str, Any]: # pyright: ignore[reportUnusedParameter]
411
413
  """Get object metadata from storage asynchronously."""
412
414
  resolved_path = self._resolve_path(path)
413
415
  result: dict[str, Any] = {}
@@ -417,31 +419,40 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
417
419
  {
418
420
  "path": resolved_path,
419
421
  "exists": True,
420
- "size": metadata.size,
421
- "last_modified": metadata.last_modified,
422
- "e_tag": metadata.e_tag,
423
- "version": metadata.version,
422
+ "size": metadata.get("size"),
423
+ "last_modified": metadata.get("last_modified"),
424
+ "e_tag": metadata.get("e_tag"),
425
+ "version": metadata.get("version"),
424
426
  }
425
427
  )
426
- if hasattr(metadata, "metadata") and metadata.metadata:
427
- result["custom_metadata"] = metadata.metadata
428
+ if metadata.get("metadata"):
429
+ result["custom_metadata"] = metadata["metadata"]
428
430
 
429
431
  except Exception:
430
432
  return {"path": resolved_path, "exists": False}
431
433
  else:
432
434
  return result
433
435
 
434
- async def read_arrow_async(self, path: str | Path, **kwargs: Any) -> ArrowTable:
436
+ async def read_arrow_async(self, path: "Union[str, Path]", **kwargs: Any) -> ArrowTable:
435
437
  """Read Arrow table from storage asynchronously."""
436
438
  resolved_path = self._resolve_path(path)
437
- return await self.store.read_arrow_async(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
439
+ if hasattr(self.store, "read_arrow_async"):
440
+ return await self.store.read_arrow_async(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore[reportAttributeAccessIssue]
441
+
442
+ self._ensure_pyarrow()
443
+ import io
438
444
 
439
- async def write_arrow_async(self, path: str | Path, table: ArrowTable, **kwargs: Any) -> None:
445
+ import pyarrow.parquet as pq
446
+
447
+ return pq.read_table(io.BytesIO(await self.read_bytes_async(resolved_path)), **kwargs)
448
+
449
+ async def write_arrow_async(self, path: "Union[str, Path]", table: ArrowTable, **kwargs: Any) -> None:
440
450
  """Write Arrow table to storage asynchronously."""
441
451
  resolved_path = self._resolve_path(path)
442
452
  if hasattr(self.store, "write_arrow_async"):
443
453
  await self.store.write_arrow_async(resolved_path, table, **kwargs) # pyright: ignore[reportAttributeAccessIssue]
444
454
  else:
455
+ self._ensure_pyarrow()
445
456
  import io
446
457
 
447
458
  import pyarrow.parquet as pq
@@ -453,4 +464,11 @@ class ObStoreBackend(ObjectStoreBase, HasStorageCapabilities):
453
464
 
454
465
  def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator[ArrowRecordBatch]:
455
466
  resolved_pattern = self._resolve_path(pattern)
456
- return _AsyncArrowIterator(self.store, resolved_pattern, **kwargs)
467
+ return _AsyncArrowIterator(self, resolved_pattern, **kwargs)
468
+
469
+ async def sign_async(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
470
+ """Generate a signed URL asynchronously."""
471
+ resolved_path = self._resolve_path(path)
472
+ if hasattr(self.store, "sign_url_async") and callable(self.store.sign_url_async):
473
+ return await self.store.sign_url_async(resolved_path, expires_in=expires_in) # type: ignore[no-any-return]
474
+ return f"{self.store_uri}/{resolved_path}"