sqlspec 0.21.0__py3-none-any.whl → 0.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

@@ -1,18 +1,14 @@
1
1
  import logging
2
2
  from pathlib import Path
3
- from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union
3
+ from typing import TYPE_CHECKING, Any, Optional, Union
4
4
 
5
- from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
6
- from sqlspec.storage.backends.base import ObjectStoreBase
7
- from sqlspec.storage.capabilities import StorageCapabilities
5
+ from sqlspec.exceptions import MissingDependencyError
8
6
  from sqlspec.typing import FSSPEC_INSTALLED, PYARROW_INSTALLED
9
7
  from sqlspec.utils.sync_tools import async_
10
8
 
11
9
  if TYPE_CHECKING:
12
10
  from collections.abc import AsyncIterator, Iterator
13
11
 
14
- from fsspec import AbstractFileSystem
15
-
16
12
  from sqlspec.typing import ArrowRecordBatch, ArrowTable
17
13
 
18
14
  __all__ = ("FSSpecBackend",)
@@ -56,40 +52,30 @@ class _ArrowStreamer:
56
52
  raise StopAsyncIteration
57
53
 
58
54
 
59
- class FSSpecBackend(ObjectStoreBase):
55
+ class FSSpecBackend:
60
56
  """Storage backend using fsspec.
61
57
 
62
- Implements the ObjectStoreProtocol using fsspec for various protocols
58
+ Implements ObjectStoreProtocol using fsspec for various protocols
63
59
  including HTTP, HTTPS, FTP, and cloud storage services.
64
60
  """
65
61
 
66
- _default_capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
67
- supports_arrow=PYARROW_INSTALLED,
68
- supports_streaming=PYARROW_INSTALLED,
69
- supports_async=True,
70
- supports_compression=True,
71
- is_remote=True,
72
- is_cloud_native=False,
73
- )
74
-
75
- def __init__(self, fs: "Union[str, AbstractFileSystem]", base_path: str = "") -> None:
76
- if not FSSPEC_INSTALLED:
77
- raise MissingDependencyError(package="fsspec", install_package="fsspec")
62
+ def __init__(self, uri: str, **kwargs: Any) -> None:
63
+ self._ensure_fsspec()
78
64
 
65
+ base_path = kwargs.pop("base_path", "")
79
66
  self.base_path = base_path.rstrip("/") if base_path else ""
80
67
 
81
- if isinstance(fs, str):
82
- import fsspec
83
-
84
- self.fs = fsspec.filesystem(fs.split("://")[0])
85
- self.protocol = fs.split("://")[0]
86
- self._fs_uri = fs
68
+ if "://" in uri:
69
+ self.protocol = uri.split("://", maxsplit=1)[0]
70
+ self._fs_uri = uri
87
71
  else:
88
- self.fs = fs
89
- self.protocol = getattr(fs, "protocol", "unknown")
90
- self._fs_uri = f"{self.protocol}://"
72
+ self.protocol = uri
73
+ self._fs_uri = f"{uri}://"
74
+
75
+ import fsspec
91
76
 
92
- self._instance_capabilities = self._detect_capabilities()
77
+ self.fs = fsspec.filesystem(self.protocol, **kwargs)
78
+ self.backend_type = "fsspec"
93
79
 
94
80
  super().__init__()
95
81
 
@@ -99,11 +85,22 @@ class FSSpecBackend(ObjectStoreBase):
99
85
  fs_config = config.get("fs_config", {})
100
86
  base_path = config.get("base_path", "")
101
87
 
102
- import fsspec
88
+ uri = f"{protocol}://"
89
+ kwargs = dict(fs_config)
90
+ if base_path:
91
+ kwargs["base_path"] = base_path
103
92
 
104
- fs_instance = fsspec.filesystem(protocol, **fs_config)
93
+ return cls(uri=uri, **kwargs)
105
94
 
106
- return cls(fs=fs_instance, base_path=base_path)
95
+ def _ensure_fsspec(self) -> None:
96
+ """Ensure fsspec is available for operations."""
97
+ if not FSSPEC_INSTALLED:
98
+ raise MissingDependencyError(package="fsspec", install_package="fsspec")
99
+
100
+ def _ensure_pyarrow(self) -> None:
101
+ """Ensure PyArrow is available for Arrow operations."""
102
+ if not PYARROW_INSTALLED:
103
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
107
104
 
108
105
  def _resolve_path(self, path: Union[str, Path]) -> str:
109
106
  """Resolve path relative to base_path."""
@@ -112,70 +109,38 @@ class FSSpecBackend(ObjectStoreBase):
112
109
  clean_base = self.base_path.rstrip("/")
113
110
  clean_path = path_str.lstrip("/")
114
111
  return f"{clean_base}/{clean_path}"
112
+ if self.protocol == "s3" and "://" in self._fs_uri:
113
+ # For S3, we need to include the bucket from the URI
114
+ # Extract bucket and path from URI like s3://bucket/path
115
+ uri_parts = self._fs_uri.split("://", 1)[1] # Remove s3://
116
+ if "/" in uri_parts:
117
+ # URI has bucket and base path
118
+ return f"{uri_parts.rstrip('/')}/{path_str.lstrip('/')}"
119
+ # URI has only bucket
120
+ return f"{uri_parts}/{path_str.lstrip('/')}"
115
121
  return path_str
116
122
 
117
- def _detect_capabilities(self) -> StorageCapabilities:
118
- """Detect capabilities based on filesystem protocol."""
119
- protocol = self.protocol.lower()
120
-
121
- if protocol in {"s3", "s3a", "s3n"}:
122
- return StorageCapabilities.s3_compatible()
123
- if protocol in {"gcs", "gs"}:
124
- return StorageCapabilities.gcs()
125
- if protocol in {"abfs", "az", "azure"}:
126
- return StorageCapabilities.azure_blob()
127
- if protocol in {"file", "local"}:
128
- return StorageCapabilities.local_filesystem()
129
- return StorageCapabilities(
130
- supports_arrow=PYARROW_INSTALLED,
131
- supports_streaming=PYARROW_INSTALLED,
132
- supports_async=True,
133
- supports_compression=True,
134
- is_remote=True,
135
- is_cloud_native=False,
136
- )
137
-
138
- @property
139
- def capabilities(self) -> StorageCapabilities:
140
- """Return capabilities based on detected protocol."""
141
- return getattr(self, "_instance_capabilities", self.__class__._default_capabilities)
142
-
143
- @classmethod
144
- def has_capability(cls, capability: str) -> bool:
145
- """Check if backend has a specific capability."""
146
- return getattr(cls._default_capabilities, capability, False)
147
-
148
- @classmethod
149
- def get_capabilities(cls) -> StorageCapabilities:
150
- """Get all capabilities for this backend."""
151
- return cls._default_capabilities
152
-
153
- @property
154
- def backend_type(self) -> str:
155
- return "fsspec"
156
-
157
123
  @property
158
124
  def base_uri(self) -> str:
159
125
  return self._fs_uri
160
126
 
161
127
  def read_bytes(self, path: Union[str, Path], **kwargs: Any) -> bytes:
162
128
  """Read bytes from an object."""
163
- try:
164
- resolved_path = self._resolve_path(path)
165
- return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
166
- except Exception as exc:
167
- msg = f"Failed to read bytes from {path}"
168
- raise StorageOperationFailedError(msg) from exc
129
+ resolved_path = self._resolve_path(path)
130
+ return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
169
131
 
170
132
  def write_bytes(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
171
133
  """Write bytes to an object."""
172
- try:
173
- resolved_path = self._resolve_path(path)
174
- with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
175
- f.write(data) # pyright: ignore
176
- except Exception as exc:
177
- msg = f"Failed to write bytes to {path}"
178
- raise StorageOperationFailedError(msg) from exc
134
+ resolved_path = self._resolve_path(path)
135
+
136
+ # Only create directories for local file systems, not for cloud storage
137
+ if self.protocol == "file":
138
+ parent_dir = str(Path(resolved_path).parent)
139
+ if parent_dir and not self.fs.exists(parent_dir):
140
+ self.fs.makedirs(parent_dir, exist_ok=True)
141
+
142
+ with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
143
+ f.write(data) # pyright: ignore
179
144
 
180
145
  def read_text(self, path: Union[str, Path], encoding: str = "utf-8", **kwargs: Any) -> str:
181
146
  """Read text from an object."""
@@ -193,87 +158,59 @@ class FSSpecBackend(ObjectStoreBase):
193
158
 
194
159
  def delete(self, path: Union[str, Path], **kwargs: Any) -> None:
195
160
  """Delete an object."""
196
- try:
197
- resolved_path = self._resolve_path(path)
198
- self.fs.rm(resolved_path, **kwargs)
199
- except Exception as exc:
200
- msg = f"Failed to delete {path}"
201
- raise StorageOperationFailedError(msg) from exc
161
+ resolved_path = self._resolve_path(path)
162
+ self.fs.rm(resolved_path, **kwargs)
202
163
 
203
164
  def copy(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
204
165
  """Copy an object."""
205
- try:
206
- source_path = self._resolve_path(source)
207
- dest_path = self._resolve_path(destination)
208
- self.fs.copy(source_path, dest_path, **kwargs)
209
- except Exception as exc:
210
- msg = f"Failed to copy {source} to {destination}"
211
- raise StorageOperationFailedError(msg) from exc
166
+ source_path = self._resolve_path(source)
167
+ dest_path = self._resolve_path(destination)
168
+ self.fs.copy(source_path, dest_path, **kwargs)
212
169
 
213
170
  def move(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
214
171
  """Move an object."""
215
- try:
216
- source_path = self._resolve_path(source)
217
- dest_path = self._resolve_path(destination)
218
- self.fs.mv(source_path, dest_path, **kwargs)
219
- except Exception as exc:
220
- msg = f"Failed to move {source} to {destination}"
221
- raise StorageOperationFailedError(msg) from exc
172
+ source_path = self._resolve_path(source)
173
+ dest_path = self._resolve_path(destination)
174
+ self.fs.mv(source_path, dest_path, **kwargs)
222
175
 
223
176
  def read_arrow(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
224
177
  """Read an Arrow table from storage."""
225
178
  if not PYARROW_INSTALLED:
226
179
  raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
227
- try:
228
- import pyarrow.parquet as pq
180
+ import pyarrow.parquet as pq
229
181
 
230
- resolved_path = self._resolve_path(path)
231
- with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
232
- return pq.read_table(f)
233
- except Exception as exc:
234
- msg = f"Failed to read Arrow table from {path}"
235
- raise StorageOperationFailedError(msg) from exc
182
+ resolved_path = self._resolve_path(path)
183
+ with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
184
+ return pq.read_table(f)
236
185
 
237
186
  def write_arrow(self, path: Union[str, Path], table: "ArrowTable", **kwargs: Any) -> None:
238
187
  """Write an Arrow table to storage."""
239
188
  if not PYARROW_INSTALLED:
240
189
  raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
241
- try:
242
- import pyarrow.parquet as pq
190
+ import pyarrow.parquet as pq
243
191
 
244
- resolved_path = self._resolve_path(path)
245
- with self.fs.open(resolved_path, mode="wb") as f:
246
- pq.write_table(table, f, **kwargs) # pyright: ignore
247
- except Exception as exc:
248
- msg = f"Failed to write Arrow table to {path}"
249
- raise StorageOperationFailedError(msg) from exc
192
+ resolved_path = self._resolve_path(path)
193
+ with self.fs.open(resolved_path, mode="wb") as f:
194
+ pq.write_table(table, f, **kwargs) # pyright: ignore
250
195
 
251
196
  def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
252
197
  """List objects with optional prefix."""
253
- try:
254
- resolved_prefix = self._resolve_path(prefix)
255
- if recursive:
256
- return sorted(self.fs.find(resolved_prefix, **kwargs))
257
- return sorted(self.fs.ls(resolved_prefix, detail=False, **kwargs))
258
- except Exception as exc:
259
- msg = f"Failed to list objects with prefix '{prefix}'"
260
- raise StorageOperationFailedError(msg) from exc
198
+ resolved_prefix = self._resolve_path(prefix)
199
+ if recursive:
200
+ return sorted(self.fs.find(resolved_prefix, **kwargs))
201
+ return sorted(self.fs.ls(resolved_prefix, detail=False, **kwargs))
261
202
 
262
203
  def glob(self, pattern: str, **kwargs: Any) -> list[str]:
263
204
  """Find objects matching a glob pattern."""
264
- try:
265
- resolved_pattern = self._resolve_path(pattern)
266
- return sorted(self.fs.glob(resolved_pattern, **kwargs)) # pyright: ignore
267
- except Exception as exc:
268
- msg = f"Failed to glob with pattern '{pattern}'"
269
- raise StorageOperationFailedError(msg) from exc
205
+ resolved_pattern = self._resolve_path(pattern)
206
+ return sorted(self.fs.glob(resolved_pattern, **kwargs)) # pyright: ignore
270
207
 
271
- def is_object(self, path: str) -> bool:
208
+ def is_object(self, path: Union[str, Path]) -> bool:
272
209
  """Check if path points to an object."""
273
210
  resolved_path = self._resolve_path(path)
274
211
  return self.fs.exists(resolved_path) and not self.fs.isdir(resolved_path)
275
212
 
276
- def is_path(self, path: str) -> bool:
213
+ def is_path(self, path: Union[str, Path]) -> bool:
277
214
  """Check if path points to a prefix (directory-like)."""
278
215
  resolved_path = self._resolve_path(path)
279
216
  return self.fs.isdir(resolved_path) # type: ignore[no-any-return]
@@ -294,9 +231,6 @@ class FSSpecBackend(ObjectStoreBase):
294
231
 
295
232
  except FileNotFoundError:
296
233
  return {"path": self._resolve_path(path), "exists": False}
297
- except Exception as exc:
298
- msg = f"Failed to get metadata for {path}"
299
- raise StorageOperationFailedError(msg) from exc
300
234
  return {
301
235
  "path": resolved_path,
302
236
  "exists": True,
@@ -305,6 +239,11 @@ class FSSpecBackend(ObjectStoreBase):
305
239
  "type": info.type,
306
240
  }
307
241
 
242
+ def sign(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
243
+ """Generate a signed URL for the file."""
244
+ resolved_path = self._resolve_path(path)
245
+ return f"{self._fs_uri}{resolved_path}"
246
+
308
247
  def _stream_file_batches(self, obj_path: Union[str, Path]) -> "Iterator[ArrowRecordBatch]":
309
248
  import pyarrow.parquet as pq
310
249
 
@@ -313,10 +252,8 @@ class FSSpecBackend(ObjectStoreBase):
313
252
  yield from parquet_file.iter_batches()
314
253
 
315
254
  def stream_arrow(self, pattern: str, **kwargs: Any) -> "Iterator[ArrowRecordBatch]":
316
- if not FSSPEC_INSTALLED:
317
- raise MissingDependencyError(package="fsspec", install_package="fsspec")
318
- if not PYARROW_INSTALLED:
319
- raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
255
+ self._ensure_fsspec()
256
+ self._ensure_pyarrow()
320
257
 
321
258
  for obj_path in self.glob(pattern, **kwargs):
322
259
  yield from self._stream_file_batches(obj_path)
@@ -339,8 +276,7 @@ class FSSpecBackend(ObjectStoreBase):
339
276
  Returns:
340
277
  AsyncIterator of Arrow record batches
341
278
  """
342
- if not PYARROW_INSTALLED:
343
- raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
279
+ self._ensure_pyarrow()
344
280
 
345
281
  return _ArrowStreamer(self, pattern, **kwargs)
346
282
 
@@ -376,6 +312,10 @@ class FSSpecBackend(ObjectStoreBase):
376
312
  """Get object metadata from storage asynchronously."""
377
313
  return await async_(self.get_metadata)(path, **kwargs)
378
314
 
315
+ async def sign_async(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
316
+ """Generate a signed URL asynchronously."""
317
+ return await async_(self.sign)(path, expires_in, for_upload)
318
+
379
319
  async def read_arrow_async(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
380
320
  """Read Arrow table from storage asynchronously."""
381
321
  return await async_(self.read_arrow)(path, **kwargs)
@@ -0,0 +1,310 @@
1
+ """Local file system storage backend.
2
+
3
+ A simple, zero-dependency implementation for local file operations.
4
+ No external dependencies like fsspec or obstore required.
5
+ """
6
+
7
+ import shutil
8
+ from collections.abc import AsyncIterator, Iterator
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING, Any, Optional, Union
11
+ from urllib.parse import unquote, urlparse
12
+
13
+ from sqlspec.exceptions import MissingDependencyError
14
+ from sqlspec.typing import PYARROW_INSTALLED
15
+ from sqlspec.utils.sync_tools import async_
16
+
17
+ if TYPE_CHECKING:
18
+ import asyncio
19
+
20
+ from sqlspec.typing import ArrowRecordBatch, ArrowTable
21
+
22
+ __all__ = ("LocalStore",)
23
+
24
+
25
+ class LocalStore:
26
+ """Simple local file system storage backend.
27
+
28
+ Provides file system operations without requiring fsspec or obstore.
29
+ Supports file:// URIs and regular file paths.
30
+
31
+ Implements ObjectStoreProtocol for type safety.
32
+ """
33
+
34
+ __slots__ = ("_loop", "backend_type", "base_path", "protocol")
35
+
36
+ def __init__(self, uri: str = "", **kwargs: Any) -> None:
37
+ """Initialize local storage backend.
38
+
39
+ Args:
40
+ uri: File URI or path (e.g., "file:///path" or "/path")
41
+ **kwargs: Additional options (base_path for relative operations)
42
+ """
43
+ if uri.startswith("file://"):
44
+ parsed = urlparse(uri)
45
+ path = unquote(parsed.path)
46
+ # Handle Windows paths (file:///C:/path)
47
+ if path and len(path) > 2 and path[2] == ":": # noqa: PLR2004
48
+ path = path[1:] # Remove leading slash for Windows
49
+ self.base_path = Path(path).resolve()
50
+ elif uri:
51
+ self.base_path = Path(uri).resolve()
52
+ else:
53
+ self.base_path = Path.cwd()
54
+
55
+ # Allow override with explicit base_path
56
+ if "base_path" in kwargs:
57
+ self.base_path = Path(kwargs["base_path"]).resolve()
58
+
59
+ # Create base directory if it doesn't exist and it's actually a directory
60
+ if not self.base_path.exists():
61
+ self.base_path.mkdir(parents=True, exist_ok=True)
62
+ elif self.base_path.is_file():
63
+ # If base_path points to a file, use its parent as the base directory
64
+ self.base_path = self.base_path.parent
65
+ self._loop: Optional[asyncio.AbstractEventLoop] = None
66
+
67
+ self.protocol = "file"
68
+ self.backend_type = "local"
69
+
70
+ def _ensure_pyarrow(self) -> None:
71
+ """Ensure PyArrow is available for Arrow operations."""
72
+ if not PYARROW_INSTALLED:
73
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
74
+
75
+ def _resolve_path(self, path: "Union[str, Path]") -> Path:
76
+ """Resolve path relative to base_path."""
77
+ p = Path(path)
78
+ if p.is_absolute():
79
+ return p
80
+ return self.base_path / p
81
+
82
+ def read_bytes(self, path: "Union[str, Path]", **kwargs: Any) -> bytes:
83
+ """Read bytes from file."""
84
+ resolved = self._resolve_path(path)
85
+ return resolved.read_bytes()
86
+
87
+ def write_bytes(self, path: "Union[str, Path]", data: bytes, **kwargs: Any) -> None:
88
+ """Write bytes to file."""
89
+ resolved = self._resolve_path(path)
90
+ resolved.parent.mkdir(parents=True, exist_ok=True)
91
+ resolved.write_bytes(data)
92
+
93
+ def read_text(self, path: "Union[str, Path]", encoding: str = "utf-8", **kwargs: Any) -> str:
94
+ """Read text from file."""
95
+ return self._resolve_path(path).read_text(encoding=encoding)
96
+
97
+ def write_text(self, path: "Union[str, Path]", data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
98
+ """Write text to file."""
99
+ resolved = self._resolve_path(path)
100
+ resolved.parent.mkdir(parents=True, exist_ok=True)
101
+ resolved.write_text(data, encoding=encoding)
102
+
103
+ def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
104
+ """List objects in directory."""
105
+ # If prefix looks like a directory path, treat as directory
106
+ if prefix and (prefix.endswith("/") or "/" in prefix):
107
+ search_path = self._resolve_path(prefix)
108
+ if not search_path.exists():
109
+ return []
110
+ if search_path.is_file():
111
+ return [str(search_path.relative_to(self.base_path))]
112
+ else:
113
+ # Treat as filename prefix filter
114
+ search_path = self.base_path
115
+
116
+ pattern = "**/*" if recursive else "*"
117
+ files = []
118
+ for path in search_path.glob(pattern):
119
+ if path.is_file():
120
+ try:
121
+ relative = path.relative_to(self.base_path)
122
+ relative_str = str(relative)
123
+ # Apply prefix filter if provided
124
+ if not prefix or relative_str.startswith(prefix):
125
+ files.append(relative_str)
126
+ except ValueError:
127
+ # Path is outside base_path, use absolute
128
+ path_str = str(path)
129
+ if not prefix or path_str.startswith(prefix):
130
+ files.append(path_str)
131
+
132
+ return sorted(files)
133
+
134
+ def exists(self, path: "Union[str, Path]", **kwargs: Any) -> bool:
135
+ """Check if file exists."""
136
+ return self._resolve_path(path).exists()
137
+
138
+ def delete(self, path: "Union[str, Path]", **kwargs: Any) -> None:
139
+ """Delete file or directory."""
140
+ resolved = self._resolve_path(path)
141
+ if resolved.is_dir():
142
+ shutil.rmtree(resolved)
143
+ elif resolved.exists():
144
+ resolved.unlink()
145
+
146
+ def copy(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
147
+ """Copy file or directory."""
148
+ src = self._resolve_path(source)
149
+ dst = self._resolve_path(destination)
150
+ dst.parent.mkdir(parents=True, exist_ok=True)
151
+
152
+ if src.is_dir():
153
+ shutil.copytree(src, dst, dirs_exist_ok=True)
154
+ else:
155
+ shutil.copy2(src, dst)
156
+
157
+ def move(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
158
+ """Move file or directory."""
159
+ src = self._resolve_path(source)
160
+ dst = self._resolve_path(destination)
161
+ dst.parent.mkdir(parents=True, exist_ok=True)
162
+ shutil.move(str(src), str(dst))
163
+
164
+ def glob(self, pattern: str, **kwargs: Any) -> list[str]:
165
+ """Find files matching pattern."""
166
+ # Handle both relative and absolute patterns
167
+ if Path(pattern).is_absolute():
168
+ base_path = Path(pattern).parent
169
+ pattern_name = Path(pattern).name
170
+ matches = base_path.rglob(pattern_name) if "**" in pattern else base_path.glob(pattern_name)
171
+ else:
172
+ matches = self.base_path.rglob(pattern) if "**" in pattern else self.base_path.glob(pattern)
173
+
174
+ results = []
175
+ for match in matches:
176
+ if match.is_file():
177
+ try:
178
+ relative = match.relative_to(self.base_path)
179
+ results.append(str(relative))
180
+ except ValueError:
181
+ results.append(str(match))
182
+
183
+ return sorted(results)
184
+
185
+ def get_metadata(self, path: "Union[str, Path]", **kwargs: Any) -> dict[str, Any]:
186
+ """Get file metadata."""
187
+ resolved = self._resolve_path(path)
188
+ if not resolved.exists():
189
+ return {}
190
+
191
+ stat = resolved.stat()
192
+ return {
193
+ "size": stat.st_size,
194
+ "modified": stat.st_mtime,
195
+ "created": stat.st_ctime,
196
+ "is_file": resolved.is_file(),
197
+ "is_dir": resolved.is_dir(),
198
+ "path": str(resolved),
199
+ }
200
+
201
+ def is_object(self, path: "Union[str, Path]") -> bool:
202
+ """Check if path points to a file."""
203
+ return self._resolve_path(path).is_file()
204
+
205
+ def is_path(self, path: "Union[str, Path]") -> bool:
206
+ """Check if path points to a directory."""
207
+ return self._resolve_path(path).is_dir()
208
+
209
+ def read_arrow(self, path: "Union[str, Path]", **kwargs: Any) -> "ArrowTable":
210
+ """Read Arrow table from file."""
211
+ self._ensure_pyarrow()
212
+ import pyarrow.parquet as pq
213
+
214
+ return pq.read_table(str(self._resolve_path(path)))
215
+
216
+ def write_arrow(self, path: "Union[str, Path]", table: "ArrowTable", **kwargs: Any) -> None:
217
+ """Write Arrow table to file."""
218
+ self._ensure_pyarrow()
219
+ import pyarrow.parquet as pq
220
+
221
+ resolved = self._resolve_path(path)
222
+ resolved.parent.mkdir(parents=True, exist_ok=True)
223
+ pq.write_table(table, str(resolved))
224
+
225
+ def stream_arrow(self, pattern: str, **kwargs: Any) -> Iterator["ArrowRecordBatch"]:
226
+ """Stream Arrow record batches from files matching pattern.
227
+
228
+ Yields:
229
+ Arrow record batches from matching files.
230
+ """
231
+ if not PYARROW_INSTALLED:
232
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
233
+ import pyarrow.parquet as pq
234
+
235
+ files = self.glob(pattern)
236
+ for file_path in files:
237
+ resolved = self._resolve_path(file_path)
238
+ parquet_file = pq.ParquetFile(str(resolved))
239
+ yield from parquet_file.iter_batches()
240
+
241
+ def sign(self, path: "Union[str, Path]", expires_in: int = 3600, for_upload: bool = False) -> str:
242
+ """Generate a signed URL (returns file:// URI for local files)."""
243
+ # For local files, just return a file:// URI
244
+ # No actual signing needed for local files
245
+ return self._resolve_path(path).as_uri()
246
+
247
+ # Async methods using sync_tools.async_
248
+ async def read_bytes_async(self, path: "Union[str, Path]", **kwargs: Any) -> bytes:
249
+ """Read bytes from file asynchronously."""
250
+ return await async_(self.read_bytes)(path, **kwargs)
251
+
252
+ async def write_bytes_async(self, path: "Union[str, Path]", data: bytes, **kwargs: Any) -> None:
253
+ """Write bytes to file asynchronously."""
254
+ await async_(self.write_bytes)(path, data, **kwargs)
255
+
256
+ async def read_text_async(self, path: "Union[str, Path]", encoding: str = "utf-8", **kwargs: Any) -> str:
257
+ """Read text from file asynchronously."""
258
+ return await async_(self.read_text)(path, encoding, **kwargs)
259
+
260
+ async def write_text_async(
261
+ self, path: "Union[str, Path]", data: str, encoding: str = "utf-8", **kwargs: Any
262
+ ) -> None:
263
+ """Write text to file asynchronously."""
264
+ await async_(self.write_text)(path, data, encoding, **kwargs)
265
+
266
+ async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
267
+ """List objects asynchronously."""
268
+ return await async_(self.list_objects)(prefix, recursive, **kwargs)
269
+
270
+ async def exists_async(self, path: "Union[str, Path]", **kwargs: Any) -> bool:
271
+ """Check if file exists asynchronously."""
272
+ return await async_(self.exists)(path, **kwargs)
273
+
274
+ async def delete_async(self, path: "Union[str, Path]", **kwargs: Any) -> None:
275
+ """Delete file asynchronously."""
276
+ await async_(self.delete)(path, **kwargs)
277
+
278
+ async def copy_async(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
279
+ """Copy file asynchronously."""
280
+ await async_(self.copy)(source, destination, **kwargs)
281
+
282
+ async def move_async(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
283
+ """Move file asynchronously."""
284
+ await async_(self.move)(source, destination, **kwargs)
285
+
286
+ async def get_metadata_async(self, path: "Union[str, Path]", **kwargs: Any) -> dict[str, Any]:
287
+ """Get file metadata asynchronously."""
288
+ return await async_(self.get_metadata)(path, **kwargs)
289
+
290
+ async def read_arrow_async(self, path: "Union[str, Path]", **kwargs: Any) -> "ArrowTable":
291
+ """Read Arrow table asynchronously."""
292
+ return await async_(self.read_arrow)(path, **kwargs)
293
+
294
+ async def write_arrow_async(self, path: "Union[str, Path]", table: "ArrowTable", **kwargs: Any) -> None:
295
+ """Write Arrow table asynchronously."""
296
+ await async_(self.write_arrow)(path, table, **kwargs)
297
+
298
+ def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator["ArrowRecordBatch"]:
299
+ """Stream Arrow record batches asynchronously."""
300
+
301
+ # Convert sync iterator to async
302
+ async def _stream() -> AsyncIterator["ArrowRecordBatch"]:
303
+ for batch in self.stream_arrow(pattern, **kwargs):
304
+ yield batch
305
+
306
+ return _stream()
307
+
308
+ async def sign_async(self, path: "Union[str, Path]", expires_in: int = 3600, for_upload: bool = False) -> str:
309
+ """Generate a signed URL asynchronously (returns file:// URI for local files)."""
310
+ return await async_(self.sign)(path, expires_in, for_upload)