sqlspec 0.21.1__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

@@ -0,0 +1,310 @@
1
+ """Local file system storage backend.
2
+
3
+ A simple, zero-dependency implementation for local file operations.
4
+ No external dependencies like fsspec or obstore required.
5
+ """
6
+
7
+ import shutil
8
+ from collections.abc import AsyncIterator, Iterator
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING, Any, Optional, Union
11
+ from urllib.parse import unquote, urlparse
12
+
13
+ from sqlspec.exceptions import MissingDependencyError
14
+ from sqlspec.typing import PYARROW_INSTALLED
15
+ from sqlspec.utils.sync_tools import async_
16
+
17
+ if TYPE_CHECKING:
18
+ import asyncio
19
+
20
+ from sqlspec.typing import ArrowRecordBatch, ArrowTable
21
+
22
+ __all__ = ("LocalStore",)
23
+
24
+
25
+ class LocalStore:
26
+ """Simple local file system storage backend.
27
+
28
+ Provides file system operations without requiring fsspec or obstore.
29
+ Supports file:// URIs and regular file paths.
30
+
31
+ Implements ObjectStoreProtocol for type safety.
32
+ """
33
+
34
+ __slots__ = ("_loop", "backend_type", "base_path", "protocol")
35
+
36
+ def __init__(self, uri: str = "", **kwargs: Any) -> None:
37
+ """Initialize local storage backend.
38
+
39
+ Args:
40
+ uri: File URI or path (e.g., "file:///path" or "/path")
41
+ **kwargs: Additional options (base_path for relative operations)
42
+ """
43
+ if uri.startswith("file://"):
44
+ parsed = urlparse(uri)
45
+ path = unquote(parsed.path)
46
+ # Handle Windows paths (file:///C:/path)
47
+ if path and len(path) > 2 and path[2] == ":": # noqa: PLR2004
48
+ path = path[1:] # Remove leading slash for Windows
49
+ self.base_path = Path(path).resolve()
50
+ elif uri:
51
+ self.base_path = Path(uri).resolve()
52
+ else:
53
+ self.base_path = Path.cwd()
54
+
55
+ # Allow override with explicit base_path
56
+ if "base_path" in kwargs:
57
+ self.base_path = Path(kwargs["base_path"]).resolve()
58
+
59
+ # Create base directory if it doesn't exist and it's actually a directory
60
+ if not self.base_path.exists():
61
+ self.base_path.mkdir(parents=True, exist_ok=True)
62
+ elif self.base_path.is_file():
63
+ # If base_path points to a file, use its parent as the base directory
64
+ self.base_path = self.base_path.parent
65
+ self._loop: Optional[asyncio.AbstractEventLoop] = None
66
+
67
+ self.protocol = "file"
68
+ self.backend_type = "local"
69
+
70
+ def _ensure_pyarrow(self) -> None:
71
+ """Ensure PyArrow is available for Arrow operations."""
72
+ if not PYARROW_INSTALLED:
73
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
74
+
75
+ def _resolve_path(self, path: "Union[str, Path]") -> Path:
76
+ """Resolve path relative to base_path."""
77
+ p = Path(path)
78
+ if p.is_absolute():
79
+ return p
80
+ return self.base_path / p
81
+
82
+ def read_bytes(self, path: "Union[str, Path]", **kwargs: Any) -> bytes:
83
+ """Read bytes from file."""
84
+ resolved = self._resolve_path(path)
85
+ return resolved.read_bytes()
86
+
87
+ def write_bytes(self, path: "Union[str, Path]", data: bytes, **kwargs: Any) -> None:
88
+ """Write bytes to file."""
89
+ resolved = self._resolve_path(path)
90
+ resolved.parent.mkdir(parents=True, exist_ok=True)
91
+ resolved.write_bytes(data)
92
+
93
+ def read_text(self, path: "Union[str, Path]", encoding: str = "utf-8", **kwargs: Any) -> str:
94
+ """Read text from file."""
95
+ return self._resolve_path(path).read_text(encoding=encoding)
96
+
97
+ def write_text(self, path: "Union[str, Path]", data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
98
+ """Write text to file."""
99
+ resolved = self._resolve_path(path)
100
+ resolved.parent.mkdir(parents=True, exist_ok=True)
101
+ resolved.write_text(data, encoding=encoding)
102
+
103
+ def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
104
+ """List objects in directory."""
105
+ # If prefix looks like a directory path, treat as directory
106
+ if prefix and (prefix.endswith("/") or "/" in prefix):
107
+ search_path = self._resolve_path(prefix)
108
+ if not search_path.exists():
109
+ return []
110
+ if search_path.is_file():
111
+ return [str(search_path.relative_to(self.base_path))]
112
+ else:
113
+ # Treat as filename prefix filter
114
+ search_path = self.base_path
115
+
116
+ pattern = "**/*" if recursive else "*"
117
+ files = []
118
+ for path in search_path.glob(pattern):
119
+ if path.is_file():
120
+ try:
121
+ relative = path.relative_to(self.base_path)
122
+ relative_str = str(relative)
123
+ # Apply prefix filter if provided
124
+ if not prefix or relative_str.startswith(prefix):
125
+ files.append(relative_str)
126
+ except ValueError:
127
+ # Path is outside base_path, use absolute
128
+ path_str = str(path)
129
+ if not prefix or path_str.startswith(prefix):
130
+ files.append(path_str)
131
+
132
+ return sorted(files)
133
+
134
+ def exists(self, path: "Union[str, Path]", **kwargs: Any) -> bool:
135
+ """Check if file exists."""
136
+ return self._resolve_path(path).exists()
137
+
138
+ def delete(self, path: "Union[str, Path]", **kwargs: Any) -> None:
139
+ """Delete file or directory."""
140
+ resolved = self._resolve_path(path)
141
+ if resolved.is_dir():
142
+ shutil.rmtree(resolved)
143
+ elif resolved.exists():
144
+ resolved.unlink()
145
+
146
+ def copy(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
147
+ """Copy file or directory."""
148
+ src = self._resolve_path(source)
149
+ dst = self._resolve_path(destination)
150
+ dst.parent.mkdir(parents=True, exist_ok=True)
151
+
152
+ if src.is_dir():
153
+ shutil.copytree(src, dst, dirs_exist_ok=True)
154
+ else:
155
+ shutil.copy2(src, dst)
156
+
157
+ def move(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
158
+ """Move file or directory."""
159
+ src = self._resolve_path(source)
160
+ dst = self._resolve_path(destination)
161
+ dst.parent.mkdir(parents=True, exist_ok=True)
162
+ shutil.move(str(src), str(dst))
163
+
164
+ def glob(self, pattern: str, **kwargs: Any) -> list[str]:
165
+ """Find files matching pattern."""
166
+ # Handle both relative and absolute patterns
167
+ if Path(pattern).is_absolute():
168
+ base_path = Path(pattern).parent
169
+ pattern_name = Path(pattern).name
170
+ matches = base_path.rglob(pattern_name) if "**" in pattern else base_path.glob(pattern_name)
171
+ else:
172
+ matches = self.base_path.rglob(pattern) if "**" in pattern else self.base_path.glob(pattern)
173
+
174
+ results = []
175
+ for match in matches:
176
+ if match.is_file():
177
+ try:
178
+ relative = match.relative_to(self.base_path)
179
+ results.append(str(relative))
180
+ except ValueError:
181
+ results.append(str(match))
182
+
183
+ return sorted(results)
184
+
185
+ def get_metadata(self, path: "Union[str, Path]", **kwargs: Any) -> dict[str, Any]:
186
+ """Get file metadata."""
187
+ resolved = self._resolve_path(path)
188
+ if not resolved.exists():
189
+ return {}
190
+
191
+ stat = resolved.stat()
192
+ return {
193
+ "size": stat.st_size,
194
+ "modified": stat.st_mtime,
195
+ "created": stat.st_ctime,
196
+ "is_file": resolved.is_file(),
197
+ "is_dir": resolved.is_dir(),
198
+ "path": str(resolved),
199
+ }
200
+
201
+ def is_object(self, path: "Union[str, Path]") -> bool:
202
+ """Check if path points to a file."""
203
+ return self._resolve_path(path).is_file()
204
+
205
+ def is_path(self, path: "Union[str, Path]") -> bool:
206
+ """Check if path points to a directory."""
207
+ return self._resolve_path(path).is_dir()
208
+
209
+ def read_arrow(self, path: "Union[str, Path]", **kwargs: Any) -> "ArrowTable":
210
+ """Read Arrow table from file."""
211
+ self._ensure_pyarrow()
212
+ import pyarrow.parquet as pq
213
+
214
+ return pq.read_table(str(self._resolve_path(path)))
215
+
216
+ def write_arrow(self, path: "Union[str, Path]", table: "ArrowTable", **kwargs: Any) -> None:
217
+ """Write Arrow table to file."""
218
+ self._ensure_pyarrow()
219
+ import pyarrow.parquet as pq
220
+
221
+ resolved = self._resolve_path(path)
222
+ resolved.parent.mkdir(parents=True, exist_ok=True)
223
+ pq.write_table(table, str(resolved))
224
+
225
+ def stream_arrow(self, pattern: str, **kwargs: Any) -> Iterator["ArrowRecordBatch"]:
226
+ """Stream Arrow record batches from files matching pattern.
227
+
228
+ Yields:
229
+ Arrow record batches from matching files.
230
+ """
231
+ if not PYARROW_INSTALLED:
232
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
233
+ import pyarrow.parquet as pq
234
+
235
+ files = self.glob(pattern)
236
+ for file_path in files:
237
+ resolved = self._resolve_path(file_path)
238
+ parquet_file = pq.ParquetFile(str(resolved))
239
+ yield from parquet_file.iter_batches()
240
+
241
+ def sign(self, path: "Union[str, Path]", expires_in: int = 3600, for_upload: bool = False) -> str:
242
+ """Generate a signed URL (returns file:// URI for local files)."""
243
+ # For local files, just return a file:// URI
244
+ # No actual signing needed for local files
245
+ return self._resolve_path(path).as_uri()
246
+
247
+ # Async methods using sync_tools.async_
248
+ async def read_bytes_async(self, path: "Union[str, Path]", **kwargs: Any) -> bytes:
249
+ """Read bytes from file asynchronously."""
250
+ return await async_(self.read_bytes)(path, **kwargs)
251
+
252
+ async def write_bytes_async(self, path: "Union[str, Path]", data: bytes, **kwargs: Any) -> None:
253
+ """Write bytes to file asynchronously."""
254
+ await async_(self.write_bytes)(path, data, **kwargs)
255
+
256
+ async def read_text_async(self, path: "Union[str, Path]", encoding: str = "utf-8", **kwargs: Any) -> str:
257
+ """Read text from file asynchronously."""
258
+ return await async_(self.read_text)(path, encoding, **kwargs)
259
+
260
+ async def write_text_async(
261
+ self, path: "Union[str, Path]", data: str, encoding: str = "utf-8", **kwargs: Any
262
+ ) -> None:
263
+ """Write text to file asynchronously."""
264
+ await async_(self.write_text)(path, data, encoding, **kwargs)
265
+
266
+ async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
267
+ """List objects asynchronously."""
268
+ return await async_(self.list_objects)(prefix, recursive, **kwargs)
269
+
270
+ async def exists_async(self, path: "Union[str, Path]", **kwargs: Any) -> bool:
271
+ """Check if file exists asynchronously."""
272
+ return await async_(self.exists)(path, **kwargs)
273
+
274
+ async def delete_async(self, path: "Union[str, Path]", **kwargs: Any) -> None:
275
+ """Delete file asynchronously."""
276
+ await async_(self.delete)(path, **kwargs)
277
+
278
+ async def copy_async(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
279
+ """Copy file asynchronously."""
280
+ await async_(self.copy)(source, destination, **kwargs)
281
+
282
+ async def move_async(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
283
+ """Move file asynchronously."""
284
+ await async_(self.move)(source, destination, **kwargs)
285
+
286
+ async def get_metadata_async(self, path: "Union[str, Path]", **kwargs: Any) -> dict[str, Any]:
287
+ """Get file metadata asynchronously."""
288
+ return await async_(self.get_metadata)(path, **kwargs)
289
+
290
+ async def read_arrow_async(self, path: "Union[str, Path]", **kwargs: Any) -> "ArrowTable":
291
+ """Read Arrow table asynchronously."""
292
+ return await async_(self.read_arrow)(path, **kwargs)
293
+
294
+ async def write_arrow_async(self, path: "Union[str, Path]", table: "ArrowTable", **kwargs: Any) -> None:
295
+ """Write Arrow table asynchronously."""
296
+ await async_(self.write_arrow)(path, table, **kwargs)
297
+
298
+ def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator["ArrowRecordBatch"]:
299
+ """Stream Arrow record batches asynchronously."""
300
+
301
+ # Convert sync iterator to async
302
+ async def _stream() -> AsyncIterator["ArrowRecordBatch"]:
303
+ for batch in self.stream_arrow(pattern, **kwargs):
304
+ yield batch
305
+
306
+ return _stream()
307
+
308
+ async def sign_async(self, path: "Union[str, Path]", expires_in: int = 3600, for_upload: bool = False) -> str:
309
+ """Generate a signed URL asynchronously (returns file:// URI for local files)."""
310
+ return await async_(self.sign)(path, expires_in, for_upload)