sqlspec 0.21.1__py3-none-any.whl → 0.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sqlspec might be problematic. Click here for more details.
- sqlspec/_sql.py +36 -0
- sqlspec/base.py +4 -4
- sqlspec/builder/mixins/_join_operations.py +205 -85
- sqlspec/loader.py +65 -68
- sqlspec/protocols.py +3 -5
- sqlspec/storage/__init__.py +2 -12
- sqlspec/storage/backends/__init__.py +1 -0
- sqlspec/storage/backends/fsspec.py +87 -147
- sqlspec/storage/backends/local.py +310 -0
- sqlspec/storage/backends/obstore.py +210 -192
- sqlspec/storage/registry.py +101 -70
- sqlspec/utils/sync_tools.py +8 -5
- {sqlspec-0.21.1.dist-info → sqlspec-0.23.0.dist-info}/METADATA +1 -1
- {sqlspec-0.21.1.dist-info → sqlspec-0.23.0.dist-info}/RECORD +18 -18
- sqlspec/storage/capabilities.py +0 -102
- {sqlspec-0.21.1.dist-info → sqlspec-0.23.0.dist-info}/WHEEL +0 -0
- {sqlspec-0.21.1.dist-info → sqlspec-0.23.0.dist-info}/entry_points.txt +0 -0
- {sqlspec-0.21.1.dist-info → sqlspec-0.23.0.dist-info}/licenses/LICENSE +0 -0
- {sqlspec-0.21.1.dist-info → sqlspec-0.23.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
"""Local file system storage backend.
|
|
2
|
+
|
|
3
|
+
A simple, zero-dependency implementation for local file operations.
|
|
4
|
+
No external dependencies like fsspec or obstore required.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import shutil
|
|
8
|
+
from collections.abc import AsyncIterator, Iterator
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
11
|
+
from urllib.parse import unquote, urlparse
|
|
12
|
+
|
|
13
|
+
from sqlspec.exceptions import MissingDependencyError
|
|
14
|
+
from sqlspec.typing import PYARROW_INSTALLED
|
|
15
|
+
from sqlspec.utils.sync_tools import async_
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
import asyncio
|
|
19
|
+
|
|
20
|
+
from sqlspec.typing import ArrowRecordBatch, ArrowTable
|
|
21
|
+
|
|
22
|
+
__all__ = ("LocalStore",)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class LocalStore:
|
|
26
|
+
"""Simple local file system storage backend.
|
|
27
|
+
|
|
28
|
+
Provides file system operations without requiring fsspec or obstore.
|
|
29
|
+
Supports file:// URIs and regular file paths.
|
|
30
|
+
|
|
31
|
+
Implements ObjectStoreProtocol for type safety.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
__slots__ = ("_loop", "backend_type", "base_path", "protocol")
|
|
35
|
+
|
|
36
|
+
def __init__(self, uri: str = "", **kwargs: Any) -> None:
|
|
37
|
+
"""Initialize local storage backend.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
uri: File URI or path (e.g., "file:///path" or "/path")
|
|
41
|
+
**kwargs: Additional options (base_path for relative operations)
|
|
42
|
+
"""
|
|
43
|
+
if uri.startswith("file://"):
|
|
44
|
+
parsed = urlparse(uri)
|
|
45
|
+
path = unquote(parsed.path)
|
|
46
|
+
# Handle Windows paths (file:///C:/path)
|
|
47
|
+
if path and len(path) > 2 and path[2] == ":": # noqa: PLR2004
|
|
48
|
+
path = path[1:] # Remove leading slash for Windows
|
|
49
|
+
self.base_path = Path(path).resolve()
|
|
50
|
+
elif uri:
|
|
51
|
+
self.base_path = Path(uri).resolve()
|
|
52
|
+
else:
|
|
53
|
+
self.base_path = Path.cwd()
|
|
54
|
+
|
|
55
|
+
# Allow override with explicit base_path
|
|
56
|
+
if "base_path" in kwargs:
|
|
57
|
+
self.base_path = Path(kwargs["base_path"]).resolve()
|
|
58
|
+
|
|
59
|
+
# Create base directory if it doesn't exist and it's actually a directory
|
|
60
|
+
if not self.base_path.exists():
|
|
61
|
+
self.base_path.mkdir(parents=True, exist_ok=True)
|
|
62
|
+
elif self.base_path.is_file():
|
|
63
|
+
# If base_path points to a file, use its parent as the base directory
|
|
64
|
+
self.base_path = self.base_path.parent
|
|
65
|
+
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
|
66
|
+
|
|
67
|
+
self.protocol = "file"
|
|
68
|
+
self.backend_type = "local"
|
|
69
|
+
|
|
70
|
+
def _ensure_pyarrow(self) -> None:
|
|
71
|
+
"""Ensure PyArrow is available for Arrow operations."""
|
|
72
|
+
if not PYARROW_INSTALLED:
|
|
73
|
+
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
74
|
+
|
|
75
|
+
def _resolve_path(self, path: "Union[str, Path]") -> Path:
|
|
76
|
+
"""Resolve path relative to base_path."""
|
|
77
|
+
p = Path(path)
|
|
78
|
+
if p.is_absolute():
|
|
79
|
+
return p
|
|
80
|
+
return self.base_path / p
|
|
81
|
+
|
|
82
|
+
def read_bytes(self, path: "Union[str, Path]", **kwargs: Any) -> bytes:
|
|
83
|
+
"""Read bytes from file."""
|
|
84
|
+
resolved = self._resolve_path(path)
|
|
85
|
+
return resolved.read_bytes()
|
|
86
|
+
|
|
87
|
+
def write_bytes(self, path: "Union[str, Path]", data: bytes, **kwargs: Any) -> None:
|
|
88
|
+
"""Write bytes to file."""
|
|
89
|
+
resolved = self._resolve_path(path)
|
|
90
|
+
resolved.parent.mkdir(parents=True, exist_ok=True)
|
|
91
|
+
resolved.write_bytes(data)
|
|
92
|
+
|
|
93
|
+
def read_text(self, path: "Union[str, Path]", encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
94
|
+
"""Read text from file."""
|
|
95
|
+
return self._resolve_path(path).read_text(encoding=encoding)
|
|
96
|
+
|
|
97
|
+
def write_text(self, path: "Union[str, Path]", data: str, encoding: str = "utf-8", **kwargs: Any) -> None:
|
|
98
|
+
"""Write text to file."""
|
|
99
|
+
resolved = self._resolve_path(path)
|
|
100
|
+
resolved.parent.mkdir(parents=True, exist_ok=True)
|
|
101
|
+
resolved.write_text(data, encoding=encoding)
|
|
102
|
+
|
|
103
|
+
def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
|
|
104
|
+
"""List objects in directory."""
|
|
105
|
+
# If prefix looks like a directory path, treat as directory
|
|
106
|
+
if prefix and (prefix.endswith("/") or "/" in prefix):
|
|
107
|
+
search_path = self._resolve_path(prefix)
|
|
108
|
+
if not search_path.exists():
|
|
109
|
+
return []
|
|
110
|
+
if search_path.is_file():
|
|
111
|
+
return [str(search_path.relative_to(self.base_path))]
|
|
112
|
+
else:
|
|
113
|
+
# Treat as filename prefix filter
|
|
114
|
+
search_path = self.base_path
|
|
115
|
+
|
|
116
|
+
pattern = "**/*" if recursive else "*"
|
|
117
|
+
files = []
|
|
118
|
+
for path in search_path.glob(pattern):
|
|
119
|
+
if path.is_file():
|
|
120
|
+
try:
|
|
121
|
+
relative = path.relative_to(self.base_path)
|
|
122
|
+
relative_str = str(relative)
|
|
123
|
+
# Apply prefix filter if provided
|
|
124
|
+
if not prefix or relative_str.startswith(prefix):
|
|
125
|
+
files.append(relative_str)
|
|
126
|
+
except ValueError:
|
|
127
|
+
# Path is outside base_path, use absolute
|
|
128
|
+
path_str = str(path)
|
|
129
|
+
if not prefix or path_str.startswith(prefix):
|
|
130
|
+
files.append(path_str)
|
|
131
|
+
|
|
132
|
+
return sorted(files)
|
|
133
|
+
|
|
134
|
+
def exists(self, path: "Union[str, Path]", **kwargs: Any) -> bool:
|
|
135
|
+
"""Check if file exists."""
|
|
136
|
+
return self._resolve_path(path).exists()
|
|
137
|
+
|
|
138
|
+
def delete(self, path: "Union[str, Path]", **kwargs: Any) -> None:
|
|
139
|
+
"""Delete file or directory."""
|
|
140
|
+
resolved = self._resolve_path(path)
|
|
141
|
+
if resolved.is_dir():
|
|
142
|
+
shutil.rmtree(resolved)
|
|
143
|
+
elif resolved.exists():
|
|
144
|
+
resolved.unlink()
|
|
145
|
+
|
|
146
|
+
def copy(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
|
|
147
|
+
"""Copy file or directory."""
|
|
148
|
+
src = self._resolve_path(source)
|
|
149
|
+
dst = self._resolve_path(destination)
|
|
150
|
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
151
|
+
|
|
152
|
+
if src.is_dir():
|
|
153
|
+
shutil.copytree(src, dst, dirs_exist_ok=True)
|
|
154
|
+
else:
|
|
155
|
+
shutil.copy2(src, dst)
|
|
156
|
+
|
|
157
|
+
def move(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
|
|
158
|
+
"""Move file or directory."""
|
|
159
|
+
src = self._resolve_path(source)
|
|
160
|
+
dst = self._resolve_path(destination)
|
|
161
|
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
162
|
+
shutil.move(str(src), str(dst))
|
|
163
|
+
|
|
164
|
+
def glob(self, pattern: str, **kwargs: Any) -> list[str]:
|
|
165
|
+
"""Find files matching pattern."""
|
|
166
|
+
# Handle both relative and absolute patterns
|
|
167
|
+
if Path(pattern).is_absolute():
|
|
168
|
+
base_path = Path(pattern).parent
|
|
169
|
+
pattern_name = Path(pattern).name
|
|
170
|
+
matches = base_path.rglob(pattern_name) if "**" in pattern else base_path.glob(pattern_name)
|
|
171
|
+
else:
|
|
172
|
+
matches = self.base_path.rglob(pattern) if "**" in pattern else self.base_path.glob(pattern)
|
|
173
|
+
|
|
174
|
+
results = []
|
|
175
|
+
for match in matches:
|
|
176
|
+
if match.is_file():
|
|
177
|
+
try:
|
|
178
|
+
relative = match.relative_to(self.base_path)
|
|
179
|
+
results.append(str(relative))
|
|
180
|
+
except ValueError:
|
|
181
|
+
results.append(str(match))
|
|
182
|
+
|
|
183
|
+
return sorted(results)
|
|
184
|
+
|
|
185
|
+
def get_metadata(self, path: "Union[str, Path]", **kwargs: Any) -> dict[str, Any]:
|
|
186
|
+
"""Get file metadata."""
|
|
187
|
+
resolved = self._resolve_path(path)
|
|
188
|
+
if not resolved.exists():
|
|
189
|
+
return {}
|
|
190
|
+
|
|
191
|
+
stat = resolved.stat()
|
|
192
|
+
return {
|
|
193
|
+
"size": stat.st_size,
|
|
194
|
+
"modified": stat.st_mtime,
|
|
195
|
+
"created": stat.st_ctime,
|
|
196
|
+
"is_file": resolved.is_file(),
|
|
197
|
+
"is_dir": resolved.is_dir(),
|
|
198
|
+
"path": str(resolved),
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
def is_object(self, path: "Union[str, Path]") -> bool:
|
|
202
|
+
"""Check if path points to a file."""
|
|
203
|
+
return self._resolve_path(path).is_file()
|
|
204
|
+
|
|
205
|
+
def is_path(self, path: "Union[str, Path]") -> bool:
|
|
206
|
+
"""Check if path points to a directory."""
|
|
207
|
+
return self._resolve_path(path).is_dir()
|
|
208
|
+
|
|
209
|
+
def read_arrow(self, path: "Union[str, Path]", **kwargs: Any) -> "ArrowTable":
|
|
210
|
+
"""Read Arrow table from file."""
|
|
211
|
+
self._ensure_pyarrow()
|
|
212
|
+
import pyarrow.parquet as pq
|
|
213
|
+
|
|
214
|
+
return pq.read_table(str(self._resolve_path(path)))
|
|
215
|
+
|
|
216
|
+
def write_arrow(self, path: "Union[str, Path]", table: "ArrowTable", **kwargs: Any) -> None:
|
|
217
|
+
"""Write Arrow table to file."""
|
|
218
|
+
self._ensure_pyarrow()
|
|
219
|
+
import pyarrow.parquet as pq
|
|
220
|
+
|
|
221
|
+
resolved = self._resolve_path(path)
|
|
222
|
+
resolved.parent.mkdir(parents=True, exist_ok=True)
|
|
223
|
+
pq.write_table(table, str(resolved))
|
|
224
|
+
|
|
225
|
+
def stream_arrow(self, pattern: str, **kwargs: Any) -> Iterator["ArrowRecordBatch"]:
|
|
226
|
+
"""Stream Arrow record batches from files matching pattern.
|
|
227
|
+
|
|
228
|
+
Yields:
|
|
229
|
+
Arrow record batches from matching files.
|
|
230
|
+
"""
|
|
231
|
+
if not PYARROW_INSTALLED:
|
|
232
|
+
raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
|
|
233
|
+
import pyarrow.parquet as pq
|
|
234
|
+
|
|
235
|
+
files = self.glob(pattern)
|
|
236
|
+
for file_path in files:
|
|
237
|
+
resolved = self._resolve_path(file_path)
|
|
238
|
+
parquet_file = pq.ParquetFile(str(resolved))
|
|
239
|
+
yield from parquet_file.iter_batches()
|
|
240
|
+
|
|
241
|
+
def sign(self, path: "Union[str, Path]", expires_in: int = 3600, for_upload: bool = False) -> str:
|
|
242
|
+
"""Generate a signed URL (returns file:// URI for local files)."""
|
|
243
|
+
# For local files, just return a file:// URI
|
|
244
|
+
# No actual signing needed for local files
|
|
245
|
+
return self._resolve_path(path).as_uri()
|
|
246
|
+
|
|
247
|
+
# Async methods using sync_tools.async_
|
|
248
|
+
async def read_bytes_async(self, path: "Union[str, Path]", **kwargs: Any) -> bytes:
|
|
249
|
+
"""Read bytes from file asynchronously."""
|
|
250
|
+
return await async_(self.read_bytes)(path, **kwargs)
|
|
251
|
+
|
|
252
|
+
async def write_bytes_async(self, path: "Union[str, Path]", data: bytes, **kwargs: Any) -> None:
|
|
253
|
+
"""Write bytes to file asynchronously."""
|
|
254
|
+
await async_(self.write_bytes)(path, data, **kwargs)
|
|
255
|
+
|
|
256
|
+
async def read_text_async(self, path: "Union[str, Path]", encoding: str = "utf-8", **kwargs: Any) -> str:
|
|
257
|
+
"""Read text from file asynchronously."""
|
|
258
|
+
return await async_(self.read_text)(path, encoding, **kwargs)
|
|
259
|
+
|
|
260
|
+
async def write_text_async(
|
|
261
|
+
self, path: "Union[str, Path]", data: str, encoding: str = "utf-8", **kwargs: Any
|
|
262
|
+
) -> None:
|
|
263
|
+
"""Write text to file asynchronously."""
|
|
264
|
+
await async_(self.write_text)(path, data, encoding, **kwargs)
|
|
265
|
+
|
|
266
|
+
async def list_objects_async(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
|
|
267
|
+
"""List objects asynchronously."""
|
|
268
|
+
return await async_(self.list_objects)(prefix, recursive, **kwargs)
|
|
269
|
+
|
|
270
|
+
async def exists_async(self, path: "Union[str, Path]", **kwargs: Any) -> bool:
|
|
271
|
+
"""Check if file exists asynchronously."""
|
|
272
|
+
return await async_(self.exists)(path, **kwargs)
|
|
273
|
+
|
|
274
|
+
async def delete_async(self, path: "Union[str, Path]", **kwargs: Any) -> None:
|
|
275
|
+
"""Delete file asynchronously."""
|
|
276
|
+
await async_(self.delete)(path, **kwargs)
|
|
277
|
+
|
|
278
|
+
async def copy_async(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
|
|
279
|
+
"""Copy file asynchronously."""
|
|
280
|
+
await async_(self.copy)(source, destination, **kwargs)
|
|
281
|
+
|
|
282
|
+
async def move_async(self, source: "Union[str, Path]", destination: "Union[str, Path]", **kwargs: Any) -> None:
|
|
283
|
+
"""Move file asynchronously."""
|
|
284
|
+
await async_(self.move)(source, destination, **kwargs)
|
|
285
|
+
|
|
286
|
+
async def get_metadata_async(self, path: "Union[str, Path]", **kwargs: Any) -> dict[str, Any]:
|
|
287
|
+
"""Get file metadata asynchronously."""
|
|
288
|
+
return await async_(self.get_metadata)(path, **kwargs)
|
|
289
|
+
|
|
290
|
+
async def read_arrow_async(self, path: "Union[str, Path]", **kwargs: Any) -> "ArrowTable":
|
|
291
|
+
"""Read Arrow table asynchronously."""
|
|
292
|
+
return await async_(self.read_arrow)(path, **kwargs)
|
|
293
|
+
|
|
294
|
+
async def write_arrow_async(self, path: "Union[str, Path]", table: "ArrowTable", **kwargs: Any) -> None:
|
|
295
|
+
"""Write Arrow table asynchronously."""
|
|
296
|
+
await async_(self.write_arrow)(path, table, **kwargs)
|
|
297
|
+
|
|
298
|
+
def stream_arrow_async(self, pattern: str, **kwargs: Any) -> AsyncIterator["ArrowRecordBatch"]:
|
|
299
|
+
"""Stream Arrow record batches asynchronously."""
|
|
300
|
+
|
|
301
|
+
# Convert sync iterator to async
|
|
302
|
+
async def _stream() -> AsyncIterator["ArrowRecordBatch"]:
|
|
303
|
+
for batch in self.stream_arrow(pattern, **kwargs):
|
|
304
|
+
yield batch
|
|
305
|
+
|
|
306
|
+
return _stream()
|
|
307
|
+
|
|
308
|
+
async def sign_async(self, path: "Union[str, Path]", expires_in: int = 3600, for_upload: bool = False) -> str:
|
|
309
|
+
"""Generate a signed URL asynchronously (returns file:// URI for local files)."""
|
|
310
|
+
return await async_(self.sign)(path, expires_in, for_upload)
|