sqlspec 0.21.1__py3-none-any.whl → 0.23.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

@@ -8,16 +8,6 @@ Provides a storage system with:
8
8
  - Capability-based backend selection
9
9
  """
10
10
 
11
- from sqlspec.protocols import ObjectStoreProtocol
12
- from sqlspec.storage.capabilities import HasStorageCapabilities, StorageCapabilities
13
- from sqlspec.storage.registry import StorageRegistry
11
+ from sqlspec.storage.registry import StorageRegistry, storage_registry
14
12
 
15
- storage_registry = StorageRegistry()
16
-
17
- __all__ = (
18
- "HasStorageCapabilities",
19
- "ObjectStoreProtocol",
20
- "StorageCapabilities",
21
- "StorageRegistry",
22
- "storage_registry",
23
- )
13
+ __all__ = ("StorageRegistry", "storage_registry")
@@ -0,0 +1 @@
1
+ """Storage backends."""
@@ -1,18 +1,14 @@
1
1
  import logging
2
2
  from pathlib import Path
3
- from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union
3
+ from typing import TYPE_CHECKING, Any, Optional, Union
4
4
 
5
- from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
6
- from sqlspec.storage.backends.base import ObjectStoreBase
7
- from sqlspec.storage.capabilities import StorageCapabilities
5
+ from sqlspec.exceptions import MissingDependencyError
8
6
  from sqlspec.typing import FSSPEC_INSTALLED, PYARROW_INSTALLED
9
7
  from sqlspec.utils.sync_tools import async_
10
8
 
11
9
  if TYPE_CHECKING:
12
10
  from collections.abc import AsyncIterator, Iterator
13
11
 
14
- from fsspec import AbstractFileSystem
15
-
16
12
  from sqlspec.typing import ArrowRecordBatch, ArrowTable
17
13
 
18
14
  __all__ = ("FSSpecBackend",)
@@ -56,40 +52,30 @@ class _ArrowStreamer:
56
52
  raise StopAsyncIteration
57
53
 
58
54
 
59
- class FSSpecBackend(ObjectStoreBase):
55
+ class FSSpecBackend:
60
56
  """Storage backend using fsspec.
61
57
 
62
- Implements the ObjectStoreProtocol using fsspec for various protocols
58
+ Implements ObjectStoreProtocol using fsspec for various protocols
63
59
  including HTTP, HTTPS, FTP, and cloud storage services.
64
60
  """
65
61
 
66
- _default_capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
67
- supports_arrow=PYARROW_INSTALLED,
68
- supports_streaming=PYARROW_INSTALLED,
69
- supports_async=True,
70
- supports_compression=True,
71
- is_remote=True,
72
- is_cloud_native=False,
73
- )
74
-
75
- def __init__(self, fs: "Union[str, AbstractFileSystem]", base_path: str = "") -> None:
76
- if not FSSPEC_INSTALLED:
77
- raise MissingDependencyError(package="fsspec", install_package="fsspec")
62
+ def __init__(self, uri: str, **kwargs: Any) -> None:
63
+ self._ensure_fsspec()
78
64
 
65
+ base_path = kwargs.pop("base_path", "")
79
66
  self.base_path = base_path.rstrip("/") if base_path else ""
80
67
 
81
- if isinstance(fs, str):
82
- import fsspec
83
-
84
- self.fs = fsspec.filesystem(fs.split("://")[0])
85
- self.protocol = fs.split("://")[0]
86
- self._fs_uri = fs
68
+ if "://" in uri:
69
+ self.protocol = uri.split("://", maxsplit=1)[0]
70
+ self._fs_uri = uri
87
71
  else:
88
- self.fs = fs
89
- self.protocol = getattr(fs, "protocol", "unknown")
90
- self._fs_uri = f"{self.protocol}://"
72
+ self.protocol = uri
73
+ self._fs_uri = f"{uri}://"
74
+
75
+ import fsspec
91
76
 
92
- self._instance_capabilities = self._detect_capabilities()
77
+ self.fs = fsspec.filesystem(self.protocol, **kwargs)
78
+ self.backend_type = "fsspec"
93
79
 
94
80
  super().__init__()
95
81
 
@@ -99,11 +85,22 @@ class FSSpecBackend(ObjectStoreBase):
99
85
  fs_config = config.get("fs_config", {})
100
86
  base_path = config.get("base_path", "")
101
87
 
102
- import fsspec
88
+ uri = f"{protocol}://"
89
+ kwargs = dict(fs_config)
90
+ if base_path:
91
+ kwargs["base_path"] = base_path
103
92
 
104
- fs_instance = fsspec.filesystem(protocol, **fs_config)
93
+ return cls(uri=uri, **kwargs)
105
94
 
106
- return cls(fs=fs_instance, base_path=base_path)
95
+ def _ensure_fsspec(self) -> None:
96
+ """Ensure fsspec is available for operations."""
97
+ if not FSSPEC_INSTALLED:
98
+ raise MissingDependencyError(package="fsspec", install_package="fsspec")
99
+
100
+ def _ensure_pyarrow(self) -> None:
101
+ """Ensure PyArrow is available for Arrow operations."""
102
+ if not PYARROW_INSTALLED:
103
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
107
104
 
108
105
  def _resolve_path(self, path: Union[str, Path]) -> str:
109
106
  """Resolve path relative to base_path."""
@@ -112,70 +109,38 @@ class FSSpecBackend(ObjectStoreBase):
112
109
  clean_base = self.base_path.rstrip("/")
113
110
  clean_path = path_str.lstrip("/")
114
111
  return f"{clean_base}/{clean_path}"
112
+ if self.protocol == "s3" and "://" in self._fs_uri:
113
+ # For S3, we need to include the bucket from the URI
114
+ # Extract bucket and path from URI like s3://bucket/path
115
+ uri_parts = self._fs_uri.split("://", 1)[1] # Remove s3://
116
+ if "/" in uri_parts:
117
+ # URI has bucket and base path
118
+ return f"{uri_parts.rstrip('/')}/{path_str.lstrip('/')}"
119
+ # URI has only bucket
120
+ return f"{uri_parts}/{path_str.lstrip('/')}"
115
121
  return path_str
116
122
 
117
- def _detect_capabilities(self) -> StorageCapabilities:
118
- """Detect capabilities based on filesystem protocol."""
119
- protocol = self.protocol.lower()
120
-
121
- if protocol in {"s3", "s3a", "s3n"}:
122
- return StorageCapabilities.s3_compatible()
123
- if protocol in {"gcs", "gs"}:
124
- return StorageCapabilities.gcs()
125
- if protocol in {"abfs", "az", "azure"}:
126
- return StorageCapabilities.azure_blob()
127
- if protocol in {"file", "local"}:
128
- return StorageCapabilities.local_filesystem()
129
- return StorageCapabilities(
130
- supports_arrow=PYARROW_INSTALLED,
131
- supports_streaming=PYARROW_INSTALLED,
132
- supports_async=True,
133
- supports_compression=True,
134
- is_remote=True,
135
- is_cloud_native=False,
136
- )
137
-
138
- @property
139
- def capabilities(self) -> StorageCapabilities:
140
- """Return capabilities based on detected protocol."""
141
- return getattr(self, "_instance_capabilities", self.__class__._default_capabilities)
142
-
143
- @classmethod
144
- def has_capability(cls, capability: str) -> bool:
145
- """Check if backend has a specific capability."""
146
- return getattr(cls._default_capabilities, capability, False)
147
-
148
- @classmethod
149
- def get_capabilities(cls) -> StorageCapabilities:
150
- """Get all capabilities for this backend."""
151
- return cls._default_capabilities
152
-
153
- @property
154
- def backend_type(self) -> str:
155
- return "fsspec"
156
-
157
123
  @property
158
124
  def base_uri(self) -> str:
159
125
  return self._fs_uri
160
126
 
161
127
  def read_bytes(self, path: Union[str, Path], **kwargs: Any) -> bytes:
162
128
  """Read bytes from an object."""
163
- try:
164
- resolved_path = self._resolve_path(path)
165
- return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
166
- except Exception as exc:
167
- msg = f"Failed to read bytes from {path}"
168
- raise StorageOperationFailedError(msg) from exc
129
+ resolved_path = self._resolve_path(path)
130
+ return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
169
131
 
170
132
  def write_bytes(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
171
133
  """Write bytes to an object."""
172
- try:
173
- resolved_path = self._resolve_path(path)
174
- with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
175
- f.write(data) # pyright: ignore
176
- except Exception as exc:
177
- msg = f"Failed to write bytes to {path}"
178
- raise StorageOperationFailedError(msg) from exc
134
+ resolved_path = self._resolve_path(path)
135
+
136
+ # Only create directories for local file systems, not for cloud storage
137
+ if self.protocol == "file":
138
+ parent_dir = str(Path(resolved_path).parent)
139
+ if parent_dir and not self.fs.exists(parent_dir):
140
+ self.fs.makedirs(parent_dir, exist_ok=True)
141
+
142
+ with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
143
+ f.write(data) # pyright: ignore
179
144
 
180
145
  def read_text(self, path: Union[str, Path], encoding: str = "utf-8", **kwargs: Any) -> str:
181
146
  """Read text from an object."""
@@ -193,87 +158,59 @@ class FSSpecBackend(ObjectStoreBase):
193
158
 
194
159
  def delete(self, path: Union[str, Path], **kwargs: Any) -> None:
195
160
  """Delete an object."""
196
- try:
197
- resolved_path = self._resolve_path(path)
198
- self.fs.rm(resolved_path, **kwargs)
199
- except Exception as exc:
200
- msg = f"Failed to delete {path}"
201
- raise StorageOperationFailedError(msg) from exc
161
+ resolved_path = self._resolve_path(path)
162
+ self.fs.rm(resolved_path, **kwargs)
202
163
 
203
164
  def copy(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
204
165
  """Copy an object."""
205
- try:
206
- source_path = self._resolve_path(source)
207
- dest_path = self._resolve_path(destination)
208
- self.fs.copy(source_path, dest_path, **kwargs)
209
- except Exception as exc:
210
- msg = f"Failed to copy {source} to {destination}"
211
- raise StorageOperationFailedError(msg) from exc
166
+ source_path = self._resolve_path(source)
167
+ dest_path = self._resolve_path(destination)
168
+ self.fs.copy(source_path, dest_path, **kwargs)
212
169
 
213
170
  def move(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
214
171
  """Move an object."""
215
- try:
216
- source_path = self._resolve_path(source)
217
- dest_path = self._resolve_path(destination)
218
- self.fs.mv(source_path, dest_path, **kwargs)
219
- except Exception as exc:
220
- msg = f"Failed to move {source} to {destination}"
221
- raise StorageOperationFailedError(msg) from exc
172
+ source_path = self._resolve_path(source)
173
+ dest_path = self._resolve_path(destination)
174
+ self.fs.mv(source_path, dest_path, **kwargs)
222
175
 
223
176
  def read_arrow(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
224
177
  """Read an Arrow table from storage."""
225
178
  if not PYARROW_INSTALLED:
226
179
  raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
227
- try:
228
- import pyarrow.parquet as pq
180
+ import pyarrow.parquet as pq
229
181
 
230
- resolved_path = self._resolve_path(path)
231
- with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
232
- return pq.read_table(f)
233
- except Exception as exc:
234
- msg = f"Failed to read Arrow table from {path}"
235
- raise StorageOperationFailedError(msg) from exc
182
+ resolved_path = self._resolve_path(path)
183
+ with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
184
+ return pq.read_table(f)
236
185
 
237
186
  def write_arrow(self, path: Union[str, Path], table: "ArrowTable", **kwargs: Any) -> None:
238
187
  """Write an Arrow table to storage."""
239
188
  if not PYARROW_INSTALLED:
240
189
  raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
241
- try:
242
- import pyarrow.parquet as pq
190
+ import pyarrow.parquet as pq
243
191
 
244
- resolved_path = self._resolve_path(path)
245
- with self.fs.open(resolved_path, mode="wb") as f:
246
- pq.write_table(table, f, **kwargs) # pyright: ignore
247
- except Exception as exc:
248
- msg = f"Failed to write Arrow table to {path}"
249
- raise StorageOperationFailedError(msg) from exc
192
+ resolved_path = self._resolve_path(path)
193
+ with self.fs.open(resolved_path, mode="wb") as f:
194
+ pq.write_table(table, f, **kwargs) # pyright: ignore
250
195
 
251
196
  def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
252
197
  """List objects with optional prefix."""
253
- try:
254
- resolved_prefix = self._resolve_path(prefix)
255
- if recursive:
256
- return sorted(self.fs.find(resolved_prefix, **kwargs))
257
- return sorted(self.fs.ls(resolved_prefix, detail=False, **kwargs))
258
- except Exception as exc:
259
- msg = f"Failed to list objects with prefix '{prefix}'"
260
- raise StorageOperationFailedError(msg) from exc
198
+ resolved_prefix = self._resolve_path(prefix)
199
+ if recursive:
200
+ return sorted(self.fs.find(resolved_prefix, **kwargs))
201
+ return sorted(self.fs.ls(resolved_prefix, detail=False, **kwargs))
261
202
 
262
203
  def glob(self, pattern: str, **kwargs: Any) -> list[str]:
263
204
  """Find objects matching a glob pattern."""
264
- try:
265
- resolved_pattern = self._resolve_path(pattern)
266
- return sorted(self.fs.glob(resolved_pattern, **kwargs)) # pyright: ignore
267
- except Exception as exc:
268
- msg = f"Failed to glob with pattern '{pattern}'"
269
- raise StorageOperationFailedError(msg) from exc
205
+ resolved_pattern = self._resolve_path(pattern)
206
+ return sorted(self.fs.glob(resolved_pattern, **kwargs)) # pyright: ignore
270
207
 
271
- def is_object(self, path: str) -> bool:
208
+ def is_object(self, path: Union[str, Path]) -> bool:
272
209
  """Check if path points to an object."""
273
210
  resolved_path = self._resolve_path(path)
274
211
  return self.fs.exists(resolved_path) and not self.fs.isdir(resolved_path)
275
212
 
276
- def is_path(self, path: str) -> bool:
213
+ def is_path(self, path: Union[str, Path]) -> bool:
277
214
  """Check if path points to a prefix (directory-like)."""
278
215
  resolved_path = self._resolve_path(path)
279
216
  return self.fs.isdir(resolved_path) # type: ignore[no-any-return]
@@ -294,9 +231,6 @@ class FSSpecBackend(ObjectStoreBase):
294
231
 
295
232
  except FileNotFoundError:
296
233
  return {"path": self._resolve_path(path), "exists": False}
297
- except Exception as exc:
298
- msg = f"Failed to get metadata for {path}"
299
- raise StorageOperationFailedError(msg) from exc
300
234
  return {
301
235
  "path": resolved_path,
302
236
  "exists": True,
@@ -305,6 +239,11 @@ class FSSpecBackend(ObjectStoreBase):
305
239
  "type": info.type,
306
240
  }
307
241
 
242
+ def sign(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
243
+ """Generate a signed URL for the file."""
244
+ resolved_path = self._resolve_path(path)
245
+ return f"{self._fs_uri}{resolved_path}"
246
+
308
247
  def _stream_file_batches(self, obj_path: Union[str, Path]) -> "Iterator[ArrowRecordBatch]":
309
248
  import pyarrow.parquet as pq
310
249
 
@@ -313,10 +252,8 @@ class FSSpecBackend(ObjectStoreBase):
313
252
  yield from parquet_file.iter_batches()
314
253
 
315
254
  def stream_arrow(self, pattern: str, **kwargs: Any) -> "Iterator[ArrowRecordBatch]":
316
- if not FSSPEC_INSTALLED:
317
- raise MissingDependencyError(package="fsspec", install_package="fsspec")
318
- if not PYARROW_INSTALLED:
319
- raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
255
+ self._ensure_fsspec()
256
+ self._ensure_pyarrow()
320
257
 
321
258
  for obj_path in self.glob(pattern, **kwargs):
322
259
  yield from self._stream_file_batches(obj_path)
@@ -339,8 +276,7 @@ class FSSpecBackend(ObjectStoreBase):
339
276
  Returns:
340
277
  AsyncIterator of Arrow record batches
341
278
  """
342
- if not PYARROW_INSTALLED:
343
- raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
279
+ self._ensure_pyarrow()
344
280
 
345
281
  return _ArrowStreamer(self, pattern, **kwargs)
346
282
 
@@ -376,6 +312,10 @@ class FSSpecBackend(ObjectStoreBase):
376
312
  """Get object metadata from storage asynchronously."""
377
313
  return await async_(self.get_metadata)(path, **kwargs)
378
314
 
315
+ async def sign_async(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
316
+ """Generate a signed URL asynchronously."""
317
+ return await async_(self.sign)(path, expires_in, for_upload)
318
+
379
319
  async def read_arrow_async(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
380
320
  """Read Arrow table from storage asynchronously."""
381
321
  return await async_(self.read_arrow)(path, **kwargs)