sqlspec 0.21.1__py3-none-any.whl → 0.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sqlspec might be problematic. Click here for more details.

sqlspec/base.py CHANGED
@@ -64,7 +64,7 @@ class SQLSpec:
64
64
  config.close_pool()
65
65
  cleaned_count += 1
66
66
  except Exception as e:
67
- logger.warning("Failed to clean up sync pool for config %s: %s", config_type.__name__, e)
67
+ logger.debug("Failed to clean up sync pool for config %s: %s", config_type.__name__, e)
68
68
 
69
69
  if cleaned_count > 0:
70
70
  logger.debug("Sync pool cleanup completed. Cleaned %d pools.", cleaned_count)
@@ -87,14 +87,14 @@ class SQLSpec:
87
87
  else:
88
88
  sync_configs.append((config_type, config))
89
89
  except Exception as e:
90
- logger.warning("Failed to prepare cleanup for config %s: %s", config_type.__name__, e)
90
+ logger.debug("Failed to prepare cleanup for config %s: %s", config_type.__name__, e)
91
91
 
92
92
  if cleanup_tasks:
93
93
  try:
94
94
  await asyncio.gather(*cleanup_tasks, return_exceptions=True)
95
95
  logger.debug("Async pool cleanup completed. Cleaned %d pools.", len(cleanup_tasks))
96
96
  except Exception as e:
97
- logger.warning("Failed to complete async pool cleanup: %s", e)
97
+ logger.debug("Failed to complete async pool cleanup: %s", e)
98
98
 
99
99
  for _config_type, config in sync_configs:
100
100
  config.close_pool()
@@ -129,7 +129,7 @@ class SQLSpec:
129
129
  """
130
130
  config_type = type(config)
131
131
  if config_type in self._configs:
132
- logger.warning("Configuration for %s already exists. Overwriting.", config_type.__name__)
132
+ logger.debug("Configuration for %s already exists. Overwriting.", config_type.__name__)
133
133
  self._configs[config_type] = config
134
134
  return config_type
135
135
 
sqlspec/loader.py CHANGED
@@ -10,18 +10,15 @@ import time
10
10
  from datetime import datetime, timezone
11
11
  from pathlib import Path
12
12
  from typing import TYPE_CHECKING, Any, Final, Optional, Union
13
+ from urllib.parse import unquote, urlparse
13
14
 
14
15
  from sqlspec.core.cache import CacheKey, get_cache_config, get_default_cache
15
16
  from sqlspec.core.statement import SQL
16
- from sqlspec.exceptions import (
17
- MissingDependencyError,
18
- SQLFileNotFoundError,
19
- SQLFileParseError,
20
- StorageOperationFailedError,
21
- )
17
+ from sqlspec.exceptions import SQLFileNotFoundError, SQLFileParseError, StorageOperationFailedError
22
18
  from sqlspec.storage.registry import storage_registry as default_storage_registry
23
19
  from sqlspec.utils.correlation import CorrelationContext
24
20
  from sqlspec.utils.logging import get_logger
21
+ from sqlspec.utils.text import slugify
25
22
 
26
23
  if TYPE_CHECKING:
27
24
  from sqlspec.storage.registry import StorageRegistry
@@ -54,13 +51,25 @@ MIN_QUERY_PARTS: Final = 3
54
51
  def _normalize_query_name(name: str) -> str:
55
52
  """Normalize query name to be a valid Python identifier.
56
53
 
54
+ Convert hyphens to underscores, preserve dots for namespacing,
55
+ and remove invalid characters.
56
+
57
57
  Args:
58
58
  name: Raw query name from SQL file.
59
59
 
60
60
  Returns:
61
61
  Normalized query name suitable as Python identifier.
62
62
  """
63
- return TRIM_SPECIAL_CHARS.sub("", name).replace("-", "_")
63
+ # Handle namespace parts separately to preserve dots
64
+ parts = name.split(".")
65
+ normalized_parts = []
66
+
67
+ for part in parts:
68
+ # Use slugify with underscore separator and remove any remaining invalid chars
69
+ normalized_part = slugify(part, separator="_")
70
+ normalized_parts.append(normalized_part)
71
+
72
+ return ".".join(normalized_parts)
64
73
 
65
74
 
66
75
  def _normalize_dialect(dialect: str) -> str:
@@ -76,19 +85,6 @@ def _normalize_dialect(dialect: str) -> str:
76
85
  return DIALECT_ALIASES.get(normalized, normalized)
77
86
 
78
87
 
79
- def _normalize_dialect_for_sqlglot(dialect: str) -> str:
80
- """Normalize dialect name for SQLGlot compatibility.
81
-
82
- Args:
83
- dialect: Dialect name from SQL file or parameter.
84
-
85
- Returns:
86
- SQLGlot-compatible dialect name.
87
- """
88
- normalized = dialect.lower().strip()
89
- return DIALECT_ALIASES.get(normalized, normalized)
90
-
91
-
92
88
  class NamedStatement:
93
89
  """Represents a parsed SQL statement with metadata.
94
90
 
@@ -218,8 +214,7 @@ class SQLFileLoader:
218
214
  SQLFileParseError: If file cannot be read.
219
215
  """
220
216
  try:
221
- content = self._read_file_content(path)
222
- return hashlib.md5(content.encode(), usedforsecurity=False).hexdigest()
217
+ return hashlib.md5(self._read_file_content(path).encode(), usedforsecurity=False).hexdigest()
223
218
  except Exception as e:
224
219
  raise SQLFileParseError(str(path), str(path), e) from e
225
220
 
@@ -253,19 +248,22 @@ class SQLFileLoader:
253
248
  SQLFileNotFoundError: If file does not exist.
254
249
  SQLFileParseError: If file cannot be read or parsed.
255
250
  """
256
-
257
251
  path_str = str(path)
258
252
 
259
253
  try:
260
254
  backend = self.storage_registry.get(path)
255
+ # For file:// URIs, extract just the filename for the backend call
256
+ if path_str.startswith("file://"):
257
+ parsed = urlparse(path_str)
258
+ file_path = unquote(parsed.path)
259
+ # Handle Windows paths (file:///C:/path)
260
+ if file_path and len(file_path) > 2 and file_path[2] == ":": # noqa: PLR2004
261
+ file_path = file_path[1:] # Remove leading slash for Windows
262
+ filename = Path(file_path).name
263
+ return backend.read_text(filename, encoding=self.encoding)
261
264
  return backend.read_text(path_str, encoding=self.encoding)
262
265
  except KeyError as e:
263
266
  raise SQLFileNotFoundError(path_str) from e
264
- except MissingDependencyError:
265
- try:
266
- return path.read_text(encoding=self.encoding) # type: ignore[union-attr]
267
- except FileNotFoundError as e:
268
- raise SQLFileNotFoundError(path_str) from e
269
267
  except StorageOperationFailedError as e:
270
268
  if "not found" in str(e).lower() or "no such file" in str(e).lower():
271
269
  raise SQLFileNotFoundError(path_str) from e
@@ -419,8 +417,7 @@ class SQLFileLoader:
419
417
  for file_path in sql_files:
420
418
  relative_path = file_path.relative_to(dir_path)
421
419
  namespace_parts = relative_path.parent.parts
422
- namespace = ".".join(namespace_parts) if namespace_parts else None
423
- self._load_single_file(file_path, namespace)
420
+ self._load_single_file(file_path, ".".join(namespace_parts) if namespace_parts else None)
424
421
  return len(sql_files)
425
422
 
426
423
  def _load_single_file(self, file_path: Union[str, Path], namespace: Optional[str]) -> None:
@@ -533,44 +530,6 @@ class SQLFileLoader:
533
530
  self._queries[normalized_name] = statement
534
531
  self._query_to_file[normalized_name] = "<directly added>"
535
532
 
536
- def get_sql(self, name: str) -> "SQL":
537
- """Get a SQL object by statement name.
538
-
539
- Args:
540
- name: Name of the statement (from -- name: in SQL file).
541
- Hyphens in names are converted to underscores.
542
-
543
- Returns:
544
- SQL object ready for execution.
545
-
546
- Raises:
547
- SQLFileNotFoundError: If statement name not found.
548
- """
549
- correlation_id = CorrelationContext.get()
550
-
551
- safe_name = _normalize_query_name(name)
552
-
553
- if safe_name not in self._queries:
554
- available = ", ".join(sorted(self._queries.keys())) if self._queries else "none"
555
- logger.error(
556
- "Statement not found: %s",
557
- name,
558
- extra={
559
- "statement_name": name,
560
- "safe_name": safe_name,
561
- "available_statements": len(self._queries),
562
- "correlation_id": correlation_id,
563
- },
564
- )
565
- raise SQLFileNotFoundError(name, path=f"Statement '{name}' not found. Available statements: {available}")
566
-
567
- parsed_statement = self._queries[safe_name]
568
- sqlglot_dialect = None
569
- if parsed_statement.dialect:
570
- sqlglot_dialect = _normalize_dialect_for_sqlglot(parsed_statement.dialect)
571
-
572
- return SQL(parsed_statement.sql, dialect=sqlglot_dialect)
573
-
574
533
  def get_file(self, path: Union[str, Path]) -> "Optional[SQLFile]":
575
534
  """Get a loaded SQLFile object by path.
576
535
 
@@ -659,3 +618,41 @@ class SQLFileLoader:
659
618
  if safe_name not in self._queries:
660
619
  raise SQLFileNotFoundError(name)
661
620
  return self._queries[safe_name].sql
621
+
622
+ def get_sql(self, name: str) -> "SQL":
623
+ """Get a SQL object by statement name.
624
+
625
+ Args:
626
+ name: Name of the statement (from -- name: in SQL file).
627
+ Hyphens in names are converted to underscores.
628
+
629
+ Returns:
630
+ SQL object ready for execution.
631
+
632
+ Raises:
633
+ SQLFileNotFoundError: If statement name not found.
634
+ """
635
+ correlation_id = CorrelationContext.get()
636
+
637
+ safe_name = _normalize_query_name(name)
638
+
639
+ if safe_name not in self._queries:
640
+ available = ", ".join(sorted(self._queries.keys())) if self._queries else "none"
641
+ logger.error(
642
+ "Statement not found: %s",
643
+ name,
644
+ extra={
645
+ "statement_name": name,
646
+ "safe_name": safe_name,
647
+ "available_statements": len(self._queries),
648
+ "correlation_id": correlation_id,
649
+ },
650
+ )
651
+ raise SQLFileNotFoundError(name, path=f"Statement '{name}' not found. Available statements: {available}")
652
+
653
+ parsed_statement = self._queries[safe_name]
654
+ sqlglot_dialect = None
655
+ if parsed_statement.dialect:
656
+ sqlglot_dialect = _normalize_dialect(parsed_statement.dialect)
657
+
658
+ return SQL(parsed_statement.sql, dialect=sqlglot_dialect)
sqlspec/protocols.py CHANGED
@@ -4,7 +4,7 @@ This module provides protocols that can be used for static type checking
4
4
  and runtime isinstance() checks.
5
5
  """
6
6
 
7
- from typing import TYPE_CHECKING, Any, ClassVar, Optional, Protocol, Union, runtime_checkable
7
+ from typing import TYPE_CHECKING, Any, Optional, Protocol, Union, runtime_checkable
8
8
 
9
9
  from typing_extensions import Self
10
10
 
@@ -14,7 +14,6 @@ if TYPE_CHECKING:
14
14
 
15
15
  from sqlglot import exp
16
16
 
17
- from sqlspec.storage.capabilities import StorageCapabilities
18
17
  from sqlspec.typing import ArrowRecordBatch, ArrowTable
19
18
 
20
19
  __all__ = (
@@ -194,9 +193,8 @@ class ObjectStoreItemProtocol(Protocol):
194
193
  class ObjectStoreProtocol(Protocol):
195
194
  """Protocol for object storage operations."""
196
195
 
197
- capabilities: ClassVar["StorageCapabilities"]
198
-
199
196
  protocol: str
197
+ backend_type: str
200
198
 
201
199
  def __init__(self, uri: str, **kwargs: Any) -> None:
202
200
  return
@@ -330,7 +328,7 @@ class ObjectStoreProtocol(Protocol):
330
328
  msg = "Async arrow writing not implemented"
331
329
  raise NotImplementedError(msg)
332
330
 
333
- async def stream_arrow_async(self, pattern: str, **kwargs: Any) -> "AsyncIterator[ArrowRecordBatch]":
331
+ def stream_arrow_async(self, pattern: str, **kwargs: Any) -> "AsyncIterator[ArrowRecordBatch]":
334
332
  """Async stream Arrow record batches from matching objects."""
335
333
  msg = "Async arrow streaming not implemented"
336
334
  raise NotImplementedError(msg)
@@ -8,16 +8,6 @@ Provides a storage system with:
8
8
  - Capability-based backend selection
9
9
  """
10
10
 
11
- from sqlspec.protocols import ObjectStoreProtocol
12
- from sqlspec.storage.capabilities import HasStorageCapabilities, StorageCapabilities
13
- from sqlspec.storage.registry import StorageRegistry
11
+ from sqlspec.storage.registry import StorageRegistry, storage_registry
14
12
 
15
- storage_registry = StorageRegistry()
16
-
17
- __all__ = (
18
- "HasStorageCapabilities",
19
- "ObjectStoreProtocol",
20
- "StorageCapabilities",
21
- "StorageRegistry",
22
- "storage_registry",
23
- )
13
+ __all__ = ("StorageRegistry", "storage_registry")
@@ -0,0 +1 @@
1
+ """Storage backends."""
@@ -1,18 +1,14 @@
1
1
  import logging
2
2
  from pathlib import Path
3
- from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union
3
+ from typing import TYPE_CHECKING, Any, Optional, Union
4
4
 
5
- from sqlspec.exceptions import MissingDependencyError, StorageOperationFailedError
6
- from sqlspec.storage.backends.base import ObjectStoreBase
7
- from sqlspec.storage.capabilities import StorageCapabilities
5
+ from sqlspec.exceptions import MissingDependencyError
8
6
  from sqlspec.typing import FSSPEC_INSTALLED, PYARROW_INSTALLED
9
7
  from sqlspec.utils.sync_tools import async_
10
8
 
11
9
  if TYPE_CHECKING:
12
10
  from collections.abc import AsyncIterator, Iterator
13
11
 
14
- from fsspec import AbstractFileSystem
15
-
16
12
  from sqlspec.typing import ArrowRecordBatch, ArrowTable
17
13
 
18
14
  __all__ = ("FSSpecBackend",)
@@ -56,40 +52,30 @@ class _ArrowStreamer:
56
52
  raise StopAsyncIteration
57
53
 
58
54
 
59
- class FSSpecBackend(ObjectStoreBase):
55
+ class FSSpecBackend:
60
56
  """Storage backend using fsspec.
61
57
 
62
- Implements the ObjectStoreProtocol using fsspec for various protocols
58
+ Implements ObjectStoreProtocol using fsspec for various protocols
63
59
  including HTTP, HTTPS, FTP, and cloud storage services.
64
60
  """
65
61
 
66
- _default_capabilities: ClassVar[StorageCapabilities] = StorageCapabilities(
67
- supports_arrow=PYARROW_INSTALLED,
68
- supports_streaming=PYARROW_INSTALLED,
69
- supports_async=True,
70
- supports_compression=True,
71
- is_remote=True,
72
- is_cloud_native=False,
73
- )
74
-
75
- def __init__(self, fs: "Union[str, AbstractFileSystem]", base_path: str = "") -> None:
76
- if not FSSPEC_INSTALLED:
77
- raise MissingDependencyError(package="fsspec", install_package="fsspec")
62
+ def __init__(self, uri: str, **kwargs: Any) -> None:
63
+ self._ensure_fsspec()
78
64
 
65
+ base_path = kwargs.pop("base_path", "")
79
66
  self.base_path = base_path.rstrip("/") if base_path else ""
80
67
 
81
- if isinstance(fs, str):
82
- import fsspec
83
-
84
- self.fs = fsspec.filesystem(fs.split("://")[0])
85
- self.protocol = fs.split("://")[0]
86
- self._fs_uri = fs
68
+ if "://" in uri:
69
+ self.protocol = uri.split("://", maxsplit=1)[0]
70
+ self._fs_uri = uri
87
71
  else:
88
- self.fs = fs
89
- self.protocol = getattr(fs, "protocol", "unknown")
90
- self._fs_uri = f"{self.protocol}://"
72
+ self.protocol = uri
73
+ self._fs_uri = f"{uri}://"
74
+
75
+ import fsspec
91
76
 
92
- self._instance_capabilities = self._detect_capabilities()
77
+ self.fs = fsspec.filesystem(self.protocol, **kwargs)
78
+ self.backend_type = "fsspec"
93
79
 
94
80
  super().__init__()
95
81
 
@@ -99,11 +85,22 @@ class FSSpecBackend(ObjectStoreBase):
99
85
  fs_config = config.get("fs_config", {})
100
86
  base_path = config.get("base_path", "")
101
87
 
102
- import fsspec
88
+ uri = f"{protocol}://"
89
+ kwargs = dict(fs_config)
90
+ if base_path:
91
+ kwargs["base_path"] = base_path
103
92
 
104
- fs_instance = fsspec.filesystem(protocol, **fs_config)
93
+ return cls(uri=uri, **kwargs)
105
94
 
106
- return cls(fs=fs_instance, base_path=base_path)
95
+ def _ensure_fsspec(self) -> None:
96
+ """Ensure fsspec is available for operations."""
97
+ if not FSSPEC_INSTALLED:
98
+ raise MissingDependencyError(package="fsspec", install_package="fsspec")
99
+
100
+ def _ensure_pyarrow(self) -> None:
101
+ """Ensure PyArrow is available for Arrow operations."""
102
+ if not PYARROW_INSTALLED:
103
+ raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
107
104
 
108
105
  def _resolve_path(self, path: Union[str, Path]) -> str:
109
106
  """Resolve path relative to base_path."""
@@ -112,70 +109,38 @@ class FSSpecBackend(ObjectStoreBase):
112
109
  clean_base = self.base_path.rstrip("/")
113
110
  clean_path = path_str.lstrip("/")
114
111
  return f"{clean_base}/{clean_path}"
112
+ if self.protocol == "s3" and "://" in self._fs_uri:
113
+ # For S3, we need to include the bucket from the URI
114
+ # Extract bucket and path from URI like s3://bucket/path
115
+ uri_parts = self._fs_uri.split("://", 1)[1] # Remove s3://
116
+ if "/" in uri_parts:
117
+ # URI has bucket and base path
118
+ return f"{uri_parts.rstrip('/')}/{path_str.lstrip('/')}"
119
+ # URI has only bucket
120
+ return f"{uri_parts}/{path_str.lstrip('/')}"
115
121
  return path_str
116
122
 
117
- def _detect_capabilities(self) -> StorageCapabilities:
118
- """Detect capabilities based on filesystem protocol."""
119
- protocol = self.protocol.lower()
120
-
121
- if protocol in {"s3", "s3a", "s3n"}:
122
- return StorageCapabilities.s3_compatible()
123
- if protocol in {"gcs", "gs"}:
124
- return StorageCapabilities.gcs()
125
- if protocol in {"abfs", "az", "azure"}:
126
- return StorageCapabilities.azure_blob()
127
- if protocol in {"file", "local"}:
128
- return StorageCapabilities.local_filesystem()
129
- return StorageCapabilities(
130
- supports_arrow=PYARROW_INSTALLED,
131
- supports_streaming=PYARROW_INSTALLED,
132
- supports_async=True,
133
- supports_compression=True,
134
- is_remote=True,
135
- is_cloud_native=False,
136
- )
137
-
138
- @property
139
- def capabilities(self) -> StorageCapabilities:
140
- """Return capabilities based on detected protocol."""
141
- return getattr(self, "_instance_capabilities", self.__class__._default_capabilities)
142
-
143
- @classmethod
144
- def has_capability(cls, capability: str) -> bool:
145
- """Check if backend has a specific capability."""
146
- return getattr(cls._default_capabilities, capability, False)
147
-
148
- @classmethod
149
- def get_capabilities(cls) -> StorageCapabilities:
150
- """Get all capabilities for this backend."""
151
- return cls._default_capabilities
152
-
153
- @property
154
- def backend_type(self) -> str:
155
- return "fsspec"
156
-
157
123
  @property
158
124
  def base_uri(self) -> str:
159
125
  return self._fs_uri
160
126
 
161
127
  def read_bytes(self, path: Union[str, Path], **kwargs: Any) -> bytes:
162
128
  """Read bytes from an object."""
163
- try:
164
- resolved_path = self._resolve_path(path)
165
- return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
166
- except Exception as exc:
167
- msg = f"Failed to read bytes from {path}"
168
- raise StorageOperationFailedError(msg) from exc
129
+ resolved_path = self._resolve_path(path)
130
+ return self.fs.cat(resolved_path, **kwargs) # type: ignore[no-any-return] # pyright: ignore
169
131
 
170
132
  def write_bytes(self, path: Union[str, Path], data: bytes, **kwargs: Any) -> None:
171
133
  """Write bytes to an object."""
172
- try:
173
- resolved_path = self._resolve_path(path)
174
- with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
175
- f.write(data) # pyright: ignore
176
- except Exception as exc:
177
- msg = f"Failed to write bytes to {path}"
178
- raise StorageOperationFailedError(msg) from exc
134
+ resolved_path = self._resolve_path(path)
135
+
136
+ # Only create directories for local file systems, not for cloud storage
137
+ if self.protocol == "file":
138
+ parent_dir = str(Path(resolved_path).parent)
139
+ if parent_dir and not self.fs.exists(parent_dir):
140
+ self.fs.makedirs(parent_dir, exist_ok=True)
141
+
142
+ with self.fs.open(resolved_path, mode="wb", **kwargs) as f:
143
+ f.write(data) # pyright: ignore
179
144
 
180
145
  def read_text(self, path: Union[str, Path], encoding: str = "utf-8", **kwargs: Any) -> str:
181
146
  """Read text from an object."""
@@ -193,87 +158,59 @@ class FSSpecBackend(ObjectStoreBase):
193
158
 
194
159
  def delete(self, path: Union[str, Path], **kwargs: Any) -> None:
195
160
  """Delete an object."""
196
- try:
197
- resolved_path = self._resolve_path(path)
198
- self.fs.rm(resolved_path, **kwargs)
199
- except Exception as exc:
200
- msg = f"Failed to delete {path}"
201
- raise StorageOperationFailedError(msg) from exc
161
+ resolved_path = self._resolve_path(path)
162
+ self.fs.rm(resolved_path, **kwargs)
202
163
 
203
164
  def copy(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
204
165
  """Copy an object."""
205
- try:
206
- source_path = self._resolve_path(source)
207
- dest_path = self._resolve_path(destination)
208
- self.fs.copy(source_path, dest_path, **kwargs)
209
- except Exception as exc:
210
- msg = f"Failed to copy {source} to {destination}"
211
- raise StorageOperationFailedError(msg) from exc
166
+ source_path = self._resolve_path(source)
167
+ dest_path = self._resolve_path(destination)
168
+ self.fs.copy(source_path, dest_path, **kwargs)
212
169
 
213
170
  def move(self, source: Union[str, Path], destination: Union[str, Path], **kwargs: Any) -> None:
214
171
  """Move an object."""
215
- try:
216
- source_path = self._resolve_path(source)
217
- dest_path = self._resolve_path(destination)
218
- self.fs.mv(source_path, dest_path, **kwargs)
219
- except Exception as exc:
220
- msg = f"Failed to move {source} to {destination}"
221
- raise StorageOperationFailedError(msg) from exc
172
+ source_path = self._resolve_path(source)
173
+ dest_path = self._resolve_path(destination)
174
+ self.fs.mv(source_path, dest_path, **kwargs)
222
175
 
223
176
  def read_arrow(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
224
177
  """Read an Arrow table from storage."""
225
178
  if not PYARROW_INSTALLED:
226
179
  raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
227
- try:
228
- import pyarrow.parquet as pq
180
+ import pyarrow.parquet as pq
229
181
 
230
- resolved_path = self._resolve_path(path)
231
- with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
232
- return pq.read_table(f)
233
- except Exception as exc:
234
- msg = f"Failed to read Arrow table from {path}"
235
- raise StorageOperationFailedError(msg) from exc
182
+ resolved_path = self._resolve_path(path)
183
+ with self.fs.open(resolved_path, mode="rb", **kwargs) as f:
184
+ return pq.read_table(f)
236
185
 
237
186
  def write_arrow(self, path: Union[str, Path], table: "ArrowTable", **kwargs: Any) -> None:
238
187
  """Write an Arrow table to storage."""
239
188
  if not PYARROW_INSTALLED:
240
189
  raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
241
- try:
242
- import pyarrow.parquet as pq
190
+ import pyarrow.parquet as pq
243
191
 
244
- resolved_path = self._resolve_path(path)
245
- with self.fs.open(resolved_path, mode="wb") as f:
246
- pq.write_table(table, f, **kwargs) # pyright: ignore
247
- except Exception as exc:
248
- msg = f"Failed to write Arrow table to {path}"
249
- raise StorageOperationFailedError(msg) from exc
192
+ resolved_path = self._resolve_path(path)
193
+ with self.fs.open(resolved_path, mode="wb") as f:
194
+ pq.write_table(table, f, **kwargs) # pyright: ignore
250
195
 
251
196
  def list_objects(self, prefix: str = "", recursive: bool = True, **kwargs: Any) -> list[str]:
252
197
  """List objects with optional prefix."""
253
- try:
254
- resolved_prefix = self._resolve_path(prefix)
255
- if recursive:
256
- return sorted(self.fs.find(resolved_prefix, **kwargs))
257
- return sorted(self.fs.ls(resolved_prefix, detail=False, **kwargs))
258
- except Exception as exc:
259
- msg = f"Failed to list objects with prefix '{prefix}'"
260
- raise StorageOperationFailedError(msg) from exc
198
+ resolved_prefix = self._resolve_path(prefix)
199
+ if recursive:
200
+ return sorted(self.fs.find(resolved_prefix, **kwargs))
201
+ return sorted(self.fs.ls(resolved_prefix, detail=False, **kwargs))
261
202
 
262
203
  def glob(self, pattern: str, **kwargs: Any) -> list[str]:
263
204
  """Find objects matching a glob pattern."""
264
- try:
265
- resolved_pattern = self._resolve_path(pattern)
266
- return sorted(self.fs.glob(resolved_pattern, **kwargs)) # pyright: ignore
267
- except Exception as exc:
268
- msg = f"Failed to glob with pattern '{pattern}'"
269
- raise StorageOperationFailedError(msg) from exc
205
+ resolved_pattern = self._resolve_path(pattern)
206
+ return sorted(self.fs.glob(resolved_pattern, **kwargs)) # pyright: ignore
270
207
 
271
- def is_object(self, path: str) -> bool:
208
+ def is_object(self, path: Union[str, Path]) -> bool:
272
209
  """Check if path points to an object."""
273
210
  resolved_path = self._resolve_path(path)
274
211
  return self.fs.exists(resolved_path) and not self.fs.isdir(resolved_path)
275
212
 
276
- def is_path(self, path: str) -> bool:
213
+ def is_path(self, path: Union[str, Path]) -> bool:
277
214
  """Check if path points to a prefix (directory-like)."""
278
215
  resolved_path = self._resolve_path(path)
279
216
  return self.fs.isdir(resolved_path) # type: ignore[no-any-return]
@@ -294,9 +231,6 @@ class FSSpecBackend(ObjectStoreBase):
294
231
 
295
232
  except FileNotFoundError:
296
233
  return {"path": self._resolve_path(path), "exists": False}
297
- except Exception as exc:
298
- msg = f"Failed to get metadata for {path}"
299
- raise StorageOperationFailedError(msg) from exc
300
234
  return {
301
235
  "path": resolved_path,
302
236
  "exists": True,
@@ -305,6 +239,11 @@ class FSSpecBackend(ObjectStoreBase):
305
239
  "type": info.type,
306
240
  }
307
241
 
242
+ def sign(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
243
+ """Generate a signed URL for the file."""
244
+ resolved_path = self._resolve_path(path)
245
+ return f"{self._fs_uri}{resolved_path}"
246
+
308
247
  def _stream_file_batches(self, obj_path: Union[str, Path]) -> "Iterator[ArrowRecordBatch]":
309
248
  import pyarrow.parquet as pq
310
249
 
@@ -313,10 +252,8 @@ class FSSpecBackend(ObjectStoreBase):
313
252
  yield from parquet_file.iter_batches()
314
253
 
315
254
  def stream_arrow(self, pattern: str, **kwargs: Any) -> "Iterator[ArrowRecordBatch]":
316
- if not FSSPEC_INSTALLED:
317
- raise MissingDependencyError(package="fsspec", install_package="fsspec")
318
- if not PYARROW_INSTALLED:
319
- raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
255
+ self._ensure_fsspec()
256
+ self._ensure_pyarrow()
320
257
 
321
258
  for obj_path in self.glob(pattern, **kwargs):
322
259
  yield from self._stream_file_batches(obj_path)
@@ -339,8 +276,7 @@ class FSSpecBackend(ObjectStoreBase):
339
276
  Returns:
340
277
  AsyncIterator of Arrow record batches
341
278
  """
342
- if not PYARROW_INSTALLED:
343
- raise MissingDependencyError(package="pyarrow", install_package="pyarrow")
279
+ self._ensure_pyarrow()
344
280
 
345
281
  return _ArrowStreamer(self, pattern, **kwargs)
346
282
 
@@ -376,6 +312,10 @@ class FSSpecBackend(ObjectStoreBase):
376
312
  """Get object metadata from storage asynchronously."""
377
313
  return await async_(self.get_metadata)(path, **kwargs)
378
314
 
315
+ async def sign_async(self, path: str, expires_in: int = 3600, for_upload: bool = False) -> str:
316
+ """Generate a signed URL asynchronously."""
317
+ return await async_(self.sign)(path, expires_in, for_upload)
318
+
379
319
  async def read_arrow_async(self, path: Union[str, Path], **kwargs: Any) -> "ArrowTable":
380
320
  """Read Arrow table from storage asynchronously."""
381
321
  return await async_(self.read_arrow)(path, **kwargs)