tab-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tab_cli/storage/az.py ADDED
@@ -0,0 +1,249 @@
1
+ import os
2
+ from enum import Enum
3
+
4
+ from typing import Any, BinaryIO, Iterator
5
+ from loguru import logger
6
+
7
+ from tab_cli.storage import StorageBackend, FileInfo
8
+ from tab_cli.url_parser import parse_url
9
+
10
+
11
+ class AzAuthMethod(Enum):
12
+ CONNECTION_STRING = 1
13
+ ACCOUNT_KEY = 2
14
+ SAS_TOKEN = 3
15
+ AZURE_AD = 4
16
+ AZURE_CLI = 5
17
+
18
+
19
+ class AzBackend(StorageBackend):
20
+ """Storage backend for Azure Blob Storage with configurable URL interpretation.
21
+
22
+ When az_url_authority_is_account=True:
23
+ - az://account/container/path - authority is the storage account name
24
+ - az:///container/path - account inferred from AZURE_STORAGE_ACCOUNT
25
+
26
+ When az_url_authority_is_account=False (default adlfs behavior):
27
+ - az://container/path - authority is the container name
28
+ """
29
+
30
+ def __init__(self, account: str | None, container: str | None, az_url_authority_is_account: bool = False) -> None:
31
+ """Initialize the Azure Blob Storage backend.
32
+
33
+ Args:
34
+ az_url_authority_is_account: If True, interpret the URL authority as the
35
+ storage account name. If False, interpret it as the container name.
36
+ """
37
+ try:
38
+ import adlfs
39
+ except ImportError as e:
40
+ raise ImportError("Package 'adlfs' is required for az:// URLs. Install with: pip install adlfs") from e
41
+
42
+ self.adlfs = adlfs
43
+ self.fs = None
44
+ self.url_authority_is_account = az_url_authority_is_account
45
+
46
+ if account is None:
47
+ account = os.environ.get("AZURE_STORAGE_ACCOUNT")
48
+ connection_string = os.environ.get("AZURE_STORAGE_CONNECTION_STRING")
49
+ account_key = os.environ.get("AZURE_STORAGE_KEY")
50
+ sas_token = os.environ.get("AZURE_STORAGE_SAS_TOKEN")
51
+
52
+ self.account = account
53
+ if account is None:
54
+ raise ValueError("Storage account name must be specified. Use az://account/container/path and use `tab --az-url-authority-is-account`, or set the environment variable.")
55
+
56
+ # 1. Try connection string from environment
57
+ if connection_string:
58
+ logger.debug("Authenticating to Azure storage account '{}' using connection string", account)
59
+ self.fs = self.adlfs.AzureBlobFileSystem(connection_string=connection_string)
60
+ self.connection_string = connection_string
61
+ self.method = AzAuthMethod.CONNECTION_STRING
62
+
63
+ # 2. Try account key from environment
64
+ elif account_key:
65
+ logger.debug("Authenticating to Azure storage account '{}' using account key", account)
66
+ self.fs = self.adlfs.AzureBlobFileSystem(
67
+ account_name=account,
68
+ account_key=account_key,
69
+ )
70
+ self.account_key = account_key
71
+ self.method = AzAuthMethod.ACCOUNT_KEY
72
+
73
+ # 3. Try SAS token from environment
74
+ elif sas_token:
75
+ logger.debug("Authenticating to Azure storage account '{}' using SAS token", account)
76
+ self.fs = self.adlfs.AzureBlobFileSystem(
77
+ account_name=account,
78
+ sas_token=sas_token,
79
+ )
80
+ self.sas_token = sas_token
81
+ self.method = AzAuthMethod.SAS_TOKEN
82
+
83
+ else:
84
+ # 4. Try Azure AD / RBAC (DefaultAzureCredential)
85
+ try:
86
+ from azure.identity.aio import DefaultAzureCredential # Async version,
87
+
88
+ logger.debug("Authenticating to Azure storage account '{}' using Azure AD / RBAC", account)
89
+ self.fs = self.adlfs.AzureBlobFileSystem(
90
+ account_name=account,
91
+ credential=DefaultAzureCredential(),
92
+ )
93
+ self.fs.ls(container)
94
+ self.method = AzAuthMethod.AZURE_AD
95
+
96
+ except ImportError:
97
+ logger.debug("azure-identity not installed, skipping Azure AD / RBAC authentication")
98
+ except Exception as e:
99
+ logger.debug(f"Azure AD / RBAC authentication failed: {e}")
100
+
101
+ if self.fs is None:
102
+ # 5. Fallback to fetching account key via Azure CLI
103
+ logger.debug(f"Attempting to fetch account key via Azure CLI for '{account}'")
104
+ try:
105
+ account_key = self._get_account_key_via_cli(account)
106
+ except Exception as e:
107
+ logger.debug(f"Fetching account key via Azure CLI failed: {e}")
108
+ try:
109
+ if account_key:
110
+ logger.debug(f"Authenticating to Azure storage account '{account}' using key from Azure CLI")
111
+ self.fs = self.adlfs.AzureBlobFileSystem(
112
+ account_name=account,
113
+ account_key=account_key,
114
+ )
115
+ self.account_key = account_key
116
+ self.fs.ls(container)
117
+ self.method = AzAuthMethod.AZURE_CLI
118
+ except Exception as e:
119
+ logger.debug(f"Authenticating to Azure storage account '{account}' using key from Azure CLI failed: {e}")
120
+
121
+ if self.fs is None:
122
+ raise ValueError(
123
+ f"Could not authenticate to storage account '{account}'. "
124
+ "Set AZURE_STORAGE_CONNECTION_STRING, AZURE_STORAGE_KEY, AZURE_STORAGE_SAS_TOKEN, "
125
+ "configure Azure AD RBAC, or run 'az login'."
126
+ )
127
+
128
+ def _get_account_key_via_cli(self, account: str) -> str | None:
129
+ """Try to get storage account key via Azure CLI."""
130
+ import subprocess
131
+
132
+ try:
133
+ result = subprocess.run(
134
+ ["az", "storage", "account", "keys", "list",
135
+ "--account-name", account,
136
+ "--query", "[0].value",
137
+ "-o", "tsv"],
138
+ capture_output=True,
139
+ text=True,
140
+ timeout=30,
141
+ )
142
+ if result.returncode == 0 and result.stdout.strip():
143
+ return result.stdout.strip()
144
+ except (subprocess.TimeoutExpired, FileNotFoundError):
145
+ pass
146
+ return None
147
+
148
+ def normalize_for_polars(self, url: str) -> str:
149
+ """Normalize URL to a format Polars understands.
150
+
151
+ Polars expects az://container/path format.
152
+
153
+ Returns:
154
+ Normalized URL in az://container/path format.
155
+ """
156
+ parsed = parse_url(url)
157
+ return f"az://{parsed.bucket}/{parsed.path}"
158
+
159
+ def storage_options(self, url: str) -> dict[str, str] | None:
160
+ """Return storage options for Polars Azure access.
161
+
162
+ Returns:
163
+ Dict with appropriate authentication options for Azure.
164
+ Includes both adlfs-style keys and Rust object_store keys for compatibility.
165
+ Priority: connection_string > account_key > sas_token > CLI key > account_name only
166
+ """
167
+ if self.method == AzAuthMethod.CONNECTION_STRING:
168
+ return {
169
+ "connection_string": self.connection_string,
170
+ "azure_storage_connection_string": self.connection_string,
171
+ }
172
+ elif self.method == AzAuthMethod.ACCOUNT_KEY:
173
+ return {
174
+ "account_name": self.account,
175
+ "account_key": self.account_key,
176
+ "azure_storage_account_name": self.account,
177
+ "azure_storage_account_key": self.account_key,
178
+ }
179
+ # 3. SAS token from environment
180
+ elif self.method == AzAuthMethod.SAS_TOKEN:
181
+ return {
182
+ "account_name": self.account,
183
+ "sas_token": self.sas_token,
184
+ "azure_storage_account_name": self.account,
185
+ "azure_storage_sas_token": self.sas_token,
186
+ }
187
+ # 4. Try fetching account key via Azure CLI
188
+ elif self.method == AzAuthMethod.AZURE_CLI:
189
+ return {
190
+ "account_name": self.account,
191
+ "account_key": self.account_key,
192
+ "azure_storage_account_name": self.account,
193
+ "azure_storage_account_key": self.account_key,
194
+ }
195
+ # Fallback: account name only (will use DefaultAzureCredential)
196
+ else:
197
+ return {
198
+ "account_name": self.account,
199
+ "azure_storage_account_name": self.account,
200
+ "anon": 'false',
201
+ "use_azure_cli": 'true',
202
+ "azure_use_azure_cli": "true",
203
+ }
204
+
205
+ def _to_internal(self, url: str) -> str:
206
+ """Convert URL to (filesystem, internal_path) for adlfs operations."""
207
+ parsed = parse_url(url)
208
+ return f"{parsed.bucket}/{parsed.path}"
209
+
210
+ def _to_uri(self, account: str, internal_path: str) -> str:
211
+ """Convert internal path back to az:// URL."""
212
+ if self.url_authority_is_account:
213
+ return f"az://{account}/{internal_path}"
214
+ else:
215
+ return f"az://{internal_path}"
216
+
217
+ def open(self, url: str) -> BinaryIO:
218
+ return self.fs.open(self._to_internal(url), "rb")
219
+
220
+ def list_files(self, url: str, extension: str) -> Iterator[FileInfo]:
221
+ internal_path = self._to_internal(url)
222
+ pattern = f"{internal_path}/**/*{extension}"
223
+ for path in sorted(self.fs.glob(pattern)):
224
+ info = self.fs.info(path)
225
+ yield FileInfo(url=self._to_uri(self.account, path), size=info["size"])
226
+
227
+ def size(self, url: str) -> int:
228
+ return self.fs.size(self._to_internal(url))
229
+
230
+ def is_directory(self, url: str) -> bool:
231
+ path = self._to_internal(url)
232
+ try:
233
+ info = self.fs.info(path)
234
+ return info.get("type") == "directory"
235
+ except FileNotFoundError:
236
+ try:
237
+ contents = self.fs.ls(path, detail=False)
238
+ return len(contents) > 0
239
+ except Exception:
240
+ return False
241
+
242
+ def __del__(self):
243
+ try:
244
+ # Check if fs exists and has a close method
245
+ if hasattr(self, 'fs') and self.fs is not None:
246
+ self.fs.close()
247
+ except Exception:
248
+ # Silently fail as the interpreter is likely in mid-teardown
249
+ pass
@@ -0,0 +1,36 @@
1
+ """Base storage backend interface."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass
5
+ from typing import BinaryIO, Iterator, Any
6
+
7
+
8
+ @dataclass
9
+ class FileInfo:
10
+ url: str
11
+ size: int
12
+
13
+
14
+ class StorageBackend(ABC):
15
+
16
+ @abstractmethod
17
+ def open(self, url: str) -> BinaryIO:
18
+ pass
19
+
20
+ @abstractmethod
21
+ def list_files(self, url: str, extension: str) -> Iterator[FileInfo]:
22
+ pass
23
+
24
+ @abstractmethod
25
+ def size(self, url: str) -> int:
26
+ pass
27
+
28
+ @abstractmethod
29
+ def is_directory(self, url: str) -> bool:
30
+ pass
31
+
32
+ def normalize_for_polars(self, url: str) -> str:
33
+ return url
34
+
35
+ def storage_options(self, url: str) -> dict[str, Any] | None:
36
+ return None
@@ -0,0 +1,60 @@
1
+ """Cloud storage backend using fsspec.
2
+
3
+ Fallback for any fsspec-supported protocol not handled by dedicated backends.
4
+
5
+ Note: The following have dedicated backends:
6
+ - az://, abfs://, abfss:// -> AzBackend
7
+ - gs:// -> GcsBackend
8
+ - s3:// -> S3Backend
9
+
10
+ The appropriate protocol handler package must be installed separately.
11
+ """
12
+
13
+ from typing import BinaryIO, Iterator, Any
14
+ import fsspec
15
+
16
+ from tab_cli.storage.base import FileInfo, StorageBackend
17
+
18
+ # Package hints for common protocols
19
+ _PACKAGE_HINTS: dict[str, str] = {}
20
+
21
+
22
+ class FsspecBackend(StorageBackend):
23
+
24
+ def __init__(self, protocol: str) -> None:
25
+ self._protocol = protocol
26
+
27
+ try:
28
+ self._fs: fsspec.AbstractFileSystem = fsspec.filesystem(protocol)
29
+ except (ImportError, ValueError) as e:
30
+ pkg = _PACKAGE_HINTS.get(protocol)
31
+ if pkg is not None:
32
+ raise ImportError(f"Package '{pkg}' is required for {protocol}:// URLs. Install with: pip install {pkg}") from e
33
+ raise ImportError(f"No handler found for {protocol}:// URLs") from e
34
+
35
+ def open(self, url: str) -> BinaryIO:
36
+ return self._fs.open(url, "rb")
37
+
38
+ def list_files(self, url: str, extension: str) -> Iterator[FileInfo]:
39
+ pattern = f"{url}/**/*{extension}"
40
+ for path in sorted(self._fs.glob(pattern)):
41
+ info = self._fs.info(path)
42
+ full_uri = f"{self._protocol}://{path}" if not path.startswith(f"{self._protocol}://") else path
43
+ yield FileInfo(url=full_uri, size=info["size"])
44
+
45
+ def size(self, url: str) -> int:
46
+ return self._fs.size(url)
47
+
48
+ def is_directory(self, url: str) -> bool:
49
+ try:
50
+ info = self._fs.info(url)
51
+ return info.get("type") == "directory"
52
+ except FileNotFoundError:
53
+ try:
54
+ contents = self._fs.ls(url, detail=False)
55
+ return len(contents) > 0
56
+ except Exception:
57
+ return False
58
+
59
+ def storage_options(self, url: str) -> dict[str, Any] | None:
60
+ return None
@@ -0,0 +1,215 @@
1
+ import os
2
+ from enum import Enum
3
+ from pathlib import Path
4
+
5
+ from typing import BinaryIO, Iterator
6
+ from loguru import logger
7
+
8
+ from tab_cli.storage.base import StorageBackend, FileInfo
9
+ from tab_cli.url_parser import parse_url
10
+
11
+
12
+ class GcloudAuthMethod(Enum):
13
+ SERVICE_ACCOUNT = 1 # GOOGLE_APPLICATION_CREDENTIALS
14
+ ADC = 2 # Application Default Credentials file
15
+ GCLOUD_CLI = 3 # gcloud auth login (non-ADC)
16
+ GOOGLE_DEFAULT = 4 # google.auth.default()
17
+
18
+
19
+ class GcloudBackend(StorageBackend):
20
+ """Storage backend for Google Cloud Storage.
21
+
22
+ URL format: gs://bucket/path
23
+
24
+ Authentication order:
25
+ 1. GOOGLE_APPLICATION_CREDENTIALS environment variable (service account JSON)
26
+ 2. ADC file (~/.config/gcloud/application_default_credentials.json)
27
+ 3. google.auth.default() via gcsfs token="google_default"
28
+ 4. gcloud CLI login credentials (gcloud auth print-access-token)
29
+ """
30
+
31
+ def __init__(self) -> None:
32
+ """Initialize the Google Cloud Storage backend."""
33
+ try:
34
+ import gcsfs
35
+ except ImportError as e:
36
+ raise ImportError("Package 'gcsfs' is required for gs:// URLs. Install with: pip install gcsfs") from e
37
+
38
+ self.gcsfs = gcsfs
39
+ self.fs = None
40
+
41
+ # 1. Try GOOGLE_APPLICATION_CREDENTIALS environment variable
42
+ credentials_file = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
43
+ if credentials_file and os.path.exists(credentials_file):
44
+ logger.debug("Authenticating to GCS using GOOGLE_APPLICATION_CREDENTIALS: {}", credentials_file)
45
+ try:
46
+ self.fs = self.gcsfs.GCSFileSystem(token=credentials_file)
47
+ self.method = GcloudAuthMethod.SERVICE_ACCOUNT
48
+ self.token = credentials_file
49
+ return
50
+ except Exception as e:
51
+ logger.debug("GOOGLE_APPLICATION_CREDENTIALS authentication failed: {}", e)
52
+
53
+ # 2. Try ADC file (application_default_credentials.json)
54
+ adc_path = self._get_adc_path()
55
+ if adc_path and adc_path.exists():
56
+ logger.debug("Authenticating to GCS using ADC file: {}", adc_path)
57
+ try:
58
+ self.fs = self.gcsfs.GCSFileSystem(token=str(adc_path))
59
+ self.method = GcloudAuthMethod.ADC
60
+ self.token = str(adc_path)
61
+ return
62
+ except Exception as e:
63
+ logger.debug("ADC file authentication failed: {}", e)
64
+
65
+ # 3. Fallback to gcloud CLI login (gcloud auth print-access-token)
66
+ logger.debug("Attempting to get access token from gcloud CLI")
67
+ access_token = self._get_access_token_via_cli()
68
+ if access_token:
69
+ logger.debug("Authenticating to GCS using gcloud CLI access token")
70
+ try:
71
+ self.fs = self.gcsfs.GCSFileSystem(token=access_token)
72
+ self.method = GcloudAuthMethod.GCLOUD_CLI
73
+ self.token = access_token
74
+ return
75
+ except Exception as e:
76
+ logger.debug("gcloud CLI access token authentication failed: {}", e)
77
+
78
+ # 4. Try google.auth.default() via token="google_default"
79
+ logger.debug("Authenticating to GCS using google.auth.default()")
80
+ try:
81
+ self.fs = self.gcsfs.GCSFileSystem(token="google_default")
82
+ self.method = GcloudAuthMethod.GOOGLE_DEFAULT
83
+ self.token = "google_default"
84
+ return
85
+ except Exception as e:
86
+ logger.debug("google.auth.default() authentication failed: {}", e)
87
+
88
+ if self.fs is None:
89
+ raise ValueError(
90
+ "Could not authenticate to Google Cloud Storage. "
91
+ "Set GOOGLE_APPLICATION_CREDENTIALS, run 'gcloud auth application-default login', "
92
+ "or run 'gcloud auth login'."
93
+ )
94
+
95
+ def _get_adc_path(self) -> Path | None:
96
+ """Get the path to the Application Default Credentials file."""
97
+ # Check CLOUDSDK_CONFIG for custom config directory
98
+ config_dir = os.environ.get("CLOUDSDK_CONFIG")
99
+ if config_dir:
100
+ return Path(config_dir) / "application_default_credentials.json"
101
+
102
+ # Default locations
103
+ if os.name == "nt": # Windows
104
+ appdata = os.environ.get("APPDATA")
105
+ if appdata:
106
+ return Path(appdata) / "gcloud" / "application_default_credentials.json"
107
+ else: # Linux/macOS
108
+ home = Path.home()
109
+ return home / ".config" / "gcloud" / "application_default_credentials.json"
110
+
111
+ return None
112
+
113
+ def _get_access_token_via_cli(self) -> str | None:
114
+ """Get access token from gcloud CLI (gcloud auth print-access-token)."""
115
+ import subprocess
116
+
117
+ try:
118
+ result = subprocess.run(
119
+ ["gcloud", "auth", "print-access-token"],
120
+ capture_output=True,
121
+ text=True,
122
+ timeout=30,
123
+ )
124
+ if result.returncode == 0 and result.stdout.strip():
125
+ return result.stdout.strip()
126
+ except (subprocess.TimeoutExpired, FileNotFoundError):
127
+ logger.debug("gcloud CLI not available or timed out")
128
+ return None
129
+
130
+ def normalize_for_polars(self, url: str) -> str:
131
+ """Normalize URL to a format Polars understands.
132
+
133
+ Polars expects gs://bucket/path format.
134
+
135
+ Returns:
136
+ Normalized URL in gs://bucket/path format.
137
+ """
138
+ parsed = parse_url(url)
139
+ return f"gs://{parsed.bucket}/{parsed.path}"
140
+
141
+ def storage_options(self, url: str) -> dict[str, str]:
142
+ """Return storage options for Polars GCS access.
143
+
144
+ Returns:
145
+ Dict with appropriate authentication options for GCS.
146
+ Includes both gcsfs-style keys and Rust object_store keys for compatibility.
147
+ """
148
+ if self.method == GcloudAuthMethod.SERVICE_ACCOUNT:
149
+ return {
150
+ "token": self.token,
151
+ "service_account": self.token,
152
+ "google_service_account": self.token,
153
+ }
154
+ elif self.method == GcloudAuthMethod.ADC:
155
+ return {
156
+ "token": self.token,
157
+ "service_account": self.token,
158
+ "google_service_account": self.token,
159
+ }
160
+ elif self.method == GcloudAuthMethod.GCLOUD_CLI:
161
+ # For CLI token, we need to refresh it for Polars
162
+ # since the token may have expired
163
+ fresh_token = self._get_access_token_via_cli()
164
+ if fresh_token:
165
+ return {
166
+ "token": fresh_token,
167
+ }
168
+ return {"token": self.token}
169
+ elif self.method == GcloudAuthMethod.GOOGLE_DEFAULT:
170
+ return {
171
+ "token": "google_default",
172
+ }
173
+
174
+ def _to_internal(self, url: str) -> str:
175
+ """Convert URL to internal path for gcsfs operations."""
176
+ parsed = parse_url(url)
177
+ return f"{parsed.bucket}/{parsed.path}"
178
+
179
+ def _to_uri(self, internal_path: str) -> str:
180
+ """Convert internal path back to gs:// URL."""
181
+ return f"gs://{internal_path}"
182
+
183
+ def open(self, url: str) -> BinaryIO:
184
+ return self.fs.open(self._to_internal(url), "rb")
185
+
186
+ def list_files(self, url: str, extension: str) -> Iterator[FileInfo]:
187
+ internal_path = self._to_internal(url)
188
+ pattern = f"{internal_path}/**/*{extension}"
189
+ for path in sorted(self.fs.glob(pattern)):
190
+ info = self.fs.info(path)
191
+ yield FileInfo(url=self._to_uri(path), size=info["size"])
192
+
193
+ def size(self, url: str) -> int:
194
+ return self.fs.size(self._to_internal(url))
195
+
196
+ def is_directory(self, url: str) -> bool:
197
+ path = self._to_internal(url)
198
+ try:
199
+ info = self.fs.info(path)
200
+ return info.get("type") == "directory"
201
+ except FileNotFoundError:
202
+ try:
203
+ contents = self.fs.ls(path, detail=False)
204
+ return len(contents) > 0
205
+ except Exception:
206
+ return False
207
+
208
+ def __del__(self):
209
+ try:
210
+ if hasattr(self, 'fs') and self.fs is not None:
211
+ # gcsfs doesn't have a close method, but clear session if possible
212
+ if hasattr(self.fs, 'session') and self.fs.session:
213
+ pass # aiohttp session cleanup is handled by gcsfs
214
+ except Exception:
215
+ pass
@@ -0,0 +1,25 @@
1
+ """Local filesystem storage backend."""
2
+
3
+ import os
4
+ from glob import glob
5
+ from typing import BinaryIO, Iterator
6
+
7
+ from tab_cli.storage.base import FileInfo, StorageBackend
8
+
9
+
10
+ class LocalBackend(StorageBackend):
11
+ """Storage backend for local filesystem."""
12
+
13
+ def open(self, url: str) -> BinaryIO:
14
+ return open(url, "rb")
15
+
16
+ def list_files(self, url: str, extension: str) -> Iterator[FileInfo]:
17
+ pattern = os.path.join(url, "**", f"*{extension}")
18
+ for path in sorted(glob(pattern, recursive=True)):
19
+ yield FileInfo(url=path, size=os.path.getsize(path))
20
+
21
+ def size(self, url: str) -> int:
22
+ return os.path.getsize(url)
23
+
24
+ def is_directory(self, url: str) -> bool:
25
+ return os.path.isdir(url)
tab_cli/style.py ADDED
@@ -0,0 +1,4 @@
1
+ _KEY_STYLE = "sea_green3"
2
+ _VAL_STYLE = "bold sky_blue3"
3
+ _ALT_ROW_STYLE_0 = "default"
4
+ _ALT_ROW_STYLE_1 = "on #101010"
tab_cli/url_parser.py ADDED
@@ -0,0 +1,97 @@
1
+ from dataclasses import dataclass
2
+ import os
3
+ from urllib.parse import urlparse
4
+
5
+
6
+ @dataclass
7
+ class ParsedUrl:
8
+ """Parsed URL components for cloud storage."""
9
+
10
+ scheme: str
11
+ """URL scheme (e.g., 's3', 'gs', 'az', 'abfs', 'abfss', 'file', or '' for local)."""
12
+
13
+ bucket: str | None
14
+ """Bucket name for s3:// and gs://, container for az://, abfs://, and abfss://."""
15
+
16
+ account: str | None
17
+ """Storage account name for Azure (az://, abfs://, abfss://)."""
18
+
19
+ path: str
20
+ """Path within the bucket/container, or full local path."""
21
+
22
+ original: str
23
+ """Original URL string."""
24
+
25
+
26
+ def parse_url(url: str) -> ParsedUrl:
27
+ """Parse a storage URL into its components.
28
+
29
+ Supports:
30
+ - Local paths: /path/to/file, ./relative/path, file:///path/to/file
31
+ - S3: s3://bucket/path
32
+ - GCS: gs://bucket/path
33
+ - Azure Blob (az://):
34
+ - With --az-url-authority-is-account: az://account/container/path
35
+ - Without (default): az://container/path (account from AZURE_STORAGE_ACCOUNT)
36
+ - Azure Data Lake (abfs://, abfss://):
37
+ - abfs[s]://container@account.dfs.core.windows.net/path
38
+ - abfs[s]://container/path (account from AZURE_STORAGE_ACCOUNT)
39
+
40
+ Returns:
41
+ ParsedUrl with scheme, bucket, account, and path components.
42
+ """
43
+ parsed = urlparse(url)
44
+ scheme = parsed.scheme.lower() if parsed.scheme else ""
45
+
46
+ # Local file paths
47
+ if not scheme or scheme == "file":
48
+ # file:///path or file://localhost/path -> /path
49
+ # No scheme -> treat as local path
50
+ if scheme == "file":
51
+ path = parsed.path
52
+ else:
53
+ path = url
54
+ return ParsedUrl(scheme="file", bucket=None, account=None, path=path, original=url)
55
+
56
+ # S3/GCS: s3://bucket/path
57
+ if scheme in {"s3", "gs"}:
58
+ bucket = parsed.netloc
59
+ path = parsed.path.lstrip("/")
60
+ return ParsedUrl(scheme=scheme, bucket=bucket, account=None, path=path, original=url)
61
+
62
+ # Azure Blob: az://container/path or az://account/container/path
63
+ if scheme == "az":
64
+ from tab_cli import config
65
+
66
+ if config.config.az_url_authority_is_account:
67
+ # az://account/container/path or az:///container/path
68
+ account = parsed.netloc if parsed.netloc else os.environ.get("AZURE_STORAGE_ACCOUNT")
69
+ # Path is /container/path, first segment is container
70
+ path_parts = parsed.path.lstrip("/").split("/", 1)
71
+ container = path_parts[0] if path_parts else None
72
+ path = path_parts[1] if len(path_parts) > 1 else ""
73
+ else:
74
+ # az://container/path (default adlfs behavior)
75
+ account = os.environ.get("AZURE_STORAGE_ACCOUNT")
76
+ container = parsed.netloc
77
+ path = parsed.path.lstrip("/")
78
+ return ParsedUrl(scheme=scheme, bucket=container, account=account, path=path, original=url)
79
+
80
+ # Azure Data Lake: abfs[s]://container@account.dfs.core.windows.net/path
81
+ # or abfs[s]://container/path
82
+ if scheme in ("abfs", "abfss"):
83
+ netloc = parsed.netloc
84
+ if "@" in netloc:
85
+ # container@account.dfs.core.windows.net
86
+ container, host = netloc.split("@", 1)
87
+ # Extract account from host (account.dfs.core.windows.net)
88
+ account = host.split(".")[0] if "." in host else host
89
+ else:
90
+ # container only, account from env
91
+ container = netloc
92
+ account = os.environ.get("AZURE_STORAGE_ACCOUNT")
93
+ path = parsed.path.lstrip("/")
94
+ return ParsedUrl(scheme=scheme, bucket=container, account=account, path=path, original=url)
95
+
96
+ # Other schemes: return with netloc as bucket-like component
97
+ raise ValueError(f"Unsupported scheme: {scheme}")