web2cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. web2cli/__init__.py +3 -0
  2. web2cli/__main__.py +5 -0
  3. web2cli/adapter/__init__.py +0 -0
  4. web2cli/adapter/lint.py +667 -0
  5. web2cli/adapter/loader.py +157 -0
  6. web2cli/adapter/validator.py +127 -0
  7. web2cli/adapters/discord.com/web2cli.yaml +476 -0
  8. web2cli/adapters/mail.google.com/parsers/inbox.py +200 -0
  9. web2cli/adapters/mail.google.com/web2cli.yaml +52 -0
  10. web2cli/adapters/news.ycombinator.com/web2cli.yaml +356 -0
  11. web2cli/adapters/reddit.com/web2cli.yaml +233 -0
  12. web2cli/adapters/slack.com/web2cli.yaml +445 -0
  13. web2cli/adapters/stackoverflow.com/web2cli.yaml +257 -0
  14. web2cli/adapters/x.com/providers/x_graphql.py +299 -0
  15. web2cli/adapters/x.com/web2cli.yaml +449 -0
  16. web2cli/auth/__init__.py +0 -0
  17. web2cli/auth/browser_login.py +820 -0
  18. web2cli/auth/manager.py +166 -0
  19. web2cli/auth/store.py +68 -0
  20. web2cli/cli.py +1286 -0
  21. web2cli/executor/__init__.py +0 -0
  22. web2cli/executor/http.py +113 -0
  23. web2cli/output/__init__.py +0 -0
  24. web2cli/output/formatter.py +116 -0
  25. web2cli/parser/__init__.py +0 -0
  26. web2cli/parser/custom.py +21 -0
  27. web2cli/parser/html_parser.py +111 -0
  28. web2cli/parser/transforms.py +127 -0
  29. web2cli/pipe.py +10 -0
  30. web2cli/providers/__init__.py +6 -0
  31. web2cli/providers/base.py +22 -0
  32. web2cli/providers/registry.py +86 -0
  33. web2cli/runtime/__init__.py +1 -0
  34. web2cli/runtime/cache.py +42 -0
  35. web2cli/runtime/engine.py +743 -0
  36. web2cli/runtime/parser.py +398 -0
  37. web2cli/runtime/template.py +52 -0
  38. web2cli/types.py +71 -0
  39. web2cli-0.2.0.dist-info/METADATA +467 -0
  40. web2cli-0.2.0.dist-info/RECORD +44 -0
  41. web2cli-0.2.0.dist-info/WHEEL +5 -0
  42. web2cli-0.2.0.dist-info/entry_points.txt +2 -0
  43. web2cli-0.2.0.dist-info/licenses/LICENSE +202 -0
  44. web2cli-0.2.0.dist-info/top_level.txt +1 -0
File without changes
@@ -0,0 +1,113 @@
1
+ """HTTP request execution via httpx (default) or curl_cffi (TLS impersonation)."""
2
+
3
+ import sys
4
+ import time
5
+
6
+ import httpx
7
+
8
+ from web2cli.types import Request
9
+
10
+
11
+ class HttpError(Exception):
12
+ def __init__(self, status_code: int, message: str):
13
+ self.status_code = status_code
14
+ super().__init__(message)
15
+
16
+
17
+ async def _execute_httpx(request: Request) -> tuple[int, dict, str]:
18
+ """Execute via httpx (standard path)."""
19
+ content_type = (request.content_type or request.headers.get("Content-Type", "")).lower()
20
+ body_is_form = (
21
+ isinstance(request.body, dict)
22
+ and content_type.startswith("application/x-www-form-urlencoded")
23
+ )
24
+
25
+ try:
26
+ async with httpx.AsyncClient(follow_redirects=True) as client:
27
+ response = await client.request(
28
+ method=request.method,
29
+ url=request.url,
30
+ params=request.params or None,
31
+ headers=request.headers,
32
+ cookies=request.cookies,
33
+ content=request.body if isinstance(request.body, (str, bytes)) else None,
34
+ data=request.body if body_is_form else None,
35
+ json=request.body if isinstance(request.body, dict) and not body_is_form else None,
36
+ )
37
+ except httpx.ConnectError:
38
+ raise HttpError(0, f"Connection failed: could not reach {request.url}")
39
+ except httpx.TimeoutException:
40
+ raise HttpError(0, f"Request timed out: {request.url}")
41
+
42
+ return response.status_code, dict(response.headers), response.text
43
+
44
+
45
+ async def _execute_impersonate(
46
+ request: Request, impersonate: str
47
+ ) -> tuple[int, dict, str]:
48
+ """Execute via curl_cffi with TLS impersonation."""
49
+ from curl_cffi.requests import AsyncSession
50
+
51
+ content_type = (request.content_type or request.headers.get("Content-Type", "")).lower()
52
+ body_is_form = (
53
+ isinstance(request.body, dict)
54
+ and content_type.startswith("application/x-www-form-urlencoded")
55
+ )
56
+
57
+ try:
58
+ async with AsyncSession(impersonate=impersonate) as session:
59
+ response = await session.request(
60
+ method=request.method,
61
+ url=request.url,
62
+ params=request.params or None,
63
+ headers=request.headers,
64
+ cookies=request.cookies,
65
+ data=request.body if body_is_form else (
66
+ request.body if isinstance(request.body, (str, bytes)) else None
67
+ ),
68
+ json=request.body if isinstance(request.body, dict) and not body_is_form else None,
69
+ allow_redirects=True,
70
+ )
71
+ except ConnectionError:
72
+ raise HttpError(0, f"Connection failed: could not reach {request.url}")
73
+ except TimeoutError:
74
+ raise HttpError(0, f"Request timed out: {request.url}")
75
+
76
+ return response.status_code, dict(response.headers), response.text
77
+
78
+
79
+ async def execute(
80
+ request: Request, verbose: bool = False, impersonate: str | None = None
81
+ ) -> tuple[int, dict, str]:
82
+ """Execute HTTP request. Returns (status_code, headers, body)."""
83
+ if verbose:
84
+ sys.stderr.write(f"→ {request.method} {request.url}\n")
85
+ if request.params:
86
+ sys.stderr.write(f" params: {request.params}\n")
87
+ if impersonate:
88
+ sys.stderr.write(f" impersonate: {impersonate}\n")
89
+
90
+ start = time.monotonic()
91
+
92
+ if impersonate:
93
+ status, headers, body = await _execute_impersonate(request, impersonate)
94
+ else:
95
+ status, headers, body = await _execute_httpx(request)
96
+
97
+ elapsed = time.monotonic() - start
98
+
99
+ if verbose:
100
+ sys.stderr.write(f"← {status} ({elapsed:.2f}s)\n")
101
+
102
+ if status == 429:
103
+ retry = headers.get("Retry-After", "?")
104
+ raise HttpError(429, f"Rate limited. Try again in {retry} seconds.")
105
+ if status == 403:
106
+ raise HttpError(
107
+ 403,
108
+ "Access denied. You may need to login: web2cli login <domain>",
109
+ )
110
+ if status >= 500:
111
+ raise HttpError(status, f"Server error ({status})")
112
+
113
+ return status, headers, body
File without changes
@@ -0,0 +1,116 @@
1
+ """Output formatting: table, json, csv, plain, md."""
2
+
3
+ import csv
4
+ import io
5
+ import json
6
+
7
+ from rich import box
8
+ from rich.console import Console
9
+ from rich.table import Table
10
+
11
+
12
+ def format_output(
13
+ records: list[dict],
14
+ fmt: str = "table",
15
+ fields: list[str] | None = None,
16
+ no_color: bool = False,
17
+ no_header: bool = False,
18
+ ) -> str:
19
+ """Format records for stdout.
20
+
21
+ Args:
22
+ records: List of dicts to format.
23
+ fmt: Output format — table, json, csv, plain.
24
+ fields: Which fields to include (None = all).
25
+ no_color: Disable colored output.
26
+ no_header: Omit header row (csv only).
27
+ """
28
+ if not records:
29
+ return ""
30
+
31
+ # Resolve fields
32
+ if not fields:
33
+ fields = list(records[0].keys())
34
+
35
+ # Filter records to only include requested fields
36
+ filtered = [{k: r.get(k) for k in fields} for r in records]
37
+
38
+ if fmt == "json":
39
+ return _format_json(filtered)
40
+ if fmt == "csv":
41
+ return _format_csv(filtered, fields, no_header=no_header)
42
+ if fmt == "plain":
43
+ return _format_plain(filtered, fields)
44
+ if fmt == "md":
45
+ return _format_markdown(filtered, fields)
46
+ return _format_table(filtered, fields, no_color)
47
+
48
+
49
+ def _format_table(records: list[dict], fields: list[str], no_color: bool) -> str:
50
+ """Rich table output."""
51
+ table_box = box.ASCII2 if no_color else box.HEAVY_HEAD
52
+ table = Table(
53
+ show_header=True,
54
+ header_style=None if no_color else "bold",
55
+ show_lines=False,
56
+ pad_edge=False,
57
+ box=table_box,
58
+ )
59
+
60
+ for field in fields:
61
+ table.add_column(field.upper())
62
+
63
+ for record in records:
64
+ row = []
65
+ for field in fields:
66
+ val = record.get(field)
67
+ cell = str(val) if val is not None else ""
68
+ row.append(cell)
69
+ table.add_row(*row)
70
+
71
+ console = Console(no_color=no_color, force_terminal=not no_color)
72
+ with console.capture() as capture:
73
+ console.print(table)
74
+ return capture.get().rstrip()
75
+
76
+
77
+ def _format_json(records: list[dict]) -> str:
78
+ """JSON array output."""
79
+ return json.dumps(records, indent=2, ensure_ascii=False)
80
+
81
+
82
+ def _format_csv(records: list[dict], fields: list[str], no_header: bool = False) -> str:
83
+ """CSV output."""
84
+ buf = io.StringIO()
85
+ writer = csv.DictWriter(buf, fieldnames=fields, extrasaction="ignore")
86
+ if not no_header:
87
+ writer.writeheader()
88
+ writer.writerows(records)
89
+ return buf.getvalue().rstrip()
90
+
91
+
92
+ def _format_markdown(records: list[dict], fields: list[str]) -> str:
93
+ """Markdown table output."""
94
+ headers = [f.upper() for f in fields]
95
+ lines = ["| " + " | ".join(headers) + " |"]
96
+ lines.append("| " + " | ".join("---" for _ in fields) + " |")
97
+ for record in records:
98
+ cells = []
99
+ for field in fields:
100
+ val = record.get(field)
101
+ cell = str(val) if val is not None else ""
102
+ cell = cell.replace("|", "\\|")
103
+ cells.append(cell)
104
+ lines.append("| " + " | ".join(cells) + " |")
105
+ return "\n".join(lines)
106
+
107
+
108
+ def _format_plain(records: list[dict], fields: list[str]) -> str:
109
+ """Plain output — first field only, one per line. Best for piping."""
110
+ first_field = fields[0]
111
+ lines = []
112
+ for record in records:
113
+ val = record.get(first_field)
114
+ if val is not None:
115
+ lines.append(str(val))
116
+ return "\n".join(lines)
File without changes
@@ -0,0 +1,21 @@
1
+ """Dynamic import and execution of custom parser scripts."""
2
+
3
+ import importlib.util
4
+ from pathlib import Path
5
+
6
+
7
+ def parse_custom(
8
+ script_path: str,
9
+ adapter_dir: Path,
10
+ status_code: int,
11
+ headers: dict,
12
+ body: str,
13
+ args: dict,
14
+ ) -> list[dict]:
15
+ """Import and call a custom parser script."""
16
+ full_path = adapter_dir / script_path
17
+ spec = importlib.util.spec_from_file_location("custom_parser", full_path)
18
+ module = importlib.util.module_from_spec(spec)
19
+ spec.loader.exec_module(module)
20
+
21
+ return module.parse(status_code, headers, body, args)
@@ -0,0 +1,111 @@
1
+ """HTML response parser using selectolax."""
2
+
3
+ import sys
4
+
5
+ from selectolax.parser import HTMLParser
6
+
7
+ from web2cli.parser.transforms import apply_transform
8
+
9
+ # Page titles that indicate bot/CAPTCHA blocking
10
+ _BLOCK_SIGNALS = ("human verification", "captcha", "access denied", "just a moment")
11
+
12
+
13
+ def parse_html(body: str, response_spec: dict, disable_truncate: bool = False) -> list[dict]:
14
+ """Parse HTML response using CSS selectors from spec."""
15
+ tree = HTMLParser(body)
16
+
17
+ # Extract items matching the top-level selector
18
+ extract_selector = response_spec.get("extract", "body")
19
+ items = tree.css(extract_selector)
20
+
21
+ if not items:
22
+ # Detect CAPTCHA / bot-blocking pages
23
+ title_el = tree.css_first("title")
24
+ if title_el:
25
+ title = title_el.text(strip=True).lower()
26
+ if any(s in title for s in _BLOCK_SIGNALS):
27
+ print(
28
+ f"Blocked by site ({title_el.text(strip=True)}). "
29
+ "Try again later or use `web2cli login` to add cookies.",
30
+ file=sys.stderr,
31
+ )
32
+ return []
33
+
34
+ fields = response_spec.get("fields", [])
35
+ if not fields:
36
+ return [{"text": node.text(strip=True)} for node in items]
37
+
38
+ records = []
39
+ for item in items:
40
+ record = {}
41
+ for field_spec in fields:
42
+ name = field_spec["name"]
43
+ path = field_spec.get("path", "")
44
+ attribute = field_spec.get("attribute", "text")
45
+ collect = field_spec.get("collect", False)
46
+ join_sep = field_spec.get("join", ", ")
47
+ prefix = field_spec.get("prefix", "")
48
+ relative = field_spec.get("relative", "self")
49
+
50
+ target = item
51
+ if relative == "next":
52
+ target = _next_element(item.next)
53
+ elif relative == "parent":
54
+ target = item.parent
55
+
56
+ if collect:
57
+ # Collect multiple matching elements
58
+ sub_nodes = target.css(path) if target else []
59
+ values = [_extract_attr(n, attribute) for n in sub_nodes]
60
+ values = [v for v in values if v]
61
+ value = join_sep.join(values)
62
+ else:
63
+ # Single element
64
+ node = target.css_first(path) if (target and path) else target
65
+ value = _extract_attr(node, attribute) if node else None
66
+
67
+ # Apply prefix
68
+ if prefix and value:
69
+ value = prefix + value
70
+
71
+ # Apply transform
72
+ transform = field_spec.get("transform")
73
+ if transform:
74
+ value = apply_transform(value, transform, disable_truncate=disable_truncate)
75
+
76
+ # Apply truncation (display hint)
77
+ truncate = field_spec.get("truncate")
78
+ if (
79
+ not disable_truncate
80
+ and truncate
81
+ and value
82
+ and isinstance(value, str)
83
+ and len(value) > truncate
84
+ ):
85
+ value = value[:truncate] + "..."
86
+
87
+ # Default fallback
88
+ if value is None:
89
+ value = field_spec.get("default")
90
+
91
+ record[name] = value
92
+ records.append(record)
93
+
94
+ return records
95
+
96
+
97
+ def _extract_attr(node, attribute: str) -> str | None:
98
+ """Extract an attribute value from a selectolax node."""
99
+ if node is None:
100
+ return None
101
+ if attribute == "text":
102
+ return node.text(strip=True) or None
103
+ return node.attributes.get(attribute)
104
+
105
+
106
+ def _next_element(node):
107
+ """Return next element-like node, skipping text/whitespace nodes."""
108
+ cur = node
109
+ while cur is not None and not hasattr(cur, "css_first"):
110
+ cur = cur.next
111
+ return cur
@@ -0,0 +1,127 @@
1
+ """Shared value transforms used by JSON and HTML parsers."""
2
+
3
+ import math
4
+ import re
5
+ from datetime import datetime, timezone
6
+
7
+
8
+ def apply_transform(value, transform: str, disable_truncate: bool = False):
9
+ """Apply a named transform to a value."""
10
+ if value is None:
11
+ return value
12
+
13
+ if transform == "round":
14
+ try:
15
+ return round(float(value))
16
+ except (ValueError, TypeError):
17
+ return value
18
+
19
+ if transform == "int":
20
+ text = str(value).strip().lower().replace(",", "")
21
+ if text.endswith("k"):
22
+ try:
23
+ return int(float(text[:-1]) * 1000)
24
+ except (ValueError, TypeError):
25
+ pass
26
+ if text.endswith("m"):
27
+ try:
28
+ return int(float(text[:-1]) * 1000000)
29
+ except (ValueError, TypeError):
30
+ pass
31
+ try:
32
+ return int(float(text))
33
+ except (ValueError, TypeError):
34
+ m = re.search(r"-?\d[\d,]*", text)
35
+ if m:
36
+ try:
37
+ return int(m.group(0).replace(",", ""))
38
+ except ValueError:
39
+ return value
40
+ return value
41
+
42
+ if transform == "lowercase":
43
+ return str(value).lower()
44
+
45
+ if transform == "uppercase":
46
+ return str(value).upper()
47
+
48
+ if transform == "strip_html":
49
+ text = re.sub(r"<[^>]+>", " ", str(value))
50
+ text = re.sub(r"&lt;", "<", text)
51
+ text = re.sub(r"&gt;", ">", text)
52
+ text = re.sub(r"&amp;", "&", text)
53
+ text = re.sub(r"&quot;", '"', text)
54
+ text = re.sub(r"&#39;", "'", text)
55
+ text = re.sub(r"&nbsp;", " ", text)
56
+ return re.sub(r"\s+", " ", text).strip()
57
+
58
+ if transform == "timestamp":
59
+ return _parse_timestamp(value)
60
+
61
+ if transform == "x_datetime":
62
+ return _parse_twitter_datetime(value)
63
+
64
+ if transform == "x_date":
65
+ full = _parse_twitter_datetime(value)
66
+ if isinstance(full, str) and len(full) >= 10:
67
+ return full[:10]
68
+ return full
69
+
70
+ if transform.startswith("truncate:"):
71
+ if disable_truncate:
72
+ return value
73
+ try:
74
+ n = int(transform.split(":")[1])
75
+ s = str(value)
76
+ return s[:n] + "..." if len(s) > n else s
77
+ except (ValueError, IndexError):
78
+ return value
79
+
80
+ return value
81
+
82
+
83
+ def _parse_timestamp(value) -> str:
84
+ """Convert various timestamp formats to readable string."""
85
+ # Unix timestamp (int or float)
86
+ if isinstance(value, (int, float)):
87
+ if value > 1e12:
88
+ value = value / 1000 # milliseconds
89
+ try:
90
+ dt = datetime.fromtimestamp(value, tz=timezone.utc)
91
+ return dt.strftime("%Y-%m-%d %H:%M")
92
+ except (OSError, ValueError):
93
+ return str(value)
94
+
95
+ # ISO string
96
+ if isinstance(value, str):
97
+ # numeric string (unix seconds / milliseconds)
98
+ if re.fullmatch(r"\d+(\.\d+)?", value.strip()):
99
+ try:
100
+ num = float(value.strip())
101
+ if num > 1e12:
102
+ num = num / 1000
103
+ dt = datetime.fromtimestamp(num, tz=timezone.utc)
104
+ return dt.strftime("%Y-%m-%d %H:%M")
105
+ except (ValueError, OSError):
106
+ pass
107
+
108
+ for fmt in ("%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S"):
109
+ try:
110
+ dt = datetime.strptime(value, fmt).replace(tzinfo=timezone.utc)
111
+ return dt.strftime("%Y-%m-%d %H:%M")
112
+ except ValueError:
113
+ continue
114
+ return value
115
+
116
+ return str(value)
117
+
118
+
119
+ def _parse_twitter_datetime(value) -> str:
120
+ """Convert X/Twitter datetime to readable format."""
121
+ if not isinstance(value, str):
122
+ return str(value)
123
+ try:
124
+ dt = datetime.strptime(value, "%a %b %d %H:%M:%S %z %Y")
125
+ return dt.strftime("%Y-%m-%d %H:%M")
126
+ except ValueError:
127
+ return value
web2cli/pipe.py ADDED
@@ -0,0 +1,10 @@
1
+ """Stdin detection and reading for piped input."""
2
+
3
+ import sys
4
+
5
+
6
+ def read_stdin() -> str | None:
7
+ """Read from stdin if data is being piped. Returns None if no pipe."""
8
+ if not sys.stdin.isatty():
9
+ return sys.stdin.read().strip()
10
+ return None
@@ -0,0 +1,6 @@
1
+ """Provider plugin interfaces and built-ins."""
2
+
3
+ from web2cli.providers.base import Provider
4
+ from web2cli.providers.registry import get_provider
5
+
6
+ __all__ = ["Provider", "get_provider"]
@@ -0,0 +1,22 @@
1
+ """Provider base class for request generation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from web2cli.types import AdapterSpec, Request, Session
8
+
9
+
10
+ class Provider:
11
+ """Provider plugin contract."""
12
+
13
+ name: str = ""
14
+
15
+ def build_request(
16
+ self,
17
+ spec: dict[str, Any],
18
+ ctx: dict[str, Any],
19
+ adapter: AdapterSpec,
20
+ session: Session | None,
21
+ ) -> Request:
22
+ raise NotImplementedError
@@ -0,0 +1,86 @@
1
+ """Provider registry."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import importlib.util
6
+ import re
7
+ from pathlib import Path
8
+
9
+ from web2cli.providers.base import Provider
10
+ from web2cli.types import AdapterSpec
11
+
12
+ _PROVIDERS: dict[str, Provider] = {}
13
+ _BUILTINS_REGISTERED = False
14
+ _DYNAMIC_MODULES_LOADED: set[str] = set()
15
+
16
+ _BUILTIN_ADAPTERS_DIR = Path(__file__).resolve().parent.parent / "adapters"
17
+ _USER_ADAPTERS_DIR = Path.home() / ".web2cli" / "adapters"
18
+
19
+
20
+ def register_provider(provider: Provider) -> None:
21
+ if not provider.name:
22
+ raise ValueError("Provider must have a name")
23
+ _PROVIDERS[provider.name] = provider
24
+
25
+
26
+ def _register_builtins_once() -> None:
27
+ global _BUILTINS_REGISTERED
28
+ if _BUILTINS_REGISTERED:
29
+ return
30
+
31
+ _BUILTINS_REGISTERED = True
32
+
33
+
34
+ def _safe_ident(value: str) -> str:
35
+ return re.sub(r"[^A-Za-z0-9_]+", "_", value)
36
+
37
+
38
+ def _load_provider_module(module_path: Path, key: str) -> None:
39
+ if key in _DYNAMIC_MODULES_LOADED:
40
+ return
41
+ if not module_path.is_file():
42
+ return
43
+
44
+ module_name = f"web2cli_dynamic_provider_{_safe_ident(key)}"
45
+ spec = importlib.util.spec_from_file_location(module_name, module_path)
46
+ if spec is None or spec.loader is None:
47
+ return
48
+
49
+ module = importlib.util.module_from_spec(spec)
50
+ spec.loader.exec_module(module)
51
+ _DYNAMIC_MODULES_LOADED.add(key)
52
+
53
+
54
+ def _load_from_adapter(adapter: AdapterSpec, provider_name: str) -> None:
55
+ if adapter.adapter_dir is None:
56
+ return
57
+ provider_path = adapter.adapter_dir / "providers" / f"{provider_name}.py"
58
+ key = f"{adapter.meta.domain}:{provider_name}:{provider_path}"
59
+ _load_provider_module(provider_path, key)
60
+
61
+
62
+ def _load_from_known_adapter_dirs(provider_name: str) -> None:
63
+ for base in (_BUILTIN_ADAPTERS_DIR, _USER_ADAPTERS_DIR):
64
+ if not base.is_dir():
65
+ continue
66
+ for adapter_dir in base.iterdir():
67
+ provider_path = adapter_dir / "providers" / f"{provider_name}.py"
68
+ key = f"{adapter_dir}:{provider_name}:{provider_path}"
69
+ _load_provider_module(provider_path, key)
70
+
71
+
72
+ def get_provider(name: str, adapter: AdapterSpec | None = None) -> Provider:
73
+ _register_builtins_once()
74
+ provider = _PROVIDERS.get(name)
75
+
76
+ if provider is None and adapter is not None:
77
+ _load_from_adapter(adapter, name)
78
+ provider = _PROVIDERS.get(name)
79
+
80
+ if provider is None:
81
+ _load_from_known_adapter_dirs(name)
82
+ provider = _PROVIDERS.get(name)
83
+
84
+ if provider is None:
85
+ raise ValueError(f"Unknown provider: {name}")
86
+ return provider
@@ -0,0 +1 @@
1
+ """web2cli runtime modules."""
@@ -0,0 +1,42 @@
1
+ """Small file cache for runtime resources."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ import time
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ BASE_CACHE_DIR = Path.home() / ".web2cli" / "cache"
12
+
13
+
14
+ def _cache_path(domain: str, key: str) -> Path:
15
+ digest = hashlib.sha1(key.encode()).hexdigest() # nosec: non-crypto use
16
+ return BASE_CACHE_DIR / domain / "runtime" / f"{digest}.json"
17
+
18
+
19
+ def load_cache(domain: str, key: str, ttl: int | None = None) -> Any | None:
20
+ """Load cached payload if present and not expired."""
21
+ path = _cache_path(domain, key)
22
+ if not path.is_file():
23
+ return None
24
+ try:
25
+ data = json.loads(path.read_text())
26
+ except (json.JSONDecodeError, OSError):
27
+ return None
28
+
29
+ ts = data.get("ts")
30
+ if ttl and isinstance(ts, (int, float)):
31
+ if time.time() - ts > ttl:
32
+ return None
33
+
34
+ return data.get("payload")
35
+
36
+
37
+ def save_cache(domain: str, key: str, payload: Any) -> None:
38
+ """Persist payload in cache."""
39
+ path = _cache_path(domain, key)
40
+ path.parent.mkdir(parents=True, exist_ok=True)
41
+ doc = {"ts": time.time(), "payload": payload}
42
+ path.write_text(json.dumps(doc))