web2cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. web2cli/__init__.py +3 -0
  2. web2cli/__main__.py +5 -0
  3. web2cli/adapter/__init__.py +0 -0
  4. web2cli/adapter/lint.py +667 -0
  5. web2cli/adapter/loader.py +157 -0
  6. web2cli/adapter/validator.py +127 -0
  7. web2cli/adapters/discord.com/web2cli.yaml +476 -0
  8. web2cli/adapters/mail.google.com/parsers/inbox.py +200 -0
  9. web2cli/adapters/mail.google.com/web2cli.yaml +52 -0
  10. web2cli/adapters/news.ycombinator.com/web2cli.yaml +356 -0
  11. web2cli/adapters/reddit.com/web2cli.yaml +233 -0
  12. web2cli/adapters/slack.com/web2cli.yaml +445 -0
  13. web2cli/adapters/stackoverflow.com/web2cli.yaml +257 -0
  14. web2cli/adapters/x.com/providers/x_graphql.py +299 -0
  15. web2cli/adapters/x.com/web2cli.yaml +449 -0
  16. web2cli/auth/__init__.py +0 -0
  17. web2cli/auth/browser_login.py +820 -0
  18. web2cli/auth/manager.py +166 -0
  19. web2cli/auth/store.py +68 -0
  20. web2cli/cli.py +1286 -0
  21. web2cli/executor/__init__.py +0 -0
  22. web2cli/executor/http.py +113 -0
  23. web2cli/output/__init__.py +0 -0
  24. web2cli/output/formatter.py +116 -0
  25. web2cli/parser/__init__.py +0 -0
  26. web2cli/parser/custom.py +21 -0
  27. web2cli/parser/html_parser.py +111 -0
  28. web2cli/parser/transforms.py +127 -0
  29. web2cli/pipe.py +10 -0
  30. web2cli/providers/__init__.py +6 -0
  31. web2cli/providers/base.py +22 -0
  32. web2cli/providers/registry.py +86 -0
  33. web2cli/runtime/__init__.py +1 -0
  34. web2cli/runtime/cache.py +42 -0
  35. web2cli/runtime/engine.py +743 -0
  36. web2cli/runtime/parser.py +398 -0
  37. web2cli/runtime/template.py +52 -0
  38. web2cli/types.py +71 -0
  39. web2cli-0.2.0.dist-info/METADATA +467 -0
  40. web2cli-0.2.0.dist-info/RECORD +44 -0
  41. web2cli-0.2.0.dist-info/WHEEL +5 -0
  42. web2cli-0.2.0.dist-info/entry_points.txt +2 -0
  43. web2cli-0.2.0.dist-info/licenses/LICENSE +202 -0
  44. web2cli-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,398 @@
1
+ """Parser helpers for declarative pipeline steps."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from typing import Any
8
+
9
+ import jmespath
10
+
11
+ from web2cli.parser.html_parser import parse_html
12
+ from web2cli.parser.transforms import apply_transform
13
+ from web2cli.runtime.template import render_string, resolve_expr
14
+
15
+
16
+ def _jmespath_expr(path: str) -> str:
17
+ """Convert common JSONPath-like syntax to jmespath."""
18
+ if path == "$":
19
+ return "@"
20
+ if path.startswith("$."):
21
+ return path[2:]
22
+ if path.startswith("$["):
23
+ return path[1:]
24
+ return path
25
+
26
+
27
+ def _eval_json_expr(data: Any, expr: str) -> Any:
28
+ if expr is None:
29
+ return None
30
+ return jmespath.search(_jmespath_expr(expr), data)
31
+
32
+
33
+ def _request_to_json(data: Any) -> Any:
34
+ """Normalize request step output or raw value to parsed JSON."""
35
+ if isinstance(data, dict) and "json" in data:
36
+ return data.get("json")
37
+ if isinstance(data, dict) and "body" in data:
38
+ body = data.get("body", "")
39
+ if isinstance(body, str):
40
+ try:
41
+ return json.loads(body)
42
+ except json.JSONDecodeError:
43
+ return None
44
+ return body
45
+ if isinstance(data, str):
46
+ try:
47
+ return json.loads(data)
48
+ except json.JSONDecodeError:
49
+ return None
50
+ return data
51
+
52
+
53
+ def _extract_items(parsed: Any, extract: str | None) -> list[Any]:
54
+ if extract:
55
+ items = _eval_json_expr(parsed, extract)
56
+ else:
57
+ items = parsed
58
+
59
+ if items is None:
60
+ return []
61
+ if isinstance(items, list):
62
+ return items
63
+ return [items]
64
+
65
+
66
+ def _resolve_field_source(field_spec: dict, item: Any, ctx: dict[str, Any]) -> Any:
67
+ source = field_spec.get("from", field_spec.get("path"))
68
+ if source is None:
69
+ return None
70
+
71
+ if isinstance(source, dict):
72
+ coalesce = source.get("coalesce")
73
+ if isinstance(coalesce, list):
74
+ for expr in coalesce:
75
+ value = _eval_json_expr(item, expr) if isinstance(expr, str) else expr
76
+ if value not in (None, "", []):
77
+ return value
78
+ return None
79
+ value_expr = source.get("value")
80
+ if isinstance(value_expr, str):
81
+ return _eval_json_expr(item, value_expr)
82
+ return value_expr
83
+
84
+ if isinstance(source, str):
85
+ # Context expression support, e.g. "steps.users.by_id"
86
+ if source.startswith("ctx."):
87
+ return resolve_expr(source[4:], ctx)
88
+ return _eval_json_expr(item, source)
89
+
90
+ return source
91
+
92
+
93
+ def _disable_truncate(ctx: dict[str, Any]) -> bool:
94
+ flags = ctx.get("flags")
95
+ if not isinstance(flags, dict):
96
+ return False
97
+ return bool(flags.get("no_truncate"))
98
+
99
+
100
+ def _apply_ops(value: Any, field_spec: dict, item: Any, ctx: dict[str, Any]) -> Any:
101
+ ops: list[Any] = []
102
+ transform = field_spec.get("transform")
103
+ if transform:
104
+ ops.append(transform)
105
+ if field_spec.get("ops"):
106
+ ops.extend(field_spec["ops"])
107
+
108
+ for op in ops:
109
+ if isinstance(op, str):
110
+ value = apply_transform(value, op, disable_truncate=_disable_truncate(ctx))
111
+ continue
112
+
113
+ if not isinstance(op, dict) or len(op) != 1:
114
+ continue
115
+
116
+ op_name, cfg = next(iter(op.items()))
117
+ if op_name == "map_lookup":
118
+ cfg = cfg or {}
119
+ mapping = resolve_expr(cfg.get("from", ""), ctx) if cfg.get("from") else {}
120
+ if isinstance(mapping, dict):
121
+ key = value
122
+ if key not in mapping and isinstance(key, str):
123
+ key = key.strip()
124
+ value = mapping.get(key, cfg.get("default", value))
125
+ continue
126
+
127
+ if op_name == "join":
128
+ cfg = cfg or {}
129
+ sep = cfg.get("sep", ", ")
130
+ if isinstance(value, list):
131
+ value = sep.join(str(v) for v in value if v is not None)
132
+ continue
133
+
134
+ if op_name == "add":
135
+ cfg = cfg or {}
136
+ delta = cfg.get("value", 0)
137
+ try:
138
+ value = (0 if value is None else float(value)) + float(delta)
139
+ if float(value).is_integer():
140
+ value = int(value)
141
+ except (ValueError, TypeError):
142
+ pass
143
+ continue
144
+
145
+ if op_name == "regex_replace":
146
+ cfg = cfg or {}
147
+ pattern = cfg.get("pattern")
148
+ repl = cfg.get("repl", "")
149
+ if pattern is not None and value is not None:
150
+ value = re.sub(pattern, repl, str(value))
151
+ continue
152
+
153
+ if op_name == "append_urls":
154
+ cfg = cfg or {}
155
+ path = cfg.get("path")
156
+ sep = cfg.get("sep", " ")
157
+ if path:
158
+ urls = _eval_json_expr(item, path)
159
+ if urls:
160
+ if not isinstance(urls, list):
161
+ urls = [urls]
162
+ urls = [str(u) for u in urls if u]
163
+ if urls:
164
+ base = str(value or "").strip()
165
+ suffix = sep.join(urls)
166
+ value = f"{base} {suffix}".strip() if base else suffix
167
+ continue
168
+
169
+ if op_name == "template":
170
+ cfg = cfg or {}
171
+ template = str(cfg.get("value", "{{value}}"))
172
+ local_ctx = dict(ctx)
173
+ local_ctx["value"] = value
174
+ local_ctx["item"] = item
175
+ value = render_string(template, local_ctx)
176
+ continue
177
+
178
+ if value is None and "default" in field_spec:
179
+ return field_spec.get("default")
180
+ return value
181
+
182
+
183
+ def apply_post_ops(
184
+ records: list[dict[str, Any]],
185
+ ops: list[Any] | None,
186
+ ctx: dict[str, Any] | None = None,
187
+ ) -> list[dict[str, Any]]:
188
+ """Apply record-level operations."""
189
+ if not ops:
190
+ return records
191
+
192
+ out = list(records)
193
+ for op in ops:
194
+ if op == "reverse":
195
+ out.reverse()
196
+ continue
197
+
198
+ if isinstance(op, dict) and "sort" in op:
199
+ cfg = op.get("sort") or {}
200
+ field = cfg.get("by")
201
+ order = cfg.get("order", "asc")
202
+ if field:
203
+ out.sort(
204
+ key=lambda r: r.get(field, 0) or 0,
205
+ reverse=(str(order).lower() == "desc"),
206
+ )
207
+ continue
208
+
209
+ if isinstance(op, dict) and "limit" in op:
210
+ n = op.get("limit")
211
+ try:
212
+ n_int = int(n)
213
+ if n_int >= 0:
214
+ out = out[:n_int]
215
+ except (ValueError, TypeError):
216
+ pass
217
+ continue
218
+
219
+ if isinstance(op, dict) and "filter_not_empty" in op:
220
+ field = op.get("filter_not_empty")
221
+ if field:
222
+ out = [r for r in out if r.get(field) not in (None, "", [])]
223
+ continue
224
+
225
+ if isinstance(op, dict) and "concat" in op:
226
+ cfg = op.get("concat") or {}
227
+ step_names = cfg.get("steps", [])
228
+ if isinstance(step_names, str):
229
+ step_names = [step_names]
230
+
231
+ extra: list[dict[str, Any]] = []
232
+ if ctx and isinstance(step_names, list):
233
+ steps_ctx = ctx.get("steps", {})
234
+ for step_name in step_names:
235
+ if not step_name:
236
+ continue
237
+ extra.extend(_records_from_source(steps_ctx.get(str(step_name))))
238
+
239
+ if str(cfg.get("position", "after")).lower() == "before":
240
+ out = extra + out
241
+ else:
242
+ out = out + extra
243
+ continue
244
+
245
+ return out
246
+
247
+
248
+ def _records_from_source(value: Any) -> list[dict[str, Any]]:
249
+ if isinstance(value, list):
250
+ return [r for r in value if isinstance(r, dict)]
251
+ if isinstance(value, dict):
252
+ records = value.get("records")
253
+ if isinstance(records, list):
254
+ return [r for r in records if isinstance(r, dict)]
255
+ return []
256
+
257
+
258
+ def _flatten_tree(items: list[Any], cfg: dict[str, Any]) -> list[Any]:
259
+ children_path = str(cfg.get("children_path", "$.children[*]"))
260
+ item_path = cfg.get("item_path")
261
+ include_path = cfg.get("include_path")
262
+ include_equals = cfg.get("include_equals")
263
+ include_in = cfg.get("include_in")
264
+ depth_path = cfg.get("depth_path")
265
+ depth_field = str(cfg.get("depth_field", "__depth"))
266
+ indent_field = cfg.get("indent_field")
267
+ indent_unit = str(cfg.get("indent_unit", " "))
268
+
269
+ flattened: list[Any] = []
270
+
271
+ def _match(node: Any) -> bool:
272
+ if include_path is None:
273
+ return True
274
+ value = _eval_json_expr(node, str(include_path))
275
+ if include_equals is not None:
276
+ return str(value) == str(include_equals)
277
+ if isinstance(include_in, list):
278
+ return value in include_in
279
+ return bool(value)
280
+
281
+ def _depth(node: Any, fallback_depth: int) -> int:
282
+ if isinstance(depth_path, str):
283
+ value = _eval_json_expr(node, depth_path)
284
+ try:
285
+ return int(value)
286
+ except (TypeError, ValueError):
287
+ pass
288
+ return fallback_depth
289
+
290
+ def _walk(node: Any, depth: int) -> None:
291
+ current_depth = _depth(node, depth)
292
+
293
+ if _match(node):
294
+ entry = _eval_json_expr(node, str(item_path)) if isinstance(item_path, str) else node
295
+ if isinstance(entry, dict):
296
+ rec = dict(entry)
297
+ rec[depth_field] = current_depth
298
+ if indent_field:
299
+ rec[str(indent_field)] = indent_unit * max(current_depth, 0)
300
+ flattened.append(rec)
301
+ else:
302
+ flattened.append(entry)
303
+
304
+ children = _eval_json_expr(node, children_path)
305
+ if children is None:
306
+ return
307
+ if not isinstance(children, list):
308
+ children = [children]
309
+ for child in children:
310
+ _walk(child, current_depth + 1)
311
+
312
+ for root in items:
313
+ _walk(root, 0)
314
+
315
+ return flattened
316
+
317
+
318
+ def _apply_item_ops(
319
+ items: list[Any],
320
+ item_ops: list[Any] | None,
321
+ ) -> list[Any]:
322
+ if not item_ops:
323
+ return items
324
+
325
+ out = list(items)
326
+ for op in item_ops:
327
+ if isinstance(op, dict) and "flatten_tree" in op:
328
+ cfg = op.get("flatten_tree") or {}
329
+ out = _flatten_tree(out, cfg)
330
+ return out
331
+
332
+
333
+ def parse_records(source: Any, parse_spec: dict, ctx: dict[str, Any]) -> list[dict[str, Any]]:
334
+ """Parse records from a step output."""
335
+ fmt = parse_spec.get("format", "json")
336
+
337
+ if fmt == "html":
338
+ body = source.get("body", "") if isinstance(source, dict) else str(source or "")
339
+ records = parse_html(body, parse_spec, disable_truncate=_disable_truncate(ctx))
340
+ return apply_post_ops(records, parse_spec.get("post_ops"), ctx)
341
+
342
+ if fmt not in {"json", "json_list"}:
343
+ raise ValueError(f"Unsupported parse format: {fmt}")
344
+
345
+ if fmt == "json_list":
346
+ raw_items = source if isinstance(source, list) else [source]
347
+ parsed_items = []
348
+ for it in raw_items:
349
+ parsed = _request_to_json(it)
350
+ if parsed is None:
351
+ continue
352
+ if isinstance(parsed, list):
353
+ for sub in parsed:
354
+ if isinstance(sub, dict) and isinstance(it, dict):
355
+ sub = dict(sub)
356
+ if "index" in it:
357
+ sub.setdefault("__index", it["index"])
358
+ if "item" in it:
359
+ sub.setdefault("__item", it["item"])
360
+ parsed_items.append(sub)
361
+ else:
362
+ if isinstance(parsed, dict) and isinstance(it, dict):
363
+ parsed = dict(parsed)
364
+ if "index" in it:
365
+ parsed.setdefault("__index", it["index"])
366
+ if "item" in it:
367
+ parsed.setdefault("__item", it["item"])
368
+ parsed_items.append(parsed)
369
+ items = _extract_items(parsed_items, parse_spec.get("extract"))
370
+ else:
371
+ parsed = _request_to_json(source)
372
+ if parsed is None:
373
+ return []
374
+ items = _extract_items(parsed, parse_spec.get("extract"))
375
+
376
+ items = _apply_item_ops(items, parse_spec.get("item_ops"))
377
+
378
+ fields = parse_spec.get("fields", [])
379
+ if not fields:
380
+ records = [it for it in items if isinstance(it, dict)]
381
+ return apply_post_ops(records, parse_spec.get("post_ops"), ctx)
382
+
383
+ records: list[dict[str, Any]] = []
384
+ for item in items:
385
+ record: dict[str, Any] = {}
386
+ for field_spec in fields:
387
+ name = field_spec["name"]
388
+ value = _resolve_field_source(field_spec, item, ctx)
389
+
390
+ template = field_spec.get("template")
391
+ if template and value is not None:
392
+ value = template.replace("{{value}}", str(value))
393
+
394
+ value = _apply_ops(value, field_spec, item, ctx)
395
+ record[name] = value
396
+ records.append(record)
397
+
398
+ return apply_post_ops(records, parse_spec.get("post_ops"), ctx)
@@ -0,0 +1,52 @@
1
+ """Template rendering helpers for adapter specs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from typing import Any
7
+
8
+ import jmespath
9
+
10
+ _TPL_RE = re.compile(r"\{\{([^{}]+)\}\}")
11
+
12
+
13
+ def resolve_expr(expr: str, ctx: dict[str, Any]) -> Any:
14
+ """Resolve an expression against runtime context."""
15
+ expr = expr.strip()
16
+ value = jmespath.search(expr, ctx)
17
+ if value is not None:
18
+ return value
19
+
20
+ # Short form: {{arg_name}} resolves from args.
21
+ args = ctx.get("args", {})
22
+ if expr in args:
23
+ return args[expr]
24
+ return ctx.get(expr)
25
+
26
+
27
+ def render_string(template: str, ctx: dict[str, Any]) -> Any:
28
+ """Render a template string.
29
+
30
+ If the full string is a single template expression, returns the resolved
31
+ value as-is (preserving type). Otherwise returns a string with replacements.
32
+ """
33
+ match = _TPL_RE.fullmatch(template.strip())
34
+ if match:
35
+ return resolve_expr(match.group(1), ctx)
36
+
37
+ def _replace(m: re.Match) -> str:
38
+ value = resolve_expr(m.group(1), ctx)
39
+ return "" if value is None else str(value)
40
+
41
+ return _TPL_RE.sub(_replace, template)
42
+
43
+
44
+ def render_value(value: Any, ctx: dict[str, Any]) -> Any:
45
+ """Recursively render templates in nested data."""
46
+ if isinstance(value, str):
47
+ return render_string(value, ctx)
48
+ if isinstance(value, list):
49
+ return [render_value(v, ctx) for v in value]
50
+ if isinstance(value, dict):
51
+ return {k: render_value(v, ctx) for k, v in value.items()}
52
+ return value
web2cli/types.py ADDED
@@ -0,0 +1,71 @@
1
+ """Core types for web2cli."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+
8
+ @dataclass
9
+ class Request:
10
+ method: str # GET, POST, etc.
11
+ url: str
12
+ params: dict[str, str] = field(default_factory=dict)
13
+ headers: dict[str, str] = field(default_factory=dict)
14
+ cookies: dict[str, str] = field(default_factory=dict)
15
+ body: str | bytes | dict | None = None
16
+ content_type: str | None = None
17
+
18
+
19
+ @dataclass
20
+ class AdapterMeta:
21
+ name: str
22
+ domain: str
23
+ base_url: str
24
+ version: str
25
+ description: str
26
+ author: str
27
+ spec_version: str = "0.2"
28
+ transport: str = "http"
29
+ impersonate: str | None = None
30
+ aliases: list[str] = field(default_factory=list)
31
+ default_headers: dict[str, str] = field(default_factory=dict)
32
+
33
+
34
+ @dataclass
35
+ class CommandArg:
36
+ name: str
37
+ type: str # string, int, float, bool, flag, string[]
38
+ required: bool = False
39
+ default: Any = None
40
+ description: str = ""
41
+ source: list[str] = field(default_factory=lambda: ["arg"])
42
+ enum: list[str] | None = None
43
+ min: int | None = None
44
+ max: int | None = None
45
+
46
+
47
+ @dataclass
48
+ class CommandSpec:
49
+ name: str
50
+ description: str
51
+ args: dict[str, CommandArg]
52
+ output: dict # raw YAML output section
53
+ pipeline: list[dict] = field(default_factory=list) # v0.2 step pipeline
54
+
55
+
56
+ @dataclass
57
+ class AdapterSpec:
58
+ meta: AdapterMeta
59
+ auth: dict | None # raw YAML auth section, None if no auth
60
+ commands: dict[str, CommandSpec]
61
+ resources: dict[str, dict] = field(default_factory=dict) # v0.2 named resources
62
+ adapter_dir: Path | None = None # path to adapter directory on disk
63
+
64
+
65
+ @dataclass
66
+ class Session:
67
+ domain: str
68
+ auth_type: str # "cookies" | "token"
69
+ data: dict # {"cookies": {...}} or {"token": "..."}
70
+ created_at: str = ""
71
+ last_used: str = ""