web2cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. web2cli/__init__.py +3 -0
  2. web2cli/__main__.py +5 -0
  3. web2cli/adapter/__init__.py +0 -0
  4. web2cli/adapter/lint.py +667 -0
  5. web2cli/adapter/loader.py +157 -0
  6. web2cli/adapter/validator.py +127 -0
  7. web2cli/adapters/discord.com/web2cli.yaml +476 -0
  8. web2cli/adapters/mail.google.com/parsers/inbox.py +200 -0
  9. web2cli/adapters/mail.google.com/web2cli.yaml +52 -0
  10. web2cli/adapters/news.ycombinator.com/web2cli.yaml +356 -0
  11. web2cli/adapters/reddit.com/web2cli.yaml +233 -0
  12. web2cli/adapters/slack.com/web2cli.yaml +445 -0
  13. web2cli/adapters/stackoverflow.com/web2cli.yaml +257 -0
  14. web2cli/adapters/x.com/providers/x_graphql.py +299 -0
  15. web2cli/adapters/x.com/web2cli.yaml +449 -0
  16. web2cli/auth/__init__.py +0 -0
  17. web2cli/auth/browser_login.py +820 -0
  18. web2cli/auth/manager.py +166 -0
  19. web2cli/auth/store.py +68 -0
  20. web2cli/cli.py +1286 -0
  21. web2cli/executor/__init__.py +0 -0
  22. web2cli/executor/http.py +113 -0
  23. web2cli/output/__init__.py +0 -0
  24. web2cli/output/formatter.py +116 -0
  25. web2cli/parser/__init__.py +0 -0
  26. web2cli/parser/custom.py +21 -0
  27. web2cli/parser/html_parser.py +111 -0
  28. web2cli/parser/transforms.py +127 -0
  29. web2cli/pipe.py +10 -0
  30. web2cli/providers/__init__.py +6 -0
  31. web2cli/providers/base.py +22 -0
  32. web2cli/providers/registry.py +86 -0
  33. web2cli/runtime/__init__.py +1 -0
  34. web2cli/runtime/cache.py +42 -0
  35. web2cli/runtime/engine.py +743 -0
  36. web2cli/runtime/parser.py +398 -0
  37. web2cli/runtime/template.py +52 -0
  38. web2cli/types.py +71 -0
  39. web2cli-0.2.0.dist-info/METADATA +467 -0
  40. web2cli-0.2.0.dist-info/RECORD +44 -0
  41. web2cli-0.2.0.dist-info/WHEEL +5 -0
  42. web2cli-0.2.0.dist-info/entry_points.txt +2 -0
  43. web2cli-0.2.0.dist-info/licenses/LICENSE +202 -0
  44. web2cli-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,743 @@
1
+ """Command pipeline execution engine."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import copy
7
+ import json
8
+ from dataclasses import dataclass
9
+ from typing import Any, Callable
10
+
11
+ import jmespath
12
+
13
+ from web2cli.executor.http import HttpError, execute
14
+ from web2cli.parser.custom import parse_custom
15
+ from web2cli.types import AdapterSpec, CommandSpec, Request, Session
16
+ from web2cli.providers import get_provider
17
+ from web2cli.runtime.cache import load_cache, save_cache
18
+ from web2cli.runtime.parser import apply_post_ops, parse_records
19
+ from web2cli.runtime.template import render_value
20
+
21
+
22
+ @dataclass
23
+ class ExecutionResult:
24
+ records: list[dict[str, Any]]
25
+ last_response_body: str | None = None
26
+ trace_lines: list[str] | None = None
27
+
28
+
29
+ def _payload_size(value: Any) -> int:
30
+ if value is None:
31
+ return 0
32
+ if isinstance(value, str):
33
+ return len(value.encode("utf-8", errors="ignore"))
34
+ if isinstance(value, bytes):
35
+ return len(value)
36
+ try:
37
+ return len(json.dumps(value))
38
+ except Exception:
39
+ return len(str(value))
40
+
41
+
42
+ def _summarize(value: Any) -> str:
43
+ if isinstance(value, list):
44
+ return f"list[{len(value)}]"
45
+ if isinstance(value, dict):
46
+ keys = ", ".join(list(value.keys())[:5])
47
+ return f"dict[{len(value)}] keys=[{keys}]"
48
+ if value is None:
49
+ return "none"
50
+ return type(value).__name__
51
+
52
+
53
+ def _jmespath_expr(path: str) -> str:
54
+ if path == "$":
55
+ return "@"
56
+ if path.startswith("$."):
57
+ return path[2:]
58
+ if path.startswith("$["):
59
+ return path[1:]
60
+ return path
61
+
62
+
63
+ def _join_url(base_url: str, raw_url: str) -> str:
64
+ if raw_url.startswith("http"):
65
+ return raw_url
66
+ return base_url.rstrip("/") + "/" + raw_url.lstrip("/")
67
+
68
+
69
+ def _omit_none(data: dict[str, Any]) -> dict[str, Any]:
70
+ return {k: v for k, v in data.items() if v is not None}
71
+
72
+
73
+ def _session_cookies(session: Session | None) -> dict[str, str]:
74
+ if session and session.data.get("cookies"):
75
+ return dict(session.data["cookies"])
76
+ return {}
77
+
78
+
79
+ def _session_token(session: Session | None) -> str | None:
80
+ if session and session.data.get("token"):
81
+ return str(session.data["token"])
82
+ return None
83
+
84
+
85
+ def _method_matches_session(method: dict, session: Session | None) -> bool:
86
+ if session is None:
87
+ return False
88
+ mtype = str(method.get("type", "")).lower()
89
+ if mtype == "token":
90
+ return _session_token(session) is not None
91
+ if mtype == "cookies":
92
+ return bool(_session_cookies(session))
93
+ return False
94
+
95
+
96
+ def _apply_auth_injection(
97
+ request: Request,
98
+ request_spec: dict[str, Any],
99
+ auth_spec: dict | None,
100
+ session: Session | None,
101
+ ) -> Request:
102
+ """Apply auth injection policy from adapter auth methods."""
103
+ # Default policy: token session -> Authorization header.
104
+ token = _session_token(session)
105
+ if token and "Authorization" not in request.headers:
106
+ request.headers["Authorization"] = token
107
+
108
+ if not auth_spec:
109
+ return request
110
+
111
+ methods = auth_spec.get("methods", [])
112
+ for method in methods:
113
+ if not _method_matches_session(method, session):
114
+ continue
115
+ inject = method.get("inject") or {}
116
+ if not inject:
117
+ continue
118
+
119
+ target = str(inject.get("target", "")).lower()
120
+ key = inject.get("key")
121
+ prefix = inject.get("prefix", "")
122
+ if not key:
123
+ continue
124
+
125
+ value: str | None = None
126
+ mtype = str(method.get("type", "")).lower()
127
+ if mtype == "token":
128
+ t = _session_token(session)
129
+ value = f"{prefix}{t}" if t is not None else None
130
+ elif mtype == "cookies":
131
+ cookie_name = inject.get("cookie")
132
+ cookies = _session_cookies(session)
133
+ if cookie_name:
134
+ raw = cookies.get(cookie_name)
135
+ value = f"{prefix}{raw}" if raw is not None else None
136
+
137
+ if value is None:
138
+ continue
139
+
140
+ if target == "header":
141
+ request.headers[str(key)] = value
142
+ elif target == "query":
143
+ request.params[str(key)] = value
144
+ elif target == "cookie":
145
+ request.cookies[str(key)] = value
146
+ elif target == "form":
147
+ if isinstance(request.body, dict):
148
+ request.body[str(key)] = value
149
+ if not request.content_type:
150
+ request.content_type = "application/x-www-form-urlencoded"
151
+
152
+ return request
153
+
154
+
155
+ def _build_request(
156
+ adapter: AdapterSpec,
157
+ request_spec: dict[str, Any],
158
+ ctx: dict[str, Any],
159
+ session: Session | None,
160
+ ) -> Request:
161
+ if request_spec.get("provider"):
162
+ provider_name = request_spec["provider"]
163
+ provider = get_provider(provider_name, adapter=adapter)
164
+ req = provider.build_request(request_spec, ctx, adapter, session)
165
+ return _apply_auth_injection(req, request_spec, adapter.auth, session)
166
+
167
+ method = str(request_spec.get("method", "GET")).upper()
168
+ url = _join_url(adapter.meta.base_url, str(render_value(request_spec.get("url", "/"), ctx)))
169
+
170
+ params = render_value(request_spec.get("params", {}), ctx) or {}
171
+ headers = dict(adapter.meta.default_headers)
172
+ headers.update(render_value(request_spec.get("headers", {}), ctx) or {})
173
+ cookies = _session_cookies(session)
174
+ cookies.update(render_value(request_spec.get("cookies", {}), ctx) or {})
175
+
176
+ body = None
177
+ content_type = None
178
+ body_spec = request_spec.get("body")
179
+ if body_spec:
180
+ if isinstance(body_spec, dict):
181
+ encoding = str(body_spec.get("encoding", "json")).lower()
182
+ template = render_value(body_spec.get("template", {}), ctx)
183
+ if encoding == "json":
184
+ body = template
185
+ content_type = "application/json"
186
+ elif encoding == "form":
187
+ body = template
188
+ content_type = "application/x-www-form-urlencoded"
189
+ elif encoding == "text":
190
+ body = template if isinstance(template, str) else json.dumps(template)
191
+ content_type = "text/plain"
192
+ elif encoding == "bytes":
193
+ body = template
194
+ content_type = body_spec.get("content_type", "application/octet-stream")
195
+ else:
196
+ raise ValueError(f"Unsupported body encoding: {encoding}")
197
+ else:
198
+ body = render_value(body_spec, ctx)
199
+
200
+ request = Request(
201
+ method=method,
202
+ url=url,
203
+ params=_omit_none(params),
204
+ headers=_omit_none(headers),
205
+ cookies=_omit_none(cookies),
206
+ body=body,
207
+ content_type=content_type,
208
+ )
209
+ if content_type and "Content-Type" not in request.headers and "content-type" not in request.headers:
210
+ request.headers["Content-Type"] = content_type
211
+ return _apply_auth_injection(request, request_spec, adapter.auth, session)
212
+
213
+
214
+ def _execute_request(
215
+ request: Request,
216
+ adapter: AdapterSpec,
217
+ verbose: bool = False,
218
+ trace: Callable[[str], None] | None = None,
219
+ label: str | None = None,
220
+ ) -> dict[str, Any]:
221
+ prefix = f"{label}: " if label else ""
222
+ if trace:
223
+ trace(
224
+ f"{prefix}request {request.method} {request.url} "
225
+ f"params={len(request.params)} headers={len(request.headers)} "
226
+ f"cookies={len(request.cookies)} body_bytes={_payload_size(request.body)}"
227
+ )
228
+
229
+ status, headers, body = asyncio.run(
230
+ execute(request, verbose=verbose, impersonate=adapter.meta.impersonate)
231
+ )
232
+ parsed_json = None
233
+ if isinstance(body, str):
234
+ try:
235
+ parsed_json = json.loads(body)
236
+ except json.JSONDecodeError:
237
+ parsed_json = None
238
+
239
+ result = {
240
+ "status": status,
241
+ "headers": headers,
242
+ "body": body,
243
+ "json": parsed_json,
244
+ "request": request,
245
+ }
246
+ if trace:
247
+ trace(
248
+ f"{prefix}response status={status} body_bytes={_payload_size(body)} "
249
+ f"json={'yes' if parsed_json is not None else 'no'}"
250
+ )
251
+ return result
252
+
253
+
254
+ def _set_cursor(
255
+ request_spec: dict[str, Any],
256
+ cursor_param: str,
257
+ cursor_location: str,
258
+ cursor_value: str,
259
+ ) -> dict[str, Any]:
260
+ spec = copy.deepcopy(request_spec)
261
+ location = cursor_location.lower()
262
+ if location == "params":
263
+ spec.setdefault("params", {})
264
+ spec["params"][cursor_param] = cursor_value
265
+ return spec
266
+
267
+ # body/form cursor
268
+ spec.setdefault("body", {})
269
+ body = spec["body"]
270
+ if isinstance(body, dict) and "template" in body:
271
+ body.setdefault("template", {})
272
+ if isinstance(body["template"], dict):
273
+ body["template"][cursor_param] = cursor_value
274
+ elif isinstance(body, dict):
275
+ body[cursor_param] = cursor_value
276
+ return spec
277
+
278
+
279
+ def _fetch_resource_records(
280
+ adapter: AdapterSpec,
281
+ resource_name: str,
282
+ resource_spec: dict[str, Any],
283
+ ctx: dict[str, Any],
284
+ session: Session | None,
285
+ verbose: bool = False,
286
+ trace: Callable[[str], None] | None = None,
287
+ ) -> list[dict[str, Any]]:
288
+ cache_spec = resource_spec.get("cache", {})
289
+ cache_key_tmpl = cache_spec.get("key", resource_name)
290
+ cache_key = str(render_value(cache_key_tmpl, ctx))
291
+ ttl = cache_spec.get("ttl")
292
+ ttl_int = int(ttl) if isinstance(ttl, (int, str)) and str(ttl).isdigit() else None
293
+
294
+ cached = load_cache(adapter.meta.domain, cache_key, ttl=ttl_int)
295
+ if isinstance(cached, list):
296
+ if trace:
297
+ trace(
298
+ f"resolve resource={resource_name} cache=hit key={cache_key} "
299
+ f"records={len(cached)}"
300
+ )
301
+ return cached
302
+ if trace:
303
+ trace(f"resolve resource={resource_name} cache=miss key={cache_key}")
304
+
305
+ request_spec = resource_spec.get("request", {})
306
+ response_spec = resource_spec.get("response", resource_spec.get("parse", {"format": "json"}))
307
+
308
+ paginate = resource_spec.get("paginate", {})
309
+ cursor_param = paginate.get("cursor_param")
310
+ cursor_path = paginate.get("cursor_path")
311
+ cursor_location = paginate.get("cursor_location", "params")
312
+ cursor = None
313
+ seen_cursors: set[str] = set()
314
+
315
+ records: list[dict[str, Any]] = []
316
+ page = 0
317
+ while True:
318
+ page += 1
319
+ current_spec = request_spec
320
+ if cursor and cursor_param:
321
+ current_spec = _set_cursor(current_spec, str(cursor_param), str(cursor_location), str(cursor))
322
+
323
+ req = _build_request(adapter, current_spec, ctx, session)
324
+ result = _execute_request(
325
+ req,
326
+ adapter,
327
+ verbose=verbose,
328
+ trace=trace,
329
+ label=f"resource:{resource_name}:page:{page}",
330
+ )
331
+ page_records = parse_records(result, response_spec, ctx)
332
+ records.extend(page_records)
333
+ if trace:
334
+ trace(
335
+ f"resource={resource_name} page={page} parsed_records={len(page_records)} "
336
+ f"total_records={len(records)}"
337
+ )
338
+
339
+ if not cursor_path:
340
+ break
341
+
342
+ parsed_json = result.get("json")
343
+ if parsed_json is None:
344
+ break
345
+ next_cursor = jmespath.search(_jmespath_expr(str(cursor_path)), parsed_json)
346
+ if not next_cursor:
347
+ break
348
+ next_cursor = str(next_cursor)
349
+ if next_cursor in seen_cursors:
350
+ break
351
+ seen_cursors.add(next_cursor)
352
+ cursor = next_cursor
353
+
354
+ save_cache(adapter.meta.domain, cache_key, records)
355
+ if trace:
356
+ trace(
357
+ f"resolve resource={resource_name} cache=write key={cache_key} "
358
+ f"records={len(records)}"
359
+ )
360
+ return records
361
+
362
+
363
+ def _run_resolve_step(
364
+ step_spec: dict[str, Any],
365
+ adapter: AdapterSpec,
366
+ ctx: dict[str, Any],
367
+ session: Session | None,
368
+ verbose: bool = False,
369
+ trace: Callable[[str], None] | None = None,
370
+ ) -> dict[str, Any]:
371
+ resource_name = step_spec["resource"]
372
+ resource_spec = adapter.resources.get(resource_name)
373
+ if resource_spec is None:
374
+ raise ValueError(f"Unknown resource '{resource_name}'")
375
+
376
+ records = _fetch_resource_records(
377
+ adapter,
378
+ resource_name,
379
+ resource_spec,
380
+ ctx,
381
+ session,
382
+ verbose=verbose,
383
+ trace=trace,
384
+ )
385
+
386
+ input_value = render_value(step_spec.get("input"), ctx)
387
+ by = step_spec.get("by", "name")
388
+ out = step_spec.get("value", "id")
389
+ mode = str(step_spec.get("match", "ci_equals")).lower()
390
+
391
+ map_by_by = {
392
+ str(r.get(by)): r.get(out)
393
+ for r in records
394
+ if r.get(by) is not None and r.get(out) is not None
395
+ }
396
+ map_by_out = {
397
+ str(r.get(out)): r.get(by)
398
+ for r in records
399
+ if r.get(by) is not None and r.get(out) is not None
400
+ }
401
+ records_by_out = {
402
+ str(r.get(out)): r
403
+ for r in records
404
+ if r.get(out) is not None
405
+ }
406
+
407
+ if input_value is None:
408
+ if trace:
409
+ trace(
410
+ f"resolve resource={resource_name} input=<none> "
411
+ f"records={len(records)}"
412
+ )
413
+ return {
414
+ "input": None,
415
+ "record": None,
416
+ out: None,
417
+ "records": records,
418
+ "by": by,
419
+ "value": out,
420
+ f"map_by_{by}": map_by_by,
421
+ f"map_by_{out}": map_by_out,
422
+ f"records_by_{out}": records_by_out,
423
+ }
424
+
425
+ matched = None
426
+ for rec in records:
427
+ rv = rec.get(by)
428
+ if rv is None:
429
+ continue
430
+ if mode == "equals" and str(rv) == str(input_value):
431
+ matched = rec
432
+ break
433
+ if mode == "ci_equals" and str(rv).lower() == str(input_value).lower():
434
+ matched = rec
435
+ break
436
+ if mode == "contains" and str(input_value).lower() in str(rv).lower():
437
+ matched = rec
438
+ break
439
+
440
+ if matched is None:
441
+ preview = ", ".join(str(r.get(by, "")) for r in records[:30] if r.get(by))
442
+ raise ValueError(
443
+ f"Could not resolve '{input_value}' via resource '{resource_name}'. "
444
+ f"Available: {preview}"
445
+ )
446
+
447
+ result = {
448
+ "input": input_value,
449
+ "record": matched,
450
+ out: matched.get(out),
451
+ "records": records,
452
+ "by": by,
453
+ "value": out,
454
+ f"map_by_{by}": map_by_by,
455
+ f"map_by_{out}": map_by_out,
456
+ f"records_by_{out}": records_by_out,
457
+ }
458
+ # Also expose common alias used by templates.
459
+ if out != "id":
460
+ result["id"] = matched.get(out)
461
+ if trace:
462
+ trace(
463
+ f"resolve resource={resource_name} input={input_value!r} "
464
+ f"matched_{out}={matched.get(out)!r}"
465
+ )
466
+ return result
467
+
468
+
469
+ def _run_request_step(
470
+ step_spec: dict[str, Any],
471
+ adapter: AdapterSpec,
472
+ ctx: dict[str, Any],
473
+ session: Session | None,
474
+ verbose: bool = False,
475
+ trace: Callable[[str], None] | None = None,
476
+ label: str | None = None,
477
+ ) -> dict[str, Any]:
478
+ req = _build_request(adapter, step_spec, ctx, session)
479
+ result = _execute_request(
480
+ req,
481
+ adapter,
482
+ verbose=verbose,
483
+ trace=trace,
484
+ label=label,
485
+ )
486
+
487
+ # Useful for providers with rotating request internals (e.g. X query ids).
488
+ if step_spec.get("provider") and result.get("status", 0) >= 400:
489
+ retry_ctx = copy.deepcopy(ctx)
490
+ retry_ctx.setdefault("args", {})
491
+ retry_ctx["args"]["_retry"] = True
492
+ req2 = _build_request(adapter, step_spec, retry_ctx, session)
493
+ if trace:
494
+ trace(f"{label or 'request'} retry with args._retry=true")
495
+ result2 = _execute_request(
496
+ req2,
497
+ adapter,
498
+ verbose=verbose,
499
+ trace=trace,
500
+ label=label,
501
+ )
502
+ if result2.get("status", 0) < result.get("status", 0):
503
+ result = result2
504
+
505
+ return result
506
+
507
+
508
+ def _run_fanout_step(
509
+ step_spec: dict[str, Any],
510
+ adapter: AdapterSpec,
511
+ ctx: dict[str, Any],
512
+ session: Session | None,
513
+ verbose: bool = False,
514
+ trace: Callable[[str], None] | None = None,
515
+ label: str | None = None,
516
+ ) -> list[dict[str, Any]]:
517
+ items = render_value(step_spec.get("items_from"), ctx)
518
+ if items is None:
519
+ if trace:
520
+ trace(f"{label or 'fanout'} items=0 (items_from resolved to null)")
521
+ return []
522
+ if not isinstance(items, list):
523
+ items = [items]
524
+
525
+ limit = render_value(step_spec.get("limit"), ctx)
526
+ try:
527
+ if limit is not None:
528
+ items = items[: int(limit)]
529
+ except (TypeError, ValueError):
530
+ pass
531
+
532
+ if trace:
533
+ trace(f"{label or 'fanout'} items={len(items)}")
534
+
535
+ req_spec = step_spec.get("request", {})
536
+ responses: list[dict[str, Any]] = []
537
+ statuses: dict[int, int] = {}
538
+ for idx, item in enumerate(items):
539
+ iter_ctx = copy.deepcopy(ctx)
540
+ iter_ctx["item"] = item
541
+ iter_ctx["index"] = idx
542
+ req = _build_request(adapter, req_spec, iter_ctx, session)
543
+ result = _execute_request(
544
+ req,
545
+ adapter,
546
+ verbose=verbose,
547
+ trace=trace,
548
+ label=f"{label or 'fanout'}[{idx}]",
549
+ )
550
+ result["item"] = item
551
+ result["index"] = idx + 1
552
+ status = int(result.get("status", 0) or 0)
553
+ statuses[status] = statuses.get(status, 0) + 1
554
+ responses.append(result)
555
+
556
+ if trace:
557
+ status_summary = ", ".join(f"{k}x{v}" for k, v in sorted(statuses.items()))
558
+ trace(
559
+ f"{label or 'fanout'} responses={len(responses)} "
560
+ f"statuses=[{status_summary}]"
561
+ )
562
+ return responses
563
+
564
+
565
+ def _records_from_output(output: Any) -> list[dict[str, Any]]:
566
+ if isinstance(output, list):
567
+ return [r for r in output if isinstance(r, dict)]
568
+ if isinstance(output, dict):
569
+ records = output.get("records")
570
+ if isinstance(records, list):
571
+ return [r for r in records if isinstance(r, dict)]
572
+ return []
573
+
574
+
575
+ def execute_command(
576
+ adapter: AdapterSpec,
577
+ cmd: CommandSpec,
578
+ args: dict[str, Any],
579
+ session: Session | None,
580
+ verbose: bool = False,
581
+ trace: bool = False,
582
+ no_truncate: bool = False,
583
+ ) -> ExecutionResult:
584
+ """Execute a command pipeline."""
585
+ trace_lines: list[str] = []
586
+
587
+ def _trace(msg: str) -> None:
588
+ if trace:
589
+ trace_lines.append(msg)
590
+
591
+ ctx: dict[str, Any] = {
592
+ "args": dict(args),
593
+ "auth": session.data if session else {},
594
+ "steps": {},
595
+ "flags": {
596
+ "no_truncate": no_truncate,
597
+ },
598
+ }
599
+
600
+ pipeline = list(cmd.pipeline or [])
601
+ if not pipeline:
602
+ return ExecutionResult(records=[], trace_lines=(trace_lines if trace else None))
603
+
604
+ last_output: Any = None
605
+ last_body: str | None = None
606
+
607
+ _trace(
608
+ f"command={adapter.meta.name}.{cmd.name} steps={len(pipeline)} "
609
+ f"args={sorted(ctx.get('args', {}).keys())}"
610
+ )
611
+
612
+ for idx, raw_step in enumerate(pipeline):
613
+ if not isinstance(raw_step, dict):
614
+ raise ValueError(f"Invalid pipeline step at index {idx}: {raw_step!r}")
615
+
616
+ if "resolve" in raw_step:
617
+ step_spec = raw_step["resolve"] or {}
618
+ step_name = step_spec.get("name") or raw_step.get("name") or f"resolve_{idx}"
619
+ _trace(f"step[{idx}] resolve:{step_name} start")
620
+ output = _run_resolve_step(
621
+ step_spec, adapter, ctx, session, verbose=verbose, trace=_trace
622
+ )
623
+ ctx["steps"][step_name] = output
624
+ last_output = output
625
+ _trace(f"step[{idx}] resolve:{step_name} done output={_summarize(output)}")
626
+ continue
627
+
628
+ if "request" in raw_step:
629
+ step_spec = raw_step["request"] or {}
630
+ step_name = step_spec.get("name") or raw_step.get("name") or f"request_{idx}"
631
+ _trace(f"step[{idx}] request:{step_name} start")
632
+ output = _run_request_step(
633
+ step_spec,
634
+ adapter,
635
+ ctx,
636
+ session,
637
+ verbose=verbose,
638
+ trace=_trace,
639
+ label=f"step[{idx}] request:{step_name}",
640
+ )
641
+ ctx["steps"][step_name] = output
642
+ last_output = output
643
+ last_body = output.get("body")
644
+ _trace(
645
+ f"step[{idx}] request:{step_name} done "
646
+ f"status={output.get('status')} output={_summarize(output)}"
647
+ )
648
+ continue
649
+
650
+ if "fanout" in raw_step:
651
+ step_spec = raw_step["fanout"] or {}
652
+ step_name = step_spec.get("name") or raw_step.get("name") or f"fanout_{idx}"
653
+ _trace(f"step[{idx}] fanout:{step_name} start")
654
+ output = _run_fanout_step(
655
+ step_spec,
656
+ adapter,
657
+ ctx,
658
+ session,
659
+ verbose=verbose,
660
+ trace=_trace,
661
+ label=f"step[{idx}] fanout:{step_name}",
662
+ )
663
+ ctx["steps"][step_name] = output
664
+ last_output = output
665
+ if output:
666
+ last_body = output[-1].get("body")
667
+ _trace(f"step[{idx}] fanout:{step_name} done output={_summarize(output)}")
668
+ continue
669
+
670
+ if "parse" in raw_step:
671
+ step_spec = raw_step["parse"] or {}
672
+ step_name = step_spec.get("name") or raw_step.get("name") or f"parse_{idx}"
673
+ from_step = step_spec.get("from")
674
+ source = ctx["steps"].get(from_step) if from_step else last_output
675
+ _trace(
676
+ f"step[{idx}] parse:{step_name} start from="
677
+ f"{from_step or '<last>'}"
678
+ )
679
+ if from_step and source is None:
680
+ _trace(f"step[{idx}] parse:{step_name} warning missing source step='{from_step}'")
681
+
682
+ if step_spec.get("parser") == "custom":
683
+ if source is None:
684
+ output = []
685
+ elif isinstance(source, dict):
686
+ output = parse_custom(
687
+ step_spec["script"],
688
+ adapter.adapter_dir,
689
+ source.get("status", 0),
690
+ source.get("headers", {}),
691
+ source.get("body", ""),
692
+ ctx["args"],
693
+ )
694
+ else:
695
+ output = []
696
+ else:
697
+ output = parse_records(source, step_spec, ctx)
698
+
699
+ ctx["steps"][step_name] = output
700
+ last_output = output
701
+ _trace(f"step[{idx}] parse:{step_name} done output={_summarize(output)}")
702
+ continue
703
+
704
+ if "transform" in raw_step:
705
+ step_spec = raw_step["transform"] or {}
706
+ step_name = step_spec.get("name") or raw_step.get("name") or f"transform_{idx}"
707
+ from_step = step_spec.get("from")
708
+ source = ctx["steps"].get(from_step) if from_step else last_output
709
+ _trace(
710
+ f"step[{idx}] transform:{step_name} start from="
711
+ f"{from_step or '<last>'}"
712
+ )
713
+ if from_step and source is None:
714
+ _trace(
715
+ f"step[{idx}] transform:{step_name} warning missing source "
716
+ f"step='{from_step}'"
717
+ )
718
+ records = _records_from_output(source)
719
+ output = apply_post_ops(records, step_spec.get("ops"), ctx)
720
+ ctx["steps"][step_name] = output
721
+ last_output = output
722
+ _trace(
723
+ f"step[{idx}] transform:{step_name} done output={_summarize(output)}"
724
+ )
725
+ continue
726
+
727
+ raise ValueError(f"Unsupported pipeline step keys: {list(raw_step.keys())}")
728
+
729
+ output_from = cmd.output.get("from_step")
730
+ if output_from and output_from in ctx["steps"]:
731
+ records = _records_from_output(ctx["steps"][output_from])
732
+ else:
733
+ records = _records_from_output(last_output)
734
+
735
+ _trace(
736
+ f"result records={len(records)} output_from={output_from or '<last>'} "
737
+ f"last_response_body_bytes={_payload_size(last_body)}"
738
+ )
739
+ return ExecutionResult(
740
+ records=records,
741
+ last_response_body=last_body,
742
+ trace_lines=(trace_lines if trace else None),
743
+ )