wtftools 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wtftools/explain.py ADDED
@@ -0,0 +1,290 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Diagnostic advice: turn (check, status) → actionable suggestion.
4
+
5
+ Two modes:
6
+
7
+ 1. **Text mode** (`wtf explain`): rule-based, deterministic, no network.
8
+ Each WARN/FAIL result is matched against a suggestion table.
9
+
10
+ 2. **Prompt mode** (`wtf explain --prompt`): emit an LLM-ready prompt that
11
+ includes the audit findings — pipe to `claude`, `ollama run`, or any other
12
+ LLM and get a synthesized diagnosis without bundling an LLM dependency.
13
+ """
14
+
15
+ from dataclasses import dataclass, field
16
+ from typing import Callable, List, Optional, Tuple, Union
17
+
18
+ from wtftools import sysinfo
19
+ from wtftools.audit import CheckResult
20
+
21
+
22
+ @dataclass
23
+ class Suggestion:
24
+ """A diagnostic suggestion for one CheckResult."""
25
+
26
+ name: str
27
+ status: str
28
+ message: str
29
+ advice: str
30
+ investigation: List[str] = field(default_factory=list)
31
+
32
+
33
+ SuggestionAdvice = Union[str, Callable[[CheckResult], str]]
34
+
35
+ # Each entry: (predicate, advice). The first matching entry wins.
36
+ # Advice is either a literal string or a callable that builds it from the result.
37
+ _RULES: List[Tuple[Callable[[CheckResult], bool], SuggestionAdvice]] = [
38
+ (
39
+ lambda r: r.name == "swap" and r.status in ("warn", "fail"),
40
+ "Swap is heavily used. Likely causes: a leaking process is being paged out, "
41
+ "memory pressure forces swap. Action: identify top RAM consumers via "
42
+ "`wtf info`, restart the offender or tune memory limits, consider adding "
43
+ "RAM/swap. Recurrent on cron-jobs → consider OOM-protecting critical services.",
44
+ ),
45
+ (
46
+ lambda r: r.name == "memory" and r.status in ("warn", "fail"),
47
+ "Memory headroom is low. Find the consumer via `wtf info` (TOP BY RAM). "
48
+ "Quick fixes: restart the bloated service, lower its memory limits, scale "
49
+ "out. Long-term: add monitoring/alerts.",
50
+ ),
51
+ (
52
+ lambda r: r.name.startswith("disk ") and r.status in ("warn", "fail"),
53
+ lambda r: (
54
+ f"Filesystem {r.name[5:]} is filling up. Common culprits: log files "
55
+ f"in /var/log, journald, docker overlay, core dumps. "
56
+ f"Run `du -sh {r.name[5:]}/* | sort -h` to find the largest. "
57
+ f"For journald: `journalctl --vacuum-size=4G`."
58
+ ),
59
+ ),
60
+ (
61
+ lambda r: r.name.startswith("inodes ") and r.status in ("warn", "fail"),
62
+ lambda r: (
63
+ f"Inode exhaustion on {r.name[7:]}. Hidden small-file accumulation: "
64
+ f"PHP sessions, mailq, sysvshm, broken caches. "
65
+ f"`find {r.name[7:]} -xdev -type f | head -1000` to sample."
66
+ ),
67
+ ),
68
+ (
69
+ lambda r: r.name == "load average" and r.status in ("warn", "fail"),
70
+ "Run queue depth exceeds CPU count. Combine with `wtf audit --check iowait psi` " "to separate CPU-bound from IO-bound. `wtf info` (TOP BY CPU) shows the consumer.",
71
+ ),
72
+ (
73
+ lambda r: r.name == "CPU iowait" and r.status in ("warn", "fail"),
74
+ "High iowait — processes blocked on disk/network IO. Check `iostat -x 1` "
75
+ "for the busy device. Could be a saturated disk, network FS hang, or "
76
+ "kernel-task stuck in D state (`wtf audit --check d-state`).",
77
+ ),
78
+ (
79
+ lambda r: r.name.startswith("PSI ") and r.status in ("warn", "fail"),
80
+ lambda r: (
81
+ "Pressure stall on " + r.name[4:] + " — real contention even if classic "
82
+ "metrics look OK. avg10 is the recent (10s) share of time tasks were "
83
+ "stalled. Drill into the matching subsystem: cpu→load/top, "
84
+ "memory→`wtf info`, io→iostat/iotop."
85
+ ),
86
+ ),
87
+ (
88
+ lambda r: r.name == "failed systemd units" and r.status == "fail",
89
+ lambda r: ("Failed unit(s): " + ", ".join(r.detail) + ". " "Inspect each with `wtf services <name>` or `systemctl status <name>` " "+ `journalctl -u <name> -n 50`."),
90
+ ),
91
+ (
92
+ lambda r: r.name == "restart loops" and r.status in ("warn", "fail"),
93
+ lambda r: (
94
+ "Service(s) restarted many times since boot. systemd is hiding flapping. "
95
+ "List: " + ", ".join(r.detail) + ". "
96
+ "Use `wtf services <name>` for journal + last cause. "
97
+ "Consider Restart=on-failure with StartLimitBurst, or fix the underlying bug."
98
+ ),
99
+ ),
100
+ (
101
+ lambda r: r.name == "enabled but down" and r.status in ("warn", "fail"),
102
+ lambda r: ("Service is enabled but not running. " "Cause is in: " + ", ".join(r.detail[:3]) + ". " "Run `systemctl status <name>` for the last fail reason."),
103
+ ),
104
+ (
105
+ lambda r: r.name.startswith("OOM kills") and r.status == "fail",
106
+ "Kernel killed processes due to OOM. Identify the victim and the bloated "
107
+ "process: `journalctl -k --since '24h ago' | grep -i 'oom\\|killed'`. "
108
+ "Address: add RAM, fix leak, tune oom_score_adj for critical services.",
109
+ ),
110
+ (
111
+ lambda r: r.name == "kernel taint" and r.status in ("warn", "fail"),
112
+ "Kernel saw something it didn't like (proprietary module, oops, machine "
113
+ "check…). Check `dmesg | grep -i 'taint\\|oops\\|bug'` and "
114
+ "`cat /proc/sys/kernel/tainted`. MACHINE_CHECK = hardware error.",
115
+ ),
116
+ (
117
+ lambda r: r.name == "cert expiry" and r.status in ("warn", "fail"),
118
+ lambda r: (
119
+ "TLS certificate(s) expiring soon. Renew via `certbot renew` (Let's "
120
+ "Encrypt) or your CA flow. Most urgent: " + (r.detail[0] if r.detail else "see audit detail") + ". "
121
+ "Automate renewal + reload hook for the consumer (nginx/haproxy/etc)."
122
+ ),
123
+ ),
124
+ (
125
+ lambda r: r.name == "TCP retransmits" and r.status in ("warn", "fail"),
126
+ "Network is dropping packets. Check switch port errors (`ethtool -S eth0`), " "MTU mismatch, congestion. `ss -ti` shows per-connection retrans counts.",
127
+ ),
128
+ (
129
+ lambda r: r.name == "network errors" and r.status in ("warn", "fail"),
130
+ "NIC accumulated rx/tx errors or drops. Likely a hardware/cable issue or " "buffer overflow under burst. `ethtool -S <iface>`, `ip -s link show <iface>`.",
131
+ ),
132
+ (
133
+ lambda r: r.name == "D-state processes" and r.status in ("warn", "fail"),
134
+ "Processes stuck in uninterruptible sleep — typically blocked on a hung "
135
+ "mount or storage device. `ps -eLo pid,stat,wchan:25,comm | grep ' D'` "
136
+ "shows where each is waiting in the kernel.",
137
+ ),
138
+ (
139
+ lambda r: r.name == "conntrack" and r.status in ("warn", "fail"),
140
+ "Connection tracking table near limit. Will silently drop new connections "
141
+ "when full. Quick fix: `sysctl -w net.netfilter.nf_conntrack_max=<2x>`. "
142
+ "Long-term: tune timeouts (nf_conntrack_tcp_timeout_established), "
143
+ "exempt high-throughput flows via NOTRACK, or scale out.",
144
+ ),
145
+ (
146
+ lambda r: r.name == "journal disk" and r.status in ("warn", "fail"),
147
+ "journald has grown large. `journalctl --vacuum-size=2G` or " "`--vacuum-time=7d`. To bound permanently: edit " "/etc/systemd/journald.conf → `SystemMaxUse=4G`.",
148
+ ),
149
+ (
150
+ lambda r: r.name == "reboot required" and r.status in ("warn", "fail"),
151
+ "Pending kernel/library update needs a reboot. Schedule the reboot to " "pick up security fixes. The pkg list shows what triggered it.",
152
+ ),
153
+ (
154
+ lambda r: r.name == "system state" and r.status in ("warn", "fail"),
155
+ "systemd reports 'degraded' — at least one unit is failed. Use " "`wtf audit --check failed-units` for the list, then `wtf services <name>`.",
156
+ ),
157
+ (
158
+ lambda r: r.name == "time sync" and r.status in ("warn", "fail"),
159
+ "Clock not synchronized. Effects: TLS cert failures, log misalignment, "
160
+ "auth replay protection breaks. Run `timedatectl set-ntp true`, check "
161
+ "`chronyc tracking` for drift, ensure firewall allows NTP.",
162
+ ),
163
+ (
164
+ lambda r: r.name == "zombie processes" and r.status in ("warn", "fail"),
165
+ "Zombie processes — parents not reap()ing exited children. Find the "
166
+ "parent: `ps -eo pid,ppid,stat,comm | awk '$3 ~ /Z/'`. Usually a "
167
+ "supervisor/process-manager bug; restarting the parent reaps the zombie.",
168
+ ),
169
+ (
170
+ lambda r: r.name == "read-only mounts" and r.status == "fail",
171
+ lambda r: (
172
+ "Filesystem(s) unexpectedly read-only: " + ", ".join(r.detail) + ". "
173
+ "Cause: kernel remount-ro on IO error or fsck failure. Check "
174
+ "`dmesg | grep -i 'EXT4-fs error\\|remount'`. May need fsck + reboot."
175
+ ),
176
+ ),
177
+ (
178
+ lambda r: r.name.startswith("kernel errors") and r.status in ("warn", "fail"),
179
+ "Recent kernel error lines — could indicate hardware (memory, disk), " "driver, or filesystem issue. `journalctl -k -p err --since '24h ago'`.",
180
+ ),
181
+ (
182
+ lambda r: r.name == "open file descriptors" and r.status in ("warn", "fail"),
183
+ "File descriptor pressure. Find offenders: " '`for p in /proc/[0-9]*; do echo "$(ls $p/fd 2>/dev/null | wc -l) $p"; done | sort -n | tail`.',
184
+ ),
185
+ (
186
+ lambda r: r.name == "process count" and r.status in ("warn", "fail"),
187
+ "PID table filling — fork-bomb, runaway service, or insufficient pid_max. " "Find the parent: `ps -eo pid,ppid,comm --sort=-pid | head -20`.",
188
+ ),
189
+ (lambda r: r.name.startswith("plugin:") and r.status in ("warn", "fail"), "Custom plugin reported a problem. Re-run the plugin manually to inspect " "its full output."),
190
+ ]
191
+
192
+ _FALLBACK = "No built-in advice for this check yet. Use `wtf audit -v --check <name>` " "to see the full detail, or pipe `wtf explain --prompt` to an LLM."
193
+
194
+
195
+ def suggest(result: CheckResult) -> Suggestion:
196
+ """Return a Suggestion for one CheckResult."""
197
+ for predicate, advice in _RULES:
198
+ if predicate(result):
199
+ text = advice(result) if callable(advice) else advice
200
+ return Suggestion(name=result.name, status=result.status, message=result.message, advice=text)
201
+ return Suggestion(name=result.name, status=result.status, message=result.message, advice=_FALLBACK)
202
+
203
+
204
+ def investigate(result: CheckResult) -> List[str]:
205
+ """Collect dynamic context for one CheckResult — heavier than static advice.
206
+
207
+ Returns a list of rendered lines (no markup; caller adds indentation).
208
+ Skipped silently when underlying tools are unavailable.
209
+
210
+ Currently specialised for disk-fill findings (`disk /…` and `inodes /…`).
211
+ Future scope: per-finding investigation for swap, failed-units, OOM, etc.
212
+ """
213
+ lines: List[str] = []
214
+ name = result.name
215
+ if name.startswith("disk ") or name.startswith("inodes "):
216
+ prefix_len = 5 if name.startswith("disk ") else 7
217
+ mount = name[prefix_len:]
218
+
219
+ top = sysinfo.get_top_paths_in(mount, limit=6)
220
+ if top:
221
+ lines.append(f"Top directories under {mount}:")
222
+ for entry in top:
223
+ lines.append(f" {sysinfo.format_bytes(entry['bytes']):>10} {entry['path']}")
224
+
225
+ big_files = sysinfo.get_largest_files(mount, limit=5, min_size_mb=100)
226
+ if big_files:
227
+ lines.append(f"Largest files (>100MB) under {mount}:")
228
+ for entry in big_files:
229
+ lines.append(f" {sysinfo.format_bytes(entry['bytes']):>10} {entry['path']}")
230
+
231
+ journal_bytes = sysinfo.get_journal_disk_usage()
232
+ if journal_bytes:
233
+ lines.append(f"Journald: {sysinfo.format_bytes(journal_bytes)} " f"(`journalctl --vacuum-size=2G` to trim)")
234
+
235
+ docker_df = sysinfo.get_docker_disk_usage()
236
+ if docker_df:
237
+ lines.append("Docker `system df`:")
238
+ for row in docker_df:
239
+ lines.append(f" {row['type']:<14} count={row['count']:<4} " f"size={row['size']:<10} reclaimable={row['reclaimable']}")
240
+
241
+ containers = sysinfo.get_docker_container_sizes(limit=5)
242
+ if containers:
243
+ lines.append("Largest Docker containers (RW + base image):")
244
+ for c in containers:
245
+ lines.append(f" {c['size']:>14} {c['name']:<24} {c['image']}")
246
+
247
+ logs = sysinfo.get_docker_log_sizes(limit=5)
248
+ if logs:
249
+ lines.append("Largest Docker JSON log files:")
250
+ for entry in logs:
251
+ lines.append(f" {sysinfo.format_bytes(entry['bytes']):>10} " f"{entry['name']:<24} {entry['log_path']}")
252
+ return lines
253
+
254
+
255
+ def explain_results(results: List[CheckResult], include_ok: bool = False, deep: bool = False) -> List[Suggestion]:
256
+ """Return Suggestions for every problem result (or all when include_ok).
257
+
258
+ When `deep=True`, each suggestion is enriched with the output of
259
+ `investigate(result)` — slower but gives concrete next-action data.
260
+ """
261
+ out: List[Suggestion] = []
262
+ for r in results:
263
+ if not include_ok and r.status not in ("warn", "fail"):
264
+ continue
265
+ s = suggest(r)
266
+ if deep:
267
+ s.investigation = investigate(r)
268
+ out.append(s)
269
+ return out
270
+
271
+
272
+ PROMPT_PREAMBLE = """You are a senior SRE. Below is a wtftools audit of a Linux host.
273
+ For each WARN/FAIL finding, give a 1-2 sentence likely root cause and 2-3 concrete actions.
274
+ Keep it tight: a paragraph per finding, no preamble. Output the highest-priority finding first.
275
+ """
276
+
277
+
278
+ def render_prompt(results: List[CheckResult], host: Optional[str] = None) -> str:
279
+ """Render an LLM-ready prompt summarizing the audit."""
280
+ lines = [PROMPT_PREAMBLE]
281
+ if host:
282
+ lines.append(f"Host: {host}")
283
+ lines.append("")
284
+ lines.append("Audit findings (all rows; FAIL/WARN are the priorities):")
285
+ for r in results:
286
+ marker = {"ok": "[ OK ]", "warn": "[WARN]", "fail": "[FAIL]", "skip": "[SKIP]"}.get(r.status, "[????]")
287
+ lines.append(f" {marker} {r.name:<30} {r.message}")
288
+ for d in r.detail[:3]:
289
+ lines.append(f" • {d}")
290
+ return "\n".join(lines) + "\n"
wtftools/info.py ADDED
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Rendering of `wtf info` — quick summary of the server state."""
4
+
5
+ from typing import List
6
+
7
+ from wtftools import colors, sysinfo
8
+
9
+
10
+ def _bar(percent: int, width: int = 20) -> str:
11
+ """Render an ASCII progress bar."""
12
+ percent = max(0, min(100, percent))
13
+ filled = int(round(width * percent / 100))
14
+ empty = width - filled
15
+ bar = "█" * filled + "·" * empty
16
+ if percent >= 90:
17
+ bar = colors.red(bar)
18
+ elif percent >= 75:
19
+ bar = colors.yellow(bar)
20
+ else:
21
+ bar = colors.green(bar)
22
+ return f"[{bar}] {percent:3d}%"
23
+
24
+
25
+ def render_info() -> str:
26
+ """Return a multi-line string with the system summary."""
27
+ out: List[str] = []
28
+
29
+ os_release = sysinfo.get_os_release()
30
+ name = os_release.get("PRETTY_NAME") or os_release.get("NAME") or "Linux"
31
+ kernel = sysinfo.get_kernel()
32
+ hostname = sysinfo.get_hostname()
33
+ uptime = sysinfo.format_duration(sysinfo.get_uptime_seconds())
34
+
35
+ out.append(colors.section("SYSTEM"))
36
+ out.append(f" host : {colors.bold(hostname)}")
37
+ out.append(f" os : {name}")
38
+ out.append(f" kernel : {kernel}")
39
+ out.append(f" uptime : {uptime}")
40
+ out.append(f" cpu : {sysinfo.get_cpu_model()} (x{sysinfo.get_cpu_count()})")
41
+
42
+ load1, load5, load15 = sysinfo.get_loadavg()
43
+ cpus = sysinfo.get_cpu_count() or 1
44
+ out.append(f" load : {load1:.2f} {load5:.2f} {load15:.2f} (per-cpu {load1 / cpus:.2f})")
45
+
46
+ out.append("")
47
+ out.append(colors.section("MEMORY"))
48
+ mem = sysinfo.get_memory_summary()
49
+ out.append(f" ram : {_bar(mem['percent'])} " f"{sysinfo.format_bytes(mem['used'])} / {sysinfo.format_bytes(mem['total'])}")
50
+ if mem["swap_total"]:
51
+ out.append(f" swap : {_bar(mem['swap_percent'])} " f"{sysinfo.format_bytes(mem['swap_used'])} / {sysinfo.format_bytes(mem['swap_total'])}")
52
+ else:
53
+ out.append(f" swap : {colors.dim('not configured')}")
54
+
55
+ out.append("")
56
+ out.append(colors.section("DISK"))
57
+ disks = sysinfo.get_disks()
58
+ if not disks:
59
+ out.append(colors.dim(" no mounts found"))
60
+ else:
61
+ for disk in disks:
62
+ target = disk["target"]
63
+ label = target if len(target) <= 16 else "…" + target[-15:]
64
+ out.append(f" {label:<16} {_bar(disk['percent'])} " f"{sysinfo.format_bytes(disk['used'])} / {sysinfo.format_bytes(disk['total'])} " f"{colors.dim(disk['fstype'])}")
65
+
66
+ out.append("")
67
+ out.append(colors.section("TOP BY CPU"))
68
+ for proc in sysinfo.get_top_processes(by="cpu", limit=5):
69
+ out.append(f" {proc['cpu_percent']:5.1f}% {str(proc.get('user',''))[:12]:<12} " f"{proc['pid']:>7} {proc['name']}")
70
+
71
+ out.append("")
72
+ out.append(colors.section("TOP BY RAM"))
73
+ for proc in sysinfo.get_top_processes(by="rss", limit=5):
74
+ out.append(f" {sysinfo.format_bytes(proc.get('rss', 0)):>8} " f"{str(proc.get('user',''))[:12]:<12} {proc['pid']:>7} {proc['name']}")
75
+
76
+ out.append("")
77
+ out.append(colors.section("NETWORK"))
78
+ for iface in sysinfo.get_network_interfaces():
79
+ state = colors.green("up") if iface.get("up") else colors.red("down")
80
+ ipv4 = ", ".join(iface.get("ipv4") or []) or colors.dim("(no ipv4)")
81
+ out.append(f" {iface['name']:<10} {state:<6} {ipv4}")
82
+
83
+ ports = sysinfo.get_listening_ports()
84
+ if ports:
85
+ unique_ports = sorted({p["port"] for p in ports})
86
+ out.append(
87
+ f" {colors.dim('listening tcp:')} {', '.join(str(p) for p in unique_ports[:20])}" + (colors.dim(f" (+{len(unique_ports)-20} more)") if len(unique_ports) > 20 else "")
88
+ )
89
+
90
+ return "\n".join(out)
wtftools/llm.py ADDED
@@ -0,0 +1,129 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """Optional LLM bridges for `wtf explain --llm …`.
4
+
5
+ Each backend is best-effort: it returns the model's text on success or None
6
+ when the backend isn't available (binary missing, SDK not installed, no API
7
+ key, network error, …). The caller is expected to fall back gracefully.
8
+
9
+ Supported backends:
10
+ ollama subprocess: ollama run <model>
11
+ claude anthropic Python SDK + ANTHROPIC_API_KEY env
12
+ openai openai Python SDK + OPENAI_API_KEY env
13
+ auto try ollama → claude → openai
14
+
15
+ All backends accept the same `prompt` string. We pass `wtf explain --prompt`
16
+ output verbatim; the model is expected to produce per-finding diagnoses.
17
+ """
18
+
19
+ import logging
20
+ import os
21
+ import shutil
22
+ import subprocess
23
+ import traceback
24
+ from typing import Optional, Tuple
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # Sensible defaults; can be overridden via --llm-model.
29
+ DEFAULT_OLLAMA_MODEL = os.environ.get("WTFTOOLS_OLLAMA_MODEL", "llama3.1")
30
+ DEFAULT_CLAUDE_MODEL = os.environ.get("WTFTOOLS_CLAUDE_MODEL", "claude-haiku-4-5-20251001")
31
+ DEFAULT_OPENAI_MODEL = os.environ.get("WTFTOOLS_OPENAI_MODEL", "gpt-4o-mini")
32
+
33
+
34
+ def call_ollama(prompt: str, model: Optional[str] = None, timeout: int = 60) -> Tuple[Optional[str], Optional[str]]:
35
+ """Run a local model via the ollama CLI. Returns (text, error)."""
36
+ if not shutil.which("ollama"):
37
+ return None, "ollama binary not found in PATH"
38
+ chosen = model or DEFAULT_OLLAMA_MODEL
39
+ try:
40
+ result = subprocess.run(
41
+ ["ollama", "run", chosen],
42
+ input=prompt,
43
+ text=True,
44
+ capture_output=True,
45
+ timeout=timeout,
46
+ check=False,
47
+ )
48
+ except subprocess.TimeoutExpired:
49
+ return None, f"ollama timed out after {timeout}s"
50
+ except Exception as exc:
51
+ return None, f"ollama exec failed: {type(exc).__name__}: {exc}"
52
+ if result.returncode != 0:
53
+ err = (result.stderr or "").strip()[:200]
54
+ return None, f"ollama exit {result.returncode}: {err}"
55
+ return result.stdout, None
56
+
57
+
58
+ def call_claude(prompt: str, model: Optional[str] = None, timeout: int = 30) -> Tuple[Optional[str], Optional[str]]:
59
+ """Use Anthropic SDK if installed and ANTHROPIC_API_KEY is set."""
60
+ api_key = os.environ.get("ANTHROPIC_API_KEY")
61
+ if not api_key:
62
+ return None, "ANTHROPIC_API_KEY not set"
63
+ try:
64
+ import anthropic # type: ignore
65
+ except ImportError:
66
+ return None, "anthropic SDK not installed (pip install anthropic)"
67
+ chosen = model or DEFAULT_CLAUDE_MODEL
68
+ try:
69
+ client = anthropic.Anthropic(api_key=api_key, timeout=timeout)
70
+ response = client.messages.create(
71
+ model=chosen,
72
+ max_tokens=2048,
73
+ messages=[{"role": "user", "content": prompt}],
74
+ )
75
+ text = "".join(getattr(b, "text", "") for b in response.content)
76
+ return text, None
77
+ except Exception as exc:
78
+ logger.debug(f"claude call failed: {type(exc).__name__}: {exc}\n" f"{traceback.format_exc()}")
79
+ return None, f"claude API error: {type(exc).__name__}: {exc}"
80
+
81
+
82
+ def call_openai(prompt: str, model: Optional[str] = None, timeout: int = 30) -> Tuple[Optional[str], Optional[str]]:
83
+ """Use OpenAI SDK if installed and OPENAI_API_KEY is set."""
84
+ api_key = os.environ.get("OPENAI_API_KEY")
85
+ if not api_key:
86
+ return None, "OPENAI_API_KEY not set"
87
+ try:
88
+ from openai import OpenAI # type: ignore
89
+ except ImportError:
90
+ return None, "openai SDK not installed (pip install openai)"
91
+ chosen = model or DEFAULT_OPENAI_MODEL
92
+ try:
93
+ client = OpenAI(api_key=api_key, timeout=timeout)
94
+ response = client.chat.completions.create(
95
+ model=chosen,
96
+ messages=[{"role": "user", "content": prompt}],
97
+ )
98
+ return response.choices[0].message.content, None
99
+ except Exception as exc:
100
+ logger.debug(f"openai call failed: {type(exc).__name__}: {exc}\n" f"{traceback.format_exc()}")
101
+ return None, f"openai API error: {type(exc).__name__}: {exc}"
102
+
103
+
104
+ _BACKENDS = {
105
+ "ollama": call_ollama,
106
+ "claude": call_claude,
107
+ "openai": call_openai,
108
+ }
109
+
110
+
111
+ def call_llm(backend: str, prompt: str, model: Optional[str] = None, timeout: Optional[int] = None) -> Tuple[Optional[str], Optional[str]]:
112
+ """Dispatch to the named backend, or try them all when backend == 'auto'."""
113
+ if backend == "auto":
114
+ # Try cheapest/most-local first.
115
+ for candidate in ("ollama", "claude", "openai"):
116
+ kwargs = {"model": model}
117
+ if timeout is not None:
118
+ kwargs["timeout"] = timeout
119
+ text, err = _BACKENDS[candidate](prompt, **kwargs)
120
+ if text is not None:
121
+ return text, f"via {candidate}"
122
+ return None, "no LLM backend available (tried ollama, claude, openai)"
123
+ fn = _BACKENDS.get(backend)
124
+ if fn is None:
125
+ return None, f"unknown backend: {backend!r} (use ollama/claude/openai/auto)"
126
+ kwargs = {"model": model}
127
+ if timeout is not None:
128
+ kwargs["timeout"] = timeout
129
+ return fn(prompt, **kwargs)