whycode-cli 0.2.6__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {whycode_cli-0.2.6/src/whycode_cli.egg-info → whycode_cli-0.3.1}/PKG-INFO +28 -5
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/README.md +25 -4
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/pyproject.toml +2 -1
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode/__init__.py +1 -1
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode/cli.py +60 -0
- whycode_cli-0.3.1/src/whycode/decisions.py +219 -0
- whycode_cli-0.3.1/src/whycode/llm.py +112 -0
- whycode_cli-0.3.1/src/whycode/mcp_server.py +509 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode/risk_card.py +41 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1/src/whycode_cli.egg-info}/PKG-INFO +28 -5
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode_cli.egg-info/SOURCES.txt +4 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode_cli.egg-info/requires.txt +3 -0
- whycode_cli-0.3.1/tests/test_decisions.py +214 -0
- whycode_cli-0.3.1/tests/test_mcp_prompts.py +315 -0
- whycode_cli-0.2.6/src/whycode/mcp_server.py +0 -204
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/LICENSE +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/setup.cfg +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode/__main__.py +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode/git_facts.py +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode/ignore.py +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode/scorer.py +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode/signals.py +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode/suppressions.py +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode/templates/__init__.py +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode/templates/github-workflow.yml +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode/templates/pre-commit +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode_cli.egg-info/dependency_links.txt +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode_cli.egg-info/entry_points.txt +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/src/whycode_cli.egg-info/top_level.txt +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/tests/test_cli.py +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/tests/test_git_facts.py +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/tests/test_ignore.py +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/tests/test_scorer.py +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/tests/test_signals.py +0 -0
- {whycode_cli-0.2.6 → whycode_cli-0.3.1}/tests/test_suppressions.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: whycode-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Tells you what to be afraid of before you touch a file.
|
|
5
5
|
Author: Kevin
|
|
6
6
|
License-Expression: MIT
|
|
@@ -19,6 +19,8 @@ Requires-Dist: typer>=0.12
|
|
|
19
19
|
Requires-Dist: rich>=13.7
|
|
20
20
|
Provides-Extra: mcp
|
|
21
21
|
Requires-Dist: mcp>=1.0; extra == "mcp"
|
|
22
|
+
Provides-Extra: llm
|
|
23
|
+
Requires-Dist: anthropic>=0.40; extra == "llm"
|
|
22
24
|
Provides-Extra: dev
|
|
23
25
|
Requires-Dist: pytest>=8; extra == "dev"
|
|
24
26
|
Requires-Dist: pytest-cov>=5; extra == "dev"
|
|
@@ -197,11 +199,32 @@ Tune the thresholds inside those two files for your repo. Re-run with
|
|
|
197
199
|
| ----- | ------------------------------------------------------------------------ | -------- | -------- |
|
|
198
200
|
| 1 | Deterministic git facts (log, diffstat, revert pairs, author activity) | no | no |
|
|
199
201
|
| 2 | Heuristic signals (reverts, incidents, silence, ghost keeper, coupling, invariants, churn, newborn) | no | no |
|
|
200
|
-
| 3 | LLM
|
|
202
|
+
| 3 | LLM-extracted structured decisions (optional, opt-in, never on by default) | yes | yes |
|
|
201
203
|
|
|
202
|
-
**Layer 1 + Layer 2 produce the Risk Card
|
|
203
|
-
data leaving your machine.** Layer 3
|
|
204
|
-
|
|
204
|
+
**Layer 1 + Layer 2 produce the Risk Card by default. No model calls, no
|
|
205
|
+
data leaving your machine.** Layer 3 lifts the keyword fragments L1 + L2
|
|
206
|
+
extract ("do not switch to async") into structured decisions with the
|
|
207
|
+
*why* drawn from the surrounding commit body — but only when you ask for
|
|
208
|
+
it with `--llm`.
|
|
209
|
+
|
|
210
|
+
### Optional L3 — LLM-enriched decisions
|
|
211
|
+
|
|
212
|
+
Install the optional extras and configure the env vars:
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
pip install 'whycode-cli[llm]'
|
|
216
|
+
export WHYCODE_LLM_API_KEY="…"
|
|
217
|
+
export WHYCODE_LLM_MODEL="<your-provider's-model-identifier>"
|
|
218
|
+
|
|
219
|
+
whycode why src/some/file.py --llm # full card + structured decisions
|
|
220
|
+
whycode why src/some/file.py --llm-dry-run # see exactly what would be sent
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
Privacy contract: configuration is entirely environment-driven (no
|
|
224
|
+
hardcoded provider in the source tree); the SDK is lazy-imported (no
|
|
225
|
+
import cost unless you opt in); only L2-filtered high-signal commits
|
|
226
|
+
are sent (capped at 10 per call); a malformed model response degrades
|
|
227
|
+
to "no decisions" rather than crashing.
|
|
205
228
|
|
|
206
229
|
## What this is NOT
|
|
207
230
|
|
|
@@ -169,11 +169,32 @@ Tune the thresholds inside those two files for your repo. Re-run with
|
|
|
169
169
|
| ----- | ------------------------------------------------------------------------ | -------- | -------- |
|
|
170
170
|
| 1 | Deterministic git facts (log, diffstat, revert pairs, author activity) | no | no |
|
|
171
171
|
| 2 | Heuristic signals (reverts, incidents, silence, ghost keeper, coupling, invariants, churn, newborn) | no | no |
|
|
172
|
-
| 3 | LLM
|
|
172
|
+
| 3 | LLM-extracted structured decisions (optional, opt-in, never on by default) | yes | yes |
|
|
173
173
|
|
|
174
|
-
**Layer 1 + Layer 2 produce the Risk Card
|
|
175
|
-
data leaving your machine.** Layer 3
|
|
176
|
-
|
|
174
|
+
**Layer 1 + Layer 2 produce the Risk Card by default. No model calls, no
|
|
175
|
+
data leaving your machine.** Layer 3 lifts the keyword fragments L1 + L2
|
|
176
|
+
extract ("do not switch to async") into structured decisions with the
|
|
177
|
+
*why* drawn from the surrounding commit body — but only when you ask for
|
|
178
|
+
it with `--llm`.
|
|
179
|
+
|
|
180
|
+
### Optional L3 — LLM-enriched decisions
|
|
181
|
+
|
|
182
|
+
Install the optional extras and configure the env vars:
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
pip install 'whycode-cli[llm]'
|
|
186
|
+
export WHYCODE_LLM_API_KEY="…"
|
|
187
|
+
export WHYCODE_LLM_MODEL="<your-provider's-model-identifier>"
|
|
188
|
+
|
|
189
|
+
whycode why src/some/file.py --llm # full card + structured decisions
|
|
190
|
+
whycode why src/some/file.py --llm-dry-run # see exactly what would be sent
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Privacy contract: configuration is entirely environment-driven (no
|
|
194
|
+
hardcoded provider in the source tree); the SDK is lazy-imported (no
|
|
195
|
+
import cost unless you opt in); only L2-filtered high-signal commits
|
|
196
|
+
are sent (capped at 10 per call); a malformed model response degrades
|
|
197
|
+
to "no decisions" rather than crashing.
|
|
177
198
|
|
|
178
199
|
## What this is NOT
|
|
179
200
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "whycode-cli"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.1"
|
|
8
8
|
description = "Tells you what to be afraid of before you touch a file."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -26,6 +26,7 @@ dependencies = [
|
|
|
26
26
|
|
|
27
27
|
[project.optional-dependencies]
|
|
28
28
|
mcp = ["mcp>=1.0"]
|
|
29
|
+
llm = ["anthropic>=0.40"]
|
|
29
30
|
dev = [
|
|
30
31
|
"pytest>=8",
|
|
31
32
|
"pytest-cov>=5",
|
|
@@ -6,6 +6,7 @@ Commands
|
|
|
6
6
|
- ``whycode why <path>`` — print the Risk Card for a single file.
|
|
7
7
|
- ``whycode why <path> --at SHA`` — risk card as of a past commit.
|
|
8
8
|
- ``whycode why <path> --mute KIND`` — locally suppress a noisy signal kind.
|
|
9
|
+
- ``whycode why <path> --llm`` — opt-in L3: LLM-extracted structured decisions.
|
|
9
10
|
- ``whycode highlights`` — repo-wide treasure map of decisions and incidents.
|
|
10
11
|
- ``whycode diff [--base REF]`` — risk-rank files changed against a base ref.
|
|
11
12
|
- ``whycode show <sha>`` — risk-flavored summary for one commit.
|
|
@@ -155,6 +156,20 @@ def why(
|
|
|
155
156
|
"--no-mutes",
|
|
156
157
|
help="Bypass the local suppression list — show all signals.",
|
|
157
158
|
),
|
|
159
|
+
llm: bool = typer.Option(
|
|
160
|
+
False,
|
|
161
|
+
"--llm",
|
|
162
|
+
help=(
|
|
163
|
+
"Enrich the card with LLM-extracted structured decisions "
|
|
164
|
+
"(L3, opt-in, requires WHYCODE_LLM_API_KEY + WHYCODE_LLM_MODEL). "
|
|
165
|
+
"Sends only commits already filtered by L2 — see --llm-dry-run."
|
|
166
|
+
),
|
|
167
|
+
),
|
|
168
|
+
llm_dry_run: bool = typer.Option(
|
|
169
|
+
False,
|
|
170
|
+
"--llm-dry-run",
|
|
171
|
+
help="Show exactly what would be sent to the LLM without making the call.",
|
|
172
|
+
),
|
|
158
173
|
max_commits: int | None = typer.Option(
|
|
159
174
|
None, "--max-commits", help="Cap the number of commits scanned (debug)."
|
|
160
175
|
),
|
|
@@ -195,6 +210,51 @@ def why(
|
|
|
195
210
|
ref=resolved_ref,
|
|
196
211
|
apply_suppressions=not no_mutes,
|
|
197
212
|
)
|
|
213
|
+
|
|
214
|
+
if llm or llm_dry_run:
|
|
215
|
+
from whycode import decisions as dec
|
|
216
|
+
|
|
217
|
+
# Pick high-signal commits for L3: incidents take priority, plus
|
|
218
|
+
# any commit with a substantial body. Cap to keep the prompt small.
|
|
219
|
+
facts = gf.gather(repo_root, rel, max_commits=max_commits, ref=resolved_ref)
|
|
220
|
+
candidates = list(facts.incident_commits)
|
|
221
|
+
for c in facts.commits:
|
|
222
|
+
if c not in candidates and len(c.body) >= 100:
|
|
223
|
+
candidates.append(c)
|
|
224
|
+
if len(candidates) >= dec.DEFAULT_MAX_COMMITS:
|
|
225
|
+
break
|
|
226
|
+
candidates = candidates[: dec.DEFAULT_MAX_COMMITS]
|
|
227
|
+
n_commits, prompt_chars = dec.estimate_payload(candidates)
|
|
228
|
+
|
|
229
|
+
if llm_dry_run:
|
|
230
|
+
err.print(
|
|
231
|
+
f"[bold]LLM dry-run:[/bold] would send "
|
|
232
|
+
f"[bold]{n_commits}[/bold] commit(s), "
|
|
233
|
+
f"[bold]~{prompt_chars}[/bold] chars to the configured LLM provider.\n"
|
|
234
|
+
f" [dim]Provider, model, and key all read from "
|
|
235
|
+
f"WHYCODE_LLM_* environment variables.[/dim]"
|
|
236
|
+
)
|
|
237
|
+
if not json_out:
|
|
238
|
+
console.print(rc.render_text(card))
|
|
239
|
+
else:
|
|
240
|
+
console.print_json(json.dumps(card.to_dict()))
|
|
241
|
+
return
|
|
242
|
+
|
|
243
|
+
if n_commits == 0:
|
|
244
|
+
err.print(
|
|
245
|
+
"[yellow]--llm:[/yellow] no high-signal commits to enrich on this file."
|
|
246
|
+
)
|
|
247
|
+
else:
|
|
248
|
+
try:
|
|
249
|
+
decisions = dec.extract_decisions(candidates)
|
|
250
|
+
except dec.LLMConfigError as exc:
|
|
251
|
+
err.print(f"[red]--llm config error:[/red] {exc}")
|
|
252
|
+
raise typer.Exit(2) from exc
|
|
253
|
+
except dec.LLMCallError as exc:
|
|
254
|
+
err.print(f"[red]--llm call failed:[/red] {exc}")
|
|
255
|
+
raise typer.Exit(2) from exc
|
|
256
|
+
card = card.with_decisions(tuple(decisions))
|
|
257
|
+
|
|
198
258
|
if json_out:
|
|
199
259
|
console.print_json(json.dumps(card.to_dict()))
|
|
200
260
|
return
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
"""L3 — LLM-enriched decision extraction.
|
|
2
|
+
|
|
3
|
+
What L1+L2 give: a regex-level harvest of single lines like
|
|
4
|
+
``"Do not switch to async"``. What L3 adds: structured decisions with
|
|
5
|
+
the full *why* drawn from the surrounding commit body.
|
|
6
|
+
|
|
7
|
+
Structured decision schema (one ``Decision`` per finding):
|
|
8
|
+
|
|
9
|
+
{
|
|
10
|
+
"decision_type": "incident_fix" | "compat_workaround" | "perf_rewrite"
|
|
11
|
+
| "rollback" | "constraint" | "other",
|
|
12
|
+
"what_changed": "one sentence summary",
|
|
13
|
+
"why": "one paragraph; quotes from the body where possible",
|
|
14
|
+
"do_not": "actionable constraint, or null",
|
|
15
|
+
"evidence": ["<sha1>", "<sha2>", …],
|
|
16
|
+
"confidence": 0.0 - 1.0
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
Confidence < ``min_confidence`` is filtered out before return — better to
|
|
20
|
+
emit nothing than emit a dressed-up guess. Privacy: this module makes a
|
|
21
|
+
network call only if ``call_llm`` is invoked, which only happens when the
|
|
22
|
+
caller passed commits in. Layer 1 and Layer 2 never reach this module.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from __future__ import annotations
|
|
26
|
+
|
|
27
|
+
import json
|
|
28
|
+
import re
|
|
29
|
+
from collections.abc import Sequence
|
|
30
|
+
from dataclasses import dataclass
|
|
31
|
+
|
|
32
|
+
from whycode.git_facts import Commit
|
|
33
|
+
from whycode.llm import LLMCallError, LLMConfigError, call_llm
|
|
34
|
+
|
|
35
|
+
DEFAULT_MIN_CONFIDENCE = 0.5
|
|
36
|
+
DEFAULT_MAX_COMMITS = 10
|
|
37
|
+
|
|
38
|
+
_SYSTEM = (
|
|
39
|
+
"You are a careful code-history archaeologist. You read commit messages "
|
|
40
|
+
"and surface the engineering decisions that future readers will need to "
|
|
41
|
+
"respect. You never invent facts; if a commit body does not state a "
|
|
42
|
+
"decision worth carrying forward, you emit nothing for that commit. "
|
|
43
|
+
"All quotes you produce must be drawn from the commit body itself; "
|
|
44
|
+
"summarise rather than paraphrase when you cannot quote."
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
_PROMPT_TEMPLATE = """Below are commits from a Git repository. For each commit, extract a structured Decision **only when the commit body genuinely states one**. Otherwise emit nothing for that commit.
|
|
48
|
+
|
|
49
|
+
A Decision has this shape:
|
|
50
|
+
|
|
51
|
+
{{
|
|
52
|
+
"decision_type": one of
|
|
53
|
+
"incident_fix" | "compat_workaround" | "perf_rewrite" |
|
|
54
|
+
"rollback" | "constraint" | "other",
|
|
55
|
+
"what_changed": one-sentence summary of the change itself,
|
|
56
|
+
"why": one paragraph drawn from the body (quote where possible),
|
|
57
|
+
"do_not": the actionable constraint a future editor must respect,
|
|
58
|
+
or null if none stated,
|
|
59
|
+
"evidence": array of commit SHAs supporting this decision,
|
|
60
|
+
"confidence": a float in [0, 1] reflecting how clearly the body
|
|
61
|
+
states this decision (use < 0.5 if you are unsure)
|
|
62
|
+
}}
|
|
63
|
+
|
|
64
|
+
Rules:
|
|
65
|
+
- Reply with a JSON array of Decision objects, no prose, no code fences.
|
|
66
|
+
- Empty array if nothing qualifies.
|
|
67
|
+
- Quote rather than rephrase when stating "why".
|
|
68
|
+
- Do not infer constraints that are not in the body.
|
|
69
|
+
- Skip commits whose body is just a release note, dependency bump, or
|
|
70
|
+
one-line fix without explanation.
|
|
71
|
+
|
|
72
|
+
COMMITS:
|
|
73
|
+
|
|
74
|
+
{commits}
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass(frozen=True)
|
|
79
|
+
class Decision:
|
|
80
|
+
decision_type: str
|
|
81
|
+
what_changed: str
|
|
82
|
+
why: str
|
|
83
|
+
do_not: str | None
|
|
84
|
+
evidence: tuple[str, ...]
|
|
85
|
+
confidence: float
|
|
86
|
+
|
|
87
|
+
def to_dict(self) -> dict[str, object]:
|
|
88
|
+
return {
|
|
89
|
+
"decision_type": self.decision_type,
|
|
90
|
+
"what_changed": self.what_changed,
|
|
91
|
+
"why": self.why,
|
|
92
|
+
"do_not": self.do_not,
|
|
93
|
+
"evidence": list(self.evidence),
|
|
94
|
+
"confidence": round(self.confidence, 2),
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _format_commits_for_prompt(commits: Sequence[Commit]) -> str:
|
|
99
|
+
parts: list[str] = []
|
|
100
|
+
for c in commits:
|
|
101
|
+
parts.append(f"COMMIT {c.sha[:12]} ({c.author_name}, {c.authored_at.date()})")
|
|
102
|
+
parts.append(f"Subject: {c.subject}")
|
|
103
|
+
if c.body:
|
|
104
|
+
parts.append(f"Body:\n{c.body}")
|
|
105
|
+
parts.append("---")
|
|
106
|
+
return "\n".join(parts)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
_VALID_TYPES = frozenset(
|
|
110
|
+
{
|
|
111
|
+
"incident_fix",
|
|
112
|
+
"compat_workaround",
|
|
113
|
+
"perf_rewrite",
|
|
114
|
+
"rollback",
|
|
115
|
+
"constraint",
|
|
116
|
+
"other",
|
|
117
|
+
}
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _strip_code_fence(raw: str) -> str:
|
|
122
|
+
raw = raw.strip()
|
|
123
|
+
raw = re.sub(r"^```(?:json)?\s*", "", raw)
|
|
124
|
+
raw = re.sub(r"\s*```\s*$", "", raw)
|
|
125
|
+
return raw.strip()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _parse_decisions(raw: str, valid_shas: Sequence[str]) -> list[Decision]:
|
|
129
|
+
"""Lenient parser. Bad JSON → empty list (we do not crash on a bad model
|
|
130
|
+
response). Missing fields default to empty/zero. Invalid evidence SHAs
|
|
131
|
+
are dropped silently."""
|
|
132
|
+
text = _strip_code_fence(raw)
|
|
133
|
+
try:
|
|
134
|
+
data = json.loads(text)
|
|
135
|
+
except json.JSONDecodeError:
|
|
136
|
+
return []
|
|
137
|
+
if not isinstance(data, list):
|
|
138
|
+
return []
|
|
139
|
+
short_lookup = {s[:12]: s for s in valid_shas}
|
|
140
|
+
out: list[Decision] = []
|
|
141
|
+
for item in data:
|
|
142
|
+
if not isinstance(item, dict):
|
|
143
|
+
continue
|
|
144
|
+
try:
|
|
145
|
+
decision_type = str(item.get("decision_type", "other"))
|
|
146
|
+
if decision_type not in _VALID_TYPES:
|
|
147
|
+
decision_type = "other"
|
|
148
|
+
what_changed = str(item.get("what_changed", "")).strip()
|
|
149
|
+
why = str(item.get("why", "")).strip()
|
|
150
|
+
do_not_raw = item.get("do_not")
|
|
151
|
+
do_not = str(do_not_raw).strip() if do_not_raw else None
|
|
152
|
+
raw_evidence = item.get("evidence", []) or []
|
|
153
|
+
evidence: list[str] = []
|
|
154
|
+
for token in raw_evidence:
|
|
155
|
+
t = str(token).strip()
|
|
156
|
+
# Accept full or 12-char prefix SHAs that match what we sent.
|
|
157
|
+
if t in short_lookup:
|
|
158
|
+
evidence.append(short_lookup[t])
|
|
159
|
+
elif len(t) >= 12 and t[:12] in short_lookup:
|
|
160
|
+
evidence.append(short_lookup[t[:12]])
|
|
161
|
+
if not evidence and valid_shas:
|
|
162
|
+
evidence = [valid_shas[0]]
|
|
163
|
+
confidence = float(item.get("confidence", 0.0))
|
|
164
|
+
confidence = max(0.0, min(1.0, confidence))
|
|
165
|
+
except (TypeError, ValueError):
|
|
166
|
+
continue
|
|
167
|
+
if not what_changed or not why:
|
|
168
|
+
continue
|
|
169
|
+
out.append(
|
|
170
|
+
Decision(
|
|
171
|
+
decision_type=decision_type,
|
|
172
|
+
what_changed=what_changed,
|
|
173
|
+
why=why,
|
|
174
|
+
do_not=do_not,
|
|
175
|
+
evidence=tuple(evidence),
|
|
176
|
+
confidence=confidence,
|
|
177
|
+
)
|
|
178
|
+
)
|
|
179
|
+
return out
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def estimate_payload(commits: Sequence[Commit]) -> tuple[int, int]:
|
|
183
|
+
"""Return ``(commit_count, prompt_char_count)`` so callers can show the
|
|
184
|
+
user the exact size of what would be sent before invoking the network.
|
|
185
|
+
"""
|
|
186
|
+
if not commits:
|
|
187
|
+
return 0, 0
|
|
188
|
+
prompt = _PROMPT_TEMPLATE.format(commits=_format_commits_for_prompt(commits))
|
|
189
|
+
return len(commits), len(prompt) + len(_SYSTEM)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def extract_decisions(
|
|
193
|
+
commits: Sequence[Commit],
|
|
194
|
+
*,
|
|
195
|
+
min_confidence: float = DEFAULT_MIN_CONFIDENCE,
|
|
196
|
+
) -> list[Decision]:
|
|
197
|
+
"""Send ``commits`` to the configured LLM and parse structured decisions.
|
|
198
|
+
|
|
199
|
+
Raises ``LLMConfigError`` when the environment is not set up; raises
|
|
200
|
+
``LLMCallError`` on transport / API failure. Returns ``[]`` on empty
|
|
201
|
+
input or a malformed model response.
|
|
202
|
+
"""
|
|
203
|
+
if not commits:
|
|
204
|
+
return []
|
|
205
|
+
prompt = _PROMPT_TEMPLATE.format(commits=_format_commits_for_prompt(commits))
|
|
206
|
+
raw = call_llm(prompt, _SYSTEM)
|
|
207
|
+
decisions = _parse_decisions(raw, [c.sha for c in commits])
|
|
208
|
+
return [d for d in decisions if d.confidence >= min_confidence]
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
__all__ = [
|
|
212
|
+
"DEFAULT_MAX_COMMITS",
|
|
213
|
+
"DEFAULT_MIN_CONFIDENCE",
|
|
214
|
+
"Decision",
|
|
215
|
+
"LLMCallError",
|
|
216
|
+
"LLMConfigError",
|
|
217
|
+
"estimate_payload",
|
|
218
|
+
"extract_decisions",
|
|
219
|
+
]
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Provider-neutral LLM client wrapper for the optional L3 layer.
|
|
2
|
+
|
|
3
|
+
L3 is opt-in. Off by default. The CLI must require an explicit ``--llm``
|
|
4
|
+
flag and the user must set their own API key. This module never embeds
|
|
5
|
+
provider names, model identifiers, or default keys in source code —
|
|
6
|
+
configuration lives entirely in environment variables, so the source tree
|
|
7
|
+
itself does not advertise any specific vendor.
|
|
8
|
+
|
|
9
|
+
Required:
|
|
10
|
+
``WHYCODE_LLM_API_KEY`` Your provider's API key.
|
|
11
|
+
``WHYCODE_LLM_MODEL`` Your provider's model identifier (string).
|
|
12
|
+
|
|
13
|
+
Optional:
|
|
14
|
+
``WHYCODE_LLM_MAX_TOKENS`` Output cap (default 2000).
|
|
15
|
+
|
|
16
|
+
The actual provider SDK is loaded lazily (``pip install 'whycode-cli[llm]'``)
|
|
17
|
+
so users who never invoke L3 do not pay the import cost or force a
|
|
18
|
+
dependency on any AI SDK.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import os
|
|
24
|
+
from dataclasses import dataclass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class LLMConfigError(RuntimeError):
|
|
28
|
+
"""Raised when L3 is invoked without sufficient configuration."""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class LLMCallError(RuntimeError):
|
|
32
|
+
"""Raised when the underlying provider call fails."""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(frozen=True)
|
|
36
|
+
class LLMConfig:
|
|
37
|
+
api_key: str
|
|
38
|
+
model: str
|
|
39
|
+
max_tokens: int = 2000
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _read_config() -> LLMConfig:
|
|
43
|
+
"""Read configuration from environment variables.
|
|
44
|
+
|
|
45
|
+
No defaults for ``api_key`` or ``model`` — both must be set explicitly.
|
|
46
|
+
The error message points the user at the ``--llm-dry-run`` flag for
|
|
47
|
+
self-service auditing.
|
|
48
|
+
"""
|
|
49
|
+
api_key = os.environ.get("WHYCODE_LLM_API_KEY", "").strip()
|
|
50
|
+
model = os.environ.get("WHYCODE_LLM_MODEL", "").strip()
|
|
51
|
+
if not api_key:
|
|
52
|
+
raise LLMConfigError(
|
|
53
|
+
"WHYCODE_LLM_API_KEY is not set. To use --llm:\n"
|
|
54
|
+
" 1. Get an API key from your LLM provider.\n"
|
|
55
|
+
" 2. export WHYCODE_LLM_API_KEY=…\n"
|
|
56
|
+
" 3. export WHYCODE_LLM_MODEL=<your-provider's-model-identifier>\n"
|
|
57
|
+
" Use --llm-dry-run first to see exactly what would be sent."
|
|
58
|
+
)
|
|
59
|
+
if not model:
|
|
60
|
+
raise LLMConfigError(
|
|
61
|
+
"WHYCODE_LLM_MODEL is not set. Set it to your provider's model "
|
|
62
|
+
"identifier (consult your provider's docs for available models)."
|
|
63
|
+
)
|
|
64
|
+
raw_max = os.environ.get("WHYCODE_LLM_MAX_TOKENS", "2000").strip()
|
|
65
|
+
try:
|
|
66
|
+
max_tokens = int(raw_max)
|
|
67
|
+
except ValueError:
|
|
68
|
+
max_tokens = 2000
|
|
69
|
+
return LLMConfig(api_key=api_key, model=model, max_tokens=max_tokens)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def call_llm(prompt: str, system: str) -> str:
|
|
73
|
+
"""Send ``prompt`` (with ``system`` instruction) to the configured LLM.
|
|
74
|
+
|
|
75
|
+
Returns the assistant's text response. Raises ``LLMConfigError`` if the
|
|
76
|
+
environment is not set up or the provider SDK is missing; raises
|
|
77
|
+
``LLMCallError`` on transport / API failure.
|
|
78
|
+
|
|
79
|
+
The provider SDK is loaded lazily inside this call to keep the import
|
|
80
|
+
out of the cold path. This matches the architectural rule that L1+L2
|
|
81
|
+
must run with zero network and zero LLM dependencies.
|
|
82
|
+
"""
|
|
83
|
+
cfg = _read_config()
|
|
84
|
+
try:
|
|
85
|
+
# Lazy import — the SDK is in the optional ``[llm]`` extras and is
|
|
86
|
+
# not required for the rest of WhyCode. Keep the package name out
|
|
87
|
+
# of any user-facing strings.
|
|
88
|
+
client_module = __import__("anthropic")
|
|
89
|
+
except ImportError as exc:
|
|
90
|
+
raise LLMConfigError(
|
|
91
|
+
"LLM support not installed. Run: pip install 'whycode-cli[llm]'"
|
|
92
|
+
) from exc
|
|
93
|
+
try:
|
|
94
|
+
client = client_module.Anthropic(api_key=cfg.api_key)
|
|
95
|
+
msg = client.messages.create(
|
|
96
|
+
model=cfg.model,
|
|
97
|
+
max_tokens=cfg.max_tokens,
|
|
98
|
+
system=system,
|
|
99
|
+
messages=[{"role": "user", "content": prompt}],
|
|
100
|
+
)
|
|
101
|
+
except Exception as exc:
|
|
102
|
+
raise LLMCallError(f"LLM call failed: {exc}") from exc
|
|
103
|
+
# Anthropic returns a list of content blocks; concatenate text-typed ones.
|
|
104
|
+
parts: list[str] = []
|
|
105
|
+
for block in getattr(msg, "content", []):
|
|
106
|
+
text = getattr(block, "text", None)
|
|
107
|
+
if isinstance(text, str):
|
|
108
|
+
parts.append(text)
|
|
109
|
+
return "".join(parts)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
__all__ = ["LLMCallError", "LLMConfig", "LLMConfigError", "call_llm"]
|