whycode-cli 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {whycode_cli-0.2.2/src/whycode_cli.egg-info → whycode_cli-0.2.4}/PKG-INFO +1 -1
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/pyproject.toml +1 -1
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode/__init__.py +1 -1
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode/cli.py +57 -47
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode/git_facts.py +81 -10
- whycode_cli-0.2.4/src/whycode/ignore.py +114 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4/src/whycode_cli.egg-info}/PKG-INFO +1 -1
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode_cli.egg-info/SOURCES.txt +2 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/tests/test_cli.py +56 -0
- whycode_cli-0.2.4/tests/test_ignore.py +73 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/LICENSE +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/README.md +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/setup.cfg +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode/__main__.py +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode/mcp_server.py +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode/risk_card.py +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode/scorer.py +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode/signals.py +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode/suppressions.py +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode/templates/__init__.py +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode/templates/github-workflow.yml +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode/templates/pre-commit +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode_cli.egg-info/dependency_links.txt +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode_cli.egg-info/entry_points.txt +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode_cli.egg-info/requires.txt +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/src/whycode_cli.egg-info/top_level.txt +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/tests/test_git_facts.py +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/tests/test_scorer.py +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/tests/test_signals.py +0 -0
- {whycode_cli-0.2.2 → whycode_cli-0.2.4}/tests/test_suppressions.py +0 -0
|
@@ -29,6 +29,7 @@ from rich.table import Table
|
|
|
29
29
|
|
|
30
30
|
from whycode import __version__
|
|
31
31
|
from whycode import git_facts as gf
|
|
32
|
+
from whycode import ignore as ign
|
|
32
33
|
from whycode import risk_card as rc
|
|
33
34
|
from whycode import signals as sig
|
|
34
35
|
from whycode import suppressions as supp
|
|
@@ -70,12 +71,29 @@ def _path_is_known_to_git(repo_root: Path, rel: str) -> bool:
|
|
|
70
71
|
if gf.is_tracked(repo_root, rel):
|
|
71
72
|
return True
|
|
72
73
|
try:
|
|
73
|
-
out = gf.
|
|
74
|
+
out = gf.run_git(repo_root, "log", "--oneline", "-1", "--all", "--", rel)
|
|
74
75
|
except gf.GitError:
|
|
75
76
|
return False
|
|
76
77
|
return bool(out.strip())
|
|
77
78
|
|
|
78
79
|
|
|
80
|
+
def _require_tracked(path_arg: str) -> tuple[Path, str]:
|
|
81
|
+
"""Resolve ``path_arg`` to ``(repo_root, rel)`` or exit with a friendly warning.
|
|
82
|
+
|
|
83
|
+
Used by every command that takes a path argument and needs git history
|
|
84
|
+
to be useful (``why``, ``timeline``, ``honest``). Combines the two earlier
|
|
85
|
+
helpers so callers don't repeat the warn-and-exit boilerplate.
|
|
86
|
+
"""
|
|
87
|
+
repo_root, rel = _resolve_repo_and_path(path_arg)
|
|
88
|
+
if not _path_is_known_to_git(repo_root, rel):
|
|
89
|
+
err.print(
|
|
90
|
+
f"[yellow]warning:[/yellow] [bold]{rel}[/bold] is not tracked by git "
|
|
91
|
+
f"and has no history in this repo. Nothing to learn from."
|
|
92
|
+
)
|
|
93
|
+
raise typer.Exit(1)
|
|
94
|
+
return repo_root, rel
|
|
95
|
+
|
|
96
|
+
|
|
79
97
|
# --- shared: band threshold parsing ----------------------------------------
|
|
80
98
|
|
|
81
99
|
_BAND_THRESHOLDS_BY_KEY: dict[str, int] = {
|
|
@@ -141,17 +159,11 @@ def why(
|
|
|
141
159
|
),
|
|
142
160
|
) -> None:
|
|
143
161
|
"""Print the Risk Card for ``path``."""
|
|
144
|
-
repo_root, rel =
|
|
145
|
-
if not _path_is_known_to_git(repo_root, rel):
|
|
146
|
-
err.print(
|
|
147
|
-
f"[yellow]warning:[/yellow] [bold]{rel}[/bold] is not tracked by git "
|
|
148
|
-
f"and has no history in this repo. Nothing to learn from."
|
|
149
|
-
)
|
|
150
|
-
raise typer.Exit(1)
|
|
162
|
+
repo_root, rel = _require_tracked(path)
|
|
151
163
|
resolved_ref: str | None = None
|
|
152
164
|
if at is not None:
|
|
153
165
|
try:
|
|
154
|
-
resolved_ref = gf.
|
|
166
|
+
resolved_ref = gf.run_git(
|
|
155
167
|
repo_root, "rev-parse", "--verify", f"{at}^{{commit}}"
|
|
156
168
|
).strip()
|
|
157
169
|
except gf.GitError:
|
|
@@ -201,7 +213,7 @@ def _resolve_base_ref(repo_root: Path, requested: str | None) -> str:
|
|
|
201
213
|
candidates = ("origin/main", "origin/master", "main", "master", "HEAD~1")
|
|
202
214
|
for ref in candidates:
|
|
203
215
|
try:
|
|
204
|
-
gf.
|
|
216
|
+
gf.run_git(repo_root, "rev-parse", "--verify", "--quiet", f"{ref}^{{commit}}")
|
|
205
217
|
return ref
|
|
206
218
|
except gf.GitError:
|
|
207
219
|
continue
|
|
@@ -240,13 +252,13 @@ def diff(
|
|
|
240
252
|
try:
|
|
241
253
|
repo_root = gf.discover_repo_root(repo.resolve())
|
|
242
254
|
if staged:
|
|
243
|
-
raw = gf.
|
|
255
|
+
raw = gf.run_git(
|
|
244
256
|
repo_root, "diff", "--cached", "--name-only", "--diff-filter=ACMR"
|
|
245
257
|
)
|
|
246
258
|
actual_base = "(staged changes)"
|
|
247
259
|
else:
|
|
248
260
|
actual_base = _resolve_base_ref(repo_root, base)
|
|
249
|
-
raw = gf.
|
|
261
|
+
raw = gf.run_git(repo_root, "diff", "--name-only", f"{actual_base}...HEAD")
|
|
250
262
|
except gf.GitError as exc:
|
|
251
263
|
err.print(f"[red]error:[/red] {exc}")
|
|
252
264
|
raise typer.Exit(2) from exc
|
|
@@ -482,13 +494,7 @@ def timeline(
|
|
|
482
494
|
),
|
|
483
495
|
) -> None:
|
|
484
496
|
"""Show how this file's risk score evolved over its history."""
|
|
485
|
-
repo_root, rel =
|
|
486
|
-
if not _path_is_known_to_git(repo_root, rel):
|
|
487
|
-
err.print(
|
|
488
|
-
f"[yellow]warning:[/yellow] [bold]{rel}[/bold] is not tracked by git "
|
|
489
|
-
f"and has no history in this repo."
|
|
490
|
-
)
|
|
491
|
-
raise typer.Exit(1)
|
|
497
|
+
repo_root, rel = _require_tracked(path)
|
|
492
498
|
|
|
493
499
|
commits = gf.commits_for_path(repo_root, rel)
|
|
494
500
|
if not commits:
|
|
@@ -563,6 +569,19 @@ def scan(
|
|
|
563
569
|
"--sample",
|
|
564
570
|
help="Cap on tracked files to evaluate (for very large repos).",
|
|
565
571
|
),
|
|
572
|
+
scan_depth: int = typer.Option(
|
|
573
|
+
200,
|
|
574
|
+
"--scan-depth",
|
|
575
|
+
help=(
|
|
576
|
+
"Cap commits-per-file scanned (controls scan speed). "
|
|
577
|
+
"Use 0 for no cap (slow on large repos)."
|
|
578
|
+
),
|
|
579
|
+
),
|
|
580
|
+
no_ignore: bool = typer.Option(
|
|
581
|
+
False,
|
|
582
|
+
"--no-ignore",
|
|
583
|
+
help="Bypass the default-ignore list and scan everything (CHANGELOGs, lockfiles, vendored).",
|
|
584
|
+
),
|
|
566
585
|
repo: Path = typer.Option(
|
|
567
586
|
Path("."), "--repo", help="Path inside the repo (defaults to cwd)."
|
|
568
587
|
),
|
|
@@ -574,17 +593,20 @@ def scan(
|
|
|
574
593
|
err.print(f"[red]error:[/red] {exc}")
|
|
575
594
|
raise typer.Exit(2) from exc
|
|
576
595
|
|
|
577
|
-
raw = gf.
|
|
578
|
-
|
|
596
|
+
raw = gf.run_git(repo_root, "ls-files")
|
|
597
|
+
all_paths = [line for line in raw.splitlines() if line.strip()]
|
|
598
|
+
patterns = () if no_ignore else ign.effective_patterns(repo_root)
|
|
599
|
+
paths = [p for p in all_paths if not ign.is_ignored(p, patterns)][:sample]
|
|
579
600
|
if not paths:
|
|
580
601
|
console.print("[yellow]no tracked files found[/yellow]")
|
|
581
602
|
raise typer.Exit(0)
|
|
582
603
|
|
|
604
|
+
depth_cap = scan_depth if scan_depth > 0 else None
|
|
583
605
|
cards: list[rc.RiskCard] = []
|
|
584
606
|
with console.status(f"Scanning {len(paths)} files…", spinner="dots"):
|
|
585
607
|
for p in paths:
|
|
586
608
|
try:
|
|
587
|
-
card = rc.build(repo_root, p)
|
|
609
|
+
card = rc.build(repo_root, p, max_commits=depth_cap)
|
|
588
610
|
except gf.GitError:
|
|
589
611
|
continue
|
|
590
612
|
# Skip files whose only signal is NEWBORN — that's "not enough
|
|
@@ -631,13 +653,7 @@ def honest(
|
|
|
631
653
|
Use when the Risk Card's first-sentence truncation is hiding important
|
|
632
654
|
context — e.g., a commit whose constraint is stated across two lines.
|
|
633
655
|
"""
|
|
634
|
-
repo_root, rel =
|
|
635
|
-
if not _path_is_known_to_git(repo_root, rel):
|
|
636
|
-
err.print(
|
|
637
|
-
f"[yellow]warning:[/yellow] [bold]{rel}[/bold] is not tracked by git "
|
|
638
|
-
f"and has no history in this repo."
|
|
639
|
-
)
|
|
640
|
-
raise typer.Exit(1)
|
|
656
|
+
repo_root, rel = _require_tracked(path)
|
|
641
657
|
facts = gf.gather(repo_root, rel)
|
|
642
658
|
if not facts.invariant_quotes:
|
|
643
659
|
if json_out:
|
|
@@ -699,24 +715,18 @@ def show(
|
|
|
699
715
|
"""Risk-flavored summary for a single commit: classification + per-file risk."""
|
|
700
716
|
try:
|
|
701
717
|
repo_root = gf.discover_repo_root(repo.resolve())
|
|
702
|
-
full_sha = gf._run_git(repo_root, "rev-parse", "--verify", f"{sha}^{{commit}}").strip()
|
|
703
718
|
except gf.GitError as exc:
|
|
704
719
|
err.print(f"[red]error:[/red] {exc}")
|
|
705
720
|
raise typer.Exit(2) from exc
|
|
706
721
|
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
commits = gf._parse_log_records(raw)
|
|
711
|
-
if not commits:
|
|
712
|
-
err.print(f"[red]error:[/red] could not read commit {full_sha}")
|
|
722
|
+
commit = gf.read_commit(repo_root, sha)
|
|
723
|
+
if commit is None:
|
|
724
|
+
err.print(f"[red]error:[/red] could not read commit {sha!r}")
|
|
713
725
|
raise typer.Exit(2)
|
|
714
|
-
|
|
726
|
+
full_sha = commit.sha
|
|
715
727
|
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
or gf._BREAKING_CC_RE.search(commit.subject)
|
|
719
|
-
)
|
|
728
|
+
classification = gf.classify_commit(commit)
|
|
729
|
+
is_incident = classification.incident_flavoured
|
|
720
730
|
invariants = gf.extract_invariant_quotes([commit])
|
|
721
731
|
file_changes = gf.files_changed_in(repo_root, full_sha)
|
|
722
732
|
|
|
@@ -751,14 +761,14 @@ def show(
|
|
|
751
761
|
)
|
|
752
762
|
console.print(f" {commit.subject}")
|
|
753
763
|
console.print()
|
|
754
|
-
|
|
764
|
+
badges: list[str] = []
|
|
755
765
|
if is_incident:
|
|
756
|
-
|
|
766
|
+
badges.append("[bold red]incident-flavored[/bold red]")
|
|
757
767
|
if invariants:
|
|
758
|
-
|
|
759
|
-
if not
|
|
760
|
-
|
|
761
|
-
console.print(" " + " ".join(
|
|
768
|
+
badges.append(f"[yellow]states {len(invariants)} invariant(s)[/yellow]")
|
|
769
|
+
if not badges:
|
|
770
|
+
badges.append("[dim]no special classification[/dim]")
|
|
771
|
+
console.print(" " + " ".join(badges))
|
|
762
772
|
console.print(f" [dim]{len(file_changes)} files changed[/dim]")
|
|
763
773
|
|
|
764
774
|
if not cards:
|
|
@@ -129,8 +129,13 @@ class GitError(RuntimeError):
|
|
|
129
129
|
"""Raised when a git invocation fails or produces unexpected output."""
|
|
130
130
|
|
|
131
131
|
|
|
132
|
-
def
|
|
133
|
-
"""Invoke git
|
|
132
|
+
def run_git(repo_root: Path, *args: str) -> str:
|
|
133
|
+
"""Invoke ``git -C <repo_root> <args>`` and return stdout.
|
|
134
|
+
|
|
135
|
+
Public API: callers (CLI, MCP server) use this to run git commands
|
|
136
|
+
that aren't already wrapped in a higher-level helper here. Raises
|
|
137
|
+
:class:`GitError` on non-zero exit or when ``git`` itself is missing.
|
|
138
|
+
"""
|
|
134
139
|
cmd = ["git", "-C", str(repo_root), *args]
|
|
135
140
|
try:
|
|
136
141
|
proc = subprocess.run(
|
|
@@ -150,6 +155,10 @@ def _run_git(repo_root: Path, *args: str) -> str:
|
|
|
150
155
|
return proc.stdout
|
|
151
156
|
|
|
152
157
|
|
|
158
|
+
# Back-compat alias. Prefer ``run_git`` in new code.
|
|
159
|
+
_run_git = run_git
|
|
160
|
+
|
|
161
|
+
|
|
153
162
|
def discover_repo_root(start: Path) -> Path:
|
|
154
163
|
"""Find the enclosing git repo root for ``start``."""
|
|
155
164
|
out = _run_git(start, "rev-parse", "--show-toplevel").strip()
|
|
@@ -240,6 +249,25 @@ def all_commits(repo_root: Path, *, max_count: int | None = None) -> list[Commit
|
|
|
240
249
|
return _parse_log_records(raw)
|
|
241
250
|
|
|
242
251
|
|
|
252
|
+
def read_commit(repo_root: Path, ref: str) -> Commit | None:
|
|
253
|
+
"""Resolve ``ref`` (SHA, tag, branch, ``HEAD~3`` …) to a single ``Commit``.
|
|
254
|
+
|
|
255
|
+
Returns ``None`` when the ref doesn't exist or doesn't resolve to a
|
|
256
|
+
commit. Used by ``whycode show <sha>`` and similar single-commit views.
|
|
257
|
+
"""
|
|
258
|
+
try:
|
|
259
|
+
full_sha = run_git(
|
|
260
|
+
repo_root, "rev-parse", "--verify", f"{ref}^{{commit}}"
|
|
261
|
+
).strip()
|
|
262
|
+
except GitError:
|
|
263
|
+
return None
|
|
264
|
+
raw = run_git(
|
|
265
|
+
repo_root, "log", "-1", "--no-merges", f"--pretty=format:{_log_format()}", full_sha
|
|
266
|
+
)
|
|
267
|
+
parsed = _parse_log_records(raw)
|
|
268
|
+
return parsed[0] if parsed else None
|
|
269
|
+
|
|
270
|
+
|
|
243
271
|
def files_changed_in(repo_root: Path, sha: str) -> list[FileChange]:
|
|
244
272
|
"""Return the list of files (with diffstat) changed in ``sha``."""
|
|
245
273
|
raw = _run_git(
|
|
@@ -268,17 +296,40 @@ def co_changes(
|
|
|
268
296
|
repo_root: Path,
|
|
269
297
|
commits: Sequence[Commit],
|
|
270
298
|
target_path: str,
|
|
299
|
+
*,
|
|
300
|
+
max_count: int | None = None,
|
|
271
301
|
) -> Counter[str]:
|
|
272
|
-
"""Count, across the
|
|
302
|
+
"""Count, across the file's history, how often other files changed alongside ``target_path``.
|
|
303
|
+
|
|
304
|
+
Implemented as a single ``git log --no-walk --numstat`` call over the
|
|
305
|
+
pre-fetched SHA list, rather than one ``git show`` per commit. On a
|
|
306
|
+
200-commit file this drops the cost from 200 git invocations to 1 —
|
|
307
|
+
typically a 30-50x speedup for the coupling signal in ``scan``.
|
|
273
308
|
|
|
274
|
-
|
|
309
|
+
Note: we cannot just pass ``--follow -- <path>`` to a single log call,
|
|
310
|
+
because git limits the numstat output to the followed path itself in
|
|
311
|
+
that mode. So we depend on the caller having already resolved the
|
|
312
|
+
relevant SHAs (in ``commits``), then pass them via ``--no-walk``.
|
|
275
313
|
"""
|
|
314
|
+
del max_count # depth was already applied when ``commits`` was built
|
|
315
|
+
if not commits:
|
|
316
|
+
return Counter()
|
|
317
|
+
shas = [c.sha for c in commits]
|
|
318
|
+
args = ["log", "--no-walk", "--numstat", "--format=%x1eCOMMIT"]
|
|
319
|
+
args.extend(shas)
|
|
320
|
+
raw = _run_git(repo_root, *args)
|
|
276
321
|
counter: Counter[str] = Counter()
|
|
277
|
-
for
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
322
|
+
for line in raw.splitlines():
|
|
323
|
+
line = line.strip()
|
|
324
|
+
if not line or line.startswith(RECORD_SEP):
|
|
325
|
+
continue
|
|
326
|
+
parts = line.split("\t")
|
|
327
|
+
if len(parts) != 3:
|
|
328
|
+
continue
|
|
329
|
+
path = parts[2]
|
|
330
|
+
if path == target_path:
|
|
331
|
+
continue
|
|
332
|
+
counter[path] += 1
|
|
282
333
|
return counter
|
|
283
334
|
|
|
284
335
|
|
|
@@ -336,6 +387,26 @@ def find_incidents(commits: Sequence[Commit]) -> list[Commit]:
|
|
|
336
387
|
return out
|
|
337
388
|
|
|
338
389
|
|
|
390
|
+
@dataclass(frozen=True)
|
|
391
|
+
class CommitClassification:
|
|
392
|
+
"""Light-weight summary of what kind of work a single commit represents."""
|
|
393
|
+
|
|
394
|
+
incident_flavoured: bool
|
|
395
|
+
invariant_count: int
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def classify_commit(commit: Commit) -> CommitClassification:
|
|
399
|
+
"""Classify a single commit by reusing the same rules ``find_incidents`` and
|
|
400
|
+
``extract_invariant_quotes`` apply to a list. Public API for ``whycode show``
|
|
401
|
+
and any other surface that wants a single-commit verdict without
|
|
402
|
+
re-implementing the regex ladder.
|
|
403
|
+
"""
|
|
404
|
+
return CommitClassification(
|
|
405
|
+
incident_flavoured=bool(find_incidents([commit])),
|
|
406
|
+
invariant_count=len(extract_invariant_quotes([commit])),
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
|
|
339
410
|
# Straight, backtick, and the four common Unicode "smart" quote code points.
|
|
340
411
|
# We build the string from chr() calls because ruff's RUF001 ambiguous-char
|
|
341
412
|
# check rejects the literal Unicode quotes inline.
|
|
@@ -443,7 +514,7 @@ def gather(
|
|
|
443
514
|
repo_root=repo_root,
|
|
444
515
|
path=path,
|
|
445
516
|
commits=commits,
|
|
446
|
-
co_changed_files=co_changes(repo_root, commits, path),
|
|
517
|
+
co_changed_files=co_changes(repo_root, commits, path, max_count=max_commits),
|
|
447
518
|
revert_pairs=find_revert_pairs(commits),
|
|
448
519
|
incident_commits=find_incidents(commits),
|
|
449
520
|
invariant_quotes=extract_invariant_quotes(commits),
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Default ignore patterns for repo-wide scans.
|
|
2
|
+
|
|
3
|
+
These are paths/files that almost always pollute risk analysis without
|
|
4
|
+
adding signal: changelogs (touched on every release, so they look "tightly
|
|
5
|
+
coupled to everything"), lockfiles (regenerated on every dependency bump),
|
|
6
|
+
vendored third-party code, and machine-generated stubs.
|
|
7
|
+
|
|
8
|
+
Users can extend this list with a ``.whycodeignore`` file at repo root,
|
|
9
|
+
one ``fnmatch``-style pattern per line. Comments start with ``#``.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import fnmatch
|
|
15
|
+
from collections.abc import Iterable
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
DEFAULT_IGNORE_PATTERNS: tuple[str, ...] = (
|
|
19
|
+
# Changelogs / release-notes — touched every release, never the source of risk.
|
|
20
|
+
"CHANGELOG*",
|
|
21
|
+
"CHANGES*",
|
|
22
|
+
"HISTORY*",
|
|
23
|
+
"NEWS*",
|
|
24
|
+
"RELEASE_NOTES*",
|
|
25
|
+
# Lockfiles — regenerated on every dependency bump.
|
|
26
|
+
"*.lock",
|
|
27
|
+
"package-lock.json",
|
|
28
|
+
"yarn.lock",
|
|
29
|
+
"pnpm-lock.yaml",
|
|
30
|
+
"Cargo.lock",
|
|
31
|
+
"poetry.lock",
|
|
32
|
+
"uv.lock",
|
|
33
|
+
"Pipfile.lock",
|
|
34
|
+
"Gemfile.lock",
|
|
35
|
+
"composer.lock",
|
|
36
|
+
"go.sum",
|
|
37
|
+
# Generated stubs.
|
|
38
|
+
"*.pb.go",
|
|
39
|
+
"*.pb.py",
|
|
40
|
+
"*_pb2.py",
|
|
41
|
+
"*_pb2_grpc.py",
|
|
42
|
+
"*.generated.go",
|
|
43
|
+
"*.generated.ts",
|
|
44
|
+
"*.generated.js",
|
|
45
|
+
# Minified / bundled web assets.
|
|
46
|
+
"*.min.js",
|
|
47
|
+
"*.min.css",
|
|
48
|
+
"*.bundle.js",
|
|
49
|
+
# Vendored third-party trees.
|
|
50
|
+
"vendor/**",
|
|
51
|
+
"_vendor/**",
|
|
52
|
+
"third_party/**",
|
|
53
|
+
"third-party/**",
|
|
54
|
+
"node_modules/**",
|
|
55
|
+
"bower_components/**",
|
|
56
|
+
# Built docs.
|
|
57
|
+
"_build/**",
|
|
58
|
+
"site/**",
|
|
59
|
+
"docs/_build/**",
|
|
60
|
+
"docs/build/**",
|
|
61
|
+
# Common binary / data formats that aren't code.
|
|
62
|
+
"*.png",
|
|
63
|
+
"*.jpg",
|
|
64
|
+
"*.jpeg",
|
|
65
|
+
"*.gif",
|
|
66
|
+
"*.ico",
|
|
67
|
+
"*.svg",
|
|
68
|
+
"*.pdf",
|
|
69
|
+
"*.woff",
|
|
70
|
+
"*.woff2",
|
|
71
|
+
"*.ttf",
|
|
72
|
+
"*.otf",
|
|
73
|
+
"*.eot",
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
_USER_IGNORE_FILE = ".whycodeignore"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def load_user_patterns(repo_root: Path) -> tuple[str, ...]:
|
|
80
|
+
"""Read ``.whycodeignore`` if present. One pattern per line; ``#`` comments."""
|
|
81
|
+
target = repo_root / _USER_IGNORE_FILE
|
|
82
|
+
if not target.exists():
|
|
83
|
+
return ()
|
|
84
|
+
out: list[str] = []
|
|
85
|
+
for raw in target.read_text().splitlines():
|
|
86
|
+
line = raw.strip()
|
|
87
|
+
if not line or line.startswith("#"):
|
|
88
|
+
continue
|
|
89
|
+
out.append(line)
|
|
90
|
+
return tuple(out)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def is_ignored(path: str, patterns: Iterable[str]) -> bool:
|
|
94
|
+
"""True if ``path`` matches any pattern (``fnmatch`` semantics)."""
|
|
95
|
+
for pat in patterns:
|
|
96
|
+
if fnmatch.fnmatch(path, pat):
|
|
97
|
+
return True
|
|
98
|
+
# Also match basename for non-recursive patterns like ``CHANGELOG*``.
|
|
99
|
+
if "/" not in pat and "/" in path and fnmatch.fnmatch(path.rsplit("/", 1)[-1], pat):
|
|
100
|
+
return True
|
|
101
|
+
return False
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def effective_patterns(repo_root: Path) -> tuple[str, ...]:
|
|
105
|
+
"""Combine the built-in defaults with the user's ``.whycodeignore``."""
|
|
106
|
+
return DEFAULT_IGNORE_PATTERNS + load_user_patterns(repo_root)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
__all__ = [
|
|
110
|
+
"DEFAULT_IGNORE_PATTERNS",
|
|
111
|
+
"effective_patterns",
|
|
112
|
+
"is_ignored",
|
|
113
|
+
"load_user_patterns",
|
|
114
|
+
]
|
|
@@ -5,6 +5,7 @@ src/whycode/__init__.py
|
|
|
5
5
|
src/whycode/__main__.py
|
|
6
6
|
src/whycode/cli.py
|
|
7
7
|
src/whycode/git_facts.py
|
|
8
|
+
src/whycode/ignore.py
|
|
8
9
|
src/whycode/mcp_server.py
|
|
9
10
|
src/whycode/risk_card.py
|
|
10
11
|
src/whycode/scorer.py
|
|
@@ -21,6 +22,7 @@ src/whycode_cli.egg-info/requires.txt
|
|
|
21
22
|
src/whycode_cli.egg-info/top_level.txt
|
|
22
23
|
tests/test_cli.py
|
|
23
24
|
tests/test_git_facts.py
|
|
25
|
+
tests/test_ignore.py
|
|
24
26
|
tests/test_scorer.py
|
|
25
27
|
tests/test_signals.py
|
|
26
28
|
tests/test_suppressions.py
|
|
@@ -465,6 +465,62 @@ def test_why_mute_unknown_kind_errors(repo) -> None: # type: ignore[no-untyped-
|
|
|
465
465
|
assert "unknown signal kind" in result.output.lower()
|
|
466
466
|
|
|
467
467
|
|
|
468
|
+
def test_scan_skips_default_ignored_paths_by_default(repo, days_ago) -> None: # type: ignore[no-untyped-def]
|
|
469
|
+
"""CHANGELOG and lockfiles must not appear in scan output by default."""
|
|
470
|
+
sha = repo.commit(
|
|
471
|
+
"init",
|
|
472
|
+
{"CHANGELOG.md": "v1", "package-lock.json": "{}", "src/app.py": "x"},
|
|
473
|
+
when=days_ago(60),
|
|
474
|
+
)
|
|
475
|
+
repo.revert(sha, when=days_ago(50))
|
|
476
|
+
repo.commit(
|
|
477
|
+
"release: 1.1",
|
|
478
|
+
{"CHANGELOG.md": "v2", "src/app.py": "y"},
|
|
479
|
+
when=days_ago(20),
|
|
480
|
+
)
|
|
481
|
+
result = _invoke(repo.root, "scan", "--top", "10")
|
|
482
|
+
assert result.exit_code == 0
|
|
483
|
+
out = result.output
|
|
484
|
+
# CHANGELOG and lockfile must not appear in the table.
|
|
485
|
+
assert "CHANGELOG" not in out
|
|
486
|
+
assert "package-lock.json" not in out
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def test_scan_no_ignore_brings_them_back(repo, days_ago) -> None: # type: ignore[no-untyped-def]
|
|
490
|
+
repo.commit(
|
|
491
|
+
"init",
|
|
492
|
+
{"CHANGELOG.md": "v1", "src/app.py": "x"},
|
|
493
|
+
when=days_ago(60),
|
|
494
|
+
)
|
|
495
|
+
sha = repo.commit(
|
|
496
|
+
"feat: A",
|
|
497
|
+
{"CHANGELOG.md": "v2", "src/app.py": "y"},
|
|
498
|
+
when=days_ago(40),
|
|
499
|
+
)
|
|
500
|
+
repo.revert(sha, when=days_ago(20)) # safe to revert: files still exist after
|
|
501
|
+
default_run = _invoke(repo.root, "scan", "--top", "10")
|
|
502
|
+
permissive_run = _invoke(repo.root, "scan", "--top", "10", "--no-ignore")
|
|
503
|
+
assert default_run.exit_code == 0
|
|
504
|
+
assert permissive_run.exit_code == 0
|
|
505
|
+
# CHANGELOG was hidden from the default run by the ignore list…
|
|
506
|
+
assert "CHANGELOG" not in default_run.output
|
|
507
|
+
# …but is at least reachable when --no-ignore is on.
|
|
508
|
+
assert "CHANGELOG" in permissive_run.output or "src/app.py" in permissive_run.output
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def test_scan_respects_user_whycodeignore(repo, days_ago) -> None: # type: ignore[no-untyped-def]
|
|
512
|
+
(repo.root / ".whycodeignore").write_text("internal/legacy.py\n")
|
|
513
|
+
sha = repo.commit(
|
|
514
|
+
"init",
|
|
515
|
+
{"internal/legacy.py": "1", "src/app.py": "x"},
|
|
516
|
+
when=days_ago(60),
|
|
517
|
+
)
|
|
518
|
+
repo.revert(sha, when=days_ago(50))
|
|
519
|
+
result = _invoke(repo.root, "scan", "--top", "10")
|
|
520
|
+
assert result.exit_code == 0
|
|
521
|
+
assert "internal/legacy.py" not in result.output
|
|
522
|
+
|
|
523
|
+
|
|
468
524
|
def test_mcp_summary_field_present_in_json(repo, days_ago) -> None: # type: ignore[no-untyped-def]
|
|
469
525
|
"""Verify the MCP server includes a quotable summary string in get_risk_profile."""
|
|
470
526
|
sha = repo.commit("feat: A", {"a.py": "1"}, when=days_ago(40))
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Tests for the ignore-pattern matcher."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from whycode.ignore import (
|
|
6
|
+
DEFAULT_IGNORE_PATTERNS,
|
|
7
|
+
effective_patterns,
|
|
8
|
+
is_ignored,
|
|
9
|
+
load_user_patterns,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_default_patterns_match_changelog() -> None:
|
|
14
|
+
assert is_ignored("CHANGELOG.md", DEFAULT_IGNORE_PATTERNS)
|
|
15
|
+
assert is_ignored("CHANGES.rst", DEFAULT_IGNORE_PATTERNS)
|
|
16
|
+
assert is_ignored("HISTORY.txt", DEFAULT_IGNORE_PATTERNS)
|
|
17
|
+
assert is_ignored("RELEASE_NOTES.md", DEFAULT_IGNORE_PATTERNS)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_default_patterns_match_lockfiles() -> None:
|
|
21
|
+
assert is_ignored("package-lock.json", DEFAULT_IGNORE_PATTERNS)
|
|
22
|
+
assert is_ignored("yarn.lock", DEFAULT_IGNORE_PATTERNS)
|
|
23
|
+
assert is_ignored("Cargo.lock", DEFAULT_IGNORE_PATTERNS)
|
|
24
|
+
assert is_ignored("uv.lock", DEFAULT_IGNORE_PATTERNS)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_default_patterns_match_vendored_dirs() -> None:
|
|
28
|
+
assert is_ignored("node_modules/foo/index.js", DEFAULT_IGNORE_PATTERNS)
|
|
29
|
+
assert is_ignored("vendor/github.com/foo/bar.go", DEFAULT_IGNORE_PATTERNS)
|
|
30
|
+
assert is_ignored("third_party/x/y.cc", DEFAULT_IGNORE_PATTERNS)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_default_patterns_match_generated_stubs() -> None:
|
|
34
|
+
assert is_ignored("api_pb2.py", DEFAULT_IGNORE_PATTERNS)
|
|
35
|
+
assert is_ignored("foo.pb.go", DEFAULT_IGNORE_PATTERNS)
|
|
36
|
+
assert is_ignored("schema.generated.ts", DEFAULT_IGNORE_PATTERNS)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def test_default_patterns_do_not_match_normal_code() -> None:
|
|
40
|
+
assert not is_ignored("src/whycode/cli.py", DEFAULT_IGNORE_PATTERNS)
|
|
41
|
+
assert not is_ignored("README.md", DEFAULT_IGNORE_PATTERNS)
|
|
42
|
+
assert not is_ignored("tests/test_cli.py", DEFAULT_IGNORE_PATTERNS)
|
|
43
|
+
assert not is_ignored("Makefile", DEFAULT_IGNORE_PATTERNS)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_basename_match_for_root_pattern_in_subdir() -> None:
|
|
47
|
+
# `CHANGELOG*` should match `docs/CHANGELOG.md` even though the pattern has no slash.
|
|
48
|
+
assert is_ignored("docs/CHANGELOG.md", DEFAULT_IGNORE_PATTERNS)
|
|
49
|
+
assert is_ignored("packages/foo/CHANGES.rst", DEFAULT_IGNORE_PATTERNS)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_user_patterns_loaded(tmp_path) -> None: # type: ignore[no-untyped-def]
|
|
53
|
+
(tmp_path / ".whycodeignore").write_text(
|
|
54
|
+
"# this is a comment\n"
|
|
55
|
+
"*.proto\n"
|
|
56
|
+
"scripts/\n"
|
|
57
|
+
"\n" # blank line
|
|
58
|
+
"internal/legacy.py\n"
|
|
59
|
+
)
|
|
60
|
+
patterns = load_user_patterns(tmp_path)
|
|
61
|
+
assert patterns == ("*.proto", "scripts/", "internal/legacy.py")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_user_patterns_empty_when_no_file(tmp_path) -> None: # type: ignore[no-untyped-def]
|
|
65
|
+
assert load_user_patterns(tmp_path) == ()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def test_effective_patterns_combines_defaults_and_user(tmp_path) -> None: # type: ignore[no-untyped-def]
|
|
69
|
+
(tmp_path / ".whycodeignore").write_text("internal/legacy.py\n")
|
|
70
|
+
eff = effective_patterns(tmp_path)
|
|
71
|
+
assert "internal/legacy.py" in eff
|
|
72
|
+
assert "*.lock" in eff # default still present
|
|
73
|
+
assert is_ignored("internal/legacy.py", eff)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|