whycode-cli 0.2.2__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {whycode_cli-0.2.2/src/whycode_cli.egg-info → whycode_cli-0.2.3}/PKG-INFO +1 -1
  2. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/pyproject.toml +1 -1
  3. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode/__init__.py +1 -1
  4. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode/cli.py +19 -2
  5. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode/git_facts.py +31 -8
  6. whycode_cli-0.2.3/src/whycode/ignore.py +114 -0
  7. {whycode_cli-0.2.2 → whycode_cli-0.2.3/src/whycode_cli.egg-info}/PKG-INFO +1 -1
  8. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode_cli.egg-info/SOURCES.txt +2 -0
  9. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/tests/test_cli.py +56 -0
  10. whycode_cli-0.2.3/tests/test_ignore.py +73 -0
  11. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/LICENSE +0 -0
  12. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/README.md +0 -0
  13. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/setup.cfg +0 -0
  14. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode/__main__.py +0 -0
  15. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode/mcp_server.py +0 -0
  16. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode/risk_card.py +0 -0
  17. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode/scorer.py +0 -0
  18. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode/signals.py +0 -0
  19. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode/suppressions.py +0 -0
  20. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode/templates/__init__.py +0 -0
  21. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode/templates/github-workflow.yml +0 -0
  22. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode/templates/pre-commit +0 -0
  23. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode_cli.egg-info/dependency_links.txt +0 -0
  24. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode_cli.egg-info/entry_points.txt +0 -0
  25. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode_cli.egg-info/requires.txt +0 -0
  26. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/src/whycode_cli.egg-info/top_level.txt +0 -0
  27. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/tests/test_git_facts.py +0 -0
  28. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/tests/test_scorer.py +0 -0
  29. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/tests/test_signals.py +0 -0
  30. {whycode_cli-0.2.2 → whycode_cli-0.2.3}/tests/test_suppressions.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: whycode-cli
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Tells you what to be afraid of before you touch a file.
5
5
  Author: Kevin
6
6
  License-Expression: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "whycode-cli"
7
- version = "0.2.2"
7
+ version = "0.2.3"
8
8
  description = "Tells you what to be afraid of before you touch a file."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -1,3 +1,3 @@
1
1
  """WhyCode — tells you what to be afraid of before touching a file."""
2
2
 
3
- __version__ = "0.2.2"
3
+ __version__ = "0.2.3"
@@ -29,6 +29,7 @@ from rich.table import Table
29
29
 
30
30
  from whycode import __version__
31
31
  from whycode import git_facts as gf
32
+ from whycode import ignore as ign
32
33
  from whycode import risk_card as rc
33
34
  from whycode import signals as sig
34
35
  from whycode import suppressions as supp
@@ -563,6 +564,19 @@ def scan(
563
564
  "--sample",
564
565
  help="Cap on tracked files to evaluate (for very large repos).",
565
566
  ),
567
+ scan_depth: int = typer.Option(
568
+ 200,
569
+ "--scan-depth",
570
+ help=(
571
+ "Cap commits-per-file scanned (controls scan speed). "
572
+ "Use 0 for no cap (slow on large repos)."
573
+ ),
574
+ ),
575
+ no_ignore: bool = typer.Option(
576
+ False,
577
+ "--no-ignore",
578
+ help="Bypass the default-ignore list and scan everything (CHANGELOGs, lockfiles, vendored).",
579
+ ),
566
580
  repo: Path = typer.Option(
567
581
  Path("."), "--repo", help="Path inside the repo (defaults to cwd)."
568
582
  ),
@@ -575,16 +589,19 @@ def scan(
575
589
  raise typer.Exit(2) from exc
576
590
 
577
591
  raw = gf._run_git(repo_root, "ls-files")
578
- paths = [line for line in raw.splitlines() if line.strip()][:sample]
592
+ all_paths = [line for line in raw.splitlines() if line.strip()]
593
+ patterns = () if no_ignore else ign.effective_patterns(repo_root)
594
+ paths = [p for p in all_paths if not ign.is_ignored(p, patterns)][:sample]
579
595
  if not paths:
580
596
  console.print("[yellow]no tracked files found[/yellow]")
581
597
  raise typer.Exit(0)
582
598
 
599
+ depth_cap = scan_depth if scan_depth > 0 else None
583
600
  cards: list[rc.RiskCard] = []
584
601
  with console.status(f"Scanning {len(paths)} files…", spinner="dots"):
585
602
  for p in paths:
586
603
  try:
587
- card = rc.build(repo_root, p)
604
+ card = rc.build(repo_root, p, max_commits=depth_cap)
588
605
  except gf.GitError:
589
606
  continue
590
607
  # Skip files whose only signal is NEWBORN — that's "not enough
@@ -268,17 +268,40 @@ def co_changes(
268
268
  repo_root: Path,
269
269
  commits: Sequence[Commit],
270
270
  target_path: str,
271
+ *,
272
+ max_count: int | None = None,
271
273
  ) -> Counter[str]:
272
- """Count, across the given commits, how often other files changed alongside ``target_path``.
274
+ """Count, across the file's history, how often other files changed alongside ``target_path``.
273
275
 
274
- The target file is excluded from the result.
276
+ Implemented as a single ``git log --no-walk --numstat`` call over the
277
+ pre-fetched SHA list, rather than one ``git show`` per commit. On a
278
+ 200-commit file this drops the cost from 200 git invocations to 1 —
279
+ typically a 30-50x speedup for the coupling signal in ``scan``.
280
+
281
+ Note: we cannot just pass ``--follow -- <path>`` to a single log call,
282
+ because git limits the numstat output to the followed path itself in
283
+ that mode. So we depend on the caller having already resolved the
284
+ relevant SHAs (in ``commits``), then pass them via ``--no-walk``.
275
285
  """
286
+ del max_count # depth was already applied when ``commits`` was built
287
+ if not commits:
288
+ return Counter()
289
+ shas = [c.sha for c in commits]
290
+ args = ["log", "--no-walk", "--numstat", "--format=%x1eCOMMIT"]
291
+ args.extend(shas)
292
+ raw = _run_git(repo_root, *args)
276
293
  counter: Counter[str] = Counter()
277
- for commit in commits:
278
- for change in files_changed_in(repo_root, commit.sha):
279
- if change.path == target_path:
280
- continue
281
- counter[change.path] += 1
294
+ for line in raw.splitlines():
295
+ line = line.strip()
296
+ if not line or line.startswith(RECORD_SEP):
297
+ continue
298
+ parts = line.split("\t")
299
+ if len(parts) != 3:
300
+ continue
301
+ path = parts[2]
302
+ if path == target_path:
303
+ continue
304
+ counter[path] += 1
282
305
  return counter
283
306
 
284
307
 
@@ -443,7 +466,7 @@ def gather(
443
466
  repo_root=repo_root,
444
467
  path=path,
445
468
  commits=commits,
446
- co_changed_files=co_changes(repo_root, commits, path),
469
+ co_changed_files=co_changes(repo_root, commits, path, max_count=max_commits),
447
470
  revert_pairs=find_revert_pairs(commits),
448
471
  incident_commits=find_incidents(commits),
449
472
  invariant_quotes=extract_invariant_quotes(commits),
@@ -0,0 +1,114 @@
1
+ """Default ignore patterns for repo-wide scans.
2
+
3
+ These are paths/files that almost always pollute risk analysis without
4
+ adding signal: changelogs (touched on every release, so they look "tightly
5
+ coupled to everything"), lockfiles (regenerated on every dependency bump),
6
+ vendored third-party code, and machine-generated stubs.
7
+
8
+ Users can extend this list with a ``.whycodeignore`` file at repo root,
9
+ one ``fnmatch``-style pattern per line. Comments start with ``#``.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import fnmatch
15
+ from collections.abc import Iterable
16
+ from pathlib import Path
17
+
18
+ DEFAULT_IGNORE_PATTERNS: tuple[str, ...] = (
19
+ # Changelogs / release-notes — touched every release, never the source of risk.
20
+ "CHANGELOG*",
21
+ "CHANGES*",
22
+ "HISTORY*",
23
+ "NEWS*",
24
+ "RELEASE_NOTES*",
25
+ # Lockfiles — regenerated on every dependency bump.
26
+ "*.lock",
27
+ "package-lock.json",
28
+ "yarn.lock",
29
+ "pnpm-lock.yaml",
30
+ "Cargo.lock",
31
+ "poetry.lock",
32
+ "uv.lock",
33
+ "Pipfile.lock",
34
+ "Gemfile.lock",
35
+ "composer.lock",
36
+ "go.sum",
37
+ # Generated stubs.
38
+ "*.pb.go",
39
+ "*.pb.py",
40
+ "*_pb2.py",
41
+ "*_pb2_grpc.py",
42
+ "*.generated.go",
43
+ "*.generated.ts",
44
+ "*.generated.js",
45
+ # Minified / bundled web assets.
46
+ "*.min.js",
47
+ "*.min.css",
48
+ "*.bundle.js",
49
+ # Vendored third-party trees.
50
+ "vendor/**",
51
+ "_vendor/**",
52
+ "third_party/**",
53
+ "third-party/**",
54
+ "node_modules/**",
55
+ "bower_components/**",
56
+ # Built docs.
57
+ "_build/**",
58
+ "site/**",
59
+ "docs/_build/**",
60
+ "docs/build/**",
61
+ # Common binary / data formats that aren't code.
62
+ "*.png",
63
+ "*.jpg",
64
+ "*.jpeg",
65
+ "*.gif",
66
+ "*.ico",
67
+ "*.svg",
68
+ "*.pdf",
69
+ "*.woff",
70
+ "*.woff2",
71
+ "*.ttf",
72
+ "*.otf",
73
+ "*.eot",
74
+ )
75
+
76
+ _USER_IGNORE_FILE = ".whycodeignore"
77
+
78
+
79
+ def load_user_patterns(repo_root: Path) -> tuple[str, ...]:
80
+ """Read ``.whycodeignore`` if present. One pattern per line; ``#`` comments."""
81
+ target = repo_root / _USER_IGNORE_FILE
82
+ if not target.exists():
83
+ return ()
84
+ out: list[str] = []
85
+ for raw in target.read_text().splitlines():
86
+ line = raw.strip()
87
+ if not line or line.startswith("#"):
88
+ continue
89
+ out.append(line)
90
+ return tuple(out)
91
+
92
+
93
+ def is_ignored(path: str, patterns: Iterable[str]) -> bool:
94
+ """True if ``path`` matches any pattern (``fnmatch`` semantics)."""
95
+ for pat in patterns:
96
+ if fnmatch.fnmatch(path, pat):
97
+ return True
98
+ # Also match basename for non-recursive patterns like ``CHANGELOG*``.
99
+ if "/" not in pat and "/" in path and fnmatch.fnmatch(path.rsplit("/", 1)[-1], pat):
100
+ return True
101
+ return False
102
+
103
+
104
+ def effective_patterns(repo_root: Path) -> tuple[str, ...]:
105
+ """Combine the built-in defaults with the user's ``.whycodeignore``."""
106
+ return DEFAULT_IGNORE_PATTERNS + load_user_patterns(repo_root)
107
+
108
+
109
+ __all__ = [
110
+ "DEFAULT_IGNORE_PATTERNS",
111
+ "effective_patterns",
112
+ "is_ignored",
113
+ "load_user_patterns",
114
+ ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: whycode-cli
3
- Version: 0.2.2
3
+ Version: 0.2.3
4
4
  Summary: Tells you what to be afraid of before you touch a file.
5
5
  Author: Kevin
6
6
  License-Expression: MIT
@@ -5,6 +5,7 @@ src/whycode/__init__.py
5
5
  src/whycode/__main__.py
6
6
  src/whycode/cli.py
7
7
  src/whycode/git_facts.py
8
+ src/whycode/ignore.py
8
9
  src/whycode/mcp_server.py
9
10
  src/whycode/risk_card.py
10
11
  src/whycode/scorer.py
@@ -21,6 +22,7 @@ src/whycode_cli.egg-info/requires.txt
21
22
  src/whycode_cli.egg-info/top_level.txt
22
23
  tests/test_cli.py
23
24
  tests/test_git_facts.py
25
+ tests/test_ignore.py
24
26
  tests/test_scorer.py
25
27
  tests/test_signals.py
26
28
  tests/test_suppressions.py
@@ -465,6 +465,62 @@ def test_why_mute_unknown_kind_errors(repo) -> None: # type: ignore[no-untyped-
465
465
  assert "unknown signal kind" in result.output.lower()
466
466
 
467
467
 
468
+ def test_scan_skips_default_ignored_paths_by_default(repo, days_ago) -> None: # type: ignore[no-untyped-def]
469
+ """CHANGELOG and lockfiles must not appear in scan output by default."""
470
+ sha = repo.commit(
471
+ "init",
472
+ {"CHANGELOG.md": "v1", "package-lock.json": "{}", "src/app.py": "x"},
473
+ when=days_ago(60),
474
+ )
475
+ repo.revert(sha, when=days_ago(50))
476
+ repo.commit(
477
+ "release: 1.1",
478
+ {"CHANGELOG.md": "v2", "src/app.py": "y"},
479
+ when=days_ago(20),
480
+ )
481
+ result = _invoke(repo.root, "scan", "--top", "10")
482
+ assert result.exit_code == 0
483
+ out = result.output
484
+ # CHANGELOG and lockfile must not appear in the table.
485
+ assert "CHANGELOG" not in out
486
+ assert "package-lock.json" not in out
487
+
488
+
489
+ def test_scan_no_ignore_brings_them_back(repo, days_ago) -> None: # type: ignore[no-untyped-def]
490
+ repo.commit(
491
+ "init",
492
+ {"CHANGELOG.md": "v1", "src/app.py": "x"},
493
+ when=days_ago(60),
494
+ )
495
+ sha = repo.commit(
496
+ "feat: A",
497
+ {"CHANGELOG.md": "v2", "src/app.py": "y"},
498
+ when=days_ago(40),
499
+ )
500
+ repo.revert(sha, when=days_ago(20)) # safe to revert: files still exist after
501
+ default_run = _invoke(repo.root, "scan", "--top", "10")
502
+ permissive_run = _invoke(repo.root, "scan", "--top", "10", "--no-ignore")
503
+ assert default_run.exit_code == 0
504
+ assert permissive_run.exit_code == 0
505
+ # CHANGELOG was hidden from the default run by the ignore list…
506
+ assert "CHANGELOG" not in default_run.output
507
+ # …but is at least reachable when --no-ignore is on.
508
+ assert "CHANGELOG" in permissive_run.output or "src/app.py" in permissive_run.output
509
+
510
+
511
+ def test_scan_respects_user_whycodeignore(repo, days_ago) -> None: # type: ignore[no-untyped-def]
512
+ (repo.root / ".whycodeignore").write_text("internal/legacy.py\n")
513
+ sha = repo.commit(
514
+ "init",
515
+ {"internal/legacy.py": "1", "src/app.py": "x"},
516
+ when=days_ago(60),
517
+ )
518
+ repo.revert(sha, when=days_ago(50))
519
+ result = _invoke(repo.root, "scan", "--top", "10")
520
+ assert result.exit_code == 0
521
+ assert "internal/legacy.py" not in result.output
522
+
523
+
468
524
  def test_mcp_summary_field_present_in_json(repo, days_ago) -> None: # type: ignore[no-untyped-def]
469
525
  """Verify the MCP server includes a quotable summary string in get_risk_profile."""
470
526
  sha = repo.commit("feat: A", {"a.py": "1"}, when=days_ago(40))
@@ -0,0 +1,73 @@
1
+ """Tests for the ignore-pattern matcher."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from whycode.ignore import (
6
+ DEFAULT_IGNORE_PATTERNS,
7
+ effective_patterns,
8
+ is_ignored,
9
+ load_user_patterns,
10
+ )
11
+
12
+
13
+ def test_default_patterns_match_changelog() -> None:
14
+ assert is_ignored("CHANGELOG.md", DEFAULT_IGNORE_PATTERNS)
15
+ assert is_ignored("CHANGES.rst", DEFAULT_IGNORE_PATTERNS)
16
+ assert is_ignored("HISTORY.txt", DEFAULT_IGNORE_PATTERNS)
17
+ assert is_ignored("RELEASE_NOTES.md", DEFAULT_IGNORE_PATTERNS)
18
+
19
+
20
+ def test_default_patterns_match_lockfiles() -> None:
21
+ assert is_ignored("package-lock.json", DEFAULT_IGNORE_PATTERNS)
22
+ assert is_ignored("yarn.lock", DEFAULT_IGNORE_PATTERNS)
23
+ assert is_ignored("Cargo.lock", DEFAULT_IGNORE_PATTERNS)
24
+ assert is_ignored("uv.lock", DEFAULT_IGNORE_PATTERNS)
25
+
26
+
27
+ def test_default_patterns_match_vendored_dirs() -> None:
28
+ assert is_ignored("node_modules/foo/index.js", DEFAULT_IGNORE_PATTERNS)
29
+ assert is_ignored("vendor/github.com/foo/bar.go", DEFAULT_IGNORE_PATTERNS)
30
+ assert is_ignored("third_party/x/y.cc", DEFAULT_IGNORE_PATTERNS)
31
+
32
+
33
+ def test_default_patterns_match_generated_stubs() -> None:
34
+ assert is_ignored("api_pb2.py", DEFAULT_IGNORE_PATTERNS)
35
+ assert is_ignored("foo.pb.go", DEFAULT_IGNORE_PATTERNS)
36
+ assert is_ignored("schema.generated.ts", DEFAULT_IGNORE_PATTERNS)
37
+
38
+
39
+ def test_default_patterns_do_not_match_normal_code() -> None:
40
+ assert not is_ignored("src/whycode/cli.py", DEFAULT_IGNORE_PATTERNS)
41
+ assert not is_ignored("README.md", DEFAULT_IGNORE_PATTERNS)
42
+ assert not is_ignored("tests/test_cli.py", DEFAULT_IGNORE_PATTERNS)
43
+ assert not is_ignored("Makefile", DEFAULT_IGNORE_PATTERNS)
44
+
45
+
46
+ def test_basename_match_for_root_pattern_in_subdir() -> None:
47
+ # `CHANGELOG*` should match `docs/CHANGELOG.md` even though the pattern has no slash.
48
+ assert is_ignored("docs/CHANGELOG.md", DEFAULT_IGNORE_PATTERNS)
49
+ assert is_ignored("packages/foo/CHANGES.rst", DEFAULT_IGNORE_PATTERNS)
50
+
51
+
52
+ def test_user_patterns_loaded(tmp_path) -> None: # type: ignore[no-untyped-def]
53
+ (tmp_path / ".whycodeignore").write_text(
54
+ "# this is a comment\n"
55
+ "*.proto\n"
56
+ "scripts/\n"
57
+ "\n" # blank line
58
+ "internal/legacy.py\n"
59
+ )
60
+ patterns = load_user_patterns(tmp_path)
61
+ assert patterns == ("*.proto", "scripts/", "internal/legacy.py")
62
+
63
+
64
+ def test_user_patterns_empty_when_no_file(tmp_path) -> None: # type: ignore[no-untyped-def]
65
+ assert load_user_patterns(tmp_path) == ()
66
+
67
+
68
+ def test_effective_patterns_combines_defaults_and_user(tmp_path) -> None: # type: ignore[no-untyped-def]
69
+ (tmp_path / ".whycodeignore").write_text("internal/legacy.py\n")
70
+ eff = effective_patterns(tmp_path)
71
+ assert "internal/legacy.py" in eff
72
+ assert "*.lock" in eff # default still present
73
+ assert is_ignored("internal/legacy.py", eff)
File without changes
File without changes
File without changes