whycode-cli 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
whycode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """WhyCode — tells you what to be afraid of before touching a file."""
2
2
 
3
- __version__ = "0.4.1"
3
+ __version__ = "0.4.2"
whycode/cache.py CHANGED
@@ -112,10 +112,21 @@ class CacheStore:
112
112
  cache misses; this class never invokes ``git`` itself.
113
113
  """
114
114
 
115
- def __init__(self, db_path: Path) -> None:
115
+ def __init__(self, db_path: Path, *, in_memory: bool = False) -> None:
116
+ """Open (creating if needed) the SQLite cache at ``db_path``.
117
+
118
+ ``in_memory=True`` opens a transient ``:memory:`` connection
119
+ instead — the disk file is never created and is never read.
120
+ Used by ``--no-cache`` to retain in-session amortisation
121
+ (matches the cold-fill code path) without persisting anything.
122
+ """
116
123
  self.db_path = db_path
117
- self.db_path.parent.mkdir(parents=True, exist_ok=True)
118
- self._conn = sqlite3.connect(self.db_path)
124
+ self._in_memory = in_memory
125
+ if in_memory:
126
+ self._conn = sqlite3.connect(":memory:")
127
+ else:
128
+ self.db_path.parent.mkdir(parents=True, exist_ok=True)
129
+ self._conn = sqlite3.connect(self.db_path)
119
130
  # row_factory makes column access readable in tests / debug.
120
131
  self._conn.row_factory = sqlite3.Row
121
132
  self._conn.execute("PRAGMA foreign_keys = ON")
@@ -402,13 +413,18 @@ class CacheStore:
402
413
  file_row_count = int(
403
414
  self._conn.execute("SELECT COUNT(*) FROM commit_files").fetchone()[0]
404
415
  )
405
- try:
406
- size_bytes = self.db_path.stat().st_size
407
- except OSError:
416
+ if self._in_memory:
408
417
  size_bytes = 0
418
+ exists = False
419
+ else:
420
+ try:
421
+ size_bytes = self.db_path.stat().st_size
422
+ except OSError:
423
+ size_bytes = 0
424
+ exists = self.db_path.exists()
409
425
  return CacheStats(
410
426
  path=self.db_path,
411
- exists=self.db_path.exists(),
427
+ exists=exists,
412
428
  schema_version=self.schema_version,
413
429
  head_sha=self.head_sha,
414
430
  commit_count=commit_count,
@@ -430,6 +446,16 @@ def open_for(repo_root: Path) -> CacheStore:
430
446
  return CacheStore(cache_path_for(repo_root))
431
447
 
432
448
 
449
+ def open_in_memory(repo_root: Path) -> CacheStore:
450
+ """Open a transient in-memory cache for ``repo_root``.
451
+
452
+ Used by ``--no-cache`` to keep within-session amortisation (the same
453
+ cold-fill code path everything else uses) while never touching disk.
454
+ The store is destroyed on ``close()`` and has no after-effects.
455
+ """
456
+ return CacheStore(cache_path_for(repo_root), in_memory=True)
457
+
458
+
433
459
  def parse_authored_at(value: str) -> datetime:
434
460
  """Parse the ``authored_at`` string we stored from git.
435
461
 
whycode/cli.py CHANGED
@@ -50,18 +50,27 @@ err = Console(stderr=True)
50
50
 
51
51
 
52
52
  def _open_cache(repo_root: Path, no_cache: bool) -> ch.CacheStore | None:
53
- """Open the on-disk cache for ``repo_root`` unless suppressed.
54
-
55
- A None return means "do not pass a cache through git_facts" — every
56
- git-side helper falls back to its original network-free, cache-free
57
- implementation. This is the escape hatch behind ``--no-cache`` and
58
- is also the default when the cache cannot be initialised at all
59
- (read-only filesystem, etc.); we never want a cache failure to
60
- block the main read path.
53
+ """Open the cache for ``repo_root`` according to the no-cache flag.
54
+
55
+ Modes:
56
+ * ``no_cache=False`` (the default): persistent on-disk SQLite at
57
+ ``.whycode/cache.db``.
58
+ * ``no_cache=True``: a transient ``:memory:`` SQLite store. The
59
+ same git-walk code path runs as for the cold-fill, but the
60
+ database is destroyed on ``close()`` — nothing lands on disk
61
+ and the next run starts cold. Keeping per-run amortisation
62
+ (one ``git log`` walk shared across files) is what makes
63
+ ``--no-cache`` at most as slow as a cold persistent fill;
64
+ the previous ``cache=None`` short-circuit lost that and so
65
+ ``--no-cache`` re-issued per-file walks every iteration.
66
+
67
+ A ``None`` return means "do not pass a cache through git_facts".
68
+ Happens only when even an in-memory open fails — very rare and
69
+ we never want a cache problem to block the main read path.
61
70
  """
62
- if no_cache:
63
- return None
64
71
  try:
72
+ if no_cache:
73
+ return ch.open_in_memory(repo_root)
65
74
  return ch.open_for(repo_root)
66
75
  except OSError:
67
76
  return None
@@ -425,7 +434,9 @@ def diff(
425
434
  cards.append(rc.build(repo_root, f, cache=cache))
426
435
  except gf.GitError:
427
436
  continue
428
- cards.sort(key=lambda c: -c.score.value)
437
+ # Stable tie-break: lex smallest path on identical scores so cache
438
+ # and --no-cache truncate the same files at --top N.
439
+ cards.sort(key=lambda c: (-c.score.value, c.path))
429
440
  cards = cards[:top]
430
441
  finally:
431
442
  if cache is not None:
@@ -565,16 +576,17 @@ def highlights(
565
576
 
566
577
  inv_pairs = gf.extract_invariant_quotes(commits)
567
578
  sha_to_commit = {c.sha: c for c in commits}
568
- seen_lines: dict[str, str] = {}
569
- for sha, line in inv_pairs:
570
- seen_lines.setdefault(line, sha)
579
+ deduped = gf.dedupe_invariant_lines(inv_pairs, sha_to_commit)
571
580
  inv_records: list[tuple[str, str, gf.Commit]] = []
572
- for line, sha in seen_lines.items():
581
+ for sha, line in deduped:
573
582
  commit = sha_to_commit.get(sha)
574
583
  if commit is None:
575
584
  continue
576
585
  inv_records.append((line, sha, commit))
577
- inv_records.sort(key=lambda t: t[2].authored_at, reverse=True)
586
+ # Sort newest first; on identical timestamps fall back to lexicographically
587
+ # smallest sha so cache and --no-cache emit byte-identical output.
588
+ inv_records.sort(key=lambda t: t[1]) # secondary: sha asc
589
+ inv_records.sort(key=lambda t: t[2].authored_at, reverse=True) # primary
578
590
  inv_records = inv_records[:invariants]
579
591
 
580
592
  incident_records = gf.find_incidents(commits)[:incidents]
@@ -827,7 +839,10 @@ def scan(
827
839
  if cache is not None:
828
840
  cache.close()
829
841
 
830
- cards.sort(key=lambda c: -c.score.value)
842
+ # Stable tie-break on identical scores: lexicographically smallest path
843
+ # so cache and --no-cache produce byte-identical text output for the
844
+ # same HEAD. Without this, the truncation at --top N is non-deterministic.
845
+ cards.sort(key=lambda c: (-c.score.value, c.path))
831
846
  top_cards = cards[:top]
832
847
  if not top_cards:
833
848
  # Be honest about what "no flagged files" actually means. A user who
@@ -949,7 +964,8 @@ def show(
949
964
  cards.append(rc.build(repo_root, change.path))
950
965
  except gf.GitError:
951
966
  continue
952
- cards.sort(key=lambda c: -c.score.value)
967
+ # Stable tie-break on identical scores: lex smallest path.
968
+ cards.sort(key=lambda c: (-c.score.value, c.path))
953
969
 
954
970
  if json_out:
955
971
  console.print_json(
@@ -1065,13 +1081,18 @@ def tour(
1065
1081
 
1066
1082
  inv_pairs = gf.extract_invariant_quotes(commits)
1067
1083
  sha_to_commit = {c.sha: c for c in commits}
1068
- seen_lines: dict[str, str] = {}
1069
- for sha, line in inv_pairs:
1070
- seen_lines.setdefault(line, sha)
1084
+ deduped = gf.dedupe_invariant_lines(inv_pairs, sha_to_commit)
1085
+ # Sort newest first with sha-asc tie-break so cache and --no-cache
1086
+ # surface the same three lines in the same order.
1087
+ deduped_sorted = sorted(
1088
+ (p for p in deduped if p[0] in sha_to_commit),
1089
+ key=lambda p: p[0],
1090
+ )
1091
+ deduped_sorted.sort(
1092
+ key=lambda p: sha_to_commit[p[0]].authored_at, reverse=True
1093
+ )
1071
1094
  invariants_top = [
1072
- (line, sha_to_commit[sha])
1073
- for line, sha in seen_lines.items()
1074
- if sha in sha_to_commit
1095
+ (line, sha_to_commit[sha]) for sha, line in deduped_sorted
1075
1096
  ][:3]
1076
1097
  incidents_top = gf.find_incidents(commits)[:3]
1077
1098
 
@@ -1135,7 +1156,8 @@ def tour(
1135
1156
  ]
1136
1157
  if useful:
1137
1158
  cards.append(card)
1138
- cards.sort(key=lambda c: -c.score.value)
1159
+ # Stable tie-break: lex smallest path on identical scores.
1160
+ cards.sort(key=lambda c: (-c.score.value, c.path))
1139
1161
 
1140
1162
  if cards:
1141
1163
  console.print("[bold red]Top 3 risky files[/bold red]")
whycode/git_facts.py CHANGED
@@ -967,6 +967,47 @@ def extract_invariant_quotes(commits: Sequence[Commit]) -> list[tuple[str, str]]
967
967
  return out
968
968
 
969
969
 
970
+ def dedupe_invariant_lines(
971
+ pairs: Sequence[tuple[str, str]],
972
+ sha_to_commit: dict[str, Commit],
973
+ ) -> list[tuple[str, str]]:
974
+ """Collapse identical invariant lines to one canonical (sha, line) pair.
975
+
976
+ When two commits state the same invariant line — typically a cherry-pick
977
+ onto a maintenance branch, or a rebase that duplicated the message — we
978
+ must pick exactly one to surface. Without a deterministic rule the cache
979
+ and ``--no-cache`` paths can disagree (their walk orders differ when
980
+ timestamps tie), and downstream JSON consumers see flaky output across
981
+ runs.
982
+
983
+ The rule:
984
+
985
+ 1. Earliest ``authored_at`` wins. The original statement is canonical;
986
+ cherry-picks and rebases are derivatives.
987
+ 2. Lexicographically smallest ``sha`` breaks ties on identical timestamps.
988
+
989
+ The returned list preserves first-encounter order of the (now-unique)
990
+ lines so downstream code that sorts by date sees a stable input.
991
+ Pairs whose ``sha`` is not in ``sha_to_commit`` keep their first-seen
992
+ record (no metadata to compare on).
993
+ """
994
+ canonical: dict[str, str] = {}
995
+ for sha, line in pairs:
996
+ existing = canonical.get(line)
997
+ if existing is None:
998
+ canonical[line] = sha
999
+ continue
1000
+ old_commit = sha_to_commit.get(existing)
1001
+ new_commit = sha_to_commit.get(sha)
1002
+ if old_commit is None or new_commit is None:
1003
+ continue
1004
+ old_key = (old_commit.authored_at, existing)
1005
+ new_key = (new_commit.authored_at, sha)
1006
+ if new_key < old_key:
1007
+ canonical[line] = sha
1008
+ return [(sha, line) for line, sha in canonical.items()]
1009
+
1010
+
970
1011
  def author_last_activity(repo_root: Path, email: str) -> datetime | None:
971
1012
  """Most recent commit timestamp by ``email`` anywhere in the repo, or None."""
972
1013
  raw = _run_git(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: whycode-cli
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Summary: Tells you what to be afraid of before you touch a file.
5
5
  Author: Kevin
6
6
  License-Expression: MIT
@@ -1,9 +1,9 @@
1
- whycode/__init__.py,sha256=dPQOppaGvPoPBoACrHwxqGykCdDMNZRROtDjOmyRuf8,96
1
+ whycode/__init__.py,sha256=YXMeIO9f86OJ3_EonP3wlcLW6Qv9sIHQQZqr-Ja4HV8,96
2
2
  whycode/__main__.py,sha256=dqAk6746YpuM-FTIH4TBOULegGc5WweojiZjce0VYgQ,105
3
- whycode/cache.py,sha256=v55KbSlTqmP_ot1FEFqxCNpAApj6vthpHl2l0lGLX3A,17477
4
- whycode/cli.py,sha256=OTYPhp8ItBXPRrQ1y6zGt0BwKyAYEuHAo3T0hMHqINk,47836
3
+ whycode/cache.py,sha256=0cEPZHdolQbSiBLAOnMu20tobIrc7G0MNycpldHRpkk,18536
4
+ whycode/cli.py,sha256=uRW5aysC2ufYvs_qPC1gzZcjQTFUZHdXxAmF25d4oY8,49328
5
5
  whycode/decisions.py,sha256=oCVhEF7QfHeci0LAWNtEjV2mUAEBJloL1rT3I4XXbkw,7570
6
- whycode/git_facts.py,sha256=vAeyhxZTrqa_6zmVuBV-06JhZ-TFBiRmcaISK1oOQjM,40162
6
+ whycode/git_facts.py,sha256=MLp8e4nGaam6lBGCHY5-sftHj71lyg_HmmBOBx3g-kg,41829
7
7
  whycode/ignore.py,sha256=O_8bHIt0d1U-sYrBajBa7oEqpnHWU3f6Zf-8PU8CpO0,4748
8
8
  whycode/llm.py,sha256=leB94pBg8kUCq_BujZq5ixny0urGtKskjdaKoum_eCA,4092
9
9
  whycode/mcp_server.py,sha256=ht1tStAkOwmQzNIRkm1eA8Tnc59fzDRSGkgyIprft-0,18503
@@ -14,9 +14,9 @@ whycode/suppressions.py,sha256=1lKSs-kCgpnJbcxozcgiSP8ZAfjEDMHXuM3sw4FaY78,3836
14
14
  whycode/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  whycode/templates/github-workflow.yml,sha256=LAfHMDG2TkAwi4vCNinHk-4zOt-mCWErBpmpaqlW5oA,2251
16
16
  whycode/templates/pre-commit,sha256=IhU11CvoDwqRAAsvHwUo-BwaNbdgy1cpXc54Z_phrmQ,316
17
- whycode_cli-0.4.1.dist-info/licenses/LICENSE,sha256=U6LN5qg5kJXSJf7KFPm9KJhmiGn3qK_GsTVWXdt1DFA,1062
18
- whycode_cli-0.4.1.dist-info/METADATA,sha256=M2XBAL02LMRZtW4Pj4L3Gcuifqh2lIAQa_1Hpt3xfPI,10218
19
- whycode_cli-0.4.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
20
- whycode_cli-0.4.1.dist-info/entry_points.txt,sha256=xrNWc4CQn3ZhQFJxsGIPiTqpN19K4pRpgaj6qGaEzSQ,44
21
- whycode_cli-0.4.1.dist-info/top_level.txt,sha256=6yIL5rxW-4DbARHQYrPlGQVqKddZ88sjvmNosDh1w3A,8
22
- whycode_cli-0.4.1.dist-info/RECORD,,
17
+ whycode_cli-0.4.2.dist-info/licenses/LICENSE,sha256=U6LN5qg5kJXSJf7KFPm9KJhmiGn3qK_GsTVWXdt1DFA,1062
18
+ whycode_cli-0.4.2.dist-info/METADATA,sha256=GD3cP18eEcHePHEXxroFuuZ-2pysLn51biNROQKDBXw,10218
19
+ whycode_cli-0.4.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
20
+ whycode_cli-0.4.2.dist-info/entry_points.txt,sha256=xrNWc4CQn3ZhQFJxsGIPiTqpN19K4pRpgaj6qGaEzSQ,44
21
+ whycode_cli-0.4.2.dist-info/top_level.txt,sha256=6yIL5rxW-4DbARHQYrPlGQVqKddZ88sjvmNosDh1w3A,8
22
+ whycode_cli-0.4.2.dist-info/RECORD,,