whycode-cli 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- whycode/__init__.py +1 -1
- whycode/cache.py +33 -7
- whycode/cli.py +47 -25
- whycode/git_facts.py +41 -0
- {whycode_cli-0.4.1.dist-info → whycode_cli-0.4.2.dist-info}/METADATA +1 -1
- {whycode_cli-0.4.1.dist-info → whycode_cli-0.4.2.dist-info}/RECORD +10 -10
- {whycode_cli-0.4.1.dist-info → whycode_cli-0.4.2.dist-info}/WHEEL +0 -0
- {whycode_cli-0.4.1.dist-info → whycode_cli-0.4.2.dist-info}/entry_points.txt +0 -0
- {whycode_cli-0.4.1.dist-info → whycode_cli-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {whycode_cli-0.4.1.dist-info → whycode_cli-0.4.2.dist-info}/top_level.txt +0 -0
whycode/__init__.py
CHANGED
whycode/cache.py
CHANGED
|
@@ -112,10 +112,21 @@ class CacheStore:
|
|
|
112
112
|
cache misses; this class never invokes ``git`` itself.
|
|
113
113
|
"""
|
|
114
114
|
|
|
115
|
-
def __init__(self, db_path: Path) -> None:
|
|
115
|
+
def __init__(self, db_path: Path, *, in_memory: bool = False) -> None:
|
|
116
|
+
"""Open (creating if needed) the SQLite cache at ``db_path``.
|
|
117
|
+
|
|
118
|
+
``in_memory=True`` opens a transient ``:memory:`` connection
|
|
119
|
+
instead — the disk file is never created and is never read.
|
|
120
|
+
Used by ``--no-cache`` to retain in-session amortisation
|
|
121
|
+
(matches the cold-fill code path) without persisting anything.
|
|
122
|
+
"""
|
|
116
123
|
self.db_path = db_path
|
|
117
|
-
self.
|
|
118
|
-
|
|
124
|
+
self._in_memory = in_memory
|
|
125
|
+
if in_memory:
|
|
126
|
+
self._conn = sqlite3.connect(":memory:")
|
|
127
|
+
else:
|
|
128
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
129
|
+
self._conn = sqlite3.connect(self.db_path)
|
|
119
130
|
# row_factory makes column access readable in tests / debug.
|
|
120
131
|
self._conn.row_factory = sqlite3.Row
|
|
121
132
|
self._conn.execute("PRAGMA foreign_keys = ON")
|
|
@@ -402,13 +413,18 @@ class CacheStore:
|
|
|
402
413
|
file_row_count = int(
|
|
403
414
|
self._conn.execute("SELECT COUNT(*) FROM commit_files").fetchone()[0]
|
|
404
415
|
)
|
|
405
|
-
|
|
406
|
-
size_bytes = self.db_path.stat().st_size
|
|
407
|
-
except OSError:
|
|
416
|
+
if self._in_memory:
|
|
408
417
|
size_bytes = 0
|
|
418
|
+
exists = False
|
|
419
|
+
else:
|
|
420
|
+
try:
|
|
421
|
+
size_bytes = self.db_path.stat().st_size
|
|
422
|
+
except OSError:
|
|
423
|
+
size_bytes = 0
|
|
424
|
+
exists = self.db_path.exists()
|
|
409
425
|
return CacheStats(
|
|
410
426
|
path=self.db_path,
|
|
411
|
-
exists=
|
|
427
|
+
exists=exists,
|
|
412
428
|
schema_version=self.schema_version,
|
|
413
429
|
head_sha=self.head_sha,
|
|
414
430
|
commit_count=commit_count,
|
|
@@ -430,6 +446,16 @@ def open_for(repo_root: Path) -> CacheStore:
|
|
|
430
446
|
return CacheStore(cache_path_for(repo_root))
|
|
431
447
|
|
|
432
448
|
|
|
449
|
+
def open_in_memory(repo_root: Path) -> CacheStore:
|
|
450
|
+
"""Open a transient in-memory cache for ``repo_root``.
|
|
451
|
+
|
|
452
|
+
Used by ``--no-cache`` to keep within-session amortisation (the same
|
|
453
|
+
cold-fill code path everything else uses) while never touching disk.
|
|
454
|
+
The store is destroyed on ``close()`` and has no after-effects.
|
|
455
|
+
"""
|
|
456
|
+
return CacheStore(cache_path_for(repo_root), in_memory=True)
|
|
457
|
+
|
|
458
|
+
|
|
433
459
|
def parse_authored_at(value: str) -> datetime:
|
|
434
460
|
"""Parse the ``authored_at`` string we stored from git.
|
|
435
461
|
|
whycode/cli.py
CHANGED
|
@@ -50,18 +50,27 @@ err = Console(stderr=True)
|
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
def _open_cache(repo_root: Path, no_cache: bool) -> ch.CacheStore | None:
|
|
53
|
-
"""Open the
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
53
|
+
"""Open the cache for ``repo_root`` according to the no-cache flag.
|
|
54
|
+
|
|
55
|
+
Modes:
|
|
56
|
+
* ``no_cache=False`` (the default): persistent on-disk SQLite at
|
|
57
|
+
``.whycode/cache.db``.
|
|
58
|
+
* ``no_cache=True``: a transient ``:memory:`` SQLite store. The
|
|
59
|
+
same git-walk code path runs as for the cold-fill, but the
|
|
60
|
+
database is destroyed on ``close()`` — nothing lands on disk
|
|
61
|
+
and the next run starts cold. Keeping per-run amortisation
|
|
62
|
+
(one ``git log`` walk shared across files) is what makes
|
|
63
|
+
``--no-cache`` at most as slow as a cold persistent fill;
|
|
64
|
+
the previous ``cache=None`` short-circuit lost that and so
|
|
65
|
+
``--no-cache`` re-issued per-file walks every iteration.
|
|
66
|
+
|
|
67
|
+
A ``None`` return means "do not pass a cache through git_facts".
|
|
68
|
+
Happens only when even an in-memory open fails — very rare and
|
|
69
|
+
we never want a cache problem to block the main read path.
|
|
61
70
|
"""
|
|
62
|
-
if no_cache:
|
|
63
|
-
return None
|
|
64
71
|
try:
|
|
72
|
+
if no_cache:
|
|
73
|
+
return ch.open_in_memory(repo_root)
|
|
65
74
|
return ch.open_for(repo_root)
|
|
66
75
|
except OSError:
|
|
67
76
|
return None
|
|
@@ -425,7 +434,9 @@ def diff(
|
|
|
425
434
|
cards.append(rc.build(repo_root, f, cache=cache))
|
|
426
435
|
except gf.GitError:
|
|
427
436
|
continue
|
|
428
|
-
|
|
437
|
+
# Stable tie-break: lex smallest path on identical scores so cache
|
|
438
|
+
# and --no-cache truncate the same files at --top N.
|
|
439
|
+
cards.sort(key=lambda c: (-c.score.value, c.path))
|
|
429
440
|
cards = cards[:top]
|
|
430
441
|
finally:
|
|
431
442
|
if cache is not None:
|
|
@@ -565,16 +576,17 @@ def highlights(
|
|
|
565
576
|
|
|
566
577
|
inv_pairs = gf.extract_invariant_quotes(commits)
|
|
567
578
|
sha_to_commit = {c.sha: c for c in commits}
|
|
568
|
-
|
|
569
|
-
for sha, line in inv_pairs:
|
|
570
|
-
seen_lines.setdefault(line, sha)
|
|
579
|
+
deduped = gf.dedupe_invariant_lines(inv_pairs, sha_to_commit)
|
|
571
580
|
inv_records: list[tuple[str, str, gf.Commit]] = []
|
|
572
|
-
for
|
|
581
|
+
for sha, line in deduped:
|
|
573
582
|
commit = sha_to_commit.get(sha)
|
|
574
583
|
if commit is None:
|
|
575
584
|
continue
|
|
576
585
|
inv_records.append((line, sha, commit))
|
|
577
|
-
|
|
586
|
+
# Sort newest first; on identical timestamps fall back to lexicographically
|
|
587
|
+
# smallest sha so cache and --no-cache emit byte-identical output.
|
|
588
|
+
inv_records.sort(key=lambda t: t[1]) # secondary: sha asc
|
|
589
|
+
inv_records.sort(key=lambda t: t[2].authored_at, reverse=True) # primary
|
|
578
590
|
inv_records = inv_records[:invariants]
|
|
579
591
|
|
|
580
592
|
incident_records = gf.find_incidents(commits)[:incidents]
|
|
@@ -827,7 +839,10 @@ def scan(
|
|
|
827
839
|
if cache is not None:
|
|
828
840
|
cache.close()
|
|
829
841
|
|
|
830
|
-
|
|
842
|
+
# Stable tie-break on identical scores: lexicographically smallest path
|
|
843
|
+
# so cache and --no-cache produce byte-identical text output for the
|
|
844
|
+
# same HEAD. Without this, the truncation at --top N is non-deterministic.
|
|
845
|
+
cards.sort(key=lambda c: (-c.score.value, c.path))
|
|
831
846
|
top_cards = cards[:top]
|
|
832
847
|
if not top_cards:
|
|
833
848
|
# Be honest about what "no flagged files" actually means. A user who
|
|
@@ -949,7 +964,8 @@ def show(
|
|
|
949
964
|
cards.append(rc.build(repo_root, change.path))
|
|
950
965
|
except gf.GitError:
|
|
951
966
|
continue
|
|
952
|
-
|
|
967
|
+
# Stable tie-break on identical scores: lex smallest path.
|
|
968
|
+
cards.sort(key=lambda c: (-c.score.value, c.path))
|
|
953
969
|
|
|
954
970
|
if json_out:
|
|
955
971
|
console.print_json(
|
|
@@ -1065,13 +1081,18 @@ def tour(
|
|
|
1065
1081
|
|
|
1066
1082
|
inv_pairs = gf.extract_invariant_quotes(commits)
|
|
1067
1083
|
sha_to_commit = {c.sha: c for c in commits}
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1084
|
+
deduped = gf.dedupe_invariant_lines(inv_pairs, sha_to_commit)
|
|
1085
|
+
# Sort newest first with sha-asc tie-break so cache and --no-cache
|
|
1086
|
+
# surface the same three lines in the same order.
|
|
1087
|
+
deduped_sorted = sorted(
|
|
1088
|
+
(p for p in deduped if p[0] in sha_to_commit),
|
|
1089
|
+
key=lambda p: p[0],
|
|
1090
|
+
)
|
|
1091
|
+
deduped_sorted.sort(
|
|
1092
|
+
key=lambda p: sha_to_commit[p[0]].authored_at, reverse=True
|
|
1093
|
+
)
|
|
1071
1094
|
invariants_top = [
|
|
1072
|
-
(line, sha_to_commit[sha])
|
|
1073
|
-
for line, sha in seen_lines.items()
|
|
1074
|
-
if sha in sha_to_commit
|
|
1095
|
+
(line, sha_to_commit[sha]) for sha, line in deduped_sorted
|
|
1075
1096
|
][:3]
|
|
1076
1097
|
incidents_top = gf.find_incidents(commits)[:3]
|
|
1077
1098
|
|
|
@@ -1135,7 +1156,8 @@ def tour(
|
|
|
1135
1156
|
]
|
|
1136
1157
|
if useful:
|
|
1137
1158
|
cards.append(card)
|
|
1138
|
-
|
|
1159
|
+
# Stable tie-break: lex smallest path on identical scores.
|
|
1160
|
+
cards.sort(key=lambda c: (-c.score.value, c.path))
|
|
1139
1161
|
|
|
1140
1162
|
if cards:
|
|
1141
1163
|
console.print("[bold red]Top 3 risky files[/bold red]")
|
whycode/git_facts.py
CHANGED
|
@@ -967,6 +967,47 @@ def extract_invariant_quotes(commits: Sequence[Commit]) -> list[tuple[str, str]]
|
|
|
967
967
|
return out
|
|
968
968
|
|
|
969
969
|
|
|
970
|
+
def dedupe_invariant_lines(
|
|
971
|
+
pairs: Sequence[tuple[str, str]],
|
|
972
|
+
sha_to_commit: dict[str, Commit],
|
|
973
|
+
) -> list[tuple[str, str]]:
|
|
974
|
+
"""Collapse identical invariant lines to one canonical (sha, line) pair.
|
|
975
|
+
|
|
976
|
+
When two commits state the same invariant line — typically a cherry-pick
|
|
977
|
+
onto a maintenance branch, or a rebase that duplicated the message — we
|
|
978
|
+
must pick exactly one to surface. Without a deterministic rule the cache
|
|
979
|
+
and ``--no-cache`` paths can disagree (their walk orders differ when
|
|
980
|
+
timestamps tie), and downstream JSON consumers see flaky output across
|
|
981
|
+
runs.
|
|
982
|
+
|
|
983
|
+
The rule:
|
|
984
|
+
|
|
985
|
+
1. Earliest ``authored_at`` wins. The original statement is canonical;
|
|
986
|
+
cherry-picks and rebases are derivatives.
|
|
987
|
+
2. Lexicographically smallest ``sha`` breaks ties on identical timestamps.
|
|
988
|
+
|
|
989
|
+
The returned list preserves first-encounter order of the (now-unique)
|
|
990
|
+
lines so downstream code that sorts by date sees a stable input.
|
|
991
|
+
Pairs whose ``sha`` is not in ``sha_to_commit`` keep their first-seen
|
|
992
|
+
record (no metadata to compare on).
|
|
993
|
+
"""
|
|
994
|
+
canonical: dict[str, str] = {}
|
|
995
|
+
for sha, line in pairs:
|
|
996
|
+
existing = canonical.get(line)
|
|
997
|
+
if existing is None:
|
|
998
|
+
canonical[line] = sha
|
|
999
|
+
continue
|
|
1000
|
+
old_commit = sha_to_commit.get(existing)
|
|
1001
|
+
new_commit = sha_to_commit.get(sha)
|
|
1002
|
+
if old_commit is None or new_commit is None:
|
|
1003
|
+
continue
|
|
1004
|
+
old_key = (old_commit.authored_at, existing)
|
|
1005
|
+
new_key = (new_commit.authored_at, sha)
|
|
1006
|
+
if new_key < old_key:
|
|
1007
|
+
canonical[line] = sha
|
|
1008
|
+
return [(sha, line) for line, sha in canonical.items()]
|
|
1009
|
+
|
|
1010
|
+
|
|
970
1011
|
def author_last_activity(repo_root: Path, email: str) -> datetime | None:
|
|
971
1012
|
"""Most recent commit timestamp by ``email`` anywhere in the repo, or None."""
|
|
972
1013
|
raw = _run_git(
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
whycode/__init__.py,sha256=
|
|
1
|
+
whycode/__init__.py,sha256=YXMeIO9f86OJ3_EonP3wlcLW6Qv9sIHQQZqr-Ja4HV8,96
|
|
2
2
|
whycode/__main__.py,sha256=dqAk6746YpuM-FTIH4TBOULegGc5WweojiZjce0VYgQ,105
|
|
3
|
-
whycode/cache.py,sha256=
|
|
4
|
-
whycode/cli.py,sha256=
|
|
3
|
+
whycode/cache.py,sha256=0cEPZHdolQbSiBLAOnMu20tobIrc7G0MNycpldHRpkk,18536
|
|
4
|
+
whycode/cli.py,sha256=uRW5aysC2ufYvs_qPC1gzZcjQTFUZHdXxAmF25d4oY8,49328
|
|
5
5
|
whycode/decisions.py,sha256=oCVhEF7QfHeci0LAWNtEjV2mUAEBJloL1rT3I4XXbkw,7570
|
|
6
|
-
whycode/git_facts.py,sha256=
|
|
6
|
+
whycode/git_facts.py,sha256=MLp8e4nGaam6lBGCHY5-sftHj71lyg_HmmBOBx3g-kg,41829
|
|
7
7
|
whycode/ignore.py,sha256=O_8bHIt0d1U-sYrBajBa7oEqpnHWU3f6Zf-8PU8CpO0,4748
|
|
8
8
|
whycode/llm.py,sha256=leB94pBg8kUCq_BujZq5ixny0urGtKskjdaKoum_eCA,4092
|
|
9
9
|
whycode/mcp_server.py,sha256=ht1tStAkOwmQzNIRkm1eA8Tnc59fzDRSGkgyIprft-0,18503
|
|
@@ -14,9 +14,9 @@ whycode/suppressions.py,sha256=1lKSs-kCgpnJbcxozcgiSP8ZAfjEDMHXuM3sw4FaY78,3836
|
|
|
14
14
|
whycode/templates/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
whycode/templates/github-workflow.yml,sha256=LAfHMDG2TkAwi4vCNinHk-4zOt-mCWErBpmpaqlW5oA,2251
|
|
16
16
|
whycode/templates/pre-commit,sha256=IhU11CvoDwqRAAsvHwUo-BwaNbdgy1cpXc54Z_phrmQ,316
|
|
17
|
-
whycode_cli-0.4.
|
|
18
|
-
whycode_cli-0.4.
|
|
19
|
-
whycode_cli-0.4.
|
|
20
|
-
whycode_cli-0.4.
|
|
21
|
-
whycode_cli-0.4.
|
|
22
|
-
whycode_cli-0.4.
|
|
17
|
+
whycode_cli-0.4.2.dist-info/licenses/LICENSE,sha256=U6LN5qg5kJXSJf7KFPm9KJhmiGn3qK_GsTVWXdt1DFA,1062
|
|
18
|
+
whycode_cli-0.4.2.dist-info/METADATA,sha256=GD3cP18eEcHePHEXxroFuuZ-2pysLn51biNROQKDBXw,10218
|
|
19
|
+
whycode_cli-0.4.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
20
|
+
whycode_cli-0.4.2.dist-info/entry_points.txt,sha256=xrNWc4CQn3ZhQFJxsGIPiTqpN19K4pRpgaj6qGaEzSQ,44
|
|
21
|
+
whycode_cli-0.4.2.dist-info/top_level.txt,sha256=6yIL5rxW-4DbARHQYrPlGQVqKddZ88sjvmNosDh1w3A,8
|
|
22
|
+
whycode_cli-0.4.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|