whycode-cli 0.4.1__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/PKG-INFO +1 -1
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/pyproject.toml +1 -1
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/__init__.py +1 -1
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/cache.py +33 -7
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/cli.py +47 -25
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/git_facts.py +41 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode_cli.egg-info/PKG-INFO +1 -1
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/tests/test_cache.py +51 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/tests/test_cli.py +113 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/LICENSE +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/README.md +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/setup.cfg +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/__main__.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/decisions.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/ignore.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/llm.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/mcp_server.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/risk_card.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/scorer.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/signals.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/suppressions.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/templates/__init__.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/templates/github-workflow.yml +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode/templates/pre-commit +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode_cli.egg-info/SOURCES.txt +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode_cli.egg-info/dependency_links.txt +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode_cli.egg-info/entry_points.txt +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode_cli.egg-info/requires.txt +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/src/whycode_cli.egg-info/top_level.txt +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/tests/test_decisions.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/tests/test_git_facts.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/tests/test_ignore.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/tests/test_mcp_prompts.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/tests/test_scorer.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/tests/test_signals.py +0 -0
- {whycode_cli-0.4.1 → whycode_cli-0.4.2}/tests/test_suppressions.py +0 -0
|
@@ -112,10 +112,21 @@ class CacheStore:
|
|
|
112
112
|
cache misses; this class never invokes ``git`` itself.
|
|
113
113
|
"""
|
|
114
114
|
|
|
115
|
-
def __init__(self, db_path: Path) -> None:
|
|
115
|
+
def __init__(self, db_path: Path, *, in_memory: bool = False) -> None:
|
|
116
|
+
"""Open (creating if needed) the SQLite cache at ``db_path``.
|
|
117
|
+
|
|
118
|
+
``in_memory=True`` opens a transient ``:memory:`` connection
|
|
119
|
+
instead — the disk file is never created and is never read.
|
|
120
|
+
Used by ``--no-cache`` to retain in-session amortisation
|
|
121
|
+
(matches the cold-fill code path) without persisting anything.
|
|
122
|
+
"""
|
|
116
123
|
self.db_path = db_path
|
|
117
|
-
self.
|
|
118
|
-
|
|
124
|
+
self._in_memory = in_memory
|
|
125
|
+
if in_memory:
|
|
126
|
+
self._conn = sqlite3.connect(":memory:")
|
|
127
|
+
else:
|
|
128
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
129
|
+
self._conn = sqlite3.connect(self.db_path)
|
|
119
130
|
# row_factory makes column access readable in tests / debug.
|
|
120
131
|
self._conn.row_factory = sqlite3.Row
|
|
121
132
|
self._conn.execute("PRAGMA foreign_keys = ON")
|
|
@@ -402,13 +413,18 @@ class CacheStore:
|
|
|
402
413
|
file_row_count = int(
|
|
403
414
|
self._conn.execute("SELECT COUNT(*) FROM commit_files").fetchone()[0]
|
|
404
415
|
)
|
|
405
|
-
|
|
406
|
-
size_bytes = self.db_path.stat().st_size
|
|
407
|
-
except OSError:
|
|
416
|
+
if self._in_memory:
|
|
408
417
|
size_bytes = 0
|
|
418
|
+
exists = False
|
|
419
|
+
else:
|
|
420
|
+
try:
|
|
421
|
+
size_bytes = self.db_path.stat().st_size
|
|
422
|
+
except OSError:
|
|
423
|
+
size_bytes = 0
|
|
424
|
+
exists = self.db_path.exists()
|
|
409
425
|
return CacheStats(
|
|
410
426
|
path=self.db_path,
|
|
411
|
-
exists=
|
|
427
|
+
exists=exists,
|
|
412
428
|
schema_version=self.schema_version,
|
|
413
429
|
head_sha=self.head_sha,
|
|
414
430
|
commit_count=commit_count,
|
|
@@ -430,6 +446,16 @@ def open_for(repo_root: Path) -> CacheStore:
|
|
|
430
446
|
return CacheStore(cache_path_for(repo_root))
|
|
431
447
|
|
|
432
448
|
|
|
449
|
+
def open_in_memory(repo_root: Path) -> CacheStore:
|
|
450
|
+
"""Open a transient in-memory cache for ``repo_root``.
|
|
451
|
+
|
|
452
|
+
Used by ``--no-cache`` to keep within-session amortisation (the same
|
|
453
|
+
cold-fill code path everything else uses) while never touching disk.
|
|
454
|
+
The store is destroyed on ``close()`` and has no after-effects.
|
|
455
|
+
"""
|
|
456
|
+
return CacheStore(cache_path_for(repo_root), in_memory=True)
|
|
457
|
+
|
|
458
|
+
|
|
433
459
|
def parse_authored_at(value: str) -> datetime:
|
|
434
460
|
"""Parse the ``authored_at`` string we stored from git.
|
|
435
461
|
|
|
@@ -50,18 +50,27 @@ err = Console(stderr=True)
|
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
def _open_cache(repo_root: Path, no_cache: bool) -> ch.CacheStore | None:
|
|
53
|
-
"""Open the
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
53
|
+
"""Open the cache for ``repo_root`` according to the no-cache flag.
|
|
54
|
+
|
|
55
|
+
Modes:
|
|
56
|
+
* ``no_cache=False`` (the default): persistent on-disk SQLite at
|
|
57
|
+
``.whycode/cache.db``.
|
|
58
|
+
* ``no_cache=True``: a transient ``:memory:`` SQLite store. The
|
|
59
|
+
same git-walk code path runs as for the cold-fill, but the
|
|
60
|
+
database is destroyed on ``close()`` — nothing lands on disk
|
|
61
|
+
and the next run starts cold. Keeping per-run amortisation
|
|
62
|
+
(one ``git log`` walk shared across files) is what makes
|
|
63
|
+
``--no-cache`` at most as slow as a cold persistent fill;
|
|
64
|
+
the previous ``cache=None`` short-circuit lost that and so
|
|
65
|
+
``--no-cache`` re-issued per-file walks every iteration.
|
|
66
|
+
|
|
67
|
+
A ``None`` return means "do not pass a cache through git_facts".
|
|
68
|
+
Happens only when even an in-memory open fails — very rare and
|
|
69
|
+
we never want a cache problem to block the main read path.
|
|
61
70
|
"""
|
|
62
|
-
if no_cache:
|
|
63
|
-
return None
|
|
64
71
|
try:
|
|
72
|
+
if no_cache:
|
|
73
|
+
return ch.open_in_memory(repo_root)
|
|
65
74
|
return ch.open_for(repo_root)
|
|
66
75
|
except OSError:
|
|
67
76
|
return None
|
|
@@ -425,7 +434,9 @@ def diff(
|
|
|
425
434
|
cards.append(rc.build(repo_root, f, cache=cache))
|
|
426
435
|
except gf.GitError:
|
|
427
436
|
continue
|
|
428
|
-
|
|
437
|
+
# Stable tie-break: lex smallest path on identical scores so cache
|
|
438
|
+
# and --no-cache truncate the same files at --top N.
|
|
439
|
+
cards.sort(key=lambda c: (-c.score.value, c.path))
|
|
429
440
|
cards = cards[:top]
|
|
430
441
|
finally:
|
|
431
442
|
if cache is not None:
|
|
@@ -565,16 +576,17 @@ def highlights(
|
|
|
565
576
|
|
|
566
577
|
inv_pairs = gf.extract_invariant_quotes(commits)
|
|
567
578
|
sha_to_commit = {c.sha: c for c in commits}
|
|
568
|
-
|
|
569
|
-
for sha, line in inv_pairs:
|
|
570
|
-
seen_lines.setdefault(line, sha)
|
|
579
|
+
deduped = gf.dedupe_invariant_lines(inv_pairs, sha_to_commit)
|
|
571
580
|
inv_records: list[tuple[str, str, gf.Commit]] = []
|
|
572
|
-
for
|
|
581
|
+
for sha, line in deduped:
|
|
573
582
|
commit = sha_to_commit.get(sha)
|
|
574
583
|
if commit is None:
|
|
575
584
|
continue
|
|
576
585
|
inv_records.append((line, sha, commit))
|
|
577
|
-
|
|
586
|
+
# Sort newest first; on identical timestamps fall back to lexicographically
|
|
587
|
+
# smallest sha so cache and --no-cache emit byte-identical output.
|
|
588
|
+
inv_records.sort(key=lambda t: t[1]) # secondary: sha asc
|
|
589
|
+
inv_records.sort(key=lambda t: t[2].authored_at, reverse=True) # primary
|
|
578
590
|
inv_records = inv_records[:invariants]
|
|
579
591
|
|
|
580
592
|
incident_records = gf.find_incidents(commits)[:incidents]
|
|
@@ -827,7 +839,10 @@ def scan(
|
|
|
827
839
|
if cache is not None:
|
|
828
840
|
cache.close()
|
|
829
841
|
|
|
830
|
-
|
|
842
|
+
# Stable tie-break on identical scores: lexicographically smallest path
|
|
843
|
+
# so cache and --no-cache produce byte-identical text output for the
|
|
844
|
+
# same HEAD. Without this, the truncation at --top N is non-deterministic.
|
|
845
|
+
cards.sort(key=lambda c: (-c.score.value, c.path))
|
|
831
846
|
top_cards = cards[:top]
|
|
832
847
|
if not top_cards:
|
|
833
848
|
# Be honest about what "no flagged files" actually means. A user who
|
|
@@ -949,7 +964,8 @@ def show(
|
|
|
949
964
|
cards.append(rc.build(repo_root, change.path))
|
|
950
965
|
except gf.GitError:
|
|
951
966
|
continue
|
|
952
|
-
|
|
967
|
+
# Stable tie-break on identical scores: lex smallest path.
|
|
968
|
+
cards.sort(key=lambda c: (-c.score.value, c.path))
|
|
953
969
|
|
|
954
970
|
if json_out:
|
|
955
971
|
console.print_json(
|
|
@@ -1065,13 +1081,18 @@ def tour(
|
|
|
1065
1081
|
|
|
1066
1082
|
inv_pairs = gf.extract_invariant_quotes(commits)
|
|
1067
1083
|
sha_to_commit = {c.sha: c for c in commits}
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1084
|
+
deduped = gf.dedupe_invariant_lines(inv_pairs, sha_to_commit)
|
|
1085
|
+
# Sort newest first with sha-asc tie-break so cache and --no-cache
|
|
1086
|
+
# surface the same three lines in the same order.
|
|
1087
|
+
deduped_sorted = sorted(
|
|
1088
|
+
(p for p in deduped if p[0] in sha_to_commit),
|
|
1089
|
+
key=lambda p: p[0],
|
|
1090
|
+
)
|
|
1091
|
+
deduped_sorted.sort(
|
|
1092
|
+
key=lambda p: sha_to_commit[p[0]].authored_at, reverse=True
|
|
1093
|
+
)
|
|
1071
1094
|
invariants_top = [
|
|
1072
|
-
(line, sha_to_commit[sha])
|
|
1073
|
-
for line, sha in seen_lines.items()
|
|
1074
|
-
if sha in sha_to_commit
|
|
1095
|
+
(line, sha_to_commit[sha]) for sha, line in deduped_sorted
|
|
1075
1096
|
][:3]
|
|
1076
1097
|
incidents_top = gf.find_incidents(commits)[:3]
|
|
1077
1098
|
|
|
@@ -1135,7 +1156,8 @@ def tour(
|
|
|
1135
1156
|
]
|
|
1136
1157
|
if useful:
|
|
1137
1158
|
cards.append(card)
|
|
1138
|
-
|
|
1159
|
+
# Stable tie-break: lex smallest path on identical scores.
|
|
1160
|
+
cards.sort(key=lambda c: (-c.score.value, c.path))
|
|
1139
1161
|
|
|
1140
1162
|
if cards:
|
|
1141
1163
|
console.print("[bold red]Top 3 risky files[/bold red]")
|
|
@@ -967,6 +967,47 @@ def extract_invariant_quotes(commits: Sequence[Commit]) -> list[tuple[str, str]]
|
|
|
967
967
|
return out
|
|
968
968
|
|
|
969
969
|
|
|
970
|
+
def dedupe_invariant_lines(
|
|
971
|
+
pairs: Sequence[tuple[str, str]],
|
|
972
|
+
sha_to_commit: dict[str, Commit],
|
|
973
|
+
) -> list[tuple[str, str]]:
|
|
974
|
+
"""Collapse identical invariant lines to one canonical (sha, line) pair.
|
|
975
|
+
|
|
976
|
+
When two commits state the same invariant line — typically a cherry-pick
|
|
977
|
+
onto a maintenance branch, or a rebase that duplicated the message — we
|
|
978
|
+
must pick exactly one to surface. Without a deterministic rule the cache
|
|
979
|
+
and ``--no-cache`` paths can disagree (their walk orders differ when
|
|
980
|
+
timestamps tie), and downstream JSON consumers see flaky output across
|
|
981
|
+
runs.
|
|
982
|
+
|
|
983
|
+
The rule:
|
|
984
|
+
|
|
985
|
+
1. Earliest ``authored_at`` wins. The original statement is canonical;
|
|
986
|
+
cherry-picks and rebases are derivatives.
|
|
987
|
+
2. Lexicographically smallest ``sha`` breaks ties on identical timestamps.
|
|
988
|
+
|
|
989
|
+
The returned list preserves first-encounter order of the (now-unique)
|
|
990
|
+
lines so downstream code that sorts by date sees a stable input.
|
|
991
|
+
Pairs whose ``sha`` is not in ``sha_to_commit`` keep their first-seen
|
|
992
|
+
record (no metadata to compare on).
|
|
993
|
+
"""
|
|
994
|
+
canonical: dict[str, str] = {}
|
|
995
|
+
for sha, line in pairs:
|
|
996
|
+
existing = canonical.get(line)
|
|
997
|
+
if existing is None:
|
|
998
|
+
canonical[line] = sha
|
|
999
|
+
continue
|
|
1000
|
+
old_commit = sha_to_commit.get(existing)
|
|
1001
|
+
new_commit = sha_to_commit.get(sha)
|
|
1002
|
+
if old_commit is None or new_commit is None:
|
|
1003
|
+
continue
|
|
1004
|
+
old_key = (old_commit.authored_at, existing)
|
|
1005
|
+
new_key = (new_commit.authored_at, sha)
|
|
1006
|
+
if new_key < old_key:
|
|
1007
|
+
canonical[line] = sha
|
|
1008
|
+
return [(sha, line) for line, sha in canonical.items()]
|
|
1009
|
+
|
|
1010
|
+
|
|
970
1011
|
def author_last_activity(repo_root: Path, email: str) -> datetime | None:
|
|
971
1012
|
"""Most recent commit timestamp by ``email`` anywhere in the repo, or None."""
|
|
972
1013
|
raw = _run_git(
|
|
@@ -306,6 +306,57 @@ def test_open_for_idempotent_open_close(tmp_path: Path) -> None:
|
|
|
306
306
|
store_b.close()
|
|
307
307
|
|
|
308
308
|
|
|
309
|
+
# ---- F7: in-memory cache for --no-cache amortisation ---------------------
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def test_open_in_memory_does_not_touch_disk(tmp_path: Path) -> None:
|
|
313
|
+
"""The `:memory:` mode must leave the filesystem completely untouched."""
|
|
314
|
+
store = ch.open_in_memory(tmp_path)
|
|
315
|
+
try:
|
|
316
|
+
# Write a few rows; nothing should land on disk.
|
|
317
|
+
store.upsert_commits([_commit(sha="a" * 40)])
|
|
318
|
+
store.upsert_commit_files([("a" * 40, "x.py", 1, 0)])
|
|
319
|
+
store.set_head_sha("deadbeef")
|
|
320
|
+
assert not (tmp_path / ch.CACHE_DIRNAME).exists()
|
|
321
|
+
# Reads still return what we wrote.
|
|
322
|
+
rows = store.fetch_all_commit_rows()
|
|
323
|
+
assert len(rows) == 1
|
|
324
|
+
assert store.head_sha == "deadbeef"
|
|
325
|
+
finally:
|
|
326
|
+
store.close()
|
|
327
|
+
# And after close there's still nothing on disk.
|
|
328
|
+
assert not (tmp_path / ch.CACHE_DIRNAME).exists()
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def test_in_memory_cache_amortises_across_files(repo) -> None: # type: ignore[no-untyped-def]
|
|
332
|
+
"""The in-memory store reuses cached diffstat rows across calls.
|
|
333
|
+
|
|
334
|
+
The cold (persistent) path's main perf advantage over a no-cache call
|
|
335
|
+
was that, after a single batched ``git log --no-walk --numstat`` for
|
|
336
|
+
file A, file B's overlapping shas were already in the cache. The
|
|
337
|
+
`:memory:` store must give --no-cache the same amortisation in-process.
|
|
338
|
+
"""
|
|
339
|
+
repo.commit("init", {"a.txt": "1", "b.txt": "1"})
|
|
340
|
+
repo.commit("touch a and b", {"a.txt": "2", "b.txt": "2"})
|
|
341
|
+
repo.commit("touch only b", {"b.txt": "3"})
|
|
342
|
+
with ch.open_in_memory(repo.root) as store:
|
|
343
|
+
# First call on a.txt populates diffstat rows for both shared shas.
|
|
344
|
+
a_commits = gf.commits_for_path(repo.root, "a.txt", cache=store)
|
|
345
|
+
gf.co_changes(repo.root, a_commits, "a.txt", cache=store)
|
|
346
|
+
# All shas a.txt touched are now present.
|
|
347
|
+
a_shas = [c.sha for c in a_commits]
|
|
348
|
+
assert store.shas_missing_files(a_shas) == []
|
|
349
|
+
# When b.txt's call runs, the two shas it shares with a.txt are
|
|
350
|
+
# served from the cache; only the b-only sha is missing.
|
|
351
|
+
b_commits = gf.commits_for_path(repo.root, "b.txt", cache=store)
|
|
352
|
+
b_shas = [c.sha for c in b_commits]
|
|
353
|
+
missing_for_b = set(store.shas_missing_files(b_shas))
|
|
354
|
+
# Exactly the shas that b.txt touched but a.txt did not are missing.
|
|
355
|
+
a_set = set(a_shas)
|
|
356
|
+
expected_missing = {s for s in b_shas if s not in a_set}
|
|
357
|
+
assert missing_for_b == expected_missing
|
|
358
|
+
|
|
359
|
+
|
|
309
360
|
def test_fetch_co_changes_chunked_query_handles_many_shas(tmp_path: Path) -> None:
|
|
310
361
|
"""SQLite limits host parameters per statement; we chunk above 500."""
|
|
311
362
|
with ch.open_for(tmp_path) as store:
|
|
@@ -755,3 +755,116 @@ def test_repeat_scan_produces_identical_top_files(repo, days_ago) -> None: # ty
|
|
|
755
755
|
assert "refund.py" in cold
|
|
756
756
|
assert "refund.py" in warm_first
|
|
757
757
|
assert "refund.py" in warm_second
|
|
758
|
+
|
|
759
|
+
|
|
760
|
+
# ---- F4: highlights determinism across cache state ------------------------
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
def test_highlights_json_is_byte_identical_across_cache_state(
|
|
764
|
+
repo, days_ago
|
|
765
|
+
) -> None: # type: ignore[no-untyped-def]
|
|
766
|
+
"""Two commits with identical bodies and timestamps (a cherry-pick on a
|
|
767
|
+
different branch) must not flip which SHA the dedup picks across cache
|
|
768
|
+
versus --no-cache reads of the same HEAD.
|
|
769
|
+
|
|
770
|
+
Without a stable tie-breaker, the cache's authored_at-DESC walk and git
|
|
771
|
+
log's walk can disagree on the order of identical-timestamp commits, and
|
|
772
|
+
the JSON consumer sees a different SHA on the same field across runs.
|
|
773
|
+
"""
|
|
774
|
+
same_time = days_ago(30)
|
|
775
|
+
repo.commit(
|
|
776
|
+
"init",
|
|
777
|
+
{"a.txt": "1", "b.txt": "1"},
|
|
778
|
+
when=days_ago(60),
|
|
779
|
+
)
|
|
780
|
+
# Two commits, identical timestamps, identical bodies — only the SHAs
|
|
781
|
+
# and the touched-file set differ. Mirrors the flask cherry-pick pattern
|
|
782
|
+
# the field test surfaced.
|
|
783
|
+
repo.commit(
|
|
784
|
+
"use global contributing guide on master",
|
|
785
|
+
{"a.txt": "2"},
|
|
786
|
+
body="Do not duplicate the contributing guide between branches.",
|
|
787
|
+
when=same_time,
|
|
788
|
+
)
|
|
789
|
+
repo.commit(
|
|
790
|
+
"use global contributing guide on stable",
|
|
791
|
+
{"b.txt": "2"},
|
|
792
|
+
body="Do not duplicate the contributing guide between branches.",
|
|
793
|
+
when=same_time,
|
|
794
|
+
)
|
|
795
|
+
cold = _invoke(repo.root, "highlights", "--no-cache", "--json").output
|
|
796
|
+
warm = _invoke(repo.root, "highlights", "--json").output
|
|
797
|
+
second_warm = _invoke(repo.root, "highlights", "--json").output
|
|
798
|
+
assert cold == warm
|
|
799
|
+
assert warm == second_warm
|
|
800
|
+
payload = json.loads(cold)
|
|
801
|
+
# Exactly one invariant should survive the dedup; the other commit's
|
|
802
|
+
# statement is identical and must not appear twice.
|
|
803
|
+
assert len(payload["invariants"]) == 1
|
|
804
|
+
|
|
805
|
+
|
|
806
|
+
# ---- F5: scan determinism across cache state ------------------------------
|
|
807
|
+
|
|
808
|
+
|
|
809
|
+
def test_scan_text_is_byte_identical_across_cache_state(
|
|
810
|
+
repo, days_ago
|
|
811
|
+
) -> None: # type: ignore[no-untyped-def]
|
|
812
|
+
"""Two files that earn the same score from the same signals must not
|
|
813
|
+
swap positions in the --top N truncation across cache versus --no-cache
|
|
814
|
+
reads. Stable tie-break on the lexicographically smallest path keeps
|
|
815
|
+
cold and warm output byte-identical.
|
|
816
|
+
"""
|
|
817
|
+
# Two files always touched together → identical histories, identical
|
|
818
|
+
# signals, identical scores. The ordering between them is settled
|
|
819
|
+
# only by the path tie-break.
|
|
820
|
+
sha = repo.commit(
|
|
821
|
+
"feature: introduce zeta and alpha",
|
|
822
|
+
{"zeta.py": "1", "alpha.py": "1"},
|
|
823
|
+
when=days_ago(50),
|
|
824
|
+
)
|
|
825
|
+
repo.revert(sha, when=days_ago(45))
|
|
826
|
+
repo.commit(
|
|
827
|
+
"hotfix: regression",
|
|
828
|
+
{"zeta.py": "2", "alpha.py": "2"},
|
|
829
|
+
body="incident #INC-1",
|
|
830
|
+
when=days_ago(20),
|
|
831
|
+
)
|
|
832
|
+
cold = _invoke(repo.root, "scan", "--top", "10", "--no-cache").output
|
|
833
|
+
warm = _invoke(repo.root, "scan", "--top", "10").output
|
|
834
|
+
second_warm = _invoke(repo.root, "scan", "--top", "10").output
|
|
835
|
+
assert cold == warm
|
|
836
|
+
assert warm == second_warm
|
|
837
|
+
# Lexicographic tie-break: alpha.py is listed before zeta.py despite
|
|
838
|
+
# equal scores.
|
|
839
|
+
alpha_pos = cold.find("alpha.py")
|
|
840
|
+
zeta_pos = cold.find("zeta.py")
|
|
841
|
+
assert alpha_pos != -1
|
|
842
|
+
assert zeta_pos != -1
|
|
843
|
+
assert alpha_pos < zeta_pos
|
|
844
|
+
|
|
845
|
+
|
|
846
|
+
# ---- F7: --no-cache uses an in-memory cache for amortisation -------------
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
def test_no_cache_scan_matches_warm_scan_byte_for_byte(
|
|
850
|
+
repo, days_ago
|
|
851
|
+
) -> None: # type: ignore[no-untyped-def]
|
|
852
|
+
"""Cache-correctness contract: ``--no-cache`` must agree with the
|
|
853
|
+
persistent cache on the same HEAD. The in-memory ``:memory:`` store
|
|
854
|
+
backing ``--no-cache`` shares the same git-walk and dedup code paths
|
|
855
|
+
as the on-disk store; output must be byte-identical.
|
|
856
|
+
"""
|
|
857
|
+
sha = repo.commit("feature", {"a.py": "1", "b.py": "1"}, when=days_ago(50))
|
|
858
|
+
repo.revert(sha, when=days_ago(45))
|
|
859
|
+
repo.commit(
|
|
860
|
+
"hotfix: regression",
|
|
861
|
+
{"a.py": "2", "b.py": "2"},
|
|
862
|
+
body="incident #INC-1",
|
|
863
|
+
when=days_ago(10),
|
|
864
|
+
)
|
|
865
|
+
# Warm path first (writes the on-disk cache).
|
|
866
|
+
warm = _invoke(repo.root, "scan", "--top", "5").output
|
|
867
|
+
no_cache = _invoke(repo.root, "scan", "--top", "5", "--no-cache").output
|
|
868
|
+
warm_again = _invoke(repo.root, "scan", "--top", "5").output
|
|
869
|
+
assert warm == no_cache
|
|
870
|
+
assert warm == warm_again
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|