sourcecode 1.31.18__py3-none-any.whl → 1.31.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/cache.py +193 -9
- sourcecode/cli.py +141 -33
- sourcecode/serializer.py +113 -0
- {sourcecode-1.31.18.dist-info → sourcecode-1.31.20.dist-info}/METADATA +3 -3
- {sourcecode-1.31.18.dist-info → sourcecode-1.31.20.dist-info}/RECORD +9 -9
- {sourcecode-1.31.18.dist-info → sourcecode-1.31.20.dist-info}/WHEEL +0 -0
- {sourcecode-1.31.18.dist-info → sourcecode-1.31.20.dist-info}/entry_points.txt +0 -0
- {sourcecode-1.31.18.dist-info → sourcecode-1.31.20.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
sourcecode/cache.py
CHANGED
|
@@ -69,6 +69,9 @@ from typing import Any, Optional
|
|
|
69
69
|
#: Bump this string to invalidate *all* existing cached snapshots.
|
|
70
70
|
SCHEMA_VERSION: str = "2"
|
|
71
71
|
|
|
72
|
+
#: Bump to invalidate all L1 core caches (independent of snapshot version).
|
|
73
|
+
CORE_SCHEMA_VERSION: str = "1"
|
|
74
|
+
|
|
72
75
|
#: Fields eligible for CAS deduplication (applied to top-level JSON dict keys).
|
|
73
76
|
_CAS_FIELDS: frozenset[str] = frozenset([
|
|
74
77
|
"file_paths",
|
|
@@ -93,11 +96,18 @@ _DEFAULT_KEEP_COMMITS: int = 5
|
|
|
93
96
|
# Matches "snapshot-<hex_commit>-<hex_flags>.json.gz"
|
|
94
97
|
_SNAPSHOT_RE = re.compile(r"^snapshot-([0-9a-f]+)-[0-9a-f]+\.json\.gz$")
|
|
95
98
|
|
|
99
|
+
# Matches "core-<hex_commit>-<hex_analysis>.json.gz"
|
|
100
|
+
_CORE_RE = re.compile(r"^core-([0-9a-f]+)-[0-9a-f]+\.json\.gz$")
|
|
101
|
+
|
|
102
|
+
# Matches "view-<hex_core_hash16>-<hex_view_flags>.json.gz"
|
|
103
|
+
_VIEW_RE = re.compile(r"^view-([0-9a-f]{16})-[0-9a-f]+\.json\.gz$")
|
|
104
|
+
|
|
96
105
|
|
|
97
106
|
# ---------------------------------------------------------------------------
|
|
98
107
|
# Public API — location helpers
|
|
99
108
|
# ---------------------------------------------------------------------------
|
|
100
109
|
|
|
110
|
+
|
|
101
111
|
def repo_id(repo_root: Path) -> str:
|
|
102
112
|
"""Stable 16-char hex identifier derived from the canonical repo path."""
|
|
103
113
|
return hashlib.sha256(str(repo_root.resolve()).encode()).hexdigest()[:16]
|
|
@@ -190,6 +200,138 @@ def write(
|
|
|
190
200
|
_gc(cache_d)
|
|
191
201
|
|
|
192
202
|
|
|
203
|
+
# ---------------------------------------------------------------------------
|
|
204
|
+
# Layer 1 — Core Analysis cache
|
|
205
|
+
# ---------------------------------------------------------------------------
|
|
206
|
+
|
|
207
|
+
def read_core(repo_root: Path, core_key: str) -> Optional[tuple[dict[str, Any], str]]:
|
|
208
|
+
"""Read core analysis artifacts from L1 cache.
|
|
209
|
+
|
|
210
|
+
Returns ``(core_dict, core_hash)`` on hit, or ``None`` on miss.
|
|
211
|
+
``core_hash`` is the 16-char SHA-256 of the stored core JSON, used as
|
|
212
|
+
the L2 view-key prefix so that different views of the same core share
|
|
213
|
+
a common ancestry without a full re-analysis.
|
|
214
|
+
"""
|
|
215
|
+
cache_d = cache_dir(repo_root)
|
|
216
|
+
gz_path = cache_d / f"core-{core_key}.json.gz"
|
|
217
|
+
if not gz_path.exists():
|
|
218
|
+
return None
|
|
219
|
+
try:
|
|
220
|
+
raw_bytes = gzip.decompress(gz_path.read_bytes())
|
|
221
|
+
envelope = json.loads(raw_bytes.decode("utf-8"))
|
|
222
|
+
except Exception:
|
|
223
|
+
_safe_unlink(gz_path)
|
|
224
|
+
return None
|
|
225
|
+
|
|
226
|
+
if not isinstance(envelope, dict):
|
|
227
|
+
_safe_unlink(gz_path)
|
|
228
|
+
return None
|
|
229
|
+
if envelope.get("csv") != CORE_SCHEMA_VERSION:
|
|
230
|
+
_safe_unlink(gz_path) # schema mismatch — evict
|
|
231
|
+
return None
|
|
232
|
+
|
|
233
|
+
core_data = envelope.get("data")
|
|
234
|
+
core_hash = envelope.get("hash", "")
|
|
235
|
+
if not isinstance(core_data, dict) or not core_hash:
|
|
236
|
+
_safe_unlink(gz_path)
|
|
237
|
+
return None
|
|
238
|
+
|
|
239
|
+
return core_data, core_hash
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def write_core(repo_root: Path, core_key: str, core_data: dict[str, Any]) -> str:
|
|
243
|
+
"""Persist core analysis dict to L1 cache.
|
|
244
|
+
|
|
245
|
+
Returns the 16-char SHA-256 hash of the core JSON (the L2 key prefix).
|
|
246
|
+
Writes are always best-effort; failures are silently swallowed.
|
|
247
|
+
|
|
248
|
+
File layout::
|
|
249
|
+
|
|
250
|
+
~/.sourcecode/cache/<repo_id>/core-<core_key>.json.gz
|
|
251
|
+
|
|
252
|
+
Envelope schema::
|
|
253
|
+
|
|
254
|
+
{ "csv": "1", // CORE_SCHEMA_VERSION
|
|
255
|
+
"key": "...", // core_key passed in
|
|
256
|
+
"hash": "<h16>", // SHA-256[:16] of core JSON — used as L2 prefix
|
|
257
|
+
"ts": "...", // ISO-8601 UTC write time
|
|
258
|
+
"data": {...} } // core_view(sm) dict
|
|
259
|
+
"""
|
|
260
|
+
core_json = json.dumps(core_data, ensure_ascii=False)
|
|
261
|
+
core_hash = hashlib.sha256(core_json.encode()).hexdigest()[:16]
|
|
262
|
+
|
|
263
|
+
cache_d = cache_dir(repo_root)
|
|
264
|
+
dest = cache_d / f"core-{core_key}.json.gz"
|
|
265
|
+
try:
|
|
266
|
+
cache_d.mkdir(parents=True, exist_ok=True)
|
|
267
|
+
envelope: dict[str, Any] = {
|
|
268
|
+
"csv": CORE_SCHEMA_VERSION,
|
|
269
|
+
"key": core_key,
|
|
270
|
+
"hash": core_hash,
|
|
271
|
+
"ts": _now_iso(),
|
|
272
|
+
"data": core_data,
|
|
273
|
+
}
|
|
274
|
+
payload = gzip.compress(
|
|
275
|
+
json.dumps(envelope, ensure_ascii=False).encode("utf-8"),
|
|
276
|
+
compresslevel=6,
|
|
277
|
+
)
|
|
278
|
+
dest.write_bytes(payload)
|
|
279
|
+
except Exception:
|
|
280
|
+
pass
|
|
281
|
+
|
|
282
|
+
return core_hash
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
# ---------------------------------------------------------------------------
|
|
286
|
+
# Layer 2 — Derived View cache
|
|
287
|
+
# ---------------------------------------------------------------------------
|
|
288
|
+
|
|
289
|
+
def read_view(repo_root: Path, view_key: str) -> Optional[str]:
|
|
290
|
+
"""Read a rendered view string from L2 cache.
|
|
291
|
+
|
|
292
|
+
Views are stored as ``view-{view_key}.json.gz`` using the same
|
|
293
|
+
envelope+CAS format as snapshot files. Returns the content string
|
|
294
|
+
(JSON or YAML) or ``None`` on miss.
|
|
295
|
+
"""
|
|
296
|
+
cache_d = cache_dir(repo_root)
|
|
297
|
+
gz_path = cache_d / f"view-{view_key}.json.gz"
|
|
298
|
+
if not gz_path.exists():
|
|
299
|
+
return None
|
|
300
|
+
try:
|
|
301
|
+
result = _parse_envelope(gz_path.read_bytes(), cache_d)
|
|
302
|
+
if result is not None:
|
|
303
|
+
return result
|
|
304
|
+
except Exception:
|
|
305
|
+
pass
|
|
306
|
+
_safe_unlink(gz_path)
|
|
307
|
+
return None
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def write_view(
|
|
311
|
+
repo_root: Path,
|
|
312
|
+
view_key: str,
|
|
313
|
+
content: str,
|
|
314
|
+
*,
|
|
315
|
+
fmt: str = "json",
|
|
316
|
+
layers: Optional[dict[str, str]] = None,
|
|
317
|
+
) -> None:
|
|
318
|
+
"""Persist a rendered view string to L2 cache as ``view-{view_key}.json.gz``.
|
|
319
|
+
|
|
320
|
+
Reuses the envelope+CAS infrastructure so large fields (file_paths,
|
|
321
|
+
graph, docs …) are automatically deduplicated with other snapshots/views.
|
|
322
|
+
Writes are always best-effort; GC is **not** triggered here — callers
|
|
323
|
+
that want eviction should invoke ``_gc(cache_dir(repo_root))`` explicitly.
|
|
324
|
+
"""
|
|
325
|
+
cache_d = cache_dir(repo_root)
|
|
326
|
+
dest = cache_d / f"view-{view_key}.json.gz"
|
|
327
|
+
try:
|
|
328
|
+
cache_d.mkdir(parents=True, exist_ok=True)
|
|
329
|
+
payload = _build_envelope(view_key, content, fmt, layers or {}, cache_d)
|
|
330
|
+
dest.write_bytes(payload)
|
|
331
|
+
except Exception:
|
|
332
|
+
pass
|
|
333
|
+
|
|
334
|
+
|
|
193
335
|
# ---------------------------------------------------------------------------
|
|
194
336
|
# Envelope (de)serialisation
|
|
195
337
|
# ---------------------------------------------------------------------------
|
|
@@ -384,31 +526,39 @@ def _cas_restore(
|
|
|
384
526
|
# ---------------------------------------------------------------------------
|
|
385
527
|
|
|
386
528
|
def _gc(cache_d: Path) -> None:
|
|
387
|
-
"""
|
|
388
|
-
Evict old snapshots and sweep orphaned CAS blobs.
|
|
529
|
+
"""Evict old snapshots/cores/views and sweep orphaned CAS blobs.
|
|
389
530
|
|
|
390
|
-
Keeps snapshots from the last ``SOURCECODE_CACHE_KEEP_COMMITS``
|
|
391
|
-
git commits (determined by mtime
|
|
531
|
+
Keeps snapshots and cores from the last ``SOURCECODE_CACHE_KEEP_COMMITS``
|
|
532
|
+
distinct git commits (determined by newest mtime within each commit group).
|
|
533
|
+
Views are then pruned: a view survives only when its core-hash prefix
|
|
534
|
+
matches a core file in the surviving set.
|
|
392
535
|
"""
|
|
393
536
|
keep = int(os.environ.get("SOURCECODE_CACHE_KEEP_COMMITS", _DEFAULT_KEEP_COMMITS))
|
|
394
537
|
|
|
395
538
|
try:
|
|
396
539
|
all_snapshots = list(cache_d.glob("snapshot-*.json.gz"))
|
|
397
|
-
|
|
540
|
+
all_cores = list(cache_d.glob("core-*.json.gz"))
|
|
541
|
+
all_views = list(cache_d.glob("view-*.json.gz"))
|
|
542
|
+
|
|
543
|
+
if not all_snapshots and not all_cores and not all_views:
|
|
398
544
|
return
|
|
399
545
|
|
|
400
|
-
# Group snapshot files by commit SHA
|
|
546
|
+
# Group snapshot + core files by commit SHA
|
|
401
547
|
groups: dict[str, list[Path]] = {}
|
|
402
548
|
for f in all_snapshots:
|
|
403
549
|
m = _SNAPSHOT_RE.match(f.name)
|
|
404
550
|
if m:
|
|
405
551
|
groups.setdefault(m.group(1), []).append(f)
|
|
552
|
+
for f in all_cores:
|
|
553
|
+
m = _CORE_RE.match(f.name)
|
|
554
|
+
if m:
|
|
555
|
+
groups.setdefault(m.group(1), []).append(f)
|
|
406
556
|
|
|
407
557
|
surviving: list[Path]
|
|
408
558
|
|
|
409
559
|
if keep <= 0 or len(groups) <= keep:
|
|
410
|
-
# No eviction needed — but still sweep CAS
|
|
411
|
-
surviving = all_snapshots
|
|
560
|
+
# No eviction needed — but still sweep views + CAS
|
|
561
|
+
surviving = all_snapshots + all_cores
|
|
412
562
|
else:
|
|
413
563
|
def _newest_mtime(commit: str) -> float:
|
|
414
564
|
return max(p.stat().st_mtime for p in groups[commit])
|
|
@@ -422,12 +572,46 @@ def _gc(cache_d: Path) -> None:
|
|
|
422
572
|
for f in groups[commit]:
|
|
423
573
|
_safe_unlink(f)
|
|
424
574
|
|
|
425
|
-
|
|
575
|
+
# Prune view files whose core hash is no longer in the surviving set
|
|
576
|
+
_gc_views(cache_d, surviving, all_views)
|
|
577
|
+
|
|
578
|
+
# Sweep orphaned CAS blobs (surviving snapshots + view files may ref them)
|
|
579
|
+
surviving_with_views = surviving + [v for v in all_views if v.exists()]
|
|
580
|
+
_gc_cas(cache_d, surviving_with_views)
|
|
426
581
|
|
|
427
582
|
except Exception:
|
|
428
583
|
pass # GC failure is non-fatal
|
|
429
584
|
|
|
430
585
|
|
|
586
|
+
def _gc_views(cache_d: Path, surviving: list[Path], all_views: list[Path]) -> None:
|
|
587
|
+
"""Delete view files not traceable to a surviving core.
|
|
588
|
+
|
|
589
|
+
Collects the ``hash`` field from every surviving core envelope, then
|
|
590
|
+
deletes view files whose filename core-hash prefix is absent from that
|
|
591
|
+
set. View files with unrecognisable names are left untouched.
|
|
592
|
+
"""
|
|
593
|
+
if not all_views:
|
|
594
|
+
return
|
|
595
|
+
|
|
596
|
+
# Collect live core hashes from surviving core-*.json.gz files
|
|
597
|
+
live_hashes: set[str] = set()
|
|
598
|
+
for path in surviving:
|
|
599
|
+
if not path.name.startswith("core-"):
|
|
600
|
+
continue
|
|
601
|
+
try:
|
|
602
|
+
env = json.loads(gzip.decompress(path.read_bytes()).decode("utf-8"))
|
|
603
|
+
h = env.get("hash", "")
|
|
604
|
+
if h:
|
|
605
|
+
live_hashes.add(h)
|
|
606
|
+
except Exception:
|
|
607
|
+
pass # unreadable core — conservatively keep its views unknown
|
|
608
|
+
|
|
609
|
+
for vp in all_views:
|
|
610
|
+
m = _VIEW_RE.match(vp.name)
|
|
611
|
+
if m and m.group(1) not in live_hashes:
|
|
612
|
+
_safe_unlink(vp)
|
|
613
|
+
|
|
614
|
+
|
|
431
615
|
def _gc_cas(cache_d: Path, surviving_snapshots: list[Path]) -> None:
|
|
432
616
|
"""
|
|
433
617
|
Delete CAS blobs not referenced by any snapshot in *surviving_snapshots*.
|
sourcecode/cli.py
CHANGED
|
@@ -876,16 +876,30 @@ def main(
|
|
|
876
876
|
architecture = True # agents need full architectural signal (M4)
|
|
877
877
|
graph_modules = True # IC-003: import graph needed for architecture confidence
|
|
878
878
|
|
|
879
|
-
# ──
|
|
880
|
-
#
|
|
881
|
-
#
|
|
882
|
-
#
|
|
879
|
+
# ── Two-layer cache ────────────────────────────────────────────────────────
|
|
880
|
+
# L1 (core): (repo, commit, analysis_flags) → pre-computed view data dict
|
|
881
|
+
# key = core-<git_sha>-<analysis_hash>.json.gz
|
|
882
|
+
# L2 (view): (core_hash, view_flags) → final rendered string
|
|
883
|
+
# key = view-<core_hash16>-<view_hash>.json.gz
|
|
884
|
+
#
|
|
885
|
+
# Lookup order: L2 exact hit → L1 hit + view rebuild → full analysis
|
|
886
|
+
# Write order: full analysis → write L1 core → write L2 view
|
|
887
|
+
#
|
|
888
|
+
# Flags split:
|
|
889
|
+
# core (analysis) — affect WHAT is analysed; same core for any view
|
|
890
|
+
# view — affect HOW it's presented; same view for any format variant
|
|
883
891
|
import hashlib as _hashlib
|
|
884
892
|
import subprocess as _sub
|
|
885
893
|
from sourcecode import cache as _cache_mod
|
|
894
|
+
|
|
886
895
|
_cache_hit_content: Optional[str] = None
|
|
887
896
|
_git_sha = ""
|
|
888
|
-
|
|
897
|
+
_core_key = ""
|
|
898
|
+
_view_key = ""
|
|
899
|
+
_core_hash = ""
|
|
900
|
+
_core_flags_str = ""
|
|
901
|
+
_view_flags_str = ""
|
|
902
|
+
|
|
889
903
|
if not no_cache:
|
|
890
904
|
try:
|
|
891
905
|
_sha_r = _sub.run(
|
|
@@ -896,37 +910,112 @@ def main(
|
|
|
896
910
|
# Only cache when target IS the git repo root (not a subdir of one),
|
|
897
911
|
# to avoid polluting sub-project directories used in tests.
|
|
898
912
|
if _git_sha and (target / ".git").exists():
|
|
899
|
-
# Include every output-affecting flag so different flag combos never collide
|
|
900
|
-
# Include version so cache is invalidated on sourcecode upgrades
|
|
901
913
|
from sourcecode import __version__ as _sc_version
|
|
902
|
-
# FIX-P0-1: cache key must include ALL analysis-affecting flags.
|
|
903
|
-
# Previously missing: exclude, depth, rank_by, symbol, entrypoints_only,
|
|
904
|
-
# no_redact, graph_detail, docs_depth, max_nodes, graph_edges,
|
|
905
|
-
# max_importers, emit_graph.
|
|
906
|
-
# Use effective_depth (not raw depth) so Java auto-adjustment is captured.
|
|
907
914
|
_excl_key = (
|
|
908
915
|
",".join(sorted(e.strip() for e in exclude.split(",") if e.strip()))
|
|
909
916
|
if exclude else ""
|
|
910
917
|
)
|
|
911
|
-
|
|
918
|
+
|
|
919
|
+
# ── Core (analysis) flags: affect which analyzers run + scan config ──
|
|
920
|
+
# Use effective_depth (not raw depth) so Java auto-adjustment is captured.
|
|
921
|
+
_core_flags_str = (
|
|
912
922
|
f"v={_sc_version},"
|
|
913
|
-
f"
|
|
914
|
-
f"co={changed_only},dep={dependencies},gm={graph_modules},"
|
|
923
|
+
f"dep={dependencies},gm={graph_modules},"
|
|
915
924
|
f"docs={docs},fm={full_metrics},sem={semantics},"
|
|
916
925
|
f"arch={architecture},gc={git_context},em={env_map},"
|
|
917
|
-
f"cn={code_notes},
|
|
918
|
-
f"ex={_excl_key},depth={effective_depth}
|
|
926
|
+
f"cn={code_notes},mode={mode},"
|
|
927
|
+
f"ex={_excl_key},depth={effective_depth}"
|
|
928
|
+
)
|
|
929
|
+
_core_h = _hashlib.md5(_core_flags_str.encode()).hexdigest()[:8]
|
|
930
|
+
_core_key = f"{_git_sha}-{_core_h}"
|
|
931
|
+
|
|
932
|
+
# ── View flags: output presentation only (no re-analysis needed) ──
|
|
933
|
+
_view_flags_str = (
|
|
934
|
+
f"c={compact},ag={agent},fmt={format},full={full},"
|
|
935
|
+
f"co={changed_only},tree={tree},nt={no_tree},"
|
|
919
936
|
f"rb={rank_by},sym={symbol},ep={entrypoints_only},"
|
|
920
937
|
f"nr={no_redact},gd={graph_detail},dd={docs_depth},"
|
|
921
938
|
f"mn={max_nodes},ge={graph_edges},mi={max_importers},"
|
|
922
939
|
f"eg={emit_graph}"
|
|
923
940
|
)
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
941
|
+
_view_h = _hashlib.md5(_view_flags_str.encode()).hexdigest()[:8]
|
|
942
|
+
|
|
943
|
+
# ── Lookup ──────────────────────────────────────────────────────
|
|
944
|
+
# Step 1: try L1 to obtain the core_hash needed for L2 key
|
|
945
|
+
_l1_result = _cache_mod.read_core(target, _core_key)
|
|
946
|
+
if _l1_result is not None:
|
|
947
|
+
_core_dict_l1, _core_hash = _l1_result
|
|
948
|
+
_view_key = f"{_core_hash}-{_view_h}"
|
|
949
|
+
|
|
950
|
+
# Step 2: try L2 (exact view match)
|
|
951
|
+
_cache_hit_content = _cache_mod.read_view(target, _view_key)
|
|
952
|
+
|
|
953
|
+
# Step 3: L1 hit but L2 miss → rebuild view from core dict
|
|
954
|
+
if _cache_hit_content is None:
|
|
955
|
+
try:
|
|
956
|
+
from sourcecode.serializer import build_view_from_core as _bvfc
|
|
957
|
+
_rebuilt = _bvfc(
|
|
958
|
+
_core_dict_l1,
|
|
959
|
+
compact=compact,
|
|
960
|
+
agent=agent,
|
|
961
|
+
full=full,
|
|
962
|
+
no_tree=no_tree,
|
|
963
|
+
tree=tree,
|
|
964
|
+
)
|
|
965
|
+
if _rebuilt is not None:
|
|
966
|
+
# Apply redaction
|
|
967
|
+
if not no_redact:
|
|
968
|
+
from sourcecode.redactor import redact_dict as _red_l1
|
|
969
|
+
_rebuilt = _red_l1(_rebuilt)
|
|
970
|
+
# Apply output budget
|
|
971
|
+
if agent:
|
|
972
|
+
from sourcecode.output_budget import (
|
|
973
|
+
trim_to_budget as _trim_l1,
|
|
974
|
+
BUDGET_AGENT,
|
|
975
|
+
)
|
|
976
|
+
_rebuilt = _trim_l1(_rebuilt, BUDGET_AGENT, label="agent")
|
|
977
|
+
elif compact:
|
|
978
|
+
from sourcecode.output_budget import (
|
|
979
|
+
trim_to_budget as _trim_l1c,
|
|
980
|
+
BUDGET_COMPACT,
|
|
981
|
+
)
|
|
982
|
+
_rebuilt = _trim_l1c(_rebuilt, BUDGET_COMPACT, label="compact")
|
|
983
|
+
# Serialize
|
|
984
|
+
if format == "yaml":
|
|
985
|
+
from io import StringIO as _SIO_L1
|
|
986
|
+
from ruamel.yaml import YAML as _YAML_L1
|
|
987
|
+
_yl1 = _YAML_L1()
|
|
988
|
+
_yl1.default_flow_style = False
|
|
989
|
+
_yl1.representer.add_representer(
|
|
990
|
+
type(None),
|
|
991
|
+
lambda d, v: d.represent_scalar(
|
|
992
|
+
"tag:yaml.org,2002:null", "null"
|
|
993
|
+
),
|
|
994
|
+
)
|
|
995
|
+
_sl1 = _SIO_L1()
|
|
996
|
+
_yl1.dump(_rebuilt, _sl1)
|
|
997
|
+
_cache_hit_content = _sl1.getvalue()
|
|
998
|
+
else:
|
|
999
|
+
import json as _json_l1
|
|
1000
|
+
_cache_hit_content = _json_l1.dumps(
|
|
1001
|
+
_rebuilt, indent=2, ensure_ascii=False
|
|
1002
|
+
)
|
|
1003
|
+
# Cache rebuilt view in L2
|
|
1004
|
+
if _cache_hit_content:
|
|
1005
|
+
_cache_mod.write_view(
|
|
1006
|
+
target,
|
|
1007
|
+
_view_key,
|
|
1008
|
+
_cache_hit_content,
|
|
1009
|
+
fmt=format,
|
|
1010
|
+
)
|
|
1011
|
+
except Exception:
|
|
1012
|
+
_cache_hit_content = None # rebuild failed → full analysis
|
|
1013
|
+
|
|
927
1014
|
except Exception:
|
|
928
1015
|
_git_sha = ""
|
|
929
|
-
|
|
1016
|
+
_core_key = ""
|
|
1017
|
+
_view_key = ""
|
|
1018
|
+
_core_hash = ""
|
|
930
1019
|
|
|
931
1020
|
if _cache_hit_content is not None:
|
|
932
1021
|
from sourcecode.serializer import write_output
|
|
@@ -1760,18 +1849,37 @@ def main(
|
|
|
1760
1849
|
_progress.finish()
|
|
1761
1850
|
write_output(content, output=output)
|
|
1762
1851
|
|
|
1763
|
-
#
|
|
1764
|
-
#
|
|
1765
|
-
#
|
|
1766
|
-
#
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
|
|
1852
|
+
# Persist to two-layer cache (git SHA unchanged → re-use on next run).
|
|
1853
|
+
#
|
|
1854
|
+
# L1 (core): stores pre-computed compact+agent+standard views at max
|
|
1855
|
+
# fidelity so any subsequent view can be derived without re-analysis.
|
|
1856
|
+
# L2 (view): stores the exact rendered string for this flag combination.
|
|
1857
|
+
#
|
|
1858
|
+
# GC runs after L2 write to evict old commits and orphaned blobs/views.
|
|
1859
|
+
if not no_cache and _core_key and not _pipeline_error:
|
|
1860
|
+
try:
|
|
1861
|
+
from sourcecode.serializer import core_view as _core_view_fn
|
|
1862
|
+
_core_dict_write = _core_view_fn(sm)
|
|
1863
|
+
_written_core_hash = _cache_mod.write_core(target, _core_key, _core_dict_write)
|
|
1864
|
+
|
|
1865
|
+
# Compute view key using the just-written core hash
|
|
1866
|
+
if _written_core_hash:
|
|
1867
|
+
if not _view_key:
|
|
1868
|
+
# _view_key not set (L1 was also a miss); compute it now
|
|
1869
|
+
_wvh = _hashlib.md5(_view_flags_str.encode()).hexdigest()[:8]
|
|
1870
|
+
_view_key = f"{_written_core_hash}-{_wvh}"
|
|
1871
|
+
_cache_mod.write_view(
|
|
1872
|
+
target,
|
|
1873
|
+
_view_key,
|
|
1874
|
+
content,
|
|
1875
|
+
fmt=format,
|
|
1876
|
+
layers=_compute_analyzer_fingerprints(),
|
|
1877
|
+
)
|
|
1878
|
+
# Trigger GC (evict old commits + orphaned views + CAS blobs)
|
|
1879
|
+
from sourcecode.cache import cache_dir as _cdir, _gc as _run_gc
|
|
1880
|
+
_run_gc(_cdir(target))
|
|
1881
|
+
except Exception:
|
|
1882
|
+
pass # non-fatal: cache write failure
|
|
1775
1883
|
|
|
1776
1884
|
if _pipeline_error:
|
|
1777
1885
|
raise typer.Exit(code=2)
|
sourcecode/serializer.py
CHANGED
|
@@ -2290,6 +2290,119 @@ def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any
|
|
|
2290
2290
|
return result
|
|
2291
2291
|
|
|
2292
2292
|
|
|
2293
|
+
# ---------------------------------------------------------------------------
|
|
2294
|
+
# Two-layer cache: core_view + build_view_from_core
|
|
2295
|
+
# ---------------------------------------------------------------------------
|
|
2296
|
+
|
|
2297
|
+
#: Bump to invalidate all L1 core caches when the core format changes.
|
|
2298
|
+
CORE_VIEW_VERSION: str = "1"
|
|
2299
|
+
|
|
2300
|
+
#: Fields that standard_view omits from file_tree when no_tree is active.
|
|
2301
|
+
_TREE_FIELDS: frozenset[str] = frozenset({"file_tree", "file_paths"})
|
|
2302
|
+
|
|
2303
|
+
#: transactional_boundaries truncation threshold for full=False compact view.
|
|
2304
|
+
_TXN_COMPACT_CAP = 10
|
|
2305
|
+
|
|
2306
|
+
|
|
2307
|
+
def core_view(sm: SourceMap) -> dict[str, Any]:
|
|
2308
|
+
"""Pre-compute all view variants for L1 (core) cache.
|
|
2309
|
+
|
|
2310
|
+
Stores compact, agent, and standard views at **maximum fidelity**
|
|
2311
|
+
(full=True, include_tree=True). View-specific flags (compact/agent,
|
|
2312
|
+
format, no_tree, full, redaction, budget) are applied later when
|
|
2313
|
+
building the L2 view from this core — they never affect core content.
|
|
2314
|
+
|
|
2315
|
+
Schema::
|
|
2316
|
+
|
|
2317
|
+
{
|
|
2318
|
+
"_cv": "<CORE_VIEW_VERSION>",
|
|
2319
|
+
"_compact": compact_view(sm, no_tree=False, full=True),
|
|
2320
|
+
"_agent": agent_view(sm, full=True),
|
|
2321
|
+
"_standard": standard_view(sm, include_tree=True),
|
|
2322
|
+
}
|
|
2323
|
+
"""
|
|
2324
|
+
return {
|
|
2325
|
+
"_cv": CORE_VIEW_VERSION,
|
|
2326
|
+
"_compact": compact_view(sm, no_tree=False, full=True),
|
|
2327
|
+
"_agent": agent_view(sm, full=True),
|
|
2328
|
+
"_standard": standard_view(sm, include_tree=True),
|
|
2329
|
+
}
|
|
2330
|
+
|
|
2331
|
+
|
|
2332
|
+
def build_view_from_core(
|
|
2333
|
+
core: dict[str, Any],
|
|
2334
|
+
*,
|
|
2335
|
+
compact: bool = False,
|
|
2336
|
+
agent: bool = False,
|
|
2337
|
+
full: bool = False,
|
|
2338
|
+
no_tree: bool = False,
|
|
2339
|
+
tree: bool = False,
|
|
2340
|
+
) -> Optional[dict[str, Any]]:
|
|
2341
|
+
"""Derive a view dict from an L1 core dict (skip full re-analysis).
|
|
2342
|
+
|
|
2343
|
+
Returns the view dict (before redaction / budget / serialisation) or
|
|
2344
|
+
``None`` when the core format is unrecognised or data is missing —
|
|
2345
|
+
the caller must fall back to a full analysis run.
|
|
2346
|
+
|
|
2347
|
+
Parameters
|
|
2348
|
+
----------
|
|
2349
|
+
core:
|
|
2350
|
+
Dict returned by :func:`core_view` (stored in L1 cache).
|
|
2351
|
+
compact / agent:
|
|
2352
|
+
Which view mode to reconstruct (mutually exclusive; both False =
|
|
2353
|
+
standard view).
|
|
2354
|
+
full:
|
|
2355
|
+
When *False* and *compact* is True, truncate transactional_boundaries
|
|
2356
|
+
to ``_TXN_COMPACT_CAP`` entries (mirrors compact_view behaviour).
|
|
2357
|
+
no_tree / tree:
|
|
2358
|
+
Control file_tree / file_paths inclusion in standard / compact views.
|
|
2359
|
+
"""
|
|
2360
|
+
if not isinstance(core, dict) or core.get("_cv") != CORE_VIEW_VERSION:
|
|
2361
|
+
return None # stale or unknown core format → full re-analysis
|
|
2362
|
+
|
|
2363
|
+
if agent:
|
|
2364
|
+
data = core.get("_agent")
|
|
2365
|
+
if not isinstance(data, dict):
|
|
2366
|
+
return None
|
|
2367
|
+
return data # agent_view never includes file_tree/file_paths
|
|
2368
|
+
|
|
2369
|
+
if compact:
|
|
2370
|
+
data = core.get("_compact")
|
|
2371
|
+
if not isinstance(data, dict):
|
|
2372
|
+
return None
|
|
2373
|
+
# compact_view stores max-fidelity data; apply flag filters
|
|
2374
|
+
if no_tree:
|
|
2375
|
+
data = {k: v for k, v in data.items() if k not in _TREE_FIELDS}
|
|
2376
|
+
if not full:
|
|
2377
|
+
# Truncate transactional_boundaries to _TXN_COMPACT_CAP when stored
|
|
2378
|
+
# with full=True (mirrors _transactional_summary(full=False) logic).
|
|
2379
|
+
txn = data.get("transactional_boundaries")
|
|
2380
|
+
if isinstance(txn, dict):
|
|
2381
|
+
classes = txn.get("classes") or []
|
|
2382
|
+
count = txn.get("count", len(classes))
|
|
2383
|
+
if count > _TXN_COMPACT_CAP and not txn.get("truncated"):
|
|
2384
|
+
data = dict(data)
|
|
2385
|
+
data["transactional_boundaries"] = {
|
|
2386
|
+
**txn,
|
|
2387
|
+
"classes": classes[:_TXN_COMPACT_CAP],
|
|
2388
|
+
"truncated": True,
|
|
2389
|
+
"note": (
|
|
2390
|
+
f"showing {_TXN_COMPACT_CAP} of {count}; "
|
|
2391
|
+
f"use --full to see all {count}"
|
|
2392
|
+
),
|
|
2393
|
+
}
|
|
2394
|
+
return data
|
|
2395
|
+
|
|
2396
|
+
# Standard view
|
|
2397
|
+
data = core.get("_standard")
|
|
2398
|
+
if not isinstance(data, dict):
|
|
2399
|
+
return None
|
|
2400
|
+
want_tree = tree and not no_tree
|
|
2401
|
+
if not want_tree:
|
|
2402
|
+
data = {k: v for k, v in data.items() if k not in _TREE_FIELDS}
|
|
2403
|
+
return data
|
|
2404
|
+
|
|
2405
|
+
|
|
2293
2406
|
def contract_view(
|
|
2294
2407
|
sm: SourceMap,
|
|
2295
2408
|
*,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcecode
|
|
3
|
-
Version: 1.31.
|
|
3
|
+
Version: 1.31.20
|
|
4
4
|
Summary: Deterministic codebase context for AI coding agents
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -225,7 +225,7 @@ Description-Content-Type: text/markdown
|
|
|
225
225
|
|
|
226
226
|
**AI-ready change intelligence for Java/Spring enterprise monoliths.**
|
|
227
227
|
|
|
228
|
-

|
|
229
229
|

|
|
230
230
|
|
|
231
231
|
---
|
|
@@ -263,7 +263,7 @@ pipx install sourcecode
|
|
|
263
263
|
|
|
264
264
|
```bash
|
|
265
265
|
sourcecode version
|
|
266
|
-
# sourcecode 1.31.
|
|
266
|
+
# sourcecode 1.31.20
|
|
267
267
|
```
|
|
268
268
|
|
|
269
269
|
---
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=SO-Vu2UFaZzh6UldfBANt4sXszun2WaihkKiyt-cYX4,104
|
|
2
2
|
sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
|
|
3
3
|
sourcecode/architecture_analyzer.py,sha256=4R13Yb02OrPeB4IH3z6V_g7HWhmGcRHbI8CobCVnRrc,39111
|
|
4
4
|
sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
|
|
5
5
|
sourcecode/ast_extractor.py,sha256=XgrZg2DcWcUm9r87cRG3KGO7IK2TIL_N-CvhSbUmmh4,49901
|
|
6
|
-
sourcecode/cache.py,sha256=
|
|
6
|
+
sourcecode/cache.py,sha256=TiYa3ECjBKtvlfCk7GvQ9v6gZkAITpH3ow9PubA7sUo,22946
|
|
7
7
|
sourcecode/canonical_ir.py,sha256=NZu0XICv__hkQGKzW2LNQLRqb1L28K2p_WQCQKS5Zlk,23141
|
|
8
8
|
sourcecode/classifier.py,sha256=yWeq6agTjkFa3zuNa-gdVIHtjoBoPoVlJnX-b7tdVJs,7851
|
|
9
|
-
sourcecode/cli.py,sha256=
|
|
9
|
+
sourcecode/cli.py,sha256=Ng9hOoMbpCXvB186etvSqV1q3I4mnndPxr3vMzcZQjc,145199
|
|
10
10
|
sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
|
|
11
11
|
sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
|
|
12
12
|
sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
|
|
@@ -37,7 +37,7 @@ sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_X
|
|
|
37
37
|
sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
|
|
38
38
|
sourcecode/schema.py,sha256=aHNXDf8LGyUC8ZDE_VS9kiskC2-Oswhi_WnpdGy6HDw,24897
|
|
39
39
|
sourcecode/semantic_analyzer.py,sha256=TDuC3wzZR2DPm1mgrAg1YSLk2QzJoueS3TZAmyGGpCU,89417
|
|
40
|
-
sourcecode/serializer.py,sha256=
|
|
40
|
+
sourcecode/serializer.py,sha256=V8ZV3Y1j4T6rkpO09-PvpVORioWWWbSnOvDjZ2hmQ2U,122144
|
|
41
41
|
sourcecode/summarizer.py,sha256=lPlKhMh28nueXkPo2xKeD3DUFYVGRlJMIdY-8TSM-ls,17486
|
|
42
42
|
sourcecode/tree_utils.py,sha256=8GAkIfQAsvtEudIeW1l4ooH_oRtrWR8cpJQJsEa_Pfw,2093
|
|
43
43
|
sourcecode/workspace.py,sha256=X_6NmNnitvT3_38V-JDChydo_sR68s249hLFlrQskU0,8271
|
|
@@ -76,8 +76,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
76
76
|
sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
|
|
77
77
|
sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
|
|
78
78
|
sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
|
|
79
|
-
sourcecode-1.31.
|
|
80
|
-
sourcecode-1.31.
|
|
81
|
-
sourcecode-1.31.
|
|
82
|
-
sourcecode-1.31.
|
|
83
|
-
sourcecode-1.31.
|
|
79
|
+
sourcecode-1.31.20.dist-info/METADATA,sha256=jAgvJ3ggn8x9HGWRoANJO5pu8_bQ74T1ftvcjAvDosE,31103
|
|
80
|
+
sourcecode-1.31.20.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
81
|
+
sourcecode-1.31.20.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
82
|
+
sourcecode-1.31.20.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
83
|
+
sourcecode-1.31.20.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|