sourcecode 1.31.18__py3-none-any.whl → 1.31.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.31.18"
3
+ __version__ = "1.31.20"
sourcecode/cache.py CHANGED
@@ -69,6 +69,9 @@ from typing import Any, Optional
69
69
  #: Bump this string to invalidate *all* existing cached snapshots.
70
70
  SCHEMA_VERSION: str = "2"
71
71
 
72
+ #: Bump to invalidate all L1 core caches (independent of snapshot version).
73
+ CORE_SCHEMA_VERSION: str = "1"
74
+
72
75
  #: Fields eligible for CAS deduplication (applied to top-level JSON dict keys).
73
76
  _CAS_FIELDS: frozenset[str] = frozenset([
74
77
  "file_paths",
@@ -93,11 +96,18 @@ _DEFAULT_KEEP_COMMITS: int = 5
93
96
  # Matches "snapshot-<hex_commit>-<hex_flags>.json.gz"
94
97
  _SNAPSHOT_RE = re.compile(r"^snapshot-([0-9a-f]+)-[0-9a-f]+\.json\.gz$")
95
98
 
99
+ # Matches "core-<hex_commit>-<hex_analysis>.json.gz"
100
+ _CORE_RE = re.compile(r"^core-([0-9a-f]+)-[0-9a-f]+\.json\.gz$")
101
+
102
+ # Matches "view-<hex_core_hash16>-<hex_view_flags>.json.gz"
103
+ _VIEW_RE = re.compile(r"^view-([0-9a-f]{16})-[0-9a-f]+\.json\.gz$")
104
+
96
105
 
97
106
  # ---------------------------------------------------------------------------
98
107
  # Public API — location helpers
99
108
  # ---------------------------------------------------------------------------
100
109
 
110
+
101
111
  def repo_id(repo_root: Path) -> str:
102
112
  """Stable 16-char hex identifier derived from the canonical repo path."""
103
113
  return hashlib.sha256(str(repo_root.resolve()).encode()).hexdigest()[:16]
@@ -190,6 +200,138 @@ def write(
190
200
  _gc(cache_d)
191
201
 
192
202
 
203
+ # ---------------------------------------------------------------------------
204
+ # Layer 1 — Core Analysis cache
205
+ # ---------------------------------------------------------------------------
206
+
207
+ def read_core(repo_root: Path, core_key: str) -> Optional[tuple[dict[str, Any], str]]:
208
+ """Read core analysis artifacts from L1 cache.
209
+
210
+ Returns ``(core_dict, core_hash)`` on hit, or ``None`` on miss.
211
+ ``core_hash`` is the 16-char SHA-256 of the stored core JSON, used as
212
+ the L2 view-key prefix so that different views of the same core share
213
+ a common ancestry without a full re-analysis.
214
+ """
215
+ cache_d = cache_dir(repo_root)
216
+ gz_path = cache_d / f"core-{core_key}.json.gz"
217
+ if not gz_path.exists():
218
+ return None
219
+ try:
220
+ raw_bytes = gzip.decompress(gz_path.read_bytes())
221
+ envelope = json.loads(raw_bytes.decode("utf-8"))
222
+ except Exception:
223
+ _safe_unlink(gz_path)
224
+ return None
225
+
226
+ if not isinstance(envelope, dict):
227
+ _safe_unlink(gz_path)
228
+ return None
229
+ if envelope.get("csv") != CORE_SCHEMA_VERSION:
230
+ _safe_unlink(gz_path) # schema mismatch — evict
231
+ return None
232
+
233
+ core_data = envelope.get("data")
234
+ core_hash = envelope.get("hash", "")
235
+ if not isinstance(core_data, dict) or not core_hash:
236
+ _safe_unlink(gz_path)
237
+ return None
238
+
239
+ return core_data, core_hash
240
+
241
+
242
+ def write_core(repo_root: Path, core_key: str, core_data: dict[str, Any]) -> str:
243
+ """Persist core analysis dict to L1 cache.
244
+
245
+ Returns the 16-char SHA-256 hash of the core JSON (the L2 key prefix).
246
+ Writes are always best-effort; failures are silently swallowed.
247
+
248
+ File layout::
249
+
250
+ ~/.sourcecode/cache/<repo_id>/core-<core_key>.json.gz
251
+
252
+ Envelope schema::
253
+
254
+ { "csv": "1", // CORE_SCHEMA_VERSION
255
+ "key": "...", // core_key passed in
256
+ "hash": "<h16>", // SHA-256[:16] of core JSON — used as L2 prefix
257
+ "ts": "...", // ISO-8601 UTC write time
258
+ "data": {...} } // core_view(sm) dict
259
+ """
260
+ core_json = json.dumps(core_data, ensure_ascii=False)
261
+ core_hash = hashlib.sha256(core_json.encode()).hexdigest()[:16]
262
+
263
+ cache_d = cache_dir(repo_root)
264
+ dest = cache_d / f"core-{core_key}.json.gz"
265
+ try:
266
+ cache_d.mkdir(parents=True, exist_ok=True)
267
+ envelope: dict[str, Any] = {
268
+ "csv": CORE_SCHEMA_VERSION,
269
+ "key": core_key,
270
+ "hash": core_hash,
271
+ "ts": _now_iso(),
272
+ "data": core_data,
273
+ }
274
+ payload = gzip.compress(
275
+ json.dumps(envelope, ensure_ascii=False).encode("utf-8"),
276
+ compresslevel=6,
277
+ )
278
+ dest.write_bytes(payload)
279
+ except Exception:
280
+ pass
281
+
282
+ return core_hash
283
+
284
+
285
+ # ---------------------------------------------------------------------------
286
+ # Layer 2 — Derived View cache
287
+ # ---------------------------------------------------------------------------
288
+
289
+ def read_view(repo_root: Path, view_key: str) -> Optional[str]:
290
+ """Read a rendered view string from L2 cache.
291
+
292
+ Views are stored as ``view-{view_key}.json.gz`` using the same
293
+ envelope+CAS format as snapshot files. Returns the content string
294
+ (JSON or YAML) or ``None`` on miss.
295
+ """
296
+ cache_d = cache_dir(repo_root)
297
+ gz_path = cache_d / f"view-{view_key}.json.gz"
298
+ if not gz_path.exists():
299
+ return None
300
+ try:
301
+ result = _parse_envelope(gz_path.read_bytes(), cache_d)
302
+ if result is not None:
303
+ return result
304
+ except Exception:
305
+ pass
306
+ _safe_unlink(gz_path)
307
+ return None
308
+
309
+
310
+ def write_view(
311
+ repo_root: Path,
312
+ view_key: str,
313
+ content: str,
314
+ *,
315
+ fmt: str = "json",
316
+ layers: Optional[dict[str, str]] = None,
317
+ ) -> None:
318
+ """Persist a rendered view string to L2 cache as ``view-{view_key}.json.gz``.
319
+
320
+ Reuses the envelope+CAS infrastructure so large fields (file_paths,
321
+ graph, docs …) are automatically deduplicated with other snapshots/views.
322
+ Writes are always best-effort; GC is **not** triggered here — callers
323
+ that want eviction should invoke ``_gc(cache_dir(repo_root))`` explicitly.
324
+ """
325
+ cache_d = cache_dir(repo_root)
326
+ dest = cache_d / f"view-{view_key}.json.gz"
327
+ try:
328
+ cache_d.mkdir(parents=True, exist_ok=True)
329
+ payload = _build_envelope(view_key, content, fmt, layers or {}, cache_d)
330
+ dest.write_bytes(payload)
331
+ except Exception:
332
+ pass
333
+
334
+
193
335
  # ---------------------------------------------------------------------------
194
336
  # Envelope (de)serialisation
195
337
  # ---------------------------------------------------------------------------
@@ -384,31 +526,39 @@ def _cas_restore(
384
526
  # ---------------------------------------------------------------------------
385
527
 
386
528
  def _gc(cache_d: Path) -> None:
387
- """
388
- Evict old snapshots and sweep orphaned CAS blobs.
529
+ """Evict old snapshots/cores/views and sweep orphaned CAS blobs.
389
530
 
390
- Keeps snapshots from the last ``SOURCECODE_CACHE_KEEP_COMMITS`` distinct
391
- git commits (determined by mtime of files in each commit group).
531
+ Keeps snapshots and cores from the last ``SOURCECODE_CACHE_KEEP_COMMITS``
532
+ distinct git commits (determined by newest mtime within each commit group).
533
+ Views are then pruned: a view survives only when its core-hash prefix
534
+ matches a core file in the surviving set.
392
535
  """
393
536
  keep = int(os.environ.get("SOURCECODE_CACHE_KEEP_COMMITS", _DEFAULT_KEEP_COMMITS))
394
537
 
395
538
  try:
396
539
  all_snapshots = list(cache_d.glob("snapshot-*.json.gz"))
397
- if not all_snapshots:
540
+ all_cores = list(cache_d.glob("core-*.json.gz"))
541
+ all_views = list(cache_d.glob("view-*.json.gz"))
542
+
543
+ if not all_snapshots and not all_cores and not all_views:
398
544
  return
399
545
 
400
- # Group snapshot files by commit SHA
546
+ # Group snapshot + core files by commit SHA
401
547
  groups: dict[str, list[Path]] = {}
402
548
  for f in all_snapshots:
403
549
  m = _SNAPSHOT_RE.match(f.name)
404
550
  if m:
405
551
  groups.setdefault(m.group(1), []).append(f)
552
+ for f in all_cores:
553
+ m = _CORE_RE.match(f.name)
554
+ if m:
555
+ groups.setdefault(m.group(1), []).append(f)
406
556
 
407
557
  surviving: list[Path]
408
558
 
409
559
  if keep <= 0 or len(groups) <= keep:
410
- # No eviction needed — but still sweep CAS
411
- surviving = all_snapshots
560
+ # No eviction needed — but still sweep views + CAS
561
+ surviving = all_snapshots + all_cores
412
562
  else:
413
563
  def _newest_mtime(commit: str) -> float:
414
564
  return max(p.stat().st_mtime for p in groups[commit])
@@ -422,12 +572,46 @@ def _gc(cache_d: Path) -> None:
422
572
  for f in groups[commit]:
423
573
  _safe_unlink(f)
424
574
 
425
- _gc_cas(cache_d, surviving)
575
+ # Prune view files whose core hash is no longer in the surviving set
576
+ _gc_views(cache_d, surviving, all_views)
577
+
578
+ # Sweep orphaned CAS blobs (surviving snapshots + view files may ref them)
579
+ surviving_with_views = surviving + [v for v in all_views if v.exists()]
580
+ _gc_cas(cache_d, surviving_with_views)
426
581
 
427
582
  except Exception:
428
583
  pass # GC failure is non-fatal
429
584
 
430
585
 
586
+ def _gc_views(cache_d: Path, surviving: list[Path], all_views: list[Path]) -> None:
587
+ """Delete view files not traceable to a surviving core.
588
+
589
+ Collects the ``hash`` field from every surviving core envelope, then
590
+ deletes view files whose filename core-hash prefix is absent from that
591
+ set. View files with unrecognisable names are left untouched.
592
+ """
593
+ if not all_views:
594
+ return
595
+
596
+ # Collect live core hashes from surviving core-*.json.gz files
597
+ live_hashes: set[str] = set()
598
+ for path in surviving:
599
+ if not path.name.startswith("core-"):
600
+ continue
601
+ try:
602
+ env = json.loads(gzip.decompress(path.read_bytes()).decode("utf-8"))
603
+ h = env.get("hash", "")
604
+ if h:
605
+ live_hashes.add(h)
606
+ except Exception:
607
+ pass # unreadable core — conservatively keep its views unknown
608
+
609
+ for vp in all_views:
610
+ m = _VIEW_RE.match(vp.name)
611
+ if m and m.group(1) not in live_hashes:
612
+ _safe_unlink(vp)
613
+
614
+
431
615
  def _gc_cas(cache_d: Path, surviving_snapshots: list[Path]) -> None:
432
616
  """
433
617
  Delete CAS blobs not referenced by any snapshot in *surviving_snapshots*.
sourcecode/cli.py CHANGED
@@ -876,16 +876,30 @@ def main(
876
876
  architecture = True # agents need full architectural signal (M4)
877
877
  graph_modules = True # IC-003: import graph needed for architecture confidence
878
878
 
879
- # ── GAP-9: Cache check — serve from global cache when git SHA unchanged ──
880
- # Cache is stored in ~/.sourcecode/cache/<repo_id>/ (outside the repo).
881
- # Snapshots are gzip-compressed (.json.gz) — ~85 % smaller than plain JSON.
882
- # Eviction keeps the last SOURCECODE_CACHE_KEEP_COMMITS commits (default 5).
879
+ # ── Two-layer cache ────────────────────────────────────────────────────────
880
+ # L1 (core): (repo, commit, analysis_flags) pre-computed view data dict
881
+ # key = core-<git_sha>-<analysis_hash>.json.gz
882
+ # L2 (view): (core_hash, view_flags) → final rendered string
883
+ # key = view-<core_hash16>-<view_hash>.json.gz
884
+ #
885
+ # Lookup order: L2 exact hit → L1 hit + view rebuild → full analysis
886
+ # Write order: full analysis → write L1 core → write L2 view
887
+ #
888
+ # Flags split:
889
+ # core (analysis) — affect WHAT is analysed; same core for any view
890
+ # view — affect HOW it's presented; same view for any format variant
883
891
  import hashlib as _hashlib
884
892
  import subprocess as _sub
885
893
  from sourcecode import cache as _cache_mod
894
+
886
895
  _cache_hit_content: Optional[str] = None
887
896
  _git_sha = ""
888
- _cache_key = ""
897
+ _core_key = ""
898
+ _view_key = ""
899
+ _core_hash = ""
900
+ _core_flags_str = ""
901
+ _view_flags_str = ""
902
+
889
903
  if not no_cache:
890
904
  try:
891
905
  _sha_r = _sub.run(
@@ -896,37 +910,112 @@ def main(
896
910
  # Only cache when target IS the git repo root (not a subdir of one),
897
911
  # to avoid polluting sub-project directories used in tests.
898
912
  if _git_sha and (target / ".git").exists():
899
- # Include every output-affecting flag so different flag combos never collide
900
- # Include version so cache is invalidated on sourcecode upgrades
901
913
  from sourcecode import __version__ as _sc_version
902
- # FIX-P0-1: cache key must include ALL analysis-affecting flags.
903
- # Previously missing: exclude, depth, rank_by, symbol, entrypoints_only,
904
- # no_redact, graph_detail, docs_depth, max_nodes, graph_edges,
905
- # max_importers, emit_graph.
906
- # Use effective_depth (not raw depth) so Java auto-adjustment is captured.
907
914
  _excl_key = (
908
915
  ",".join(sorted(e.strip() for e in exclude.split(",") if e.strip()))
909
916
  if exclude else ""
910
917
  )
911
- _flags_str = (
918
+
919
+ # ── Core (analysis) flags: affect which analyzers run + scan config ──
920
+ # Use effective_depth (not raw depth) so Java auto-adjustment is captured.
921
+ _core_flags_str = (
912
922
  f"v={_sc_version},"
913
- f"c={compact},ag={agent},fmt={format},full={full},"
914
- f"co={changed_only},dep={dependencies},gm={graph_modules},"
923
+ f"dep={dependencies},gm={graph_modules},"
915
924
  f"docs={docs},fm={full_metrics},sem={semantics},"
916
925
  f"arch={architecture},gc={git_context},em={env_map},"
917
- f"cn={code_notes},tree={tree},mode={mode},"
918
- f"ex={_excl_key},depth={effective_depth},"
926
+ f"cn={code_notes},mode={mode},"
927
+ f"ex={_excl_key},depth={effective_depth}"
928
+ )
929
+ _core_h = _hashlib.md5(_core_flags_str.encode()).hexdigest()[:8]
930
+ _core_key = f"{_git_sha}-{_core_h}"
931
+
932
+ # ── View flags: output presentation only (no re-analysis needed) ──
933
+ _view_flags_str = (
934
+ f"c={compact},ag={agent},fmt={format},full={full},"
935
+ f"co={changed_only},tree={tree},nt={no_tree},"
919
936
  f"rb={rank_by},sym={symbol},ep={entrypoints_only},"
920
937
  f"nr={no_redact},gd={graph_detail},dd={docs_depth},"
921
938
  f"mn={max_nodes},ge={graph_edges},mi={max_importers},"
922
939
  f"eg={emit_graph}"
923
940
  )
924
- _flags_h = _hashlib.md5(_flags_str.encode()).hexdigest()[:8]
925
- _cache_key = f"{_git_sha}-{_flags_h}"
926
- _cache_hit_content = _cache_mod.read(target, _cache_key)
941
+ _view_h = _hashlib.md5(_view_flags_str.encode()).hexdigest()[:8]
942
+
943
+ # ── Lookup ──────────────────────────────────────────────────────
944
+ # Step 1: try L1 to obtain the core_hash needed for L2 key
945
+ _l1_result = _cache_mod.read_core(target, _core_key)
946
+ if _l1_result is not None:
947
+ _core_dict_l1, _core_hash = _l1_result
948
+ _view_key = f"{_core_hash}-{_view_h}"
949
+
950
+ # Step 2: try L2 (exact view match)
951
+ _cache_hit_content = _cache_mod.read_view(target, _view_key)
952
+
953
+ # Step 3: L1 hit but L2 miss → rebuild view from core dict
954
+ if _cache_hit_content is None:
955
+ try:
956
+ from sourcecode.serializer import build_view_from_core as _bvfc
957
+ _rebuilt = _bvfc(
958
+ _core_dict_l1,
959
+ compact=compact,
960
+ agent=agent,
961
+ full=full,
962
+ no_tree=no_tree,
963
+ tree=tree,
964
+ )
965
+ if _rebuilt is not None:
966
+ # Apply redaction
967
+ if not no_redact:
968
+ from sourcecode.redactor import redact_dict as _red_l1
969
+ _rebuilt = _red_l1(_rebuilt)
970
+ # Apply output budget
971
+ if agent:
972
+ from sourcecode.output_budget import (
973
+ trim_to_budget as _trim_l1,
974
+ BUDGET_AGENT,
975
+ )
976
+ _rebuilt = _trim_l1(_rebuilt, BUDGET_AGENT, label="agent")
977
+ elif compact:
978
+ from sourcecode.output_budget import (
979
+ trim_to_budget as _trim_l1c,
980
+ BUDGET_COMPACT,
981
+ )
982
+ _rebuilt = _trim_l1c(_rebuilt, BUDGET_COMPACT, label="compact")
983
+ # Serialize
984
+ if format == "yaml":
985
+ from io import StringIO as _SIO_L1
986
+ from ruamel.yaml import YAML as _YAML_L1
987
+ _yl1 = _YAML_L1()
988
+ _yl1.default_flow_style = False
989
+ _yl1.representer.add_representer(
990
+ type(None),
991
+ lambda d, v: d.represent_scalar(
992
+ "tag:yaml.org,2002:null", "null"
993
+ ),
994
+ )
995
+ _sl1 = _SIO_L1()
996
+ _yl1.dump(_rebuilt, _sl1)
997
+ _cache_hit_content = _sl1.getvalue()
998
+ else:
999
+ import json as _json_l1
1000
+ _cache_hit_content = _json_l1.dumps(
1001
+ _rebuilt, indent=2, ensure_ascii=False
1002
+ )
1003
+ # Cache rebuilt view in L2
1004
+ if _cache_hit_content:
1005
+ _cache_mod.write_view(
1006
+ target,
1007
+ _view_key,
1008
+ _cache_hit_content,
1009
+ fmt=format,
1010
+ )
1011
+ except Exception:
1012
+ _cache_hit_content = None # rebuild failed → full analysis
1013
+
927
1014
  except Exception:
928
1015
  _git_sha = ""
929
- _cache_key = ""
1016
+ _core_key = ""
1017
+ _view_key = ""
1018
+ _core_hash = ""
930
1019
 
931
1020
  if _cache_hit_content is not None:
932
1021
  from sourcecode.serializer import write_output
@@ -1760,18 +1849,37 @@ def main(
1760
1849
  _progress.finish()
1761
1850
  write_output(content, output=output)
1762
1851
 
1763
- # GAP-9: Persist to cache for future identical runs (git SHA unchanged)
1764
- # Writes versioned envelope to ~/.sourcecode/cache/<repo_id>/<key>.json.gz.
1765
- # Large JSON fields are extracted into shared CAS blobs (deduplication).
1766
- # GC runs inline after each write (keep last N commits + CAS sweep).
1767
- if not no_cache and _cache_key and not _pipeline_error:
1768
- _cache_mod.write(
1769
- target,
1770
- _cache_key,
1771
- content,
1772
- fmt=format,
1773
- layers=_compute_analyzer_fingerprints(),
1774
- )
1852
+ # Persist to two-layer cache (git SHA unchanged re-use on next run).
1853
+ #
1854
+ # L1 (core): stores pre-computed compact+agent+standard views at max
1855
+ # fidelity so any subsequent view can be derived without re-analysis.
1856
+ # L2 (view): stores the exact rendered string for this flag combination.
1857
+ #
1858
+ # GC runs after L2 write to evict old commits and orphaned blobs/views.
1859
+ if not no_cache and _core_key and not _pipeline_error:
1860
+ try:
1861
+ from sourcecode.serializer import core_view as _core_view_fn
1862
+ _core_dict_write = _core_view_fn(sm)
1863
+ _written_core_hash = _cache_mod.write_core(target, _core_key, _core_dict_write)
1864
+
1865
+ # Compute view key using the just-written core hash
1866
+ if _written_core_hash:
1867
+ if not _view_key:
1868
+ # _view_key not set (L1 was also a miss); compute it now
1869
+ _wvh = _hashlib.md5(_view_flags_str.encode()).hexdigest()[:8]
1870
+ _view_key = f"{_written_core_hash}-{_wvh}"
1871
+ _cache_mod.write_view(
1872
+ target,
1873
+ _view_key,
1874
+ content,
1875
+ fmt=format,
1876
+ layers=_compute_analyzer_fingerprints(),
1877
+ )
1878
+ # Trigger GC (evict old commits + orphaned views + CAS blobs)
1879
+ from sourcecode.cache import cache_dir as _cdir, _gc as _run_gc
1880
+ _run_gc(_cdir(target))
1881
+ except Exception:
1882
+ pass # non-fatal: cache write failure
1775
1883
 
1776
1884
  if _pipeline_error:
1777
1885
  raise typer.Exit(code=2)
sourcecode/serializer.py CHANGED
@@ -2290,6 +2290,119 @@ def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any
2290
2290
  return result
2291
2291
 
2292
2292
 
2293
+ # ---------------------------------------------------------------------------
2294
+ # Two-layer cache: core_view + build_view_from_core
2295
+ # ---------------------------------------------------------------------------
2296
+
2297
+ #: Bump to invalidate all L1 core caches when the core format changes.
2298
+ CORE_VIEW_VERSION: str = "1"
2299
+
2300
+ #: Fields that standard_view omits from file_tree when no_tree is active.
2301
+ _TREE_FIELDS: frozenset[str] = frozenset({"file_tree", "file_paths"})
2302
+
2303
+ #: transactional_boundaries truncation threshold for full=False compact view.
2304
+ _TXN_COMPACT_CAP = 10
2305
+
2306
+
2307
+ def core_view(sm: SourceMap) -> dict[str, Any]:
2308
+ """Pre-compute all view variants for L1 (core) cache.
2309
+
2310
+ Stores compact, agent, and standard views at **maximum fidelity**
2311
+ (full=True, include_tree=True). View-specific flags (compact/agent,
2312
+ format, no_tree, full, redaction, budget) are applied later when
2313
+ building the L2 view from this core — they never affect core content.
2314
+
2315
+ Schema::
2316
+
2317
+ {
2318
+ "_cv": "<CORE_VIEW_VERSION>",
2319
+ "_compact": compact_view(sm, no_tree=False, full=True),
2320
+ "_agent": agent_view(sm, full=True),
2321
+ "_standard": standard_view(sm, include_tree=True),
2322
+ }
2323
+ """
2324
+ return {
2325
+ "_cv": CORE_VIEW_VERSION,
2326
+ "_compact": compact_view(sm, no_tree=False, full=True),
2327
+ "_agent": agent_view(sm, full=True),
2328
+ "_standard": standard_view(sm, include_tree=True),
2329
+ }
2330
+
2331
+
2332
+ def build_view_from_core(
2333
+ core: dict[str, Any],
2334
+ *,
2335
+ compact: bool = False,
2336
+ agent: bool = False,
2337
+ full: bool = False,
2338
+ no_tree: bool = False,
2339
+ tree: bool = False,
2340
+ ) -> Optional[dict[str, Any]]:
2341
+ """Derive a view dict from an L1 core dict (skip full re-analysis).
2342
+
2343
+ Returns the view dict (before redaction / budget / serialisation) or
2344
+ ``None`` when the core format is unrecognised or data is missing —
2345
+ the caller must fall back to a full analysis run.
2346
+
2347
+ Parameters
2348
+ ----------
2349
+ core:
2350
+ Dict returned by :func:`core_view` (stored in L1 cache).
2351
+ compact / agent:
2352
+ Which view mode to reconstruct (mutually exclusive; both False =
2353
+ standard view).
2354
+ full:
2355
+ When *False* and *compact* is True, truncate transactional_boundaries
2356
+ to ``_TXN_COMPACT_CAP`` entries (mirrors compact_view behaviour).
2357
+ no_tree / tree:
2358
+ Control file_tree / file_paths inclusion in standard / compact views.
2359
+ """
2360
+ if not isinstance(core, dict) or core.get("_cv") != CORE_VIEW_VERSION:
2361
+ return None # stale or unknown core format → full re-analysis
2362
+
2363
+ if agent:
2364
+ data = core.get("_agent")
2365
+ if not isinstance(data, dict):
2366
+ return None
2367
+ return data # agent_view never includes file_tree/file_paths
2368
+
2369
+ if compact:
2370
+ data = core.get("_compact")
2371
+ if not isinstance(data, dict):
2372
+ return None
2373
+ # compact_view stores max-fidelity data; apply flag filters
2374
+ if no_tree:
2375
+ data = {k: v for k, v in data.items() if k not in _TREE_FIELDS}
2376
+ if not full:
2377
+ # Truncate transactional_boundaries to _TXN_COMPACT_CAP when stored
2378
+ # with full=True (mirrors _transactional_summary(full=False) logic).
2379
+ txn = data.get("transactional_boundaries")
2380
+ if isinstance(txn, dict):
2381
+ classes = txn.get("classes") or []
2382
+ count = txn.get("count", len(classes))
2383
+ if count > _TXN_COMPACT_CAP and not txn.get("truncated"):
2384
+ data = dict(data)
2385
+ data["transactional_boundaries"] = {
2386
+ **txn,
2387
+ "classes": classes[:_TXN_COMPACT_CAP],
2388
+ "truncated": True,
2389
+ "note": (
2390
+ f"showing {_TXN_COMPACT_CAP} of {count}; "
2391
+ f"use --full to see all {count}"
2392
+ ),
2393
+ }
2394
+ return data
2395
+
2396
+ # Standard view
2397
+ data = core.get("_standard")
2398
+ if not isinstance(data, dict):
2399
+ return None
2400
+ want_tree = tree and not no_tree
2401
+ if not want_tree:
2402
+ data = {k: v for k, v in data.items() if k not in _TREE_FIELDS}
2403
+ return data
2404
+
2405
+
2293
2406
  def contract_view(
2294
2407
  sm: SourceMap,
2295
2408
  *,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.31.18
3
+ Version: 1.31.20
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -225,7 +225,7 @@ Description-Content-Type: text/markdown
225
225
 
226
226
  **AI-ready change intelligence for Java/Spring enterprise monoliths.**
227
227
 
228
- ![Version](https://img.shields.io/badge/version-1.31.18-blue)
228
+ ![Version](https://img.shields.io/badge/version-1.31.20-blue)
229
229
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
230
230
 
231
231
  ---
@@ -263,7 +263,7 @@ pipx install sourcecode
263
263
 
264
264
  ```bash
265
265
  sourcecode version
266
- # sourcecode 1.31.18
266
+ # sourcecode 1.31.20
267
267
  ```
268
268
 
269
269
  ---
@@ -1,12 +1,12 @@
1
- sourcecode/__init__.py,sha256=RKBkTCXd0nPibD6uZj_CLNSWfxJYQOS-gsplP4C8K_g,104
1
+ sourcecode/__init__.py,sha256=SO-Vu2UFaZzh6UldfBANt4sXszun2WaihkKiyt-cYX4,104
2
2
  sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
3
3
  sourcecode/architecture_analyzer.py,sha256=4R13Yb02OrPeB4IH3z6V_g7HWhmGcRHbI8CobCVnRrc,39111
4
4
  sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
5
5
  sourcecode/ast_extractor.py,sha256=XgrZg2DcWcUm9r87cRG3KGO7IK2TIL_N-CvhSbUmmh4,49901
6
- sourcecode/cache.py,sha256=HDkUZqXOovBc1PjTg-JpOQlyKhUMmEhiG789R7L4Wms,16348
6
+ sourcecode/cache.py,sha256=TiYa3ECjBKtvlfCk7GvQ9v6gZkAITpH3ow9PubA7sUo,22946
7
7
  sourcecode/canonical_ir.py,sha256=NZu0XICv__hkQGKzW2LNQLRqb1L28K2p_WQCQKS5Zlk,23141
8
8
  sourcecode/classifier.py,sha256=yWeq6agTjkFa3zuNa-gdVIHtjoBoPoVlJnX-b7tdVJs,7851
9
- sourcecode/cli.py,sha256=zBJZqoOntf3m4UWqvixrNdSDdytevuYJF4rDvxXTM8k,139621
9
+ sourcecode/cli.py,sha256=Ng9hOoMbpCXvB186etvSqV1q3I4mnndPxr3vMzcZQjc,145199
10
10
  sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
11
11
  sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
12
12
  sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -37,7 +37,7 @@ sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_X
37
37
  sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
38
38
  sourcecode/schema.py,sha256=aHNXDf8LGyUC8ZDE_VS9kiskC2-Oswhi_WnpdGy6HDw,24897
39
39
  sourcecode/semantic_analyzer.py,sha256=TDuC3wzZR2DPm1mgrAg1YSLk2QzJoueS3TZAmyGGpCU,89417
40
- sourcecode/serializer.py,sha256=OU-t78jpeTZS6yAMbYHXNA8QpxzzIDwfhLXtv_Uk_G0,117930
40
+ sourcecode/serializer.py,sha256=V8ZV3Y1j4T6rkpO09-PvpVORioWWWbSnOvDjZ2hmQ2U,122144
41
41
  sourcecode/summarizer.py,sha256=lPlKhMh28nueXkPo2xKeD3DUFYVGRlJMIdY-8TSM-ls,17486
42
42
  sourcecode/tree_utils.py,sha256=8GAkIfQAsvtEudIeW1l4ooH_oRtrWR8cpJQJsEa_Pfw,2093
43
43
  sourcecode/workspace.py,sha256=X_6NmNnitvT3_38V-JDChydo_sR68s249hLFlrQskU0,8271
@@ -76,8 +76,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
76
76
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
77
77
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
78
78
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
79
- sourcecode-1.31.18.dist-info/METADATA,sha256=paObgQ32RFOKlwHD7oyNK6tRtbEBRStsmeXXSg4RaPw,31103
80
- sourcecode-1.31.18.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
81
- sourcecode-1.31.18.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
82
- sourcecode-1.31.18.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
83
- sourcecode-1.31.18.dist-info/RECORD,,
79
+ sourcecode-1.31.20.dist-info/METADATA,sha256=jAgvJ3ggn8x9HGWRoANJO5pu8_bQ74T1ftvcjAvDosE,31103
80
+ sourcecode-1.31.20.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
81
+ sourcecode-1.31.20.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
82
+ sourcecode-1.31.20.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
83
+ sourcecode-1.31.20.dist-info/RECORD,,