sourcecode 1.31.16__py3-none-any.whl → 1.31.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.31.16"
3
+ __version__ = "1.31.18"
sourcecode/cache.py ADDED
@@ -0,0 +1,470 @@
1
+ """
2
+ Snapshot cache manager for sourcecode — v2.
3
+
4
+ Cache layout
5
+ ------------
6
+ ~/.sourcecode/cache/<repo_id>/
7
+ snapshot-<git_sha>-<flags_hash>.json.gz ← versioned envelope
8
+ cas/
9
+ <blob_hash16>.gz ← content-addressed blobs
10
+
11
+ Schema
12
+ ------
13
+ Every snapshot file is a gzip-compressed JSON *envelope*:
14
+
15
+ {
16
+ "sv": "2", // schema version — bump to invalidate all
17
+ "key": "abc1234-aabbccdd", // cache key (git_sha + flags_hash)
18
+ "ts": "2026-05-24T22:00:00Z", // write timestamp (ISO-8601 UTC)
19
+ "fmt": "json", // output format: "json" | "yaml"
20
+ "layers": {"heuristic": "...", ...}, // analyzer fingerprints at write time
21
+ // ── content (one of two forms) ──────────────────────────────────────
22
+ "snap": {...}, // inline fields (small) — JSON mode
23
+ "cas": {"file_paths": "<h16>",…} // large fields deduped into CAS store
24
+ // — OR —
25
+ "raw": "<content string>" // YAML or unparseable JSON stored as-is
26
+ }
27
+
28
+ Content-addressed store (CAS)
29
+ -----------------------------
30
+ Large top-level JSON fields (> _CAS_THRESHOLD bytes) are extracted into the
31
+ ``cas/`` directory as individual gzip-compressed blobs identified by a 16-char
32
+ SHA-256 hash of their uncompressed bytes. Two snapshots that share an
33
+ identical ``file_paths`` array reference the *same* blob — zero duplication.
34
+
35
+ Eviction / GC
36
+ -------------
37
+ After each write, ``_gc()`` keeps snapshots from the last
38
+ ``SOURCECODE_CACHE_KEEP_COMMITS`` distinct git commits (default 5, override via
39
+ env var). A CAS sweep runs concurrently: blobs unreferenced by any surviving
40
+ snapshot are deleted.
41
+
42
+ Backward compatibility
43
+ ----------------------
44
+ v1 files (raw gzip'd content, no envelope) are detected by the absence of an
45
+ ``sv`` key in the decompressed JSON, and served transparently. Legacy files
46
+ in ``<repo>/.sourcecode-cache/`` are also checked as a final fallback.
47
+
48
+ Env vars
49
+ --------
50
+ SOURCECODE_CACHE_DIR Override global cache base (default: ~/.sourcecode/cache)
51
+ SOURCECODE_CACHE_KEEP_COMMITS How many git commits to retain (default: 5; 0 = unlimited)
52
+ """
53
+ from __future__ import annotations
54
+
55
+ import gzip
56
+ import hashlib
57
+ import json
58
+ import os
59
+ import re
60
+ from datetime import datetime, timezone
61
+ from pathlib import Path
62
+ from typing import Any, Optional
63
+
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # Version / constants
67
+ # ---------------------------------------------------------------------------
68
+
69
+ #: Bump this string to invalidate *all* existing cached snapshots.
70
+ SCHEMA_VERSION: str = "2"
71
+
72
+ #: Fields eligible for CAS deduplication (applied to top-level JSON dict keys).
73
+ _CAS_FIELDS: frozenset[str] = frozenset([
74
+ "file_paths",
75
+ "entry_points",
76
+ "docs",
77
+ "dependencies",
78
+ "graph",
79
+ "semantic_calls",
80
+ "semantic_symbols",
81
+ "architecture",
82
+ "metrics",
83
+ "git_history",
84
+ "env_map",
85
+ "code_notes",
86
+ ])
87
+
88
+ #: Serialised size threshold (bytes) above which a field is moved to CAS.
89
+ _CAS_THRESHOLD: int = 4096
90
+
91
+ _DEFAULT_KEEP_COMMITS: int = 5
92
+
93
+ # Matches "snapshot-<hex_commit>-<hex_flags>.json.gz"
94
+ _SNAPSHOT_RE = re.compile(r"^snapshot-([0-9a-f]+)-[0-9a-f]+\.json\.gz$")
95
+
96
+
97
+ # ---------------------------------------------------------------------------
98
+ # Public API — location helpers
99
+ # ---------------------------------------------------------------------------
100
+
101
+ def repo_id(repo_root: Path) -> str:
102
+ """Stable 16-char hex identifier derived from the canonical repo path."""
103
+ return hashlib.sha256(str(repo_root.resolve()).encode()).hexdigest()[:16]
104
+
105
+
106
+ def cache_dir(repo_root: Path) -> Path:
107
+ """
108
+ Return the per-repo cache directory (``~/.sourcecode/cache/<repo_id>/``).
109
+
110
+ Override the base via ``SOURCECODE_CACHE_DIR``.
111
+ """
112
+ env_base = os.environ.get("SOURCECODE_CACHE_DIR", "")
113
+ base: Path = Path(env_base) if env_base else Path.home() / ".sourcecode" / "cache"
114
+ return base / repo_id(repo_root)
115
+
116
+
117
+ # ---------------------------------------------------------------------------
118
+ # Public API — read / write
119
+ # ---------------------------------------------------------------------------
120
+
121
+ def read(repo_root: Path, cache_key: str) -> Optional[str]:
122
+ """
123
+ Return the cached snapshot string for *cache_key*, or ``None`` on miss.
124
+
125
+ Lookup order:
126
+ 1. ``<cache_dir>/snapshot-<cache_key>.json.gz`` — v2 envelope (new)
127
+ 2. ``<repo_root>/.sourcecode-cache/snapshot-<cache_key>.json`` — legacy
128
+ """
129
+ cache_d = cache_dir(repo_root)
130
+
131
+ # ── 1. Global location (.json.gz, v2 envelope or v1 raw) ───────────────
132
+ gz_path = cache_d / f"snapshot-{cache_key}.json.gz"
133
+ if gz_path.exists():
134
+ try:
135
+ result = _parse_envelope(gz_path.read_bytes(), cache_d)
136
+ if result is not None:
137
+ return result
138
+ except Exception:
139
+ pass
140
+ _safe_unlink(gz_path) # corrupted or version mismatch — evict
141
+ return None
142
+
143
+ # ── 2. Legacy location (<repo>/.sourcecode-cache/*.json) ───────────────
144
+ legacy = repo_root / ".sourcecode-cache" / f"snapshot-{cache_key}.json"
145
+ if legacy.exists():
146
+ try:
147
+ return legacy.read_text(encoding="utf-8")
148
+ except Exception:
149
+ return None
150
+
151
+ return None
152
+
153
+
154
+ def write(
155
+ repo_root: Path,
156
+ cache_key: str,
157
+ content: str,
158
+ *,
159
+ fmt: str = "json",
160
+ layers: Optional[dict[str, str]] = None,
161
+ ) -> None:
162
+ """
163
+ Persist *content* as a versioned, optionally CAS-deduped snapshot.
164
+
165
+ Parameters
166
+ ----------
167
+ repo_root : Path
168
+ Root directory of the analysed repository.
169
+ cache_key : str
170
+ ``"{git_sha}-{flags_hash}"`` identifying this analysis.
171
+ content : str
172
+ Final rendered output (JSON or YAML string).
173
+ fmt : str
174
+ ``"json"`` or ``"yaml"`` — determines whether CAS extraction applies.
175
+ layers : dict[str, str], optional
176
+ Analyzer fingerprints (from ``_compute_analyzer_fingerprints()``).
177
+ Stored in the envelope for future layer-aware reuse.
178
+
179
+ Writes are always best-effort: any failure is silently swallowed.
180
+ """
181
+ cache_d = cache_dir(repo_root)
182
+ dest = cache_d / f"snapshot-{cache_key}.json.gz"
183
+ try:
184
+ cache_d.mkdir(parents=True, exist_ok=True)
185
+ payload = _build_envelope(cache_key, content, fmt, layers or {}, cache_d)
186
+ dest.write_bytes(payload)
187
+ except Exception:
188
+ return # non-fatal
189
+
190
+ _gc(cache_d)
191
+
192
+
193
+ # ---------------------------------------------------------------------------
194
+ # Envelope (de)serialisation
195
+ # ---------------------------------------------------------------------------
196
+
197
+ def _now_iso() -> str:
198
+ return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
199
+
200
+
201
+ def _build_envelope(
202
+ cache_key: str,
203
+ content: str,
204
+ fmt: str,
205
+ layers: dict[str, str],
206
+ cache_d: Path,
207
+ ) -> bytes:
208
+ """Build a versioned envelope and return gzip-compressed bytes."""
209
+ envelope: dict[str, Any] = {
210
+ "sv": SCHEMA_VERSION,
211
+ "key": cache_key,
212
+ "ts": _now_iso(),
213
+ "fmt": fmt,
214
+ "layers": layers,
215
+ }
216
+
217
+ if fmt == "json":
218
+ # Try to parse and extract large fields into CAS
219
+ try:
220
+ snap_dict = json.loads(content)
221
+ if isinstance(snap_dict, dict):
222
+ inline, cas_refs = _cas_extract(snap_dict, cache_d)
223
+ envelope["snap"] = inline
224
+ if cas_refs:
225
+ envelope["cas"] = cas_refs
226
+ else:
227
+ # JSON array or primitive — store as-is
228
+ envelope["raw"] = content
229
+ except Exception:
230
+ envelope["raw"] = content
231
+ else:
232
+ # YAML or unknown format — store raw string
233
+ envelope["raw"] = content
234
+
235
+ return gzip.compress(
236
+ json.dumps(envelope, ensure_ascii=False).encode("utf-8"),
237
+ compresslevel=6,
238
+ )
239
+
240
+
241
+ def _parse_envelope(data: bytes, cache_d: Path) -> Optional[str]:
242
+ """
243
+ Decompress *data*, parse envelope, resolve CAS refs, return content string.
244
+
245
+ Returns ``None`` on schema version mismatch, CAS miss, or parse failure.
246
+ v1 files (no envelope wrapper) are detected and served transparently.
247
+ """
248
+ try:
249
+ raw_bytes = gzip.decompress(data)
250
+ except Exception:
251
+ return None
252
+
253
+ # ── v1 detection ────────────────────────────────────────────────────────
254
+ # v1 stored the content string directly (gzip'd UTF-8), not an envelope.
255
+ # Heuristic: if decompressed bytes are not a JSON object with an "sv" key,
256
+ # treat as v1 and return the raw bytes as the content string.
257
+ try:
258
+ envelope = json.loads(raw_bytes.decode("utf-8"))
259
+ except Exception:
260
+ # Not JSON at all (e.g. YAML v1) — return as-is
261
+ try:
262
+ return raw_bytes.decode("utf-8")
263
+ except Exception:
264
+ return None
265
+
266
+ if not isinstance(envelope, dict) or envelope.get("sv") != SCHEMA_VERSION:
267
+ # dict without "sv" → v1 JSON snapshot; non-matching sv → old envelope
268
+ # Serve v1 transparently; reject mismatched schema versions as a miss.
269
+ if isinstance(envelope, dict) and "sv" in envelope:
270
+ return None # schema version mismatch
271
+ # No "sv" at all → v1 format, raw content
272
+ return raw_bytes.decode("utf-8")
273
+
274
+ # ── v2 envelope ─────────────────────────────────────────────────────────
275
+ if "raw" in envelope:
276
+ return envelope["raw"]
277
+
278
+ if "snap" in envelope:
279
+ inline: dict[str, Any] = envelope["snap"]
280
+ cas_refs: dict[str, str] = envelope.get("cas", {})
281
+ if cas_refs:
282
+ restored = _cas_restore(inline, cas_refs, cache_d)
283
+ if restored is None:
284
+ return None # CAS miss (blob evicted or corrupted)
285
+ else:
286
+ restored = dict(inline)
287
+ # Re-serialise with the same parameters used by the pipeline.
288
+ # json.loads → json.dumps round-trips correctly: Python 3.7+ preserves
289
+ # dict insertion order and the pipeline uses indent=2, ensure_ascii=False.
290
+ return json.dumps(restored, indent=2, ensure_ascii=False)
291
+
292
+ return None # malformed envelope
293
+
294
+
295
+ # ---------------------------------------------------------------------------
296
+ # CAS store
297
+ # ---------------------------------------------------------------------------
298
+
299
+ def _cas_dir(cache_d: Path) -> Path:
300
+ return cache_d / "cas"
301
+
302
+
303
+ def _cas_path(cache_d: Path, blob_hash: str) -> Path:
304
+ return _cas_dir(cache_d) / f"{blob_hash}.gz"
305
+
306
+
307
+ def _cas_store_blob(cache_d: Path, serialised: str) -> str:
308
+ """
309
+ Store *serialised* (a JSON string) in the CAS. Idempotent.
310
+
311
+ Returns the 16-char SHA-256 hex hash that identifies the blob.
312
+ """
313
+ raw = serialised.encode("utf-8")
314
+ blob_hash = hashlib.sha256(raw).hexdigest()[:16]
315
+ path = _cas_path(cache_d, blob_hash)
316
+ if not path.exists():
317
+ path.parent.mkdir(parents=True, exist_ok=True)
318
+ path.write_bytes(gzip.compress(raw, compresslevel=6))
319
+ return blob_hash
320
+
321
+
322
+ def _cas_load_blob(cache_d: Path, blob_hash: str) -> Optional[str]:
323
+ """Return the stored JSON string for *blob_hash*, or ``None`` if absent."""
324
+ path = _cas_path(cache_d, blob_hash)
325
+ if not path.exists():
326
+ return None
327
+ try:
328
+ return gzip.decompress(path.read_bytes()).decode("utf-8")
329
+ except Exception:
330
+ return None
331
+
332
+
333
+ def _cas_extract(
334
+ snap_dict: dict[str, Any],
335
+ cache_d: Path,
336
+ ) -> tuple[dict[str, Any], dict[str, str]]:
337
+ """
338
+ Walk *snap_dict* top-level fields. Fields that:
339
+ - are in ``_CAS_FIELDS``
340
+ - serialise to more than ``_CAS_THRESHOLD`` bytes
341
+
342
+ … are stored as CAS blobs and replaced with their hash in the returned
343
+ ``cas_refs`` mapping. Other fields remain inline.
344
+ """
345
+ inline: dict[str, Any] = {}
346
+ cas_refs: dict[str, str] = {}
347
+
348
+ for key, value in snap_dict.items():
349
+ if key in _CAS_FIELDS and value is not None:
350
+ serialised = json.dumps(value, ensure_ascii=False)
351
+ if len(serialised.encode("utf-8")) > _CAS_THRESHOLD:
352
+ blob_hash = _cas_store_blob(cache_d, serialised)
353
+ cas_refs[key] = blob_hash
354
+ continue
355
+ inline[key] = value
356
+
357
+ return inline, cas_refs
358
+
359
+
360
+ def _cas_restore(
361
+ inline: dict[str, Any],
362
+ cas_refs: dict[str, str],
363
+ cache_d: Path,
364
+ ) -> Optional[dict[str, Any]]:
365
+ """
366
+ Reconstruct a full snapshot dict by loading CAS blobs for *cas_refs*.
367
+
368
+ Returns ``None`` if any blob is missing (treat as cache miss).
369
+ """
370
+ result: dict[str, Any] = dict(inline)
371
+ for field, blob_hash in cas_refs.items():
372
+ blob_str = _cas_load_blob(cache_d, blob_hash)
373
+ if blob_str is None:
374
+ return None # blob evicted or corrupted → full miss
375
+ try:
376
+ result[field] = json.loads(blob_str)
377
+ except Exception:
378
+ return None
379
+ return result
380
+
381
+
382
+ # ---------------------------------------------------------------------------
383
+ # Eviction / GC
384
+ # ---------------------------------------------------------------------------
385
+
386
+ def _gc(cache_d: Path) -> None:
387
+ """
388
+ Evict old snapshots and sweep orphaned CAS blobs.
389
+
390
+ Keeps snapshots from the last ``SOURCECODE_CACHE_KEEP_COMMITS`` distinct
391
+ git commits (determined by mtime of files in each commit group).
392
+ """
393
+ keep = int(os.environ.get("SOURCECODE_CACHE_KEEP_COMMITS", _DEFAULT_KEEP_COMMITS))
394
+
395
+ try:
396
+ all_snapshots = list(cache_d.glob("snapshot-*.json.gz"))
397
+ if not all_snapshots:
398
+ return
399
+
400
+ # Group snapshot files by commit SHA
401
+ groups: dict[str, list[Path]] = {}
402
+ for f in all_snapshots:
403
+ m = _SNAPSHOT_RE.match(f.name)
404
+ if m:
405
+ groups.setdefault(m.group(1), []).append(f)
406
+
407
+ surviving: list[Path]
408
+
409
+ if keep <= 0 or len(groups) <= keep:
410
+ # No eviction needed — but still sweep CAS
411
+ surviving = all_snapshots
412
+ else:
413
+ def _newest_mtime(commit: str) -> float:
414
+ return max(p.stat().st_mtime for p in groups[commit])
415
+
416
+ sorted_commits = sorted(groups, key=_newest_mtime, reverse=True)
417
+ surviving = []
418
+ for i, commit in enumerate(sorted_commits):
419
+ if i < keep:
420
+ surviving.extend(groups[commit])
421
+ else:
422
+ for f in groups[commit]:
423
+ _safe_unlink(f)
424
+
425
+ _gc_cas(cache_d, surviving)
426
+
427
+ except Exception:
428
+ pass # GC failure is non-fatal
429
+
430
+
431
+ def _gc_cas(cache_d: Path, surviving_snapshots: list[Path]) -> None:
432
+ """
433
+ Delete CAS blobs not referenced by any snapshot in *surviving_snapshots*.
434
+
435
+ Walks each snapshot's ``cas`` dict to collect live hashes; deletes the rest.
436
+ """
437
+ cas_d = _cas_dir(cache_d)
438
+ if not cas_d.exists():
439
+ return
440
+
441
+ try:
442
+ # Collect all hashes referenced by surviving snapshots
443
+ referenced: set[str] = set()
444
+ for snap_path in surviving_snapshots:
445
+ try:
446
+ raw = gzip.decompress(snap_path.read_bytes())
447
+ env = json.loads(raw.decode("utf-8"))
448
+ if isinstance(env, dict) and "cas" in env:
449
+ referenced.update(env["cas"].values())
450
+ except Exception:
451
+ pass # unreadable snapshot — conservatively keep its blobs unknown
452
+
453
+ # Delete blobs not referenced by any surviving snapshot
454
+ for blob in cas_d.glob("*.gz"):
455
+ if blob.stem not in referenced:
456
+ _safe_unlink(blob)
457
+
458
+ except Exception:
459
+ pass # CAS sweep failure is non-fatal
460
+
461
+
462
+ # ---------------------------------------------------------------------------
463
+ # Utilities
464
+ # ---------------------------------------------------------------------------
465
+
466
+ def _safe_unlink(path: Path) -> None:
467
+ try:
468
+ path.unlink(missing_ok=True)
469
+ except Exception:
470
+ pass
sourcecode/cli.py CHANGED
@@ -876,14 +876,16 @@ def main(
876
876
  architecture = True # agents need full architectural signal (M4)
877
877
  graph_modules = True # IC-003: import graph needed for architecture confidence
878
878
 
879
- # ── GAP-9: Cache check — serve from .sourcecode-cache when git SHA unchanged ──
879
+ # ── GAP-9: Cache check — serve from global cache when git SHA unchanged ──
880
+ # Cache is stored in ~/.sourcecode/cache/<repo_id>/ (outside the repo).
881
+ # Snapshots are gzip-compressed (.json.gz) — ~85 % smaller than plain JSON.
882
+ # Eviction keeps the last SOURCECODE_CACHE_KEEP_COMMITS commits (default 5).
880
883
  import hashlib as _hashlib
881
884
  import subprocess as _sub
882
- _cache_dir = target / ".sourcecode-cache"
885
+ from sourcecode import cache as _cache_mod
883
886
  _cache_hit_content: Optional[str] = None
884
887
  _git_sha = ""
885
888
  _cache_key = ""
886
- _cache_file: Optional[Path] = None
887
889
  if not no_cache:
888
890
  try:
889
891
  _sha_r = _sub.run(
@@ -921,13 +923,10 @@ def main(
921
923
  )
922
924
  _flags_h = _hashlib.md5(_flags_str.encode()).hexdigest()[:8]
923
925
  _cache_key = f"{_git_sha}-{_flags_h}"
924
- _cache_file = _cache_dir / f"snapshot-{_cache_key}.json"
925
- if _cache_file.exists():
926
- _cache_hit_content = _cache_file.read_text(encoding="utf-8")
926
+ _cache_hit_content = _cache_mod.read(target, _cache_key)
927
927
  except Exception:
928
928
  _git_sha = ""
929
929
  _cache_key = ""
930
- _cache_file = None
931
930
 
932
931
  if _cache_hit_content is not None:
933
932
  from sourcecode.serializer import write_output
@@ -1762,12 +1761,17 @@ def main(
1762
1761
  write_output(content, output=output)
1763
1762
 
1764
1763
  # GAP-9: Persist to cache for future identical runs (git SHA unchanged)
1765
- if not no_cache and _cache_key and _cache_file is not None and not _pipeline_error:
1766
- try:
1767
- _cache_dir.mkdir(parents=True, exist_ok=True)
1768
- _cache_file.write_text(content, encoding="utf-8")
1769
- except Exception:
1770
- pass
1764
+ # Writes versioned envelope to ~/.sourcecode/cache/<repo_id>/<key>.json.gz.
1765
+ # Large JSON fields are extracted into shared CAS blobs (deduplication).
1766
+ # GC runs inline after each write (keep last N commits + CAS sweep).
1767
+ if not no_cache and _cache_key and not _pipeline_error:
1768
+ _cache_mod.write(
1769
+ target,
1770
+ _cache_key,
1771
+ content,
1772
+ fmt=format,
1773
+ layers=_compute_analyzer_fingerprints(),
1774
+ )
1771
1775
 
1772
1776
  if _pipeline_error:
1773
1777
  raise typer.Exit(code=2)
@@ -2691,6 +2691,60 @@ def apply_ir_size_limits(
2691
2691
  "global_score": (ir.get("impact") or {}).get("global_score", 0),
2692
2692
  "ranked_nodes": ranked,
2693
2693
  }
2694
+
2695
+ # ── Trim reverse_graph to match node/edge limits ──────────────────────────
2696
+ # BUG-P0-02: reverse_graph was never bounded by --max-nodes/--max-edges.
2697
+ # A 26K-node repo (Broadleaf) emits ~3MB of reverse_graph even when
2698
+ # --max-nodes 200 --max-edges 500 is requested.
2699
+ full_rg: dict = ir.get("reverse_graph") or {}
2700
+ if full_rg:
2701
+ # Inner caller-list cap: prevents individual entries from dominating budget.
2702
+ # Formula: max(20, max_nodes // 4) when max_nodes given; 50 otherwise.
2703
+ def _cap_rg_lists(entry: dict, cap: int) -> dict:
2704
+ return {k: (v[:cap] if isinstance(v, list) and len(v) > cap else v)
2705
+ for k, v in entry.items()}
2706
+
2707
+ if kept_fqns is not None:
2708
+ # max_nodes was applied — restrict reverse_graph to kept nodes only.
2709
+ # Cap inner caller lists proportionally: large max_nodes → more callers shown.
2710
+ _inner_cap = max(20, max_nodes // 4) if max_nodes else 50
2711
+ trimmed_rg: dict = {
2712
+ k: _cap_rg_lists(v, _inner_cap)
2713
+ for k, v in full_rg.items()
2714
+ if k in kept_fqns
2715
+ }
2716
+ out["reverse_graph"] = trimmed_rg
2717
+ _rg_trimmed_count = len(full_rg) - len(trimmed_rg)
2718
+ if _rg_trimmed_count:
2719
+ out["reverse_graph_note"] = (
2720
+ f"reverse_graph trimmed: {len(trimmed_rg)}/{len(full_rg)} entries "
2721
+ f"kept (matching --max-nodes {max_nodes} kept nodes), "
2722
+ f"caller lists capped at {_inner_cap}. "
2723
+ "Use --output for full reverse_graph."
2724
+ )
2725
+ elif max_edges is not None:
2726
+ # Only max_edges given (no max_nodes): cap reverse_graph keys
2727
+ # proportionally. Target: at most max_edges keys, sorted by in-degree
2728
+ # (most-connected hubs first) so the most useful entries survive.
2729
+ _rg_limit = max(1, min(max_edges, len(full_rg)))
2730
+ _rg_sorted_keys = sorted(
2731
+ full_rg.keys(),
2732
+ key=lambda k: sum(len(v) for v in full_rg[k].values() if isinstance(v, list)),
2733
+ reverse=True,
2734
+ )
2735
+ _inner_cap = 50
2736
+ out["reverse_graph"] = {
2737
+ k: _cap_rg_lists(full_rg[k], _inner_cap)
2738
+ for k in _rg_sorted_keys[:_rg_limit]
2739
+ }
2740
+ if len(full_rg) > _rg_limit:
2741
+ out["reverse_graph_note"] = (
2742
+ f"reverse_graph trimmed: {_rg_limit}/{len(full_rg)} entries "
2743
+ f"kept (top by in-degree, bounded by --max-edges {max_edges}), "
2744
+ f"caller lists capped at {_inner_cap}. "
2745
+ "Use --output for full reverse_graph."
2746
+ )
2747
+
2694
2748
  return out
2695
2749
 
2696
2750
 
@@ -2849,7 +2903,9 @@ def compute_blast_radius(
2849
2903
  """
2850
2904
  reverse_graph: dict[str, dict[str, list[str]]] = ir.get("reverse_graph") or {}
2851
2905
  route_surface: list[dict] = ir.get("route_surface") or []
2852
- graph_nodes: list[dict] = (ir.get("graph") or {}).get("nodes") or []
2906
+ _graph: dict = ir.get("graph") or {}
2907
+ graph_nodes: list[dict] = _graph.get("nodes") or []
2908
+ graph_edges: list[dict] = _graph.get("edges") or []
2853
2909
  subsystems: list[dict] = ir.get("subsystems") or []
2854
2910
 
2855
2911
  # ── 1. Resolve target → one or more FQNs ─────────────────────────────────
@@ -2914,6 +2970,76 @@ def compute_blast_radius(
2914
2970
  if _effective_depth > 1:
2915
2971
  queue.append((c, 1))
2916
2972
 
2973
+ # ── 2a. Interface bridging: Spring DI / CDI / IoC pattern ────────────────
2974
+ # In DI frameworks (Spring, CDI, Guice), callers inject the INTERFACE, not
2975
+ # the Impl. e.g. `impact OrderServiceImpl` → 0 direct callers, because every
2976
+ # caller wires against OrderService.
2977
+ #
2978
+ # Root cause: implements edges in graph.edges often carry unresolved short-name
2979
+ # `to` values (e.g. "OrderService" not FQN), so _build_reverse_adjacency drops
2980
+ # them (to_symbol ∉ all_fqns). The reverse_graph["...OrderService"] therefore
2981
+ # has no "implements" key — we cannot scan it from the reverse side.
2982
+ #
2983
+ # Fix: scan FORWARD graph edges for type=implements FROM our matched classes.
2984
+ # Resolve the `to` value (short or FQN) against reverse_graph keys via suffix
2985
+ # matching. Gather non-structural callers of those interface keys and merge
2986
+ # them into direct_callers.
2987
+ _iface_bridging: list[dict] = [] # [{interface, caller_count}] for output metadata
2988
+
2989
+ _target_is_interface = any(
2990
+ n.get("symbol_kind") == "interface" or n.get("type") == "interface"
2991
+ for n in graph_nodes
2992
+ if n.get("fqn") in matched_fqns
2993
+ )
2994
+
2995
+ if not _target_is_interface and graph_edges:
2996
+ # Build suffix→FQN lookup for reverse_graph keys (one-time, O(n))
2997
+ _rg_suffix_map: dict[str, list[str]] = {}
2998
+ for _rg_key in reverse_graph:
2999
+ _sfx = _simple_name(_rg_key)
3000
+ _rg_suffix_map.setdefault(_sfx, []).append(_rg_key)
3001
+
3002
+ _BRIDGE_SKIP = frozenset({
3003
+ "implements", "extends", "contained_in", "annotated_with"
3004
+ })
3005
+
3006
+ for _edge in graph_edges:
3007
+ if _edge.get("type") != "implements":
3008
+ continue
3009
+ _from = _edge.get("from") or ""
3010
+ if _from not in matched_fqns:
3011
+ continue
3012
+ # Resolve `to` (may be short name like "OrderService" or full FQN)
3013
+ _to_raw = _edge.get("to") or ""
3014
+ _to_simple = _simple_name(_to_raw)
3015
+ _candidate_iface_keys: list[str] = []
3016
+ if _to_raw in reverse_graph:
3017
+ _candidate_iface_keys = [_to_raw]
3018
+ else:
3019
+ _candidate_iface_keys = _rg_suffix_map.get(_to_simple, [])
3020
+
3021
+ for _iface_fqn in _candidate_iface_keys:
3022
+ _rg_entry = reverse_graph[_iface_fqn]
3023
+ _iface_callers = [
3024
+ c
3025
+ for _etype, _clist in _rg_entry.items()
3026
+ if _etype not in _BRIDGE_SKIP
3027
+ for c in _clist
3028
+ if c not in matched_fqns
3029
+ ]
3030
+ if not _iface_callers:
3031
+ continue
3032
+ _iface_bridging.append({
3033
+ "interface": _iface_fqn,
3034
+ "caller_count": len(_iface_callers),
3035
+ })
3036
+ for c in _iface_callers:
3037
+ if c not in all_affected:
3038
+ all_affected[c] = 1
3039
+ direct_callers.append(c)
3040
+ if _effective_depth > 1:
3041
+ queue.append((c, 1))
3042
+
2917
3043
  # BFS for indirect callers
2918
3044
  indirect_callers: list[str] = []
2919
3045
  visited: set[str] = set(matched_fqns) | set(direct_callers)
@@ -3142,6 +3268,13 @@ def compute_blast_radius(
3142
3268
  if n_modules > 1:
3143
3269
  _parts.append(f"impact crosses {n_modules} modules")
3144
3270
 
3271
+ if _iface_bridging:
3272
+ _iface_names = [b["interface"].split(".")[-1] for b in _iface_bridging]
3273
+ _parts.append(
3274
+ f"callers resolved via interface{'s' if len(_iface_names) > 1 else ''} "
3275
+ f"({', '.join(_iface_names)}) — Spring/CDI DI pattern"
3276
+ )
3277
+
3145
3278
  if not _parts:
3146
3279
  explanation = f"No callers or dependents found for {target!r}. Low-risk isolated change."
3147
3280
  else:
@@ -3181,6 +3314,13 @@ def compute_blast_radius(
3181
3314
  }
3182
3315
  if _candidates_out:
3183
3316
  out["candidates"] = _candidates_out
3317
+ if _iface_bridging:
3318
+ out["via_interface_resolution"] = _iface_bridging
3319
+ out["via_interface_note"] = (
3320
+ "Target is a concrete class injected via interface(s) in DI frameworks "
3321
+ "(Spring/CDI/Guice). direct_callers includes callers of the implemented "
3322
+ "interface(s) — these are the real production dependents."
3323
+ )
3184
3324
  if len(direct_callers) > 30:
3185
3325
  out["direct_callers_note"] = (
3186
3326
  f"Showing 30/{n_direct} direct callers. Use --output to inspect full IR."
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.31.16
3
+ Version: 1.31.18
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -225,7 +225,7 @@ Description-Content-Type: text/markdown
225
225
 
226
226
  **AI-ready change intelligence for Java/Spring enterprise monoliths.**
227
227
 
228
- ![Version](https://img.shields.io/badge/version-1.31.16-blue)
228
+ ![Version](https://img.shields.io/badge/version-1.31.18-blue)
229
229
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
230
230
 
231
231
  ---
@@ -263,7 +263,7 @@ pipx install sourcecode
263
263
 
264
264
  ```bash
265
265
  sourcecode version
266
- # sourcecode 1.31.16
266
+ # sourcecode 1.31.18
267
267
  ```
268
268
 
269
269
  ---
@@ -302,15 +302,24 @@ sourcecode fix-bug /path/to/repo --symptom "NullPointerException in checkout"
302
302
 
303
303
  Measured against open-source enterprise Java repos:
304
304
 
305
- | Repo | Classes | Cold scan (`--compact`) | Cache hit | Endpoints found |
306
- |------|---------|------------------------|-----------|----------------|
307
- | BroadleafCommerce | ~2970 | 2.6s | 0.26s | 130 |
308
- | Keycloak | ~6363 | 8.4s | 0.27s | 693 |
305
+ | Repo | Java files | Cold scan (`--compact`) | Cache hit | Cache speedup | Endpoints found |
306
+ |------|-----------|------------------------|-----------|---------------|----------------|
307
+ | BroadleafCommerce | 2,985 | 2.9s | 0.20s | ~13x | 130 |
308
+ | Keycloak | 7,885 | 9.0s | 0.27s | ~33x | 693 |
309
309
 
310
- Cache speedup: **30x**. The cache is keyed on file content hashes — invalidated only when source changes.
310
+ The cache is keyed on file content hashes — invalidated only when source changes. Speedup varies by repo size and OS I/O.
311
311
 
312
- **`impact` on a high-fan-in class:**
313
- For hub interfaces (2000+ direct dependents), use `--depth 1` — it gives you the direct endpoints in 12s. Default depth=4 can take 90+ seconds on very large repos.
312
+ **Token sizes (measured):**
313
+
314
+ | Mode | BroadleafCommerce | Keycloak |
315
+ |------|------------------|---------|
316
+ | `--compact` | ~2,900 | ~4,000 |
317
+ | `--agent` | ~4,800 | ~5,500 |
318
+ | `onboard` | ~2,600 | n/a |
319
+ | `fix-bug` (trimmed) | ~27,000 | ~4,600 |
320
+
321
+ **`impact` on high-fan-in classes:**
322
+ For hub interfaces (1000+ direct dependents), use `--depth 1` — direct endpoints are already the most actionable signal. Depth=4 on very large repos may take 90+ seconds.
314
323
 
315
324
  ---
316
325
 
@@ -318,7 +327,7 @@ For hub interfaces (2000+ direct dependents), use `--depth 1` — it gives you t
318
327
 
319
328
  | Flag | Alias | Default | Description |
320
329
  |------|-------|---------|-------------|
321
- | `--compact` | | off | High-signal summary (typically 20004000 tokens): stacks, entry points, dependencies, confidence, gaps. Includes `mybatis` and `transactional_boundaries` for Java projects. |
330
+ | `--compact` | | off | High-signal summary (typically 2,5004,000 tokens for mid-to-large Java repos): stacks, entry points, dependencies, confidence, gaps. Includes `transactional_boundaries` for Spring projects. |
322
331
  | `--agent` | | off | Structured JSON for AI agents: project identity, entry points, architecture, dependencies, confidence. More detail than `--compact`. ~4500–5500 tokens. |
323
332
  | `--full` | | off | Remove truncation limits on `transactional_boundaries`, `mybatis.dto_mappers`, and other capped lists. |
324
333
  | `--git-context` | `-g` | off | Include git activity: recent commits, change hotspots, and uncommitted file count. |
@@ -360,9 +369,10 @@ sourcecode impact OrderService . --depth 2 # limit BFS depth
360
369
  | `candidates` | On partial match: up to 10 FQNs ranked by relevance |
361
370
 
362
371
  **Best practices:**
363
- - Target **interfaces**, not implementations: `impact OrderService` > `impact OrderServiceImpl`. Callers depend on the interface contract, not the Impl.
372
+ - Target **interfaces**, not implementations: `impact OrderService` > `impact OrderServiceImpl`. In Spring projects, callers inject the interface via `@Autowired` the impl has zero direct callers in the graph even though it runs all the code. Querying the impl returns `direct_callers: []` with no error; querying the interface returns the real blast radius.
364
373
  - Use `--depth 1` when the target has 200+ callers — direct endpoints are already the most actionable signal.
365
374
  - The cache applies to the underlying IR scan — second `impact` run on the same repo is significantly faster.
375
+ - When you get `direct_callers: 0` for a `@Service` or `@Repository` class, that is almost certainly the interface-injection pattern. Re-run with the interface name.
366
376
 
367
377
  **Supported targets:**
368
378
  - Simple class name: `OrderService`
@@ -389,14 +399,16 @@ Extracts all Spring MVC (`@GetMapping`, `@PostMapping`, `@RequestMapping`, etc.)
389
399
  ## `repo-ir` — Symbol-level IR
390
400
 
391
401
  ```bash
392
- sourcecode repo-ir /path/to/repo
393
- sourcecode repo-ir /path/to/repo --summary-only # compact: analysis + impact, no full graph
402
+ sourcecode repo-ir /path/to/repo --summary-only # recommended: analysis + impact, no full graph (~20K tokens)
394
403
  sourcecode repo-ir /path/to/repo --since HEAD~1 # symbol-level diff
395
- sourcecode repo-ir /path/to/repo --max-nodes 200 --max-edges 500
404
+ sourcecode repo-ir /path/to/repo --files src/.../OrderService.java # single-file IR
405
+ sourcecode repo-ir /path/to/repo --max-nodes 200 --max-edges 500 # limits forward graph only — see note below
396
406
  ```
397
407
 
398
408
  Builds a deterministic symbol graph: classes, methods, import/injection edges, Spring roles, subsystems. Output is JSON with `graph`, `reverse_graph`, `impact`, `subsystems`, and `route_surface`.
399
409
 
410
+ **Size warning:** Without `--summary-only`, output can exceed 1MB for mid-size repos. `--max-nodes`/`--max-edges` limit the forward `graph` section only — the `reverse_graph` section is not bounded by these flags and is the largest component. Always use `--summary-only` unless you need the full graph for downstream tooling.
411
+
400
412
  ---
401
413
 
402
414
  ## `onboard` — [OSS Core] Codebase orientation
@@ -475,47 +487,103 @@ Note: `sourcecode onboard`, `sourcecode fix-bug`, `sourcecode review-pr`, and `s
475
487
 
476
488
  ## How to use sourcecode effectively
477
489
 
478
- ### With AI agents (Claude, GPT-4, etc.)
490
+ ### Onboarding new repo, new agent session
479
491
 
480
492
  ```bash
481
- # Inject bounded context at session start:
482
- sourcecode /repo --agent | paste-to-agent
493
+ # Bounded context at session start (~2,500–5,500 tokens)
494
+ sourcecode /repo --compact # fast overview
495
+ sourcecode /repo --agent # more detail: file relevance, architecture, event flows
496
+ sourcecode onboard /repo # task-structured: entry points, key files, gaps
497
+ ```
483
498
 
484
- # For a change task:
485
- sourcecode impact PaymentService /repo --depth 1 | ask-agent "What are the risks?"
499
+ Use `--compact` or `--agent` as first-prompt injection for AI coding agents. Both are bounded and deterministic.
486
500
 
487
- # For PR review:
488
- sourcecode review-pr /repo --since main | ask-agent "Summarize architectural risks"
501
+ ### Impact analysis — before touching a class
502
+
503
+ ```bash
504
+ # Always target the INTERFACE in Spring projects:
505
+ sourcecode impact OrderService /repo # ✓ correct: 30 callers, 11 endpoints
506
+ sourcecode impact OrderServiceImpl /repo # ✗ wrong: 0 callers (Spring DI blindness)
507
+
508
+ # Large hub interfaces — depth=1 is faster and still actionable:
509
+ sourcecode impact KeycloakSession /repo --depth 1
510
+
511
+ # If you get direct_callers:[] for a @Service class, re-query the interface.
489
512
  ```
490
513
 
491
- ### In CI/CD pipelines
514
+ ### Bug triage — symptom-driven
492
515
 
493
516
  ```bash
494
- # Always-fresh bounded JSON deterministic, cacheable by content hash
495
- sourcecode /repo --compact --no-cache --format json --output context.json
517
+ # Specific symptoms produce the best signal:
518
+ sourcecode fix-bug /repo --symptom "OIDC token refresh fails after realm update"
519
+ sourcecode fix-bug /repo --symptom "NullPointerException in OrderService during checkout"
496
520
 
497
- # PR gateparse ci_decision field
498
- sourcecode review-pr /repo --since $BASE_REF --output review.json
499
- jq '.ci_decision' review.json # "analysis_success" | "git_ref_error" | etc.
521
+ # Generic symptoms produce noisy output (100s of files) be specific.
522
+ # Use --output to capture full output without budget truncation.
523
+ sourcecode fix-bug /repo --symptom "payment timeout" --output triage.json
500
524
  ```
501
525
 
502
- ### For debugging production issues
526
+ ### PR review
503
527
 
504
528
  ```bash
505
- # Correlate symptom to files
506
- sourcecode fix-bug /repo --symptom "NullPointerException in PaymentProcessor when cart is empty"
507
- # ranked list of files, suspected areas, git commits touching relevant code
529
+ # JSON for programmatic use:
530
+ sourcecode review-pr /repo --since main --output review.json
531
+ jq '.ci_decision' review.json # "analysis_success" | "git_ref_error"
532
+
533
+ # Markdown for GitHub comment:
534
+ sourcecode review-pr /repo --since main --format github-comment
535
+
536
+ # CI/CD gate — parse risk and test coverage fields:
537
+ jq '{ci_decision, test_coverage_risk, impact_summary}' review.json
508
538
  ```
509
539
 
510
- ### For understanding blast radius
540
+ ### Modernization planning
511
541
 
512
542
  ```bash
513
- # Interface targets give the most complete signal
514
- sourcecode impact OrderService /repo --depth 2
515
- # shows all endpoints, transaction boundaries, and modules affected
543
+ sourcecode modernize /repo
544
+ # high_coupling_nodes: classes most risky to change (by fan-in degree)
545
+ # dead_zone_candidates: classes with zero callers safe to remove or refactor
546
+ # Note: hotspot_candidates may be empty in annotation-heavy codebases —
547
+ # check high_coupling_nodes directly for coupling signal.
548
+ ```
516
549
 
517
- # On very large repos or hub classes, depth=1 is faster and still actionable
518
- sourcecode impact KeycloakSession /repo --depth 1
550
+ ### Symbol IR for downstream tooling
551
+
552
+ ```bash
553
+ # Always use --summary-only unless you need the full graph:
554
+ sourcecode repo-ir /repo --summary-only --output ir.json # ~20K tokens
555
+ sourcecode repo-ir /repo --since HEAD~3 --summary-only # changed symbols only
556
+
557
+ # Full graph warning: output can exceed 1MB for mid-size repos.
558
+ # --max-nodes/--max-edges only limit the forward graph, not reverse_graph.
559
+ ```
560
+
561
+ ### With AI agents (Claude, GPT-4, etc.)
562
+
563
+ ```bash
564
+ # Start agent session with bounded context:
565
+ sourcecode /repo --agent --output context.json && cat context.json | agent-cli
566
+
567
+ # For a specific change task, combine context + impact:
568
+ sourcecode /repo --compact > context.json
569
+ sourcecode impact PaymentService /repo --depth 1 >> impact.json
570
+ # Feed both to agent: "Given this context and impact, what are the risks of changing PaymentService?"
571
+
572
+ # For PR review:
573
+ sourcecode review-pr /repo --since main --format github-comment
574
+ # Paste directly into GitHub PR description or feed to agent
575
+ ```
576
+
577
+ ### In CI/CD pipelines
578
+
579
+ ```bash
580
+ # Deterministic, content-hash cached — safe to run on every commit
581
+ sourcecode /repo --compact --no-cache --output context.json
582
+
583
+ # PR gate
584
+ sourcecode review-pr /repo --since $BASE_REF --output review.json
585
+ DECISION=$(jq -r '.ci_decision' review.json)
586
+ if [ "$DECISION" != "analysis_success" ]; then echo "Review failed: $DECISION"; fi
519
587
  ```
520
588
 
521
589
  ---
@@ -525,8 +593,11 @@ sourcecode impact KeycloakSession /repo --depth 1
525
593
  - No runtime analysis — all signals are static (annotation, import graph, file structure)
526
594
  - No semantic code understanding — it reads structure, not logic
527
595
  - Architecture pattern detection works best for Spring MVC layered apps; SPI/plugin architectures (e.g. Quarkus extension model) are classified as "layered" which may be inaccurate
528
- - Endpoint recall for JAX-RS subresource locator pattern is ~65% — endpoints mounted dynamically via factory methods are not individually counted
529
- - `impact` on implementation classes (e.g. `OrderServiceImpl`) reflects callers of the implementation specifically, which is often zero if callers use the interface prefer targeting the interface
596
+ - Endpoint recall for JAX-RS subresource locator pattern is ~65% — endpoints mounted dynamically via factory methods are not individually counted. JAX-RS sub-resource paths (method-level `@Path` inside a `@Path`-annotated class) are extracted as relative paths, not the fully composed URL.
597
+ - `impact` on implementation classes (e.g. `OrderServiceImpl`) reflects callers of the implementation specifically **in Spring Boot projects this is almost always zero**, because callers inject the interface via `@Autowired`. Always target the interface (`OrderService`) to get the real blast radius. The tool does not auto-resolve impl → interface. When `direct_callers: []` is returned with `confidence_level: high` for a `@Service` class, treat it as a prompt to re-query the interface.
598
+ - `no_security_signal` on endpoints means no method-level security annotations (`@PreAuthorize`, `@Secured`) were found — it does **not** mean the endpoint is unsecured. Projects using Spring Security filter chains, XML security config, or custom filters will show 100% `no_security_signal` even when fully secured.
599
+ - `hotspot_candidates` in `modernize` output reflects graph coupling, not git churn — in annotation-heavy codebases it is often empty even though real hotspots exist. Check `high_coupling_nodes` directly for the coupling picture.
600
+ - `project_summary` is extracted from the repository README — it may reflect marketing language rather than architectural description
530
601
 
531
602
  ---
532
603
 
@@ -1,11 +1,12 @@
1
- sourcecode/__init__.py,sha256=WRDjwdcctiM18O9La_WTmoTUfMIZt9RXjtQC1kObj58,104
1
+ sourcecode/__init__.py,sha256=RKBkTCXd0nPibD6uZj_CLNSWfxJYQOS-gsplP4C8K_g,104
2
2
  sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
3
3
  sourcecode/architecture_analyzer.py,sha256=4R13Yb02OrPeB4IH3z6V_g7HWhmGcRHbI8CobCVnRrc,39111
4
4
  sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
5
5
  sourcecode/ast_extractor.py,sha256=XgrZg2DcWcUm9r87cRG3KGO7IK2TIL_N-CvhSbUmmh4,49901
6
+ sourcecode/cache.py,sha256=HDkUZqXOovBc1PjTg-JpOQlyKhUMmEhiG789R7L4Wms,16348
6
7
  sourcecode/canonical_ir.py,sha256=NZu0XICv__hkQGKzW2LNQLRqb1L28K2p_WQCQKS5Zlk,23141
7
8
  sourcecode/classifier.py,sha256=yWeq6agTjkFa3zuNa-gdVIHtjoBoPoVlJnX-b7tdVJs,7851
8
- sourcecode/cli.py,sha256=2AMxV0HWV8y89JVKWHLCdgLHZVzQ4yjZaK9dm-KGJck,139374
9
+ sourcecode/cli.py,sha256=zBJZqoOntf3m4UWqvixrNdSDdytevuYJF4rDvxXTM8k,139621
9
10
  sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
10
11
  sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
11
12
  sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -31,7 +32,7 @@ sourcecode/ranking_engine.py,sha256=ZAucq_YX2KkWUuAZf4P0lhtQ_38vEFnUhuGtSZd1S0E,
31
32
  sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
32
33
  sourcecode/relevance_scorer.py,sha256=MYF4FFkveAQps9SmTeTlh6ODiBz2F--_hWNeHMLtUHQ,8405
33
34
  sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
34
- sourcecode/repository_ir.py,sha256=qUuuYAyXvFW_fqfD8AU1X2Z1Odq2lodu8Urj51AOpPk,133501
35
+ sourcecode/repository_ir.py,sha256=NooCrMJYqycKSYTEroVWTYR8X83hHaAYKTsgYxvlz-I,140221
35
36
  sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_Xx58,18432
36
37
  sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
37
38
  sourcecode/schema.py,sha256=aHNXDf8LGyUC8ZDE_VS9kiskC2-Oswhi_WnpdGy6HDw,24897
@@ -75,8 +76,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
75
76
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
76
77
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
77
78
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
78
- sourcecode-1.31.16.dist-info/METADATA,sha256=gumbkAKXOwaszw6emN9Vzqer20NAkWkXVdJOCsSzCWc,26562
79
- sourcecode-1.31.16.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
80
- sourcecode-1.31.16.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
81
- sourcecode-1.31.16.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
82
- sourcecode-1.31.16.dist-info/RECORD,,
79
+ sourcecode-1.31.18.dist-info/METADATA,sha256=paObgQ32RFOKlwHD7oyNK6tRtbEBRStsmeXXSg4RaPw,31103
80
+ sourcecode-1.31.18.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
81
+ sourcecode-1.31.18.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
82
+ sourcecode-1.31.18.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
83
+ sourcecode-1.31.18.dist-info/RECORD,,