PyPI - sourcecode - Versions diffs - 1.33.12__tar.gz → 1.33.14__tar.gz - Mend

sourcecode 1.33.12tar.gz → 1.33.14tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

{sourcecode-1.33.12 → sourcecode-1.33.14}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sourcecode
-Version: 1.33.12
+Version: 1.33.14
 Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
 License-File: LICENSE
 Keywords: agents,ai,codebase,context,developer-tools,llm

{sourcecode-1.33.12 → sourcecode-1.33.14}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "sourcecode"
-version = "1.33.12"
+version = "1.33.14"
 description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
 readme = "README.md"
 requires-python = ">=3.9"

{sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """sourcecode — Deterministic codebase context maps for AI coding agents."""
-__version__ = "1.33.12"
+__version__ = "1.33.14"

{sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/cli.py RENAMED Viewed

@@ -1102,6 +1102,9 @@ def main(
             obj = _jm.loads(raw)
             if isinstance(obj, dict):
                 obj["_cache"] = meta
+                # Top-level cache_source for one release — backward compat alias
+                if "cache_source" in meta:
+                    obj["cache_source"] = meta["cache_source"]
                 return _jm.dumps(obj, indent=2, ensure_ascii=False)
         except Exception:
             pass
@@ -2273,6 +2276,9 @@ def _make_explanation(reason: str, why: str) -> str:
 def _serialize_relevant_file(f: Any) -> dict:
     from dataclasses import asdict as _asdict
     d = {k: v for k, v in _asdict(f).items() if v != "" and v is not None}
+    # Emit 'file' as backward-compat alias for 'path' for one release
+    if "path" in d:
+        d["file"] = d["path"]
     reason = d.pop("reason", "") or ""
     why = d.pop("why", "") or ""
     # Expose score as a rounded float so agents can rank/filter files deterministically.

{sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/output_budget.py RENAMED Viewed

@@ -67,7 +67,7 @@ _TRIM_SCHEDULE: list[tuple[str, str | None, int]] = [
     ("execution_paths",          None,                    0),
     ("dependency_graph_summary", None,                    0),
     # Step 6 — last resort
-    ("relevant_files",           None,                    3),
+    ("relevant_files",           None,                   10),
     ("suspected_areas",          None,                    0),
     ("key_dependencies",         None,                    0),
 ]
@@ -148,7 +148,7 @@ def trim_to_budget(data: dict, budget_bytes: int, *, label: str = "") -> dict:
 # Budget constants (bytes) — used by CLI callers
 BUDGET_COMPACT    = 30_000   # compact/agent main cmd
 BUDGET_AGENT      = 40_000   # agent main cmd (slightly more headroom)
-BUDGET_FIX_BUG   = 100_000  # fix-bug (with or without --symptom)
+BUDGET_FIX_BUG   = 200_000  # fix-bug (with or without --symptom)
 BUDGET_REVIEW_PR  = 100_000  # review-pr
 BUDGET_ONBOARD    = 30_000   # onboard
 BUDGET_EXPLAIN    = 30_000   # explain

{sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/prepare_context.py RENAMED Viewed

@@ -1725,6 +1725,7 @@ class TaskContextBuilder:
                 _sx_commits: list[dict] = []
                 _sx_synonyms: list[str] = []
                 _sx_boosts: list[dict] = []
+                _sx_graph_expanded: list[str] = []
                 # Pass 1: surface code notes whose text contains any keyword
                 _note_matched_paths: dict[str, int] = {}  # path → count of matching notes
@@ -1780,7 +1781,12 @@ class TaskContextBuilder:
                 # Pass 4: inject files whose path matches symptom keywords.
                 # CamelCase-expand the filename stem so "OfflineSessionLoader" matches
                 # the keyword "offline" even without an explicit directory separator.
+                # Large repos: cap per-keyword injections so a common term like
+                # "authentication" (50+ path matches in an IAM repo) cannot flood the
+                # candidate list and push specific terms like "ldap" out of the budget.
                 _p4_dirs_of_injected: set[str] = set()  # directories of high-score injects
+                _P4_KW_CAP = 15  # max path-injections per keyword in large repos
+                _p4_kw_counts: dict[str, int] = {}
                 for _p in all_paths:
                     if _p in _existing_paths:
                         continue
@@ -1797,6 +1803,16 @@ class TaskContextBuilder:
                     _matching_kws = [kw for kw in symptom_keywords if kw in _p_search]
                     if not _matching_kws:
                         continue
+                    # In large repos, skip keywords already at cap; keep file only if at
+                    # least one keyword still has quota (multi-kw matches exhaust each
+                    # keyword's quota independently so specific terms survive longer).
+                    if _is_large_repo:
+                        _matching_kws = [
+                            kw for kw in _matching_kws
+                            if _p4_kw_counts.get(kw, 0) < _P4_KW_CAP
+                        ]
+                        if not _matching_kws:
+                            continue
                     _boost = 0.2 * len(_matching_kws)
                     _injected_score = round(min(0.5 + _boost, 1.0), 2)
                     _first_kw = _matching_kws[0]
@@ -1809,6 +1825,9 @@ class TaskContextBuilder:
                     ))
                     _existing_paths.add(_p)
                     _sx_direct_path.append(_p)
+                    if _is_large_repo:
+                        for _kw in _matching_kws:
+                            _p4_kw_counts[_kw] = _p4_kw_counts.get(_kw, 0) + 1
                     if _injected_score >= 0.7:
                         _p4_dirs_of_injected.add(str(Path(_p).parent))
@@ -1863,9 +1882,15 @@ class TaskContextBuilder:
                 # architecturally adjacent classes that don't mention symptom keywords
                 # in their own name (e.g. InfinispanOfflineSessionCacheEntryLifespan…
                 # siblings in the same infinispan/ package).
+                # Large repos: cap total co-location injections so that a keyword
+                # matching many directories doesn't flood the candidate list.
                 if _is_large_repo and _p4_dirs_of_injected:
                     _coloc_existing = {rf.path for rf in relevant_files}
+                    _P4C_CAP = 30
+                    _coloc_count = 0
                     for _cp in all_paths:
+                        if _coloc_count >= _P4C_CAP:
+                            break
                         if _cp in _coloc_existing:
                             continue
                         if Path(_cp).suffix.lower() not in _src_exts:
@@ -1879,6 +1904,7 @@ class TaskContextBuilder:
                                 why="directory proximity injection",
                             ))
                             _coloc_existing.add(_cp)
+                            _coloc_count += 1
                 # Sort before content scan so top candidates get read first.
                 # In large repos: prioritise symptom_match files within each score band
@@ -1897,6 +1923,7 @@ class TaskContextBuilder:
                 _no_scan_candidates = relevant_files[_CONTENT_SCAN_LIMIT:]
                 _boosted: list[RelevantFile] = []
+                _scanned_body: dict[str, str] = {}  # cache for graph expansion (Pass 5)
                 for _rf in _scan_candidates:
                     _extra = 0.0
                     _extra_syn = 0.0
@@ -1931,9 +1958,11 @@ class TaskContextBuilder:
                     _body_lower = ""
                     if Path(_rf.path).suffix.lower() in _src_exts:
                         try:
-                            _body_lower = (self.root / _rf.path).read_text(
+                            _raw_body = (self.root / _rf.path).read_text(
                                 encoding="utf-8", errors="replace"
-                            )[:12000].lower()  # ~300 lines avg
+                            )[:12000]  # ~300 lines avg
+                            _scanned_body[_rf.path] = _raw_body  # cache for Pass 5
+                            _body_lower = _raw_body.lower()
                         except OSError:
                             pass
@@ -1993,6 +2022,105 @@ class TaskContextBuilder:
                     key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
                 )
+                # Pass 5: reverse graph expansion from high-score seed nodes.
+                # Identifies which source files in the repo REFERENCE the seed
+                # classes (imports, implements, extends, field declarations).
+                # This is a reverse-import lookup: for seed class "UserProvider",
+                # it finds JpaUserProvider / DefaultUserSessionProvider which import
+                # UserProvider — even though those files don't contain symptom
+                # keywords in their own path.
+                # Seeds include any high-score file (not just symptom_match role)
+                # so that files found by _rank_files class-name matching also expand.
+                if not fast:
+                    import re as _re_gx
+                    _GX_SEED_THRESH = 0.5
+                    _GX_EXPAND_CAP = 30
+                    _GX_HOP_DECAY = 0.6
+                    # Collect seed class names from high-score results
+                    _gx_seed_stems: dict[str, float] = {}  # stem → score
+                    for _gx_rf in relevant_files:
+                        if _gx_rf.score < _GX_SEED_THRESH:
+                            continue
+                        if Path(_gx_rf.path).suffix.lower() not in _src_exts:
+                            continue
+                        _gx_stem = Path(_gx_rf.path).stem
+                        _gx_seed_stems[_gx_stem] = max(
+                            _gx_seed_stems.get(_gx_stem, 0.0), _gx_rf.score
+                        )
+                    if _gx_seed_stems:
+                        # Compile per-stem word-boundary patterns for fast matching
+                        import re as _re_gx2
+                        _gx_patterns: dict[str, Any] = {
+                            stem: _re_gx2.compile(rf'\b{_re_gx2.escape(stem)}\b')
+                            for stem in _gx_seed_stems
+                        }
+                        _gx_existing = {rf.path for rf in relevant_files}
+                        _gx_new: list[RelevantFile] = []
+                        _gx_added: set[str] = set()
+                        # Candidates: non-test source files not yet in results.
+                        # Small repos: scan all; large repos: use pre-scanned content only.
+                        # Test files are excluded (fix-bug focuses on production code).
+                        if _is_large_repo:
+                            _gx_candidates = [
+                                p for p in _scanned_body
+                                if p not in _gx_existing and not self._is_test(p)
+                            ]
+                        else:
+                            _gx_candidates = [
+                                p for p in all_paths
+                                if p not in _gx_existing
+                                and Path(p).suffix.lower() in _src_exts
+                                and not self._is_test(p)
+                            ]
+                        for _gx_cand in _gx_candidates:
+                            if len(_gx_new) >= _GX_EXPAND_CAP:
+                                break
+                            if _gx_cand in _gx_added:
+                                continue
+                            # Use cached content or read fresh (small repos only)
+                            _gx_body = _scanned_body.get(_gx_cand)
+                            if _gx_body is None:
+                                if _is_large_repo:
+                                    continue  # never do fresh reads on large repos in Pass 5
+                                try:
+                                    _gx_body = (self.root / _gx_cand).read_text(
+                                        encoding="utf-8", errors="replace"
+                                    )[:8000]
+                                except OSError:
+                                    continue
+                            # Reverse lookup: does this file reference any seed class?
+                            for _gx_stem, _gx_seed_score in _gx_seed_stems.items():
+                                if _gx_patterns[_gx_stem].search(_gx_body):
+                                    _hop1_score = round(
+                                        min(_gx_seed_score * _GX_HOP_DECAY, 0.85), 2
+                                    )
+                                    _gx_new.append(RelevantFile(
+                                        path=_gx_cand,
+                                        role="symptom_match",
+                                        score=_hop1_score,
+                                        reason=(
+                                            f"graph_expansion: references {_gx_stem} "
+                                            f"(1-hop reverse import)"
+                                        ),
+                                        why=f"graph_expansion: 1 hop from {_gx_stem}",
+                                    ))
+                                    _gx_added.add(_gx_cand)
+                                    _sx_graph_expanded.append(_gx_cand)
+                                    break  # one match per candidate is enough
+                        if _gx_new:
+                            relevant_files = sorted(
+                                relevant_files + _gx_new,
+                                key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
+                            )
                 # Synonym note (only when synonyms actually fired)
                 if _frontend_kws and _sx_synonyms:
                     symptom_note = (
@@ -2016,6 +2144,7 @@ class TaskContextBuilder:
                     "content_matches": _sx_content[:10],
                     "commit_matches": _sx_commits[:10],
                     "synonym_matches": _sx_synonyms[:10],
+                    "graph_expansion": _sx_graph_expanded[:10],
                     "boosts": _sx_boosts[:30],
                     "final_boost": round(
                         sum(b["value"] for b in _sx_boosts), 3