PyPI - sourcecode - Versions diffs - 1.33.14__tar.gz → 1.33.15__tar.gz - Mend

sourcecode 1.33.14tar.gz → 1.33.15tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

{sourcecode-1.33.14 → sourcecode-1.33.15}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sourcecode
-Version: 1.33.14
+Version: 1.33.15
 Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
 License-File: LICENSE
 Keywords: agents,ai,codebase,context,developer-tools,llm

{sourcecode-1.33.14 → sourcecode-1.33.15}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "sourcecode"
-version = "1.33.14"
+version = "1.33.15"
 description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
 readme = "README.md"
 requires-python = ">=3.9"

{sourcecode-1.33.14 → sourcecode-1.33.15}/src/sourcecode/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """sourcecode — Deterministic codebase context maps for AI coding agents."""
-__version__ = "1.33.14"
+__version__ = "1.33.15"

{sourcecode-1.33.14 → sourcecode-1.33.15}/src/sourcecode/file_classifier.py RENAMED Viewed

@@ -187,16 +187,19 @@ class FileClassifier:
             if java_class is not None:
                 return java_class
-        if self._has_any_import(imports, _API_IMPORTS):
-            evidence = self._matched_imports(imports, _API_IMPORTS)
+        # Fix 4: call _matched_imports once per category instead of twice
+        # (_has_any_import was calling _matched_imports and discarding the result,
+        # then the caller invoked it again to get the evidence — halving throughput).
+        evidence = self._matched_imports(imports, _API_IMPORTS)
+        if evidence:
             return FileClassification(norm, "api_layer", "high", 0.82, "imports API/server framework", evidence)
-        if self._has_any_import(imports, _DB_IMPORTS):
-            evidence = self._matched_imports(imports, _DB_IMPORTS)
+        evidence = self._matched_imports(imports, _DB_IMPORTS)
+        if evidence:
             return FileClassification(norm, "database_layer", "high", 0.78, "imports database/persistence dependency", evidence)
-        if self._has_any_import(imports, _INFRA_IMPORTS):
-            evidence = self._matched_imports(imports, _INFRA_IMPORTS)
+        evidence = self._matched_imports(imports, _INFRA_IMPORTS)
+        if evidence:
             return FileClassification(norm, "infrastructure", "high", 0.72, "imports infrastructure dependency", evidence)
         role = self._package_role(norm)

{sourcecode-1.33.14 → sourcecode-1.33.15}/src/sourcecode/prepare_context.py RENAMED Viewed

@@ -1836,6 +1836,15 @@ class TaskContextBuilder:
                 # the candidate pool (e.g. AkitaBaseService containing setLoading).
                 _src_exts = frozenset({".java", ".py", ".ts", ".js", ".kt", ".go"})
                 _frontend_kws = [kw for kw in symptom_keywords if kw in _FRONTEND_SYMPTOM_MAP]
+                # Fix 5: In large repos, skip frontend→backend synonym grep for keywords
+                # that already have direct path matches — those are backend terms (e.g.
+                # "login" in an IAM repo) that don't need UI→service-layer translation.
+                # Prevents "authentication" grep flooding keycloak with SAML adapter files.
+                if _is_large_repo and _frontend_kws:
+                    _frontend_kws = [
+                        kw for kw in _frontend_kws
+                        if not any(kw in p.lower() for p in _sx_direct_path)
+                    ]
                 _backend_terms_set: list[str] = []
                 if _frontend_kws:
                     _bt: list[str] = []
@@ -1923,6 +1932,7 @@ class TaskContextBuilder:
                 _no_scan_candidates = relevant_files[_CONTENT_SCAN_LIMIT:]
                 _boosted: list[RelevantFile] = []
+                _raw_signals: dict[str, float] = {}  # uncapped accumulated signal per file
                 _scanned_body: dict[str, str] = {}  # cache for graph expansion (Pass 5)
                 for _rf in _scan_candidates:
                     _extra = 0.0
@@ -1996,7 +2006,9 @@ class TaskContextBuilder:
                     elif _extra_syn > 0:
                         _new_reason = _rf.reason + f", synonym-match backend (+{_extra_syn:.2f})"
-                    _final_score = round(min(_rf.score + _total_extra, 1.0), 2)
+                    _raw_signal = _rf.score + _total_extra  # uncapped for ranking
+                    _raw_signals[_rf.path] = _raw_signal
+                    _final_score = round(min(_raw_signal, 1.0), 2)
                     _boosted.append(RelevantFile(
                         path=_rf.path,
                         role=_rf.role,
@@ -2005,21 +2017,14 @@ class TaskContextBuilder:
                         why=_rf.why,
                     ))
-                # Use total boost as a secondary sort key so symptom-matched files
-                # that were boosted from a lower base score rank above structural
-                # files that coincidentally reach the same capped score of 1.0.
-                # This prevents budget-trimming from discarding the most relevant files.
-                _boost_totals: dict[str, float] = {}
-                for _rf in _scan_candidates:
-                    pass  # populated below
-                _boost_totals = {}
-                for _idx, _rf in enumerate(_scan_candidates):
-                    _b_rf = _boosted[_idx]
-                    _boost_totals[_b_rf.path] = round(_b_rf.score - _rf.score, 4)
+                # Sort by uncapped raw signal so files with more accumulated evidence
+                # (path matches + content hits + commit matches) rank above files that
+                # merely cap at the same display score of 1.0.
+                # _raw_signals holds each file's full sum before the display cap.
+                # Files not content-scanned (_no_scan_candidates) use their base score.
                 relevant_files = sorted(
                     _boosted + _no_scan_candidates,
-                    key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
+                    key=lambda rf: -_raw_signals.get(rf.path, rf.score),
                 )
                 # Pass 5: reverse graph expansion from high-score seed nodes.
@@ -2118,9 +2123,14 @@ class TaskContextBuilder:
                         if _gx_new:
                             relevant_files = sorted(
                                 relevant_files + _gx_new,
-                                key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
+                                key=lambda rf: -_raw_signals.get(rf.path, rf.score),
                             )
+                # Fix 2: Cap output for large repos to stay within agent context budgets.
+                # Raw signal sort above ensures highest-signal files survive the cut.
+                if _is_large_repo and len(relevant_files) > 40:
+                    relevant_files = relevant_files[:40]
                 # Synonym note (only when synonyms actually fired)
                 if _frontend_kws and _sx_synonyms:
                     symptom_note = (

{sourcecode-1.33.14 → sourcecode-1.33.15}/src/sourcecode/ris.py RENAMED Viewed

@@ -437,6 +437,18 @@ def get_cold_start_context(repo_root: Path) -> dict:
             "endpoints": endpoints,
             "hotspots": ris.git_context_snapshot.get("hotspots", []),
             "validation": _validation,
+            # Fix 3: _cache wrapper for backward compat with CLI schema consumers.
+            # CLI outputs inject _cache via _inject_cache_meta; MCP cold-start path
+            # skips that step, leaving agents that read _cache.cache_source with None.
+            "_cache": {
+                "cache_source": "RIS",
+                "git_head_at_generation": ris.git_head or "",
+                "current_git_head": current_head or "",
+                "is_stale": stale,
+                "has_uncommitted_changes": uncommitted,
+                "generated_at": ris.last_updated_at,
+                "data_scope": "RIS_BOOTSTRAP",
+            },
         }
         if not endpoints and _is_java:
             result["endpoints_hint"] = (