sourcecode 1.33.12__tar.gz → 1.33.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sourcecode-1.33.12 → sourcecode-1.33.14}/PKG-INFO +1 -1
- {sourcecode-1.33.12 → sourcecode-1.33.14}/pyproject.toml +1 -1
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/__init__.py +1 -1
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/cli.py +6 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/output_budget.py +2 -2
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/prepare_context.py +131 -2
- {sourcecode-1.33.12 → sourcecode-1.33.14}/.github/workflows/build-windows.yml +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/.gitignore +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/.ruff.toml +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/CHANGELOG.md +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/CONTRIBUTING.md +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/LICENSE +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/README.md +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/SECURITY.md +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/raw +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/adaptive_scanner.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/architecture_analyzer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/architecture_summary.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/ast_extractor.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/cache.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/canonical_ir.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/classifier.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/code_notes_analyzer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/confidence_analyzer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/context_scorer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/context_summarizer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/contract_model.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/contract_pipeline.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/coverage_parser.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/dependency_analyzer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/__init__.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/base.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/csproj_parser.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/dart.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/dotnet.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/elixir.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/go.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/heuristic.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/hybrid.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/java.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/jvm_ext.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/nodejs.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/parsers.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/php.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/project.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/python.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/ruby.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/rust.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/systems.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/terraform.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/detectors/tooling.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/doc_analyzer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/entrypoint_classifier.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/env_analyzer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/error_schema.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/file_classifier.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/flow_analyzer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/git_analyzer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/graph_analyzer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/license.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/mcp/__init__.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/mcp/onboarding/__init__.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/mcp/onboarding/applier.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/mcp/onboarding/backup.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/mcp/onboarding/detector.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/mcp/onboarding/planner.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/mcp/orchestrator.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/mcp/registry.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/mcp/runner.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/mcp/server.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/mcp_nudge.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/metrics_analyzer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/path_filters.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/pr_comment_renderer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/progress.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/ranking_engine.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/redactor.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/relevance_scorer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/repo_classifier.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/repository_ir.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/ris.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/runtime_classifier.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/scanner.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/schema.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/semantic_analyzer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/serializer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/summarizer.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/telemetry/__init__.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/telemetry/config.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/telemetry/consent.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/telemetry/events.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/telemetry/filters.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/telemetry/transport.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/tree_utils.py +0 -0
- {sourcecode-1.33.12 → sourcecode-1.33.14}/src/sourcecode/workspace.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sourcecode"
|
|
7
|
-
version = "1.33.
|
|
7
|
+
version = "1.33.14"
|
|
8
8
|
description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -1102,6 +1102,9 @@ def main(
|
|
|
1102
1102
|
obj = _jm.loads(raw)
|
|
1103
1103
|
if isinstance(obj, dict):
|
|
1104
1104
|
obj["_cache"] = meta
|
|
1105
|
+
# Top-level cache_source for one release — backward compat alias
|
|
1106
|
+
if "cache_source" in meta:
|
|
1107
|
+
obj["cache_source"] = meta["cache_source"]
|
|
1105
1108
|
return _jm.dumps(obj, indent=2, ensure_ascii=False)
|
|
1106
1109
|
except Exception:
|
|
1107
1110
|
pass
|
|
@@ -2273,6 +2276,9 @@ def _make_explanation(reason: str, why: str) -> str:
|
|
|
2273
2276
|
def _serialize_relevant_file(f: Any) -> dict:
|
|
2274
2277
|
from dataclasses import asdict as _asdict
|
|
2275
2278
|
d = {k: v for k, v in _asdict(f).items() if v != "" and v is not None}
|
|
2279
|
+
# Emit 'file' as backward-compat alias for 'path' for one release
|
|
2280
|
+
if "path" in d:
|
|
2281
|
+
d["file"] = d["path"]
|
|
2276
2282
|
reason = d.pop("reason", "") or ""
|
|
2277
2283
|
why = d.pop("why", "") or ""
|
|
2278
2284
|
# Expose score as a rounded float so agents can rank/filter files deterministically.
|
|
@@ -67,7 +67,7 @@ _TRIM_SCHEDULE: list[tuple[str, str | None, int]] = [
|
|
|
67
67
|
("execution_paths", None, 0),
|
|
68
68
|
("dependency_graph_summary", None, 0),
|
|
69
69
|
# Step 6 — last resort
|
|
70
|
-
("relevant_files", None,
|
|
70
|
+
("relevant_files", None, 10),
|
|
71
71
|
("suspected_areas", None, 0),
|
|
72
72
|
("key_dependencies", None, 0),
|
|
73
73
|
]
|
|
@@ -148,7 +148,7 @@ def trim_to_budget(data: dict, budget_bytes: int, *, label: str = "") -> dict:
|
|
|
148
148
|
# Budget constants (bytes) — used by CLI callers
|
|
149
149
|
BUDGET_COMPACT = 30_000 # compact/agent main cmd
|
|
150
150
|
BUDGET_AGENT = 40_000 # agent main cmd (slightly more headroom)
|
|
151
|
-
BUDGET_FIX_BUG =
|
|
151
|
+
BUDGET_FIX_BUG = 200_000 # fix-bug (with or without --symptom)
|
|
152
152
|
BUDGET_REVIEW_PR = 100_000 # review-pr
|
|
153
153
|
BUDGET_ONBOARD = 30_000 # onboard
|
|
154
154
|
BUDGET_EXPLAIN = 30_000 # explain
|
|
@@ -1725,6 +1725,7 @@ class TaskContextBuilder:
|
|
|
1725
1725
|
_sx_commits: list[dict] = []
|
|
1726
1726
|
_sx_synonyms: list[str] = []
|
|
1727
1727
|
_sx_boosts: list[dict] = []
|
|
1728
|
+
_sx_graph_expanded: list[str] = []
|
|
1728
1729
|
|
|
1729
1730
|
# Pass 1: surface code notes whose text contains any keyword
|
|
1730
1731
|
_note_matched_paths: dict[str, int] = {} # path → count of matching notes
|
|
@@ -1780,7 +1781,12 @@ class TaskContextBuilder:
|
|
|
1780
1781
|
# Pass 4: inject files whose path matches symptom keywords.
|
|
1781
1782
|
# CamelCase-expand the filename stem so "OfflineSessionLoader" matches
|
|
1782
1783
|
# the keyword "offline" even without an explicit directory separator.
|
|
1784
|
+
# Large repos: cap per-keyword injections so a common term like
|
|
1785
|
+
# "authentication" (50+ path matches in an IAM repo) cannot flood the
|
|
1786
|
+
# candidate list and push specific terms like "ldap" out of the budget.
|
|
1783
1787
|
_p4_dirs_of_injected: set[str] = set() # directories of high-score injects
|
|
1788
|
+
_P4_KW_CAP = 15 # max path-injections per keyword in large repos
|
|
1789
|
+
_p4_kw_counts: dict[str, int] = {}
|
|
1784
1790
|
for _p in all_paths:
|
|
1785
1791
|
if _p in _existing_paths:
|
|
1786
1792
|
continue
|
|
@@ -1797,6 +1803,16 @@ class TaskContextBuilder:
|
|
|
1797
1803
|
_matching_kws = [kw for kw in symptom_keywords if kw in _p_search]
|
|
1798
1804
|
if not _matching_kws:
|
|
1799
1805
|
continue
|
|
1806
|
+
# In large repos, skip keywords already at cap; keep file only if at
|
|
1807
|
+
# least one keyword still has quota (multi-kw matches exhaust each
|
|
1808
|
+
# keyword's quota independently so specific terms survive longer).
|
|
1809
|
+
if _is_large_repo:
|
|
1810
|
+
_matching_kws = [
|
|
1811
|
+
kw for kw in _matching_kws
|
|
1812
|
+
if _p4_kw_counts.get(kw, 0) < _P4_KW_CAP
|
|
1813
|
+
]
|
|
1814
|
+
if not _matching_kws:
|
|
1815
|
+
continue
|
|
1800
1816
|
_boost = 0.2 * len(_matching_kws)
|
|
1801
1817
|
_injected_score = round(min(0.5 + _boost, 1.0), 2)
|
|
1802
1818
|
_first_kw = _matching_kws[0]
|
|
@@ -1809,6 +1825,9 @@ class TaskContextBuilder:
|
|
|
1809
1825
|
))
|
|
1810
1826
|
_existing_paths.add(_p)
|
|
1811
1827
|
_sx_direct_path.append(_p)
|
|
1828
|
+
if _is_large_repo:
|
|
1829
|
+
for _kw in _matching_kws:
|
|
1830
|
+
_p4_kw_counts[_kw] = _p4_kw_counts.get(_kw, 0) + 1
|
|
1812
1831
|
if _injected_score >= 0.7:
|
|
1813
1832
|
_p4_dirs_of_injected.add(str(Path(_p).parent))
|
|
1814
1833
|
|
|
@@ -1863,9 +1882,15 @@ class TaskContextBuilder:
|
|
|
1863
1882
|
# architecturally adjacent classes that don't mention symptom keywords
|
|
1864
1883
|
# in their own name (e.g. InfinispanOfflineSessionCacheEntryLifespan…
|
|
1865
1884
|
# siblings in the same infinispan/ package).
|
|
1885
|
+
# Large repos: cap total co-location injections so that a keyword
|
|
1886
|
+
# matching many directories doesn't flood the candidate list.
|
|
1866
1887
|
if _is_large_repo and _p4_dirs_of_injected:
|
|
1867
1888
|
_coloc_existing = {rf.path for rf in relevant_files}
|
|
1889
|
+
_P4C_CAP = 30
|
|
1890
|
+
_coloc_count = 0
|
|
1868
1891
|
for _cp in all_paths:
|
|
1892
|
+
if _coloc_count >= _P4C_CAP:
|
|
1893
|
+
break
|
|
1869
1894
|
if _cp in _coloc_existing:
|
|
1870
1895
|
continue
|
|
1871
1896
|
if Path(_cp).suffix.lower() not in _src_exts:
|
|
@@ -1879,6 +1904,7 @@ class TaskContextBuilder:
|
|
|
1879
1904
|
why="directory proximity injection",
|
|
1880
1905
|
))
|
|
1881
1906
|
_coloc_existing.add(_cp)
|
|
1907
|
+
_coloc_count += 1
|
|
1882
1908
|
|
|
1883
1909
|
# Sort before content scan so top candidates get read first.
|
|
1884
1910
|
# In large repos: prioritise symptom_match files within each score band
|
|
@@ -1897,6 +1923,7 @@ class TaskContextBuilder:
|
|
|
1897
1923
|
_no_scan_candidates = relevant_files[_CONTENT_SCAN_LIMIT:]
|
|
1898
1924
|
|
|
1899
1925
|
_boosted: list[RelevantFile] = []
|
|
1926
|
+
_scanned_body: dict[str, str] = {} # cache for graph expansion (Pass 5)
|
|
1900
1927
|
for _rf in _scan_candidates:
|
|
1901
1928
|
_extra = 0.0
|
|
1902
1929
|
_extra_syn = 0.0
|
|
@@ -1931,9 +1958,11 @@ class TaskContextBuilder:
|
|
|
1931
1958
|
_body_lower = ""
|
|
1932
1959
|
if Path(_rf.path).suffix.lower() in _src_exts:
|
|
1933
1960
|
try:
|
|
1934
|
-
|
|
1961
|
+
_raw_body = (self.root / _rf.path).read_text(
|
|
1935
1962
|
encoding="utf-8", errors="replace"
|
|
1936
|
-
)[:12000]
|
|
1963
|
+
)[:12000] # ~300 lines avg
|
|
1964
|
+
_scanned_body[_rf.path] = _raw_body # cache for Pass 5
|
|
1965
|
+
_body_lower = _raw_body.lower()
|
|
1937
1966
|
except OSError:
|
|
1938
1967
|
pass
|
|
1939
1968
|
|
|
@@ -1993,6 +2022,105 @@ class TaskContextBuilder:
|
|
|
1993
2022
|
key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
|
|
1994
2023
|
)
|
|
1995
2024
|
|
|
2025
|
+
# Pass 5: reverse graph expansion from high-score seed nodes.
|
|
2026
|
+
# Identifies which source files in the repo REFERENCE the seed
|
|
2027
|
+
# classes (imports, implements, extends, field declarations).
|
|
2028
|
+
# This is a reverse-import lookup: for seed class "UserProvider",
|
|
2029
|
+
# it finds JpaUserProvider / DefaultUserSessionProvider which import
|
|
2030
|
+
# UserProvider — even though those files don't contain symptom
|
|
2031
|
+
# keywords in their own path.
|
|
2032
|
+
# Seeds include any high-score file (not just symptom_match role)
|
|
2033
|
+
# so that files found by _rank_files class-name matching also expand.
|
|
2034
|
+
if not fast:
|
|
2035
|
+
import re as _re_gx
|
|
2036
|
+
_GX_SEED_THRESH = 0.5
|
|
2037
|
+
_GX_EXPAND_CAP = 30
|
|
2038
|
+
_GX_HOP_DECAY = 0.6
|
|
2039
|
+
|
|
2040
|
+
# Collect seed class names from high-score results
|
|
2041
|
+
_gx_seed_stems: dict[str, float] = {} # stem → score
|
|
2042
|
+
for _gx_rf in relevant_files:
|
|
2043
|
+
if _gx_rf.score < _GX_SEED_THRESH:
|
|
2044
|
+
continue
|
|
2045
|
+
if Path(_gx_rf.path).suffix.lower() not in _src_exts:
|
|
2046
|
+
continue
|
|
2047
|
+
_gx_stem = Path(_gx_rf.path).stem
|
|
2048
|
+
_gx_seed_stems[_gx_stem] = max(
|
|
2049
|
+
_gx_seed_stems.get(_gx_stem, 0.0), _gx_rf.score
|
|
2050
|
+
)
|
|
2051
|
+
|
|
2052
|
+
if _gx_seed_stems:
|
|
2053
|
+
# Compile per-stem word-boundary patterns for fast matching
|
|
2054
|
+
import re as _re_gx2
|
|
2055
|
+
_gx_patterns: dict[str, Any] = {
|
|
2056
|
+
stem: _re_gx2.compile(rf'\b{_re_gx2.escape(stem)}\b')
|
|
2057
|
+
for stem in _gx_seed_stems
|
|
2058
|
+
}
|
|
2059
|
+
|
|
2060
|
+
_gx_existing = {rf.path for rf in relevant_files}
|
|
2061
|
+
_gx_new: list[RelevantFile] = []
|
|
2062
|
+
_gx_added: set[str] = set()
|
|
2063
|
+
|
|
2064
|
+
# Candidates: non-test source files not yet in results.
|
|
2065
|
+
# Small repos: scan all; large repos: use pre-scanned content only.
|
|
2066
|
+
# Test files are excluded (fix-bug focuses on production code).
|
|
2067
|
+
if _is_large_repo:
|
|
2068
|
+
_gx_candidates = [
|
|
2069
|
+
p for p in _scanned_body
|
|
2070
|
+
if p not in _gx_existing and not self._is_test(p)
|
|
2071
|
+
]
|
|
2072
|
+
else:
|
|
2073
|
+
_gx_candidates = [
|
|
2074
|
+
p for p in all_paths
|
|
2075
|
+
if p not in _gx_existing
|
|
2076
|
+
and Path(p).suffix.lower() in _src_exts
|
|
2077
|
+
and not self._is_test(p)
|
|
2078
|
+
]
|
|
2079
|
+
|
|
2080
|
+
for _gx_cand in _gx_candidates:
|
|
2081
|
+
if len(_gx_new) >= _GX_EXPAND_CAP:
|
|
2082
|
+
break
|
|
2083
|
+
if _gx_cand in _gx_added:
|
|
2084
|
+
continue
|
|
2085
|
+
|
|
2086
|
+
# Use cached content or read fresh (small repos only)
|
|
2087
|
+
_gx_body = _scanned_body.get(_gx_cand)
|
|
2088
|
+
if _gx_body is None:
|
|
2089
|
+
if _is_large_repo:
|
|
2090
|
+
continue # never do fresh reads on large repos in Pass 5
|
|
2091
|
+
try:
|
|
2092
|
+
_gx_body = (self.root / _gx_cand).read_text(
|
|
2093
|
+
encoding="utf-8", errors="replace"
|
|
2094
|
+
)[:8000]
|
|
2095
|
+
except OSError:
|
|
2096
|
+
continue
|
|
2097
|
+
|
|
2098
|
+
# Reverse lookup: does this file reference any seed class?
|
|
2099
|
+
for _gx_stem, _gx_seed_score in _gx_seed_stems.items():
|
|
2100
|
+
if _gx_patterns[_gx_stem].search(_gx_body):
|
|
2101
|
+
_hop1_score = round(
|
|
2102
|
+
min(_gx_seed_score * _GX_HOP_DECAY, 0.85), 2
|
|
2103
|
+
)
|
|
2104
|
+
_gx_new.append(RelevantFile(
|
|
2105
|
+
path=_gx_cand,
|
|
2106
|
+
role="symptom_match",
|
|
2107
|
+
score=_hop1_score,
|
|
2108
|
+
reason=(
|
|
2109
|
+
f"graph_expansion: references {_gx_stem} "
|
|
2110
|
+
f"(1-hop reverse import)"
|
|
2111
|
+
),
|
|
2112
|
+
why=f"graph_expansion: 1 hop from {_gx_stem}",
|
|
2113
|
+
))
|
|
2114
|
+
_gx_added.add(_gx_cand)
|
|
2115
|
+
_sx_graph_expanded.append(_gx_cand)
|
|
2116
|
+
break # one match per candidate is enough
|
|
2117
|
+
|
|
2118
|
+
if _gx_new:
|
|
2119
|
+
relevant_files = sorted(
|
|
2120
|
+
relevant_files + _gx_new,
|
|
2121
|
+
key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
|
|
2122
|
+
)
|
|
2123
|
+
|
|
1996
2124
|
# Synonym note (only when synonyms actually fired)
|
|
1997
2125
|
if _frontend_kws and _sx_synonyms:
|
|
1998
2126
|
symptom_note = (
|
|
@@ -2016,6 +2144,7 @@ class TaskContextBuilder:
|
|
|
2016
2144
|
"content_matches": _sx_content[:10],
|
|
2017
2145
|
"commit_matches": _sx_commits[:10],
|
|
2018
2146
|
"synonym_matches": _sx_synonyms[:10],
|
|
2147
|
+
"graph_expansion": _sx_graph_expanded[:10],
|
|
2019
2148
|
"boosts": _sx_boosts[:30],
|
|
2020
2149
|
"final_boost": round(
|
|
2021
2150
|
sum(b["value"] for b in _sx_boosts), 3
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|