sourcecode 1.33.11__tar.gz → 1.33.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sourcecode-1.33.11 → sourcecode-1.33.13}/PKG-INFO +2 -2
- {sourcecode-1.33.11 → sourcecode-1.33.13}/README.md +1 -1
- {sourcecode-1.33.11 → sourcecode-1.33.13}/pyproject.toml +1 -1
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/__init__.py +1 -1
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/cli.py +6 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/output_budget.py +2 -2
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/prepare_context.py +202 -12
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/repository_ir.py +63 -13
- {sourcecode-1.33.11 → sourcecode-1.33.13}/.github/workflows/build-windows.yml +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/.gitignore +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/.ruff.toml +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/CHANGELOG.md +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/CONTRIBUTING.md +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/LICENSE +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/SECURITY.md +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/raw +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/adaptive_scanner.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/architecture_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/architecture_summary.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/ast_extractor.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/cache.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/canonical_ir.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/classifier.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/code_notes_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/confidence_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/context_scorer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/context_summarizer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/contract_model.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/contract_pipeline.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/coverage_parser.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/dependency_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/__init__.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/base.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/csproj_parser.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/dart.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/dotnet.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/elixir.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/go.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/heuristic.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/hybrid.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/java.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/jvm_ext.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/nodejs.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/parsers.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/php.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/project.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/python.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/ruby.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/rust.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/systems.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/terraform.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/detectors/tooling.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/doc_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/entrypoint_classifier.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/env_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/error_schema.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/file_classifier.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/flow_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/git_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/graph_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/license.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/mcp/__init__.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/mcp/onboarding/__init__.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/mcp/onboarding/applier.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/mcp/onboarding/backup.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/mcp/onboarding/detector.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/mcp/onboarding/planner.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/mcp/orchestrator.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/mcp/registry.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/mcp/runner.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/mcp/server.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/mcp_nudge.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/metrics_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/path_filters.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/pr_comment_renderer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/progress.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/ranking_engine.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/redactor.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/relevance_scorer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/repo_classifier.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/ris.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/runtime_classifier.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/scanner.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/schema.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/semantic_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/serializer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/summarizer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/telemetry/__init__.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/telemetry/config.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/telemetry/consent.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/telemetry/events.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/telemetry/filters.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/telemetry/transport.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/tree_utils.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.13}/src/sourcecode/workspace.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcecode
|
|
3
|
-
Version: 1.33.
|
|
3
|
+
Version: 1.33.13
|
|
4
4
|
Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Keywords: agents,ai,codebase,context,developer-tools,llm
|
|
@@ -39,7 +39,7 @@ Description-Content-Type: text/markdown
|
|
|
39
39
|
|
|
40
40
|
**Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
|
|
41
41
|
|
|
42
|
-

|
|
43
43
|

|
|
44
44
|
|
|
45
45
|
---
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
**Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
|
|
4
4
|
|
|
5
|
-

|
|
6
6
|

|
|
7
7
|
|
|
8
8
|
---
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sourcecode"
|
|
7
|
-
version = "1.33.
|
|
7
|
+
version = "1.33.13"
|
|
8
8
|
description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -1102,6 +1102,9 @@ def main(
|
|
|
1102
1102
|
obj = _jm.loads(raw)
|
|
1103
1103
|
if isinstance(obj, dict):
|
|
1104
1104
|
obj["_cache"] = meta
|
|
1105
|
+
# Top-level cache_source for one release — backward compat alias
|
|
1106
|
+
if "cache_source" in meta:
|
|
1107
|
+
obj["cache_source"] = meta["cache_source"]
|
|
1105
1108
|
return _jm.dumps(obj, indent=2, ensure_ascii=False)
|
|
1106
1109
|
except Exception:
|
|
1107
1110
|
pass
|
|
@@ -2273,6 +2276,9 @@ def _make_explanation(reason: str, why: str) -> str:
|
|
|
2273
2276
|
def _serialize_relevant_file(f: Any) -> dict:
|
|
2274
2277
|
from dataclasses import asdict as _asdict
|
|
2275
2278
|
d = {k: v for k, v in _asdict(f).items() if v != "" and v is not None}
|
|
2279
|
+
# Emit 'file' as backward-compat alias for 'path' for one release
|
|
2280
|
+
if "path" in d:
|
|
2281
|
+
d["file"] = d["path"]
|
|
2276
2282
|
reason = d.pop("reason", "") or ""
|
|
2277
2283
|
why = d.pop("why", "") or ""
|
|
2278
2284
|
# Expose score as a rounded float so agents can rank/filter files deterministically.
|
|
@@ -67,7 +67,7 @@ _TRIM_SCHEDULE: list[tuple[str, str | None, int]] = [
|
|
|
67
67
|
("execution_paths", None, 0),
|
|
68
68
|
("dependency_graph_summary", None, 0),
|
|
69
69
|
# Step 6 — last resort
|
|
70
|
-
("relevant_files", None,
|
|
70
|
+
("relevant_files", None, 10),
|
|
71
71
|
("suspected_areas", None, 0),
|
|
72
72
|
("key_dependencies", None, 0),
|
|
73
73
|
]
|
|
@@ -148,7 +148,7 @@ def trim_to_budget(data: dict, budget_bytes: int, *, label: str = "") -> dict:
|
|
|
148
148
|
# Budget constants (bytes) — used by CLI callers
|
|
149
149
|
BUDGET_COMPACT = 30_000 # compact/agent main cmd
|
|
150
150
|
BUDGET_AGENT = 40_000 # agent main cmd (slightly more headroom)
|
|
151
|
-
BUDGET_FIX_BUG =
|
|
151
|
+
BUDGET_FIX_BUG = 200_000 # fix-bug (with or without --symptom)
|
|
152
152
|
BUDGET_REVIEW_PR = 100_000 # review-pr
|
|
153
153
|
BUDGET_ONBOARD = 30_000 # onboard
|
|
154
154
|
BUDGET_EXPLAIN = 30_000 # explain
|
|
@@ -627,6 +627,21 @@ _FRONTEND_SYMPTOM_MAP: dict[str, list[str]] = {
|
|
|
627
627
|
"trabajador": ["trabajador", "empleado", "worker", "asignacion", "trabajadordao", "trabajadorservice"],
|
|
628
628
|
}
|
|
629
629
|
|
|
630
|
+
# Generic words that add noise when used as symptom keywords in large repos.
|
|
631
|
+
# "token" and "user" are too ubiquitous in auth systems to be useful alone.
|
|
632
|
+
_SYMPTOM_STOP_WORDS: frozenset[str] = frozenset({
|
|
633
|
+
"fails", "fail", "failed", "failure",
|
|
634
|
+
"not", "for", "with", "when", "that", "the", "and", "but",
|
|
635
|
+
"are", "has", "had", "have", "was", "were",
|
|
636
|
+
"get", "set", "can", "does", "did", "should", "would", "could",
|
|
637
|
+
"null", "none", "empty", "invalid", "incorrect", "wrong", "missing",
|
|
638
|
+
"error", "issue", "problem", "bug",
|
|
639
|
+
"from", "into", "via", "due", "also", "after", "before",
|
|
640
|
+
"slow", "fast", "new", "old",
|
|
641
|
+
})
|
|
642
|
+
|
|
643
|
+
# Repo-scale threshold: above this file count, use stricter injection logic.
|
|
644
|
+
_LARGE_REPO_THRESHOLD = 500
|
|
630
645
|
|
|
631
646
|
MAX_FILES_FAST = 2000 # above this threshold --fast uses git-index-only mode
|
|
632
647
|
|
|
@@ -1695,7 +1710,7 @@ class TaskContextBuilder:
|
|
|
1695
1710
|
_camel_expanded = _re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _camel_expanded)
|
|
1696
1711
|
symptom_keywords = [
|
|
1697
1712
|
w.lower() for w in _re.split(r"[\s\W]+", _camel_expanded)
|
|
1698
|
-
if len(w) > 2
|
|
1713
|
+
if len(w) > 2 and w.lower() not in _SYMPTOM_STOP_WORDS
|
|
1699
1714
|
]
|
|
1700
1715
|
if symptom_keywords:
|
|
1701
1716
|
# Pre-compile combined keyword pattern for fast content scanning
|
|
@@ -1710,6 +1725,7 @@ class TaskContextBuilder:
|
|
|
1710
1725
|
_sx_commits: list[dict] = []
|
|
1711
1726
|
_sx_synonyms: list[str] = []
|
|
1712
1727
|
_sx_boosts: list[dict] = []
|
|
1728
|
+
_sx_graph_expanded: list[str] = []
|
|
1713
1729
|
|
|
1714
1730
|
# Pass 1: surface code notes whose text contains any keyword
|
|
1715
1731
|
_note_matched_paths: dict[str, int] = {} # path → count of matching notes
|
|
@@ -1759,14 +1775,27 @@ class TaskContextBuilder:
|
|
|
1759
1775
|
))
|
|
1760
1776
|
_existing_paths.add(_cp)
|
|
1761
1777
|
|
|
1762
|
-
#
|
|
1778
|
+
# Scale-awareness: large repos need wider scan and stricter injection.
|
|
1779
|
+
_is_large_repo = len(all_paths) > _LARGE_REPO_THRESHOLD
|
|
1780
|
+
|
|
1781
|
+
# Pass 4: inject files whose path matches symptom keywords.
|
|
1782
|
+
# CamelCase-expand the filename stem so "OfflineSessionLoader" matches
|
|
1783
|
+
# the keyword "offline" even without an explicit directory separator.
|
|
1784
|
+
_p4_dirs_of_injected: set[str] = set() # directories of high-score injects
|
|
1763
1785
|
for _p in all_paths:
|
|
1764
1786
|
if _p in _existing_paths:
|
|
1765
1787
|
continue
|
|
1766
1788
|
if Path(_p).suffix.lower() not in _ALL_EXTENSIONS:
|
|
1767
1789
|
continue
|
|
1768
1790
|
_p_lower = _p.lower()
|
|
1769
|
-
|
|
1791
|
+
# CamelCase-expand the stem and append to the search string so
|
|
1792
|
+
# "OfflineSessionLoader" → "offline session loader" can match
|
|
1793
|
+
# individual keyword tokens beyond what substring search finds.
|
|
1794
|
+
_stem_raw = Path(_p).stem
|
|
1795
|
+
_stem_exp = _re.sub(r'([a-z])([A-Z])', r'\1 \2', _stem_raw)
|
|
1796
|
+
_stem_exp = _re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _stem_exp).lower()
|
|
1797
|
+
_p_search = _p_lower + " " + _stem_exp
|
|
1798
|
+
_matching_kws = [kw for kw in symptom_keywords if kw in _p_search]
|
|
1770
1799
|
if not _matching_kws:
|
|
1771
1800
|
continue
|
|
1772
1801
|
_boost = 0.2 * len(_matching_kws)
|
|
@@ -1781,6 +1810,8 @@ class TaskContextBuilder:
|
|
|
1781
1810
|
))
|
|
1782
1811
|
_existing_paths.add(_p)
|
|
1783
1812
|
_sx_direct_path.append(_p)
|
|
1813
|
+
if _injected_score >= 0.7:
|
|
1814
|
+
_p4_dirs_of_injected.add(str(Path(_p).parent))
|
|
1784
1815
|
|
|
1785
1816
|
# Pass 4b: grep-based injection for frontend→backend synonym terms.
|
|
1786
1817
|
# Runs parallel grep for each backend term to find files not yet in
|
|
@@ -1828,13 +1859,46 @@ class TaskContextBuilder:
|
|
|
1828
1859
|
))
|
|
1829
1860
|
_existing_paths_now.add(_gf)
|
|
1830
1861
|
|
|
1831
|
-
#
|
|
1832
|
-
|
|
1833
|
-
|
|
1862
|
+
# Pass 4c: subsystem co-location — inject sibling files from the same
|
|
1863
|
+
# directories as high-score (≥0.7) path-matched files. This catches
|
|
1864
|
+
# architecturally adjacent classes that don't mention symptom keywords
|
|
1865
|
+
# in their own name (e.g. InfinispanOfflineSessionCacheEntryLifespan…
|
|
1866
|
+
# siblings in the same infinispan/ package).
|
|
1867
|
+
if _is_large_repo and _p4_dirs_of_injected:
|
|
1868
|
+
_coloc_existing = {rf.path for rf in relevant_files}
|
|
1869
|
+
for _cp in all_paths:
|
|
1870
|
+
if _cp in _coloc_existing:
|
|
1871
|
+
continue
|
|
1872
|
+
if Path(_cp).suffix.lower() not in _src_exts:
|
|
1873
|
+
continue
|
|
1874
|
+
if str(Path(_cp).parent) in _p4_dirs_of_injected:
|
|
1875
|
+
relevant_files.append(RelevantFile(
|
|
1876
|
+
path=_cp,
|
|
1877
|
+
role="symptom_match",
|
|
1878
|
+
score=0.55,
|
|
1879
|
+
reason="subsystem co-location: same directory as symptom-matched file",
|
|
1880
|
+
why="directory proximity injection",
|
|
1881
|
+
))
|
|
1882
|
+
_coloc_existing.add(_cp)
|
|
1883
|
+
|
|
1884
|
+
# Sort before content scan so top candidates get read first.
|
|
1885
|
+
# In large repos: prioritise symptom_match files within each score band
|
|
1886
|
+
# so that subsystem-relevant files are content-scanned before generic
|
|
1887
|
+
# structural files at the same score.
|
|
1888
|
+
if _is_large_repo:
|
|
1889
|
+
relevant_files = sorted(
|
|
1890
|
+
relevant_files,
|
|
1891
|
+
key=lambda rf: (-rf.score, 0 if rf.role == "symptom_match" else 1),
|
|
1892
|
+
)
|
|
1893
|
+
_CONTENT_SCAN_LIMIT = 150
|
|
1894
|
+
else:
|
|
1895
|
+
relevant_files = sorted(relevant_files, key=lambda rf: -rf.score)
|
|
1896
|
+
_CONTENT_SCAN_LIMIT = 80
|
|
1834
1897
|
_scan_candidates = relevant_files[:_CONTENT_SCAN_LIMIT]
|
|
1835
1898
|
_no_scan_candidates = relevant_files[_CONTENT_SCAN_LIMIT:]
|
|
1836
1899
|
|
|
1837
1900
|
_boosted: list[RelevantFile] = []
|
|
1901
|
+
_scanned_body: dict[str, str] = {} # cache for graph expansion (Pass 5)
|
|
1838
1902
|
for _rf in _scan_candidates:
|
|
1839
1903
|
_extra = 0.0
|
|
1840
1904
|
_extra_syn = 0.0
|
|
@@ -1869,9 +1933,11 @@ class TaskContextBuilder:
|
|
|
1869
1933
|
_body_lower = ""
|
|
1870
1934
|
if Path(_rf.path).suffix.lower() in _src_exts:
|
|
1871
1935
|
try:
|
|
1872
|
-
|
|
1936
|
+
_raw_body = (self.root / _rf.path).read_text(
|
|
1873
1937
|
encoding="utf-8", errors="replace"
|
|
1874
|
-
)[:12000]
|
|
1938
|
+
)[:12000] # ~300 lines avg
|
|
1939
|
+
_scanned_body[_rf.path] = _raw_body # cache for Pass 5
|
|
1940
|
+
_body_lower = _raw_body.lower()
|
|
1875
1941
|
except OSError:
|
|
1876
1942
|
pass
|
|
1877
1943
|
|
|
@@ -1905,15 +1971,130 @@ class TaskContextBuilder:
|
|
|
1905
1971
|
elif _extra_syn > 0:
|
|
1906
1972
|
_new_reason = _rf.reason + f", synonym-match backend (+{_extra_syn:.2f})"
|
|
1907
1973
|
|
|
1974
|
+
_final_score = round(min(_rf.score + _total_extra, 1.0), 2)
|
|
1908
1975
|
_boosted.append(RelevantFile(
|
|
1909
1976
|
path=_rf.path,
|
|
1910
1977
|
role=_rf.role,
|
|
1911
|
-
score=
|
|
1978
|
+
score=_final_score,
|
|
1912
1979
|
reason=_new_reason,
|
|
1913
1980
|
why=_rf.why,
|
|
1914
1981
|
))
|
|
1915
1982
|
|
|
1916
|
-
|
|
1983
|
+
# Use total boost as a secondary sort key so symptom-matched files
|
|
1984
|
+
# that were boosted from a lower base score rank above structural
|
|
1985
|
+
# files that coincidentally reach the same capped score of 1.0.
|
|
1986
|
+
# This prevents budget-trimming from discarding the most relevant files.
|
|
1987
|
+
_boost_totals: dict[str, float] = {}
|
|
1988
|
+
for _rf in _scan_candidates:
|
|
1989
|
+
pass # populated below
|
|
1990
|
+
_boost_totals = {}
|
|
1991
|
+
for _idx, _rf in enumerate(_scan_candidates):
|
|
1992
|
+
_b_rf = _boosted[_idx]
|
|
1993
|
+
_boost_totals[_b_rf.path] = round(_b_rf.score - _rf.score, 4)
|
|
1994
|
+
|
|
1995
|
+
relevant_files = sorted(
|
|
1996
|
+
_boosted + _no_scan_candidates,
|
|
1997
|
+
key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
|
|
1998
|
+
)
|
|
1999
|
+
|
|
2000
|
+
# Pass 5: reverse graph expansion from high-score seed nodes.
|
|
2001
|
+
# Identifies which source files in the repo REFERENCE the seed
|
|
2002
|
+
# classes (imports, implements, extends, field declarations).
|
|
2003
|
+
# This is a reverse-import lookup: for seed class "UserProvider",
|
|
2004
|
+
# it finds JpaUserProvider / DefaultUserSessionProvider which import
|
|
2005
|
+
# UserProvider — even though those files don't contain symptom
|
|
2006
|
+
# keywords in their own path.
|
|
2007
|
+
# Seeds include any high-score file (not just symptom_match role)
|
|
2008
|
+
# so that files found by _rank_files class-name matching also expand.
|
|
2009
|
+
if not fast:
|
|
2010
|
+
import re as _re_gx
|
|
2011
|
+
_GX_SEED_THRESH = 0.5
|
|
2012
|
+
_GX_EXPAND_CAP = 30
|
|
2013
|
+
_GX_HOP_DECAY = 0.6
|
|
2014
|
+
|
|
2015
|
+
# Collect seed class names from high-score results
|
|
2016
|
+
_gx_seed_stems: dict[str, float] = {} # stem → score
|
|
2017
|
+
for _gx_rf in relevant_files:
|
|
2018
|
+
if _gx_rf.score < _GX_SEED_THRESH:
|
|
2019
|
+
continue
|
|
2020
|
+
if Path(_gx_rf.path).suffix.lower() not in _src_exts:
|
|
2021
|
+
continue
|
|
2022
|
+
_gx_stem = Path(_gx_rf.path).stem
|
|
2023
|
+
_gx_seed_stems[_gx_stem] = max(
|
|
2024
|
+
_gx_seed_stems.get(_gx_stem, 0.0), _gx_rf.score
|
|
2025
|
+
)
|
|
2026
|
+
|
|
2027
|
+
if _gx_seed_stems:
|
|
2028
|
+
# Compile per-stem word-boundary patterns for fast matching
|
|
2029
|
+
import re as _re_gx2
|
|
2030
|
+
_gx_patterns: dict[str, Any] = {
|
|
2031
|
+
stem: _re_gx2.compile(rf'\b{_re_gx2.escape(stem)}\b')
|
|
2032
|
+
for stem in _gx_seed_stems
|
|
2033
|
+
}
|
|
2034
|
+
|
|
2035
|
+
_gx_existing = {rf.path for rf in relevant_files}
|
|
2036
|
+
_gx_new: list[RelevantFile] = []
|
|
2037
|
+
_gx_added: set[str] = set()
|
|
2038
|
+
|
|
2039
|
+
# Candidates: non-test source files not yet in results.
|
|
2040
|
+
# Small repos: scan all; large repos: use pre-scanned content only.
|
|
2041
|
+
# Test files are excluded (fix-bug focuses on production code).
|
|
2042
|
+
if _is_large_repo:
|
|
2043
|
+
_gx_candidates = [
|
|
2044
|
+
p for p in _scanned_body
|
|
2045
|
+
if p not in _gx_existing and not self._is_test(p)
|
|
2046
|
+
]
|
|
2047
|
+
else:
|
|
2048
|
+
_gx_candidates = [
|
|
2049
|
+
p for p in all_paths
|
|
2050
|
+
if p not in _gx_existing
|
|
2051
|
+
and Path(p).suffix.lower() in _src_exts
|
|
2052
|
+
and not self._is_test(p)
|
|
2053
|
+
]
|
|
2054
|
+
|
|
2055
|
+
for _gx_cand in _gx_candidates:
|
|
2056
|
+
if len(_gx_new) >= _GX_EXPAND_CAP:
|
|
2057
|
+
break
|
|
2058
|
+
if _gx_cand in _gx_added:
|
|
2059
|
+
continue
|
|
2060
|
+
|
|
2061
|
+
# Use cached content or read fresh (small repos only)
|
|
2062
|
+
_gx_body = _scanned_body.get(_gx_cand)
|
|
2063
|
+
if _gx_body is None:
|
|
2064
|
+
if _is_large_repo:
|
|
2065
|
+
continue # never do fresh reads on large repos in Pass 5
|
|
2066
|
+
try:
|
|
2067
|
+
_gx_body = (self.root / _gx_cand).read_text(
|
|
2068
|
+
encoding="utf-8", errors="replace"
|
|
2069
|
+
)[:8000]
|
|
2070
|
+
except OSError:
|
|
2071
|
+
continue
|
|
2072
|
+
|
|
2073
|
+
# Reverse lookup: does this file reference any seed class?
|
|
2074
|
+
for _gx_stem, _gx_seed_score in _gx_seed_stems.items():
|
|
2075
|
+
if _gx_patterns[_gx_stem].search(_gx_body):
|
|
2076
|
+
_hop1_score = round(
|
|
2077
|
+
min(_gx_seed_score * _GX_HOP_DECAY, 0.85), 2
|
|
2078
|
+
)
|
|
2079
|
+
_gx_new.append(RelevantFile(
|
|
2080
|
+
path=_gx_cand,
|
|
2081
|
+
role="symptom_match",
|
|
2082
|
+
score=_hop1_score,
|
|
2083
|
+
reason=(
|
|
2084
|
+
f"graph_expansion: references {_gx_stem} "
|
|
2085
|
+
f"(1-hop reverse import)"
|
|
2086
|
+
),
|
|
2087
|
+
why=f"graph_expansion: 1 hop from {_gx_stem}",
|
|
2088
|
+
))
|
|
2089
|
+
_gx_added.add(_gx_cand)
|
|
2090
|
+
_sx_graph_expanded.append(_gx_cand)
|
|
2091
|
+
break # one match per candidate is enough
|
|
2092
|
+
|
|
2093
|
+
if _gx_new:
|
|
2094
|
+
relevant_files = sorted(
|
|
2095
|
+
relevant_files + _gx_new,
|
|
2096
|
+
key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
|
|
2097
|
+
)
|
|
1917
2098
|
|
|
1918
2099
|
# Synonym note (only when synonyms actually fired)
|
|
1919
2100
|
if _frontend_kws and _sx_synonyms:
|
|
@@ -1938,6 +2119,7 @@ class TaskContextBuilder:
|
|
|
1938
2119
|
"content_matches": _sx_content[:10],
|
|
1939
2120
|
"commit_matches": _sx_commits[:10],
|
|
1940
2121
|
"synonym_matches": _sx_synonyms[:10],
|
|
2122
|
+
"graph_expansion": _sx_graph_expanded[:10],
|
|
1941
2123
|
"boosts": _sx_boosts[:30],
|
|
1942
2124
|
"final_boost": round(
|
|
1943
2125
|
sum(b["value"] for b in _sx_boosts), 3
|
|
@@ -2390,7 +2572,8 @@ class TaskContextBuilder:
|
|
|
2390
2572
|
else:
|
|
2391
2573
|
_symptom_class_names.add(_tok)
|
|
2392
2574
|
_symptom_tokens = {
|
|
2393
|
-
w.lower() for w in _re_bug.split(r'[\s\W]+', symptom)
|
|
2575
|
+
w.lower() for w in _re_bug.split(r'[\s\W]+', symptom)
|
|
2576
|
+
if len(w) > 2 and w.lower() not in _SYMPTOM_STOP_WORDS
|
|
2394
2577
|
}
|
|
2395
2578
|
|
|
2396
2579
|
scored: list[tuple[float, str, RelevantFile]] = []
|
|
@@ -2487,9 +2670,16 @@ class TaskContextBuilder:
|
|
|
2487
2670
|
content_boost += 0.8
|
|
2488
2671
|
_why_parts.append("exception type in path (+0.8)")
|
|
2489
2672
|
|
|
2490
|
-
# AND-weighted token intersection — multiple matching tokens >> single
|
|
2673
|
+
# AND-weighted token intersection — multiple matching tokens >> single.
|
|
2674
|
+
# CamelCase-expand the filename stem so "OfflineSessionLoader" contributes
|
|
2675
|
+
# "offline", "session", "loader" as individual tokens beyond what the raw
|
|
2676
|
+
# path splitting yields. This lets multi-word symptoms match class names.
|
|
2491
2677
|
if _symptom_tokens:
|
|
2492
2678
|
_path_parts = set(path_lower.replace("/", " ").replace(".", " ").replace("_", " ").split())
|
|
2679
|
+
_stem_cc = Path(path).stem
|
|
2680
|
+
_stem_cc_exp = _re_bug.sub(r'([a-z])([A-Z])', r'\1 \2', _stem_cc)
|
|
2681
|
+
_stem_cc_exp = _re_bug.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _stem_cc_exp).lower()
|
|
2682
|
+
_path_parts.update(_stem_cc_exp.split())
|
|
2493
2683
|
_intersection = _symptom_tokens & _path_parts
|
|
2494
2684
|
_n_match = len(_intersection)
|
|
2495
2685
|
if _n_match >= 3:
|
|
@@ -888,15 +888,40 @@ def _extract_mapped_paths(source: str, class_fqn: str) -> dict[str, str]:
|
|
|
888
888
|
# Phase 3 — Symbol relation graph
|
|
889
889
|
# ---------------------------------------------------------------------------
|
|
890
890
|
|
|
891
|
+
def _build_same_package_map(symbols: list[SymbolRecord]) -> dict[str, dict[str, str]]:
|
|
892
|
+
"""Build {package: {simple_name: FQN}} map from all class/interface symbols.
|
|
893
|
+
|
|
894
|
+
Used by build_repo_ir to resolve same-package types that need no explicit import.
|
|
895
|
+
In Java, classes in the same package reference each other without import statements,
|
|
896
|
+
so import_map is empty for them — this map provides the fallback resolution.
|
|
897
|
+
"""
|
|
898
|
+
result: dict[str, dict[str, str]] = {}
|
|
899
|
+
for sym in symbols:
|
|
900
|
+
if sym.type not in ("class", "interface") or "#" in sym.symbol:
|
|
901
|
+
continue
|
|
902
|
+
pkg = sym.symbol.rsplit(".", 1)[0] if "." in sym.symbol else ""
|
|
903
|
+
simple = sym.symbol.split(".")[-1]
|
|
904
|
+
result.setdefault(pkg, {})[simple] = sym.symbol
|
|
905
|
+
return result
|
|
906
|
+
|
|
907
|
+
|
|
891
908
|
def _build_relations(
|
|
892
909
|
symbols: list[SymbolRecord],
|
|
893
910
|
raw_imports: list[str],
|
|
894
911
|
source: str,
|
|
895
912
|
package: str,
|
|
896
913
|
rel_path: str,
|
|
914
|
+
same_pkg_types: dict[str, str] | None = None,
|
|
897
915
|
) -> list[RelationEdge]:
|
|
898
|
-
"""Phase 3: Build directed relation graph for symbols in one file.
|
|
916
|
+
"""Phase 3: Build directed relation graph for symbols in one file.
|
|
917
|
+
|
|
918
|
+
same_pkg_types: {simple_name → FQN} for classes in the same package.
|
|
919
|
+
Passed by build_repo_ir after a first pass that collects all symbols.
|
|
920
|
+
Enables resolving injection targets that share a package with the caller
|
|
921
|
+
and therefore need no explicit Java import statement.
|
|
922
|
+
"""
|
|
899
923
|
edges: list[RelationEdge] = []
|
|
924
|
+
_same_pkg: dict[str, str] = same_pkg_types or {}
|
|
900
925
|
|
|
901
926
|
import_map: dict[str, str] = {}
|
|
902
927
|
for fqn in raw_imports:
|
|
@@ -929,15 +954,27 @@ def _build_relations(
|
|
|
929
954
|
))
|
|
930
955
|
|
|
931
956
|
if sym.type == "field":
|
|
932
|
-
|
|
957
|
+
_inject_ann = next(
|
|
958
|
+
(a for a in sym.annotations if a in _INJECT_ANNOTATIONS), "@Autowired"
|
|
959
|
+
)
|
|
960
|
+
_field_targets: set[str] = set(sym.imports_used)
|
|
961
|
+
# Same-package field injection: imports_used is empty when the field type
|
|
962
|
+
# shares a package with the declaring class (no import needed in Java).
|
|
963
|
+
# Extract type from signature ("Type name") and resolve via same_pkg_types.
|
|
964
|
+
if not _field_targets and _same_pkg:
|
|
965
|
+
_sig_type = (sym.signature or "").split()[0] if sym.signature else ""
|
|
966
|
+
_sig_base = re.sub(r'<.*', '', _sig_type).strip()
|
|
967
|
+
if _sig_base and _sig_base[0].isupper():
|
|
968
|
+
_same_fqn = _same_pkg.get(_sig_base)
|
|
969
|
+
if _same_fqn and _same_fqn != _enclosing_class(sym_fqn):
|
|
970
|
+
_field_targets.add(_same_fqn)
|
|
971
|
+
for imp_fqn in _field_targets:
|
|
933
972
|
edges.append(RelationEdge(
|
|
934
973
|
from_symbol=sym_fqn,
|
|
935
974
|
to_symbol=imp_fqn,
|
|
936
975
|
type="injects",
|
|
937
976
|
confidence="high",
|
|
938
|
-
evidence={"type": "annotation", "value":
|
|
939
|
-
(a for a in sym.annotations if a in _INJECT_ANNOTATIONS), "@Autowired"
|
|
940
|
-
)},
|
|
977
|
+
evidence={"type": "annotation", "value": _inject_ann},
|
|
941
978
|
))
|
|
942
979
|
|
|
943
980
|
# ── Constructor injection ─────────────────────────────────────────────────
|
|
@@ -949,7 +986,7 @@ def _build_relations(
|
|
|
949
986
|
continue
|
|
950
987
|
for simple_type in sym.param_types:
|
|
951
988
|
base = re.sub(r'<.*', '', simple_type).strip()
|
|
952
|
-
fqn = import_map.get(base)
|
|
989
|
+
fqn = import_map.get(base) or _same_pkg.get(base)
|
|
953
990
|
if fqn:
|
|
954
991
|
edges.append(RelationEdge(
|
|
955
992
|
from_symbol=sym.symbol,
|
|
@@ -982,7 +1019,7 @@ def _build_relations(
|
|
|
982
1019
|
continue
|
|
983
1020
|
_ftype = fld.group("type").strip()
|
|
984
1021
|
_base = re.sub(r'<.*', '', _ftype).strip()
|
|
985
|
-
_fqn = import_map.get(_base)
|
|
1022
|
+
_fqn = import_map.get(_base) or _same_pkg.get(_base)
|
|
986
1023
|
if _fqn:
|
|
987
1024
|
edges.append(RelationEdge(
|
|
988
1025
|
from_symbol=sym.symbol,
|
|
@@ -2632,24 +2669,38 @@ def build_repo_ir(
|
|
|
2632
2669
|
if since:
|
|
2633
2670
|
_since_changed = _get_git_changed_files(root, since)
|
|
2634
2671
|
|
|
2672
|
+
# Pass 1: extract symbols from all files so we can build the same-package
|
|
2673
|
+
# type map before building relations. Java classes in the same package
|
|
2674
|
+
# reference each other without import statements, so import_map alone cannot
|
|
2675
|
+
# resolve them — _build_same_package_map provides the cross-file fallback.
|
|
2676
|
+
_per_file: list[tuple[str, str, str, list[str], list[SymbolRecord]]] = []
|
|
2635
2677
|
for rel_path in sorted(file_paths):
|
|
2636
2678
|
abs_path = root / rel_path
|
|
2637
2679
|
try:
|
|
2638
2680
|
source = abs_path.read_text(encoding="utf-8", errors="replace")
|
|
2639
2681
|
except OSError:
|
|
2640
2682
|
continue
|
|
2683
|
+
package, symbols, raw_imports = _extract_symbols(source, rel_path)
|
|
2684
|
+
all_symbols.extend(symbols)
|
|
2685
|
+
_per_file.append((rel_path, source, package, raw_imports, symbols))
|
|
2686
|
+
|
|
2687
|
+
# Build {package: {simple_name: FQN}} from every class/interface found.
|
|
2688
|
+
_same_pkg_map: dict[str, dict[str, str]] = _build_same_package_map(all_symbols)
|
|
2689
|
+
|
|
2690
|
+
# Pass 2: build relations with same-package type resolution available.
|
|
2691
|
+
for rel_path, source, package, raw_imports, symbols in _per_file:
|
|
2692
|
+
same_pkg_types = _same_pkg_map.get(package, {})
|
|
2693
|
+
relations = _build_relations(
|
|
2694
|
+
symbols, raw_imports, source, package, rel_path,
|
|
2695
|
+
same_pkg_types=same_pkg_types,
|
|
2696
|
+
)
|
|
2641
2697
|
|
|
2642
2698
|
old_source: Optional[str] = None
|
|
2643
2699
|
if since:
|
|
2644
|
-
# Only fetch old content for files known to have changed.
|
|
2645
|
-
# Unchanged files have no diff entries — skip git show entirely.
|
|
2646
2700
|
_file_changed = _since_changed is None or rel_path in _since_changed
|
|
2647
2701
|
if _file_changed:
|
|
2648
2702
|
old_source = _get_git_old_content(root, rel_path, since)
|
|
2649
2703
|
|
|
2650
|
-
package, symbols, raw_imports = _extract_symbols(source, rel_path)
|
|
2651
|
-
relations = _build_relations(symbols, raw_imports, source, package, rel_path)
|
|
2652
|
-
|
|
2653
2704
|
if old_source is not None:
|
|
2654
2705
|
_, old_symbols, _ = _extract_symbols(old_source, rel_path)
|
|
2655
2706
|
all_changed.extend(_diff_symbols(old_symbols, symbols))
|
|
@@ -2664,7 +2715,6 @@ def build_repo_ir(
|
|
|
2664
2715
|
confidence="high",
|
|
2665
2716
|
))
|
|
2666
2717
|
|
|
2667
|
-
all_symbols.extend(symbols)
|
|
2668
2718
|
all_relations.extend(relations)
|
|
2669
2719
|
|
|
2670
2720
|
spring_summary = _build_spring_summary(all_symbols)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|