sourcecode 1.33.11__tar.gz → 1.33.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sourcecode-1.33.11 → sourcecode-1.33.12}/PKG-INFO +2 -2
- {sourcecode-1.33.11 → sourcecode-1.33.12}/README.md +1 -1
- {sourcecode-1.33.11 → sourcecode-1.33.12}/pyproject.toml +1 -1
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/__init__.py +1 -1
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/prepare_context.py +96 -10
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/repository_ir.py +63 -13
- {sourcecode-1.33.11 → sourcecode-1.33.12}/.github/workflows/build-windows.yml +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/.gitignore +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/.ruff.toml +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/CHANGELOG.md +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/CONTRIBUTING.md +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/LICENSE +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/SECURITY.md +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/raw +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/adaptive_scanner.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/architecture_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/architecture_summary.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/ast_extractor.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/cache.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/canonical_ir.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/classifier.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/cli.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/code_notes_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/confidence_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/context_scorer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/context_summarizer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/contract_model.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/contract_pipeline.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/coverage_parser.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/dependency_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/__init__.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/base.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/csproj_parser.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/dart.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/dotnet.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/elixir.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/go.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/heuristic.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/hybrid.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/java.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/jvm_ext.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/nodejs.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/parsers.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/php.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/project.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/python.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/ruby.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/rust.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/systems.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/terraform.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/tooling.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/doc_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/entrypoint_classifier.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/env_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/error_schema.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/file_classifier.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/flow_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/git_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/graph_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/license.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/__init__.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/__init__.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/applier.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/backup.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/detector.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/planner.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/orchestrator.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/registry.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/runner.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/server.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp_nudge.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/metrics_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/output_budget.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/path_filters.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/pr_comment_renderer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/progress.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/ranking_engine.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/redactor.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/relevance_scorer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/repo_classifier.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/ris.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/runtime_classifier.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/scanner.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/schema.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/semantic_analyzer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/serializer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/summarizer.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/telemetry/__init__.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/telemetry/config.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/telemetry/consent.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/telemetry/events.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/telemetry/filters.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/telemetry/transport.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/tree_utils.py +0 -0
- {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/workspace.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcecode
|
|
3
|
-
Version: 1.33.
|
|
3
|
+
Version: 1.33.12
|
|
4
4
|
Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Keywords: agents,ai,codebase,context,developer-tools,llm
|
|
@@ -39,7 +39,7 @@ Description-Content-Type: text/markdown
|
|
|
39
39
|
|
|
40
40
|
**Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
|
|
41
41
|
|
|
42
|
-

|
|
43
43
|

|
|
44
44
|
|
|
45
45
|
---
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
**Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
|
|
4
4
|
|
|
5
|
-

|
|
6
6
|

|
|
7
7
|
|
|
8
8
|
---
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sourcecode"
|
|
7
|
-
version = "1.33.
|
|
7
|
+
version = "1.33.12"
|
|
8
8
|
description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -627,6 +627,21 @@ _FRONTEND_SYMPTOM_MAP: dict[str, list[str]] = {
|
|
|
627
627
|
"trabajador": ["trabajador", "empleado", "worker", "asignacion", "trabajadordao", "trabajadorservice"],
|
|
628
628
|
}
|
|
629
629
|
|
|
630
|
+
# Generic words that add noise when used as symptom keywords in large repos.
|
|
631
|
+
# "token" and "user" are too ubiquitous in auth systems to be useful alone.
|
|
632
|
+
_SYMPTOM_STOP_WORDS: frozenset[str] = frozenset({
|
|
633
|
+
"fails", "fail", "failed", "failure",
|
|
634
|
+
"not", "for", "with", "when", "that", "the", "and", "but",
|
|
635
|
+
"are", "has", "had", "have", "was", "were",
|
|
636
|
+
"get", "set", "can", "does", "did", "should", "would", "could",
|
|
637
|
+
"null", "none", "empty", "invalid", "incorrect", "wrong", "missing",
|
|
638
|
+
"error", "issue", "problem", "bug",
|
|
639
|
+
"from", "into", "via", "due", "also", "after", "before",
|
|
640
|
+
"slow", "fast", "new", "old",
|
|
641
|
+
})
|
|
642
|
+
|
|
643
|
+
# Repo-scale threshold: above this file count, use stricter injection logic.
|
|
644
|
+
_LARGE_REPO_THRESHOLD = 500
|
|
630
645
|
|
|
631
646
|
MAX_FILES_FAST = 2000 # above this threshold --fast uses git-index-only mode
|
|
632
647
|
|
|
@@ -1695,7 +1710,7 @@ class TaskContextBuilder:
|
|
|
1695
1710
|
_camel_expanded = _re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _camel_expanded)
|
|
1696
1711
|
symptom_keywords = [
|
|
1697
1712
|
w.lower() for w in _re.split(r"[\s\W]+", _camel_expanded)
|
|
1698
|
-
if len(w) > 2
|
|
1713
|
+
if len(w) > 2 and w.lower() not in _SYMPTOM_STOP_WORDS
|
|
1699
1714
|
]
|
|
1700
1715
|
if symptom_keywords:
|
|
1701
1716
|
# Pre-compile combined keyword pattern for fast content scanning
|
|
@@ -1759,14 +1774,27 @@ class TaskContextBuilder:
|
|
|
1759
1774
|
))
|
|
1760
1775
|
_existing_paths.add(_cp)
|
|
1761
1776
|
|
|
1762
|
-
#
|
|
1777
|
+
# Scale-awareness: large repos need wider scan and stricter injection.
|
|
1778
|
+
_is_large_repo = len(all_paths) > _LARGE_REPO_THRESHOLD
|
|
1779
|
+
|
|
1780
|
+
# Pass 4: inject files whose path matches symptom keywords.
|
|
1781
|
+
# CamelCase-expand the filename stem so "OfflineSessionLoader" matches
|
|
1782
|
+
# the keyword "offline" even without an explicit directory separator.
|
|
1783
|
+
_p4_dirs_of_injected: set[str] = set() # directories of high-score injects
|
|
1763
1784
|
for _p in all_paths:
|
|
1764
1785
|
if _p in _existing_paths:
|
|
1765
1786
|
continue
|
|
1766
1787
|
if Path(_p).suffix.lower() not in _ALL_EXTENSIONS:
|
|
1767
1788
|
continue
|
|
1768
1789
|
_p_lower = _p.lower()
|
|
1769
|
-
|
|
1790
|
+
# CamelCase-expand the stem and append to the search string so
|
|
1791
|
+
# "OfflineSessionLoader" → "offline session loader" can match
|
|
1792
|
+
# individual keyword tokens beyond what substring search finds.
|
|
1793
|
+
_stem_raw = Path(_p).stem
|
|
1794
|
+
_stem_exp = _re.sub(r'([a-z])([A-Z])', r'\1 \2', _stem_raw)
|
|
1795
|
+
_stem_exp = _re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _stem_exp).lower()
|
|
1796
|
+
_p_search = _p_lower + " " + _stem_exp
|
|
1797
|
+
_matching_kws = [kw for kw in symptom_keywords if kw in _p_search]
|
|
1770
1798
|
if not _matching_kws:
|
|
1771
1799
|
continue
|
|
1772
1800
|
_boost = 0.2 * len(_matching_kws)
|
|
@@ -1781,6 +1809,8 @@ class TaskContextBuilder:
|
|
|
1781
1809
|
))
|
|
1782
1810
|
_existing_paths.add(_p)
|
|
1783
1811
|
_sx_direct_path.append(_p)
|
|
1812
|
+
if _injected_score >= 0.7:
|
|
1813
|
+
_p4_dirs_of_injected.add(str(Path(_p).parent))
|
|
1784
1814
|
|
|
1785
1815
|
# Pass 4b: grep-based injection for frontend→backend synonym terms.
|
|
1786
1816
|
# Runs parallel grep for each backend term to find files not yet in
|
|
@@ -1828,9 +1858,41 @@ class TaskContextBuilder:
|
|
|
1828
1858
|
))
|
|
1829
1859
|
_existing_paths_now.add(_gf)
|
|
1830
1860
|
|
|
1831
|
-
#
|
|
1832
|
-
|
|
1833
|
-
|
|
1861
|
+
# Pass 4c: subsystem co-location — inject sibling files from the same
|
|
1862
|
+
# directories as high-score (≥0.7) path-matched files. This catches
|
|
1863
|
+
# architecturally adjacent classes that don't mention symptom keywords
|
|
1864
|
+
# in their own name (e.g. InfinispanOfflineSessionCacheEntryLifespan…
|
|
1865
|
+
# siblings in the same infinispan/ package).
|
|
1866
|
+
if _is_large_repo and _p4_dirs_of_injected:
|
|
1867
|
+
_coloc_existing = {rf.path for rf in relevant_files}
|
|
1868
|
+
for _cp in all_paths:
|
|
1869
|
+
if _cp in _coloc_existing:
|
|
1870
|
+
continue
|
|
1871
|
+
if Path(_cp).suffix.lower() not in _src_exts:
|
|
1872
|
+
continue
|
|
1873
|
+
if str(Path(_cp).parent) in _p4_dirs_of_injected:
|
|
1874
|
+
relevant_files.append(RelevantFile(
|
|
1875
|
+
path=_cp,
|
|
1876
|
+
role="symptom_match",
|
|
1877
|
+
score=0.55,
|
|
1878
|
+
reason="subsystem co-location: same directory as symptom-matched file",
|
|
1879
|
+
why="directory proximity injection",
|
|
1880
|
+
))
|
|
1881
|
+
_coloc_existing.add(_cp)
|
|
1882
|
+
|
|
1883
|
+
# Sort before content scan so top candidates get read first.
|
|
1884
|
+
# In large repos: prioritise symptom_match files within each score band
|
|
1885
|
+
# so that subsystem-relevant files are content-scanned before generic
|
|
1886
|
+
# structural files at the same score.
|
|
1887
|
+
if _is_large_repo:
|
|
1888
|
+
relevant_files = sorted(
|
|
1889
|
+
relevant_files,
|
|
1890
|
+
key=lambda rf: (-rf.score, 0 if rf.role == "symptom_match" else 1),
|
|
1891
|
+
)
|
|
1892
|
+
_CONTENT_SCAN_LIMIT = 150
|
|
1893
|
+
else:
|
|
1894
|
+
relevant_files = sorted(relevant_files, key=lambda rf: -rf.score)
|
|
1895
|
+
_CONTENT_SCAN_LIMIT = 80
|
|
1834
1896
|
_scan_candidates = relevant_files[:_CONTENT_SCAN_LIMIT]
|
|
1835
1897
|
_no_scan_candidates = relevant_files[_CONTENT_SCAN_LIMIT:]
|
|
1836
1898
|
|
|
@@ -1905,15 +1967,31 @@ class TaskContextBuilder:
|
|
|
1905
1967
|
elif _extra_syn > 0:
|
|
1906
1968
|
_new_reason = _rf.reason + f", synonym-match backend (+{_extra_syn:.2f})"
|
|
1907
1969
|
|
|
1970
|
+
_final_score = round(min(_rf.score + _total_extra, 1.0), 2)
|
|
1908
1971
|
_boosted.append(RelevantFile(
|
|
1909
1972
|
path=_rf.path,
|
|
1910
1973
|
role=_rf.role,
|
|
1911
|
-
score=
|
|
1974
|
+
score=_final_score,
|
|
1912
1975
|
reason=_new_reason,
|
|
1913
1976
|
why=_rf.why,
|
|
1914
1977
|
))
|
|
1915
1978
|
|
|
1916
|
-
|
|
1979
|
+
# Use total boost as a secondary sort key so symptom-matched files
|
|
1980
|
+
# that were boosted from a lower base score rank above structural
|
|
1981
|
+
# files that coincidentally reach the same capped score of 1.0.
|
|
1982
|
+
# This prevents budget-trimming from discarding the most relevant files.
|
|
1983
|
+
_boost_totals: dict[str, float] = {}
|
|
1984
|
+
for _rf in _scan_candidates:
|
|
1985
|
+
pass # populated below
|
|
1986
|
+
_boost_totals = {}
|
|
1987
|
+
for _idx, _rf in enumerate(_scan_candidates):
|
|
1988
|
+
_b_rf = _boosted[_idx]
|
|
1989
|
+
_boost_totals[_b_rf.path] = round(_b_rf.score - _rf.score, 4)
|
|
1990
|
+
|
|
1991
|
+
relevant_files = sorted(
|
|
1992
|
+
_boosted + _no_scan_candidates,
|
|
1993
|
+
key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
|
|
1994
|
+
)
|
|
1917
1995
|
|
|
1918
1996
|
# Synonym note (only when synonyms actually fired)
|
|
1919
1997
|
if _frontend_kws and _sx_synonyms:
|
|
@@ -2390,7 +2468,8 @@ class TaskContextBuilder:
|
|
|
2390
2468
|
else:
|
|
2391
2469
|
_symptom_class_names.add(_tok)
|
|
2392
2470
|
_symptom_tokens = {
|
|
2393
|
-
w.lower() for w in _re_bug.split(r'[\s\W]+', symptom)
|
|
2471
|
+
w.lower() for w in _re_bug.split(r'[\s\W]+', symptom)
|
|
2472
|
+
if len(w) > 2 and w.lower() not in _SYMPTOM_STOP_WORDS
|
|
2394
2473
|
}
|
|
2395
2474
|
|
|
2396
2475
|
scored: list[tuple[float, str, RelevantFile]] = []
|
|
@@ -2487,9 +2566,16 @@ class TaskContextBuilder:
|
|
|
2487
2566
|
content_boost += 0.8
|
|
2488
2567
|
_why_parts.append("exception type in path (+0.8)")
|
|
2489
2568
|
|
|
2490
|
-
# AND-weighted token intersection — multiple matching tokens >> single
|
|
2569
|
+
# AND-weighted token intersection — multiple matching tokens >> single.
|
|
2570
|
+
# CamelCase-expand the filename stem so "OfflineSessionLoader" contributes
|
|
2571
|
+
# "offline", "session", "loader" as individual tokens beyond what the raw
|
|
2572
|
+
# path splitting yields. This lets multi-word symptoms match class names.
|
|
2491
2573
|
if _symptom_tokens:
|
|
2492
2574
|
_path_parts = set(path_lower.replace("/", " ").replace(".", " ").replace("_", " ").split())
|
|
2575
|
+
_stem_cc = Path(path).stem
|
|
2576
|
+
_stem_cc_exp = _re_bug.sub(r'([a-z])([A-Z])', r'\1 \2', _stem_cc)
|
|
2577
|
+
_stem_cc_exp = _re_bug.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _stem_cc_exp).lower()
|
|
2578
|
+
_path_parts.update(_stem_cc_exp.split())
|
|
2493
2579
|
_intersection = _symptom_tokens & _path_parts
|
|
2494
2580
|
_n_match = len(_intersection)
|
|
2495
2581
|
if _n_match >= 3:
|
|
@@ -888,15 +888,40 @@ def _extract_mapped_paths(source: str, class_fqn: str) -> dict[str, str]:
|
|
|
888
888
|
# Phase 3 — Symbol relation graph
|
|
889
889
|
# ---------------------------------------------------------------------------
|
|
890
890
|
|
|
891
|
+
def _build_same_package_map(symbols: list[SymbolRecord]) -> dict[str, dict[str, str]]:
|
|
892
|
+
"""Build {package: {simple_name: FQN}} map from all class/interface symbols.
|
|
893
|
+
|
|
894
|
+
Used by build_repo_ir to resolve same-package types that need no explicit import.
|
|
895
|
+
In Java, classes in the same package reference each other without import statements,
|
|
896
|
+
so import_map is empty for them — this map provides the fallback resolution.
|
|
897
|
+
"""
|
|
898
|
+
result: dict[str, dict[str, str]] = {}
|
|
899
|
+
for sym in symbols:
|
|
900
|
+
if sym.type not in ("class", "interface") or "#" in sym.symbol:
|
|
901
|
+
continue
|
|
902
|
+
pkg = sym.symbol.rsplit(".", 1)[0] if "." in sym.symbol else ""
|
|
903
|
+
simple = sym.symbol.split(".")[-1]
|
|
904
|
+
result.setdefault(pkg, {})[simple] = sym.symbol
|
|
905
|
+
return result
|
|
906
|
+
|
|
907
|
+
|
|
891
908
|
def _build_relations(
|
|
892
909
|
symbols: list[SymbolRecord],
|
|
893
910
|
raw_imports: list[str],
|
|
894
911
|
source: str,
|
|
895
912
|
package: str,
|
|
896
913
|
rel_path: str,
|
|
914
|
+
same_pkg_types: dict[str, str] | None = None,
|
|
897
915
|
) -> list[RelationEdge]:
|
|
898
|
-
"""Phase 3: Build directed relation graph for symbols in one file.
|
|
916
|
+
"""Phase 3: Build directed relation graph for symbols in one file.
|
|
917
|
+
|
|
918
|
+
same_pkg_types: {simple_name → FQN} for classes in the same package.
|
|
919
|
+
Passed by build_repo_ir after a first pass that collects all symbols.
|
|
920
|
+
Enables resolving injection targets that share a package with the caller
|
|
921
|
+
and therefore need no explicit Java import statement.
|
|
922
|
+
"""
|
|
899
923
|
edges: list[RelationEdge] = []
|
|
924
|
+
_same_pkg: dict[str, str] = same_pkg_types or {}
|
|
900
925
|
|
|
901
926
|
import_map: dict[str, str] = {}
|
|
902
927
|
for fqn in raw_imports:
|
|
@@ -929,15 +954,27 @@ def _build_relations(
|
|
|
929
954
|
))
|
|
930
955
|
|
|
931
956
|
if sym.type == "field":
|
|
932
|
-
|
|
957
|
+
_inject_ann = next(
|
|
958
|
+
(a for a in sym.annotations if a in _INJECT_ANNOTATIONS), "@Autowired"
|
|
959
|
+
)
|
|
960
|
+
_field_targets: set[str] = set(sym.imports_used)
|
|
961
|
+
# Same-package field injection: imports_used is empty when the field type
|
|
962
|
+
# shares a package with the declaring class (no import needed in Java).
|
|
963
|
+
# Extract type from signature ("Type name") and resolve via same_pkg_types.
|
|
964
|
+
if not _field_targets and _same_pkg:
|
|
965
|
+
_sig_type = (sym.signature or "").split()[0] if sym.signature else ""
|
|
966
|
+
_sig_base = re.sub(r'<.*', '', _sig_type).strip()
|
|
967
|
+
if _sig_base and _sig_base[0].isupper():
|
|
968
|
+
_same_fqn = _same_pkg.get(_sig_base)
|
|
969
|
+
if _same_fqn and _same_fqn != _enclosing_class(sym_fqn):
|
|
970
|
+
_field_targets.add(_same_fqn)
|
|
971
|
+
for imp_fqn in _field_targets:
|
|
933
972
|
edges.append(RelationEdge(
|
|
934
973
|
from_symbol=sym_fqn,
|
|
935
974
|
to_symbol=imp_fqn,
|
|
936
975
|
type="injects",
|
|
937
976
|
confidence="high",
|
|
938
|
-
evidence={"type": "annotation", "value":
|
|
939
|
-
(a for a in sym.annotations if a in _INJECT_ANNOTATIONS), "@Autowired"
|
|
940
|
-
)},
|
|
977
|
+
evidence={"type": "annotation", "value": _inject_ann},
|
|
941
978
|
))
|
|
942
979
|
|
|
943
980
|
# ── Constructor injection ─────────────────────────────────────────────────
|
|
@@ -949,7 +986,7 @@ def _build_relations(
|
|
|
949
986
|
continue
|
|
950
987
|
for simple_type in sym.param_types:
|
|
951
988
|
base = re.sub(r'<.*', '', simple_type).strip()
|
|
952
|
-
fqn = import_map.get(base)
|
|
989
|
+
fqn = import_map.get(base) or _same_pkg.get(base)
|
|
953
990
|
if fqn:
|
|
954
991
|
edges.append(RelationEdge(
|
|
955
992
|
from_symbol=sym.symbol,
|
|
@@ -982,7 +1019,7 @@ def _build_relations(
|
|
|
982
1019
|
continue
|
|
983
1020
|
_ftype = fld.group("type").strip()
|
|
984
1021
|
_base = re.sub(r'<.*', '', _ftype).strip()
|
|
985
|
-
_fqn = import_map.get(_base)
|
|
1022
|
+
_fqn = import_map.get(_base) or _same_pkg.get(_base)
|
|
986
1023
|
if _fqn:
|
|
987
1024
|
edges.append(RelationEdge(
|
|
988
1025
|
from_symbol=sym.symbol,
|
|
@@ -2632,24 +2669,38 @@ def build_repo_ir(
|
|
|
2632
2669
|
if since:
|
|
2633
2670
|
_since_changed = _get_git_changed_files(root, since)
|
|
2634
2671
|
|
|
2672
|
+
# Pass 1: extract symbols from all files so we can build the same-package
|
|
2673
|
+
# type map before building relations. Java classes in the same package
|
|
2674
|
+
# reference each other without import statements, so import_map alone cannot
|
|
2675
|
+
# resolve them — _build_same_package_map provides the cross-file fallback.
|
|
2676
|
+
_per_file: list[tuple[str, str, str, list[str], list[SymbolRecord]]] = []
|
|
2635
2677
|
for rel_path in sorted(file_paths):
|
|
2636
2678
|
abs_path = root / rel_path
|
|
2637
2679
|
try:
|
|
2638
2680
|
source = abs_path.read_text(encoding="utf-8", errors="replace")
|
|
2639
2681
|
except OSError:
|
|
2640
2682
|
continue
|
|
2683
|
+
package, symbols, raw_imports = _extract_symbols(source, rel_path)
|
|
2684
|
+
all_symbols.extend(symbols)
|
|
2685
|
+
_per_file.append((rel_path, source, package, raw_imports, symbols))
|
|
2686
|
+
|
|
2687
|
+
# Build {package: {simple_name: FQN}} from every class/interface found.
|
|
2688
|
+
_same_pkg_map: dict[str, dict[str, str]] = _build_same_package_map(all_symbols)
|
|
2689
|
+
|
|
2690
|
+
# Pass 2: build relations with same-package type resolution available.
|
|
2691
|
+
for rel_path, source, package, raw_imports, symbols in _per_file:
|
|
2692
|
+
same_pkg_types = _same_pkg_map.get(package, {})
|
|
2693
|
+
relations = _build_relations(
|
|
2694
|
+
symbols, raw_imports, source, package, rel_path,
|
|
2695
|
+
same_pkg_types=same_pkg_types,
|
|
2696
|
+
)
|
|
2641
2697
|
|
|
2642
2698
|
old_source: Optional[str] = None
|
|
2643
2699
|
if since:
|
|
2644
|
-
# Only fetch old content for files known to have changed.
|
|
2645
|
-
# Unchanged files have no diff entries — skip git show entirely.
|
|
2646
2700
|
_file_changed = _since_changed is None or rel_path in _since_changed
|
|
2647
2701
|
if _file_changed:
|
|
2648
2702
|
old_source = _get_git_old_content(root, rel_path, since)
|
|
2649
2703
|
|
|
2650
|
-
package, symbols, raw_imports = _extract_symbols(source, rel_path)
|
|
2651
|
-
relations = _build_relations(symbols, raw_imports, source, package, rel_path)
|
|
2652
|
-
|
|
2653
2704
|
if old_source is not None:
|
|
2654
2705
|
_, old_symbols, _ = _extract_symbols(old_source, rel_path)
|
|
2655
2706
|
all_changed.extend(_diff_symbols(old_symbols, symbols))
|
|
@@ -2664,7 +2715,6 @@ def build_repo_ir(
|
|
|
2664
2715
|
confidence="high",
|
|
2665
2716
|
))
|
|
2666
2717
|
|
|
2667
|
-
all_symbols.extend(symbols)
|
|
2668
2718
|
all_relations.extend(relations)
|
|
2669
2719
|
|
|
2670
2720
|
spring_summary = _build_spring_summary(all_symbols)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|