sourcecode 1.33.11__tar.gz → 1.33.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {sourcecode-1.33.11 → sourcecode-1.33.12}/PKG-INFO +2 -2
  2. {sourcecode-1.33.11 → sourcecode-1.33.12}/README.md +1 -1
  3. {sourcecode-1.33.11 → sourcecode-1.33.12}/pyproject.toml +1 -1
  4. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/__init__.py +1 -1
  5. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/prepare_context.py +96 -10
  6. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/repository_ir.py +63 -13
  7. {sourcecode-1.33.11 → sourcecode-1.33.12}/.github/workflows/build-windows.yml +0 -0
  8. {sourcecode-1.33.11 → sourcecode-1.33.12}/.gitignore +0 -0
  9. {sourcecode-1.33.11 → sourcecode-1.33.12}/.ruff.toml +0 -0
  10. {sourcecode-1.33.11 → sourcecode-1.33.12}/CHANGELOG.md +0 -0
  11. {sourcecode-1.33.11 → sourcecode-1.33.12}/CONTRIBUTING.md +0 -0
  12. {sourcecode-1.33.11 → sourcecode-1.33.12}/LICENSE +0 -0
  13. {sourcecode-1.33.11 → sourcecode-1.33.12}/SECURITY.md +0 -0
  14. {sourcecode-1.33.11 → sourcecode-1.33.12}/raw +0 -0
  15. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/adaptive_scanner.py +0 -0
  16. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/architecture_analyzer.py +0 -0
  17. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/architecture_summary.py +0 -0
  18. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/ast_extractor.py +0 -0
  19. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/cache.py +0 -0
  20. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/canonical_ir.py +0 -0
  21. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/classifier.py +0 -0
  22. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/cli.py +0 -0
  23. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/code_notes_analyzer.py +0 -0
  24. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/confidence_analyzer.py +0 -0
  25. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/context_scorer.py +0 -0
  26. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/context_summarizer.py +0 -0
  27. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/contract_model.py +0 -0
  28. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/contract_pipeline.py +0 -0
  29. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/coverage_parser.py +0 -0
  30. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/dependency_analyzer.py +0 -0
  31. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/__init__.py +0 -0
  32. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/base.py +0 -0
  33. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/csproj_parser.py +0 -0
  34. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/dart.py +0 -0
  35. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/dotnet.py +0 -0
  36. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/elixir.py +0 -0
  37. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/go.py +0 -0
  38. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/heuristic.py +0 -0
  39. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/hybrid.py +0 -0
  40. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/java.py +0 -0
  41. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/jvm_ext.py +0 -0
  42. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/nodejs.py +0 -0
  43. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/parsers.py +0 -0
  44. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/php.py +0 -0
  45. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/project.py +0 -0
  46. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/python.py +0 -0
  47. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/ruby.py +0 -0
  48. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/rust.py +0 -0
  49. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/systems.py +0 -0
  50. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/terraform.py +0 -0
  51. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/detectors/tooling.py +0 -0
  52. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/doc_analyzer.py +0 -0
  53. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/entrypoint_classifier.py +0 -0
  54. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/env_analyzer.py +0 -0
  55. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/error_schema.py +0 -0
  56. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/file_classifier.py +0 -0
  57. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/flow_analyzer.py +0 -0
  58. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/git_analyzer.py +0 -0
  59. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/graph_analyzer.py +0 -0
  60. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/license.py +0 -0
  61. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/__init__.py +0 -0
  62. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/__init__.py +0 -0
  63. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/applier.py +0 -0
  64. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/backup.py +0 -0
  65. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/detector.py +0 -0
  66. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/planner.py +0 -0
  67. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/orchestrator.py +0 -0
  68. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/registry.py +0 -0
  69. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/runner.py +0 -0
  70. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp/server.py +0 -0
  71. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/mcp_nudge.py +0 -0
  72. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/metrics_analyzer.py +0 -0
  73. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/output_budget.py +0 -0
  74. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/path_filters.py +0 -0
  75. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/pr_comment_renderer.py +0 -0
  76. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/progress.py +0 -0
  77. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/ranking_engine.py +0 -0
  78. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/redactor.py +0 -0
  79. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/relevance_scorer.py +0 -0
  80. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/repo_classifier.py +0 -0
  81. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/ris.py +0 -0
  82. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/runtime_classifier.py +0 -0
  83. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/scanner.py +0 -0
  84. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/schema.py +0 -0
  85. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/semantic_analyzer.py +0 -0
  86. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/serializer.py +0 -0
  87. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/summarizer.py +0 -0
  88. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/telemetry/__init__.py +0 -0
  89. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/telemetry/config.py +0 -0
  90. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/telemetry/consent.py +0 -0
  91. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/telemetry/events.py +0 -0
  92. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/telemetry/filters.py +0 -0
  93. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/telemetry/transport.py +0 -0
  94. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/tree_utils.py +0 -0
  95. {sourcecode-1.33.11 → sourcecode-1.33.12}/src/sourcecode/workspace.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.33.11
3
+ Version: 1.33.12
4
4
  Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
5
5
  License-File: LICENSE
6
6
  Keywords: agents,ai,codebase,context,developer-tools,llm
@@ -39,7 +39,7 @@ Description-Content-Type: text/markdown
39
39
 
40
40
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
41
41
 
42
- ![Version](https://img.shields.io/badge/version-1.33.11-blue)
42
+ ![Version](https://img.shields.io/badge/version-1.33.12-blue)
43
43
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
44
44
 
45
45
  ---
@@ -2,7 +2,7 @@
2
2
 
3
3
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
4
4
 
5
- ![Version](https://img.shields.io/badge/version-1.33.11-blue)
5
+ ![Version](https://img.shields.io/badge/version-1.33.12-blue)
6
6
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
7
7
 
8
8
  ---
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sourcecode"
7
- version = "1.33.11"
7
+ version = "1.33.12"
8
8
  description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.33.11"
3
+ __version__ = "1.33.12"
@@ -627,6 +627,21 @@ _FRONTEND_SYMPTOM_MAP: dict[str, list[str]] = {
627
627
  "trabajador": ["trabajador", "empleado", "worker", "asignacion", "trabajadordao", "trabajadorservice"],
628
628
  }
629
629
 
630
+ # Generic words that add noise when used as symptom keywords in large repos.
631
+ # "token" and "user" are too ubiquitous in auth systems to be useful alone.
632
+ _SYMPTOM_STOP_WORDS: frozenset[str] = frozenset({
633
+ "fails", "fail", "failed", "failure",
634
+ "not", "for", "with", "when", "that", "the", "and", "but",
635
+ "are", "has", "had", "have", "was", "were",
636
+ "get", "set", "can", "does", "did", "should", "would", "could",
637
+ "null", "none", "empty", "invalid", "incorrect", "wrong", "missing",
638
+ "error", "issue", "problem", "bug",
639
+ "from", "into", "via", "due", "also", "after", "before",
640
+ "slow", "fast", "new", "old",
641
+ })
642
+
643
+ # Repo-scale threshold: above this file count, use stricter injection logic.
644
+ _LARGE_REPO_THRESHOLD = 500
630
645
 
631
646
  MAX_FILES_FAST = 2000 # above this threshold --fast uses git-index-only mode
632
647
 
@@ -1695,7 +1710,7 @@ class TaskContextBuilder:
1695
1710
  _camel_expanded = _re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _camel_expanded)
1696
1711
  symptom_keywords = [
1697
1712
  w.lower() for w in _re.split(r"[\s\W]+", _camel_expanded)
1698
- if len(w) > 2
1713
+ if len(w) > 2 and w.lower() not in _SYMPTOM_STOP_WORDS
1699
1714
  ]
1700
1715
  if symptom_keywords:
1701
1716
  # Pre-compile combined keyword pattern for fast content scanning
@@ -1759,14 +1774,27 @@ class TaskContextBuilder:
1759
1774
  ))
1760
1775
  _existing_paths.add(_cp)
1761
1776
 
1762
- # Pass 4: inject files whose path matches symptom keywords
1777
+ # Scale-awareness: large repos need wider scan and stricter injection.
1778
+ _is_large_repo = len(all_paths) > _LARGE_REPO_THRESHOLD
1779
+
1780
+ # Pass 4: inject files whose path matches symptom keywords.
1781
+ # CamelCase-expand the filename stem so "OfflineSessionLoader" matches
1782
+ # the keyword "offline" even without an explicit directory separator.
1783
+ _p4_dirs_of_injected: set[str] = set() # directories of high-score injects
1763
1784
  for _p in all_paths:
1764
1785
  if _p in _existing_paths:
1765
1786
  continue
1766
1787
  if Path(_p).suffix.lower() not in _ALL_EXTENSIONS:
1767
1788
  continue
1768
1789
  _p_lower = _p.lower()
1769
- _matching_kws = [kw for kw in symptom_keywords if kw in _p_lower]
1790
+ # CamelCase-expand the stem and append to the search string so
1791
+ # "OfflineSessionLoader" → "offline session loader" can match
1792
+ # individual keyword tokens beyond what substring search finds.
1793
+ _stem_raw = Path(_p).stem
1794
+ _stem_exp = _re.sub(r'([a-z])([A-Z])', r'\1 \2', _stem_raw)
1795
+ _stem_exp = _re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _stem_exp).lower()
1796
+ _p_search = _p_lower + " " + _stem_exp
1797
+ _matching_kws = [kw for kw in symptom_keywords if kw in _p_search]
1770
1798
  if not _matching_kws:
1771
1799
  continue
1772
1800
  _boost = 0.2 * len(_matching_kws)
@@ -1781,6 +1809,8 @@ class TaskContextBuilder:
1781
1809
  ))
1782
1810
  _existing_paths.add(_p)
1783
1811
  _sx_direct_path.append(_p)
1812
+ if _injected_score >= 0.7:
1813
+ _p4_dirs_of_injected.add(str(Path(_p).parent))
1784
1814
 
1785
1815
  # Pass 4b: grep-based injection for frontend→backend synonym terms.
1786
1816
  # Runs parallel grep for each backend term to find files not yet in
@@ -1828,9 +1858,41 @@ class TaskContextBuilder:
1828
1858
  ))
1829
1859
  _existing_paths_now.add(_gf)
1830
1860
 
1831
- # Sort before content scan so top candidates get read first
1832
- relevant_files = sorted(relevant_files, key=lambda rf: -rf.score)
1833
- _CONTENT_SCAN_LIMIT = 80
1861
+ # Pass 4c: subsystem co-location inject sibling files from the same
1862
+ # directories as high-score (≥0.7) path-matched files. This catches
1863
+ # architecturally adjacent classes that don't mention symptom keywords
1864
+ # in their own name (e.g. InfinispanOfflineSessionCacheEntryLifespan…
1865
+ # siblings in the same infinispan/ package).
1866
+ if _is_large_repo and _p4_dirs_of_injected:
1867
+ _coloc_existing = {rf.path for rf in relevant_files}
1868
+ for _cp in all_paths:
1869
+ if _cp in _coloc_existing:
1870
+ continue
1871
+ if Path(_cp).suffix.lower() not in _src_exts:
1872
+ continue
1873
+ if str(Path(_cp).parent) in _p4_dirs_of_injected:
1874
+ relevant_files.append(RelevantFile(
1875
+ path=_cp,
1876
+ role="symptom_match",
1877
+ score=0.55,
1878
+ reason="subsystem co-location: same directory as symptom-matched file",
1879
+ why="directory proximity injection",
1880
+ ))
1881
+ _coloc_existing.add(_cp)
1882
+
1883
+ # Sort before content scan so top candidates get read first.
1884
+ # In large repos: prioritise symptom_match files within each score band
1885
+ # so that subsystem-relevant files are content-scanned before generic
1886
+ # structural files at the same score.
1887
+ if _is_large_repo:
1888
+ relevant_files = sorted(
1889
+ relevant_files,
1890
+ key=lambda rf: (-rf.score, 0 if rf.role == "symptom_match" else 1),
1891
+ )
1892
+ _CONTENT_SCAN_LIMIT = 150
1893
+ else:
1894
+ relevant_files = sorted(relevant_files, key=lambda rf: -rf.score)
1895
+ _CONTENT_SCAN_LIMIT = 80
1834
1896
  _scan_candidates = relevant_files[:_CONTENT_SCAN_LIMIT]
1835
1897
  _no_scan_candidates = relevant_files[_CONTENT_SCAN_LIMIT:]
1836
1898
 
@@ -1905,15 +1967,31 @@ class TaskContextBuilder:
1905
1967
  elif _extra_syn > 0:
1906
1968
  _new_reason = _rf.reason + f", synonym-match backend (+{_extra_syn:.2f})"
1907
1969
 
1970
+ _final_score = round(min(_rf.score + _total_extra, 1.0), 2)
1908
1971
  _boosted.append(RelevantFile(
1909
1972
  path=_rf.path,
1910
1973
  role=_rf.role,
1911
- score=round(min(_rf.score + _total_extra, 1.0), 2),
1974
+ score=_final_score,
1912
1975
  reason=_new_reason,
1913
1976
  why=_rf.why,
1914
1977
  ))
1915
1978
 
1916
- relevant_files = sorted(_boosted + _no_scan_candidates, key=lambda rf: -rf.score)
1979
+ # Use total boost as a secondary sort key so symptom-matched files
1980
+ # that were boosted from a lower base score rank above structural
1981
+ # files that coincidentally reach the same capped score of 1.0.
1982
+ # This prevents budget-trimming from discarding the most relevant files.
1983
+ _boost_totals: dict[str, float] = {}
1984
+ for _rf in _scan_candidates:
1985
+ pass # populated below
1986
+ _boost_totals = {}
1987
+ for _idx, _rf in enumerate(_scan_candidates):
1988
+ _b_rf = _boosted[_idx]
1989
+ _boost_totals[_b_rf.path] = round(_b_rf.score - _rf.score, 4)
1990
+
1991
+ relevant_files = sorted(
1992
+ _boosted + _no_scan_candidates,
1993
+ key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
1994
+ )
1917
1995
 
1918
1996
  # Synonym note (only when synonyms actually fired)
1919
1997
  if _frontend_kws and _sx_synonyms:
@@ -2390,7 +2468,8 @@ class TaskContextBuilder:
2390
2468
  else:
2391
2469
  _symptom_class_names.add(_tok)
2392
2470
  _symptom_tokens = {
2393
- w.lower() for w in _re_bug.split(r'[\s\W]+', symptom) if len(w) > 2
2471
+ w.lower() for w in _re_bug.split(r'[\s\W]+', symptom)
2472
+ if len(w) > 2 and w.lower() not in _SYMPTOM_STOP_WORDS
2394
2473
  }
2395
2474
 
2396
2475
  scored: list[tuple[float, str, RelevantFile]] = []
@@ -2487,9 +2566,16 @@ class TaskContextBuilder:
2487
2566
  content_boost += 0.8
2488
2567
  _why_parts.append("exception type in path (+0.8)")
2489
2568
 
2490
- # AND-weighted token intersection — multiple matching tokens >> single
2569
+ # AND-weighted token intersection — multiple matching tokens >> single.
2570
+ # CamelCase-expand the filename stem so "OfflineSessionLoader" contributes
2571
+ # "offline", "session", "loader" as individual tokens beyond what the raw
2572
+ # path splitting yields. This lets multi-word symptoms match class names.
2491
2573
  if _symptom_tokens:
2492
2574
  _path_parts = set(path_lower.replace("/", " ").replace(".", " ").replace("_", " ").split())
2575
+ _stem_cc = Path(path).stem
2576
+ _stem_cc_exp = _re_bug.sub(r'([a-z])([A-Z])', r'\1 \2', _stem_cc)
2577
+ _stem_cc_exp = _re_bug.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _stem_cc_exp).lower()
2578
+ _path_parts.update(_stem_cc_exp.split())
2493
2579
  _intersection = _symptom_tokens & _path_parts
2494
2580
  _n_match = len(_intersection)
2495
2581
  if _n_match >= 3:
@@ -888,15 +888,40 @@ def _extract_mapped_paths(source: str, class_fqn: str) -> dict[str, str]:
888
888
  # Phase 3 — Symbol relation graph
889
889
  # ---------------------------------------------------------------------------
890
890
 
891
+ def _build_same_package_map(symbols: list[SymbolRecord]) -> dict[str, dict[str, str]]:
892
+ """Build {package: {simple_name: FQN}} map from all class/interface symbols.
893
+
894
+ Used by build_repo_ir to resolve same-package types that need no explicit import.
895
+ In Java, classes in the same package reference each other without import statements,
896
+ so import_map is empty for them — this map provides the fallback resolution.
897
+ """
898
+ result: dict[str, dict[str, str]] = {}
899
+ for sym in symbols:
900
+ if sym.type not in ("class", "interface") or "#" in sym.symbol:
901
+ continue
902
+ pkg = sym.symbol.rsplit(".", 1)[0] if "." in sym.symbol else ""
903
+ simple = sym.symbol.split(".")[-1]
904
+ result.setdefault(pkg, {})[simple] = sym.symbol
905
+ return result
906
+
907
+
891
908
  def _build_relations(
892
909
  symbols: list[SymbolRecord],
893
910
  raw_imports: list[str],
894
911
  source: str,
895
912
  package: str,
896
913
  rel_path: str,
914
+ same_pkg_types: dict[str, str] | None = None,
897
915
  ) -> list[RelationEdge]:
898
- """Phase 3: Build directed relation graph for symbols in one file."""
916
+ """Phase 3: Build directed relation graph for symbols in one file.
917
+
918
+ same_pkg_types: {simple_name → FQN} for classes in the same package.
919
+ Passed by build_repo_ir after a first pass that collects all symbols.
920
+ Enables resolving injection targets that share a package with the caller
921
+ and therefore need no explicit Java import statement.
922
+ """
899
923
  edges: list[RelationEdge] = []
924
+ _same_pkg: dict[str, str] = same_pkg_types or {}
900
925
 
901
926
  import_map: dict[str, str] = {}
902
927
  for fqn in raw_imports:
@@ -929,15 +954,27 @@ def _build_relations(
929
954
  ))
930
955
 
931
956
  if sym.type == "field":
932
- for imp_fqn in sym.imports_used:
957
+ _inject_ann = next(
958
+ (a for a in sym.annotations if a in _INJECT_ANNOTATIONS), "@Autowired"
959
+ )
960
+ _field_targets: set[str] = set(sym.imports_used)
961
+ # Same-package field injection: imports_used is empty when the field type
962
+ # shares a package with the declaring class (no import needed in Java).
963
+ # Extract type from signature ("Type name") and resolve via same_pkg_types.
964
+ if not _field_targets and _same_pkg:
965
+ _sig_type = (sym.signature or "").split()[0] if sym.signature else ""
966
+ _sig_base = re.sub(r'<.*', '', _sig_type).strip()
967
+ if _sig_base and _sig_base[0].isupper():
968
+ _same_fqn = _same_pkg.get(_sig_base)
969
+ if _same_fqn and _same_fqn != _enclosing_class(sym_fqn):
970
+ _field_targets.add(_same_fqn)
971
+ for imp_fqn in _field_targets:
933
972
  edges.append(RelationEdge(
934
973
  from_symbol=sym_fqn,
935
974
  to_symbol=imp_fqn,
936
975
  type="injects",
937
976
  confidence="high",
938
- evidence={"type": "annotation", "value": next(
939
- (a for a in sym.annotations if a in _INJECT_ANNOTATIONS), "@Autowired"
940
- )},
977
+ evidence={"type": "annotation", "value": _inject_ann},
941
978
  ))
942
979
 
943
980
  # ── Constructor injection ─────────────────────────────────────────────────
@@ -949,7 +986,7 @@ def _build_relations(
949
986
  continue
950
987
  for simple_type in sym.param_types:
951
988
  base = re.sub(r'<.*', '', simple_type).strip()
952
- fqn = import_map.get(base)
989
+ fqn = import_map.get(base) or _same_pkg.get(base)
953
990
  if fqn:
954
991
  edges.append(RelationEdge(
955
992
  from_symbol=sym.symbol,
@@ -982,7 +1019,7 @@ def _build_relations(
982
1019
  continue
983
1020
  _ftype = fld.group("type").strip()
984
1021
  _base = re.sub(r'<.*', '', _ftype).strip()
985
- _fqn = import_map.get(_base)
1022
+ _fqn = import_map.get(_base) or _same_pkg.get(_base)
986
1023
  if _fqn:
987
1024
  edges.append(RelationEdge(
988
1025
  from_symbol=sym.symbol,
@@ -2632,24 +2669,38 @@ def build_repo_ir(
2632
2669
  if since:
2633
2670
  _since_changed = _get_git_changed_files(root, since)
2634
2671
 
2672
+ # Pass 1: extract symbols from all files so we can build the same-package
2673
+ # type map before building relations. Java classes in the same package
2674
+ # reference each other without import statements, so import_map alone cannot
2675
+ # resolve them — _build_same_package_map provides the cross-file fallback.
2676
+ _per_file: list[tuple[str, str, str, list[str], list[SymbolRecord]]] = []
2635
2677
  for rel_path in sorted(file_paths):
2636
2678
  abs_path = root / rel_path
2637
2679
  try:
2638
2680
  source = abs_path.read_text(encoding="utf-8", errors="replace")
2639
2681
  except OSError:
2640
2682
  continue
2683
+ package, symbols, raw_imports = _extract_symbols(source, rel_path)
2684
+ all_symbols.extend(symbols)
2685
+ _per_file.append((rel_path, source, package, raw_imports, symbols))
2686
+
2687
+ # Build {package: {simple_name: FQN}} from every class/interface found.
2688
+ _same_pkg_map: dict[str, dict[str, str]] = _build_same_package_map(all_symbols)
2689
+
2690
+ # Pass 2: build relations with same-package type resolution available.
2691
+ for rel_path, source, package, raw_imports, symbols in _per_file:
2692
+ same_pkg_types = _same_pkg_map.get(package, {})
2693
+ relations = _build_relations(
2694
+ symbols, raw_imports, source, package, rel_path,
2695
+ same_pkg_types=same_pkg_types,
2696
+ )
2641
2697
 
2642
2698
  old_source: Optional[str] = None
2643
2699
  if since:
2644
- # Only fetch old content for files known to have changed.
2645
- # Unchanged files have no diff entries — skip git show entirely.
2646
2700
  _file_changed = _since_changed is None or rel_path in _since_changed
2647
2701
  if _file_changed:
2648
2702
  old_source = _get_git_old_content(root, rel_path, since)
2649
2703
 
2650
- package, symbols, raw_imports = _extract_symbols(source, rel_path)
2651
- relations = _build_relations(symbols, raw_imports, source, package, rel_path)
2652
-
2653
2704
  if old_source is not None:
2654
2705
  _, old_symbols, _ = _extract_symbols(old_source, rel_path)
2655
2706
  all_changed.extend(_diff_symbols(old_symbols, symbols))
@@ -2664,7 +2715,6 @@ def build_repo_ir(
2664
2715
  confidence="high",
2665
2716
  ))
2666
2717
 
2667
- all_symbols.extend(symbols)
2668
2718
  all_relations.extend(relations)
2669
2719
 
2670
2720
  spring_summary = _build_spring_summary(all_symbols)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes