sourcecode 1.31.5__py3-none-any.whl → 1.31.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.31.5"
3
+ __version__ = "1.31.7"
sourcecode/cli.py CHANGED
@@ -2495,23 +2495,81 @@ def _extract_java_endpoints(root: "Path") -> "dict[str, Any]":
2495
2495
 
2496
2496
  endpoints: list[dict] = []
2497
2497
  seen: set[tuple] = set()
2498
+ # Fix 1: inheritance projection — tracks class data so controllers with ONLY
2499
+ # inherited endpoints (no own @RequestMapping methods) are not silently dropped.
2500
+ _class_info: dict[str, dict] = {}
2501
+ _EXTENDS_RE_LOCAL = _re.compile(r'\bextends\s+(\w+)')
2502
+
2503
+ from sourcecode.path_filters import is_test_path as _is_test_path
2498
2504
 
2499
2505
  java_files = [
2500
2506
  p for p in root.rglob("*.java")
2501
- if "/test/" not in str(p).replace("\\", "/")
2502
- and "/tests/" not in str(p).replace("\\", "/")
2507
+ if not _is_test_path(str(p).replace("\\", "/"))
2503
2508
  and "target/" not in str(p).replace("\\", "/")
2504
2509
  ]
2505
2510
 
2511
+ # ── Meta-annotation index ─────────────────────────────────────────────────
2512
+ # First pass: find @interface declarations and the annotations they carry.
2513
+ # This lets us detect controllers annotated with framework wrappers like
2514
+ # @FrameworkController that itself carries @Controller.
2515
+ #
2516
+ # Index maps annotation simple name → set of annotation names it is
2517
+ # annotated with (one level; resolution is done recursively below).
2518
+ _ANN_DECL_RE = _re.compile(r'public\s+@interface\s+(\w+)')
2519
+ _ANN_USE_RE = _re.compile(r'@(\w+)')
2520
+
2521
+ _meta_index: dict[str, set[str]] = {}
2522
+ for _jf in java_files:
2523
+ _raw: str
2524
+ try:
2525
+ _raw = _jf.read_text(encoding="utf-8", errors="replace")
2526
+ except OSError:
2527
+ continue
2528
+ if "@interface" not in _raw:
2529
+ continue
2530
+ _decl_m = _ANN_DECL_RE.search(_raw)
2531
+ if not _decl_m:
2532
+ continue
2533
+ _ann_name = _decl_m.group(1)
2534
+ # Collect annotations appearing in the header (before the @interface line)
2535
+ _header = _raw[: _decl_m.start()]
2536
+ _meta_anns: set[str] = {
2537
+ m for m in _ANN_USE_RE.findall(_header)
2538
+ if m not in ("interface", "interface") # strip keywords; annotation names only
2539
+ }
2540
+ _meta_index[_ann_name] = _meta_anns
2541
+
2542
+ _CONTROLLER_CORE = frozenset({"Controller", "RestController"})
2543
+
2544
+ def _resolves_to_controller(name: str, visited: "set[str]") -> bool:
2545
+ """Return True when annotation *name* transitively carries @Controller/@RestController."""
2546
+ if name in _CONTROLLER_CORE:
2547
+ return True
2548
+ if name in visited:
2549
+ return False
2550
+ visited.add(name)
2551
+ for parent in _meta_index.get(name, ()):
2552
+ if _resolves_to_controller(parent, visited):
2553
+ return True
2554
+ return False
2555
+
2506
2556
  for java_file in java_files:
2507
2557
  try:
2508
2558
  content = java_file.read_text(encoding="utf-8", errors="replace")
2509
2559
  except OSError:
2510
2560
  continue
2511
2561
 
2512
- # Only process files with REST controller or mapping annotations
2513
- if not any(x in content for x in ("@RestController", "@Controller", "@RequestMapping")):
2514
- continue
2562
+ # Process files with direct controller/mapping annotations OR
2563
+ # with custom annotations that transitively resolve to @Controller/@RestController.
2564
+ _has_direct = any(x in content for x in ("@RestController", "@Controller", "@RequestMapping"))
2565
+ if not _has_direct:
2566
+ # Quick meta-annotation check: extract class-level annotation names
2567
+ # (first 60 lines — before the class body opens) and resolve them.
2568
+ _header_lines = content.splitlines()[:60]
2569
+ _header_text = "\n".join(_header_lines)
2570
+ _file_anns = set(_ANN_USE_RE.findall(_header_text))
2571
+ if not any(_resolves_to_controller(a, set()) for a in _file_anns):
2572
+ continue
2515
2573
 
2516
2574
  try:
2517
2575
  rel_path = str(java_file.relative_to(root)).replace("\\", "/")
@@ -2525,14 +2583,18 @@ def _extract_java_endpoints(root: "Path") -> "dict[str, Any]":
2525
2583
  # Extract class-level base path and locate class body start
2526
2584
  lines = content.splitlines()
2527
2585
 
2528
- # First pass: find class/interface declaration line index
2586
+ # First pass: find class/interface declaration line index and extends clause.
2529
2587
  class_body_start = 0
2588
+ extends_class_name: Optional[str] = None
2530
2589
  for i, line in enumerate(lines):
2531
2590
  stripped_l = line.strip()
2532
2591
  if (not stripped_l.startswith("//") and not stripped_l.startswith("*")
2533
2592
  and ("class " in stripped_l or "interface " in stripped_l)
2534
2593
  and _CLASS_RE.search(stripped_l)):
2535
2594
  class_body_start = i + 1
2595
+ _ext_m = _EXTENDS_RE_LOCAL.search(stripped_l)
2596
+ if _ext_m:
2597
+ extends_class_name = _ext_m.group(1)
2536
2598
  break
2537
2599
 
2538
2600
  # Second pass: extract class-level @RequestMapping path (only before class body).
@@ -2602,6 +2664,7 @@ def _extract_java_endpoints(root: "Path") -> "dict[str, Any]":
2602
2664
  pending_annotations: list[tuple[str, str]] = [] # (http_verb, path_suffix)
2603
2665
  pending_filtro: Optional[str] = None
2604
2666
  in_block_comment = False
2667
+ file_own_endpoints: list[tuple] = [] # (http_verb, path_suffix, handler, filtro)
2605
2668
 
2606
2669
  for i in range(class_body_start, len(lines)):
2607
2670
  line = lines[i]
@@ -2654,6 +2717,7 @@ def _extract_java_endpoints(root: "Path") -> "dict[str, Any]":
2654
2717
  handler = mm.group(1) if mm else ""
2655
2718
  if handler and not handler.startswith("class"):
2656
2719
  for http_verb, path_suffix in pending_annotations:
2720
+ file_own_endpoints.append((http_verb, path_suffix, handler, pending_filtro))
2657
2721
  for _cb in class_bases: # Bug #1B: one endpoint per class prefix
2658
2722
  full_path = (_cb + "/" + path_suffix).replace("//", "/").rstrip("/") or "/"
2659
2723
  if not full_path.startswith("/"):
@@ -2679,6 +2743,50 @@ def _extract_java_endpoints(root: "Path") -> "dict[str, Any]":
2679
2743
  pending_annotations = []
2680
2744
  pending_filtro = None
2681
2745
 
2746
+ # Store per-class data for inheritance projection
2747
+ _class_info[class_name] = {
2748
+ "base_paths": class_bases,
2749
+ "extends_class": extends_class_name,
2750
+ "own_endpoints": file_own_endpoints,
2751
+ }
2752
+
2753
+ # Fix 1: Inheritance projection — controllers whose methods are 100% inherited from a
2754
+ # base class emit 0 endpoints above because no method-level annotations exist in their
2755
+ # file. Walk the inheritance chain and project parent suffixes with the child's base path.
2756
+ for _cls, _data in _class_info.items():
2757
+ if _data["own_endpoints"]:
2758
+ continue
2759
+ _bases = _data["base_paths"]
2760
+ if not _bases or _bases == [""]:
2761
+ continue
2762
+ _chain = _data.get("extends_class")
2763
+ _visited: set[str] = {_cls}
2764
+ while _chain and _chain not in _visited:
2765
+ _visited.add(_chain)
2766
+ _parent = _class_info.get(_chain)
2767
+ if not _parent:
2768
+ break
2769
+ if _parent["own_endpoints"]:
2770
+ for _verb, _suffix, _handler, _filtro in _parent["own_endpoints"]:
2771
+ for _cb in _bases:
2772
+ _fp = (_cb + "/" + _suffix).replace("//", "/").rstrip("/") or "/"
2773
+ if not _fp.startswith("/"):
2774
+ _fp = "/" + _fp
2775
+ _key = (_cls, _handler, _verb, _cb)
2776
+ if _key not in seen:
2777
+ seen.add(_key)
2778
+ _entry: dict[str, Any] = {
2779
+ "method": _verb,
2780
+ "path": _fp,
2781
+ "controller": _cls,
2782
+ "handler": _handler,
2783
+ }
2784
+ if _filtro:
2785
+ _entry["required_permission"] = _filtro
2786
+ endpoints.append(_entry)
2787
+ break
2788
+ _chain = _parent.get("extends_class")
2789
+
2682
2790
  endpoints.sort(key=lambda e: (e.get("controller", ""), e.get("path", "")))
2683
2791
  undocumented = sum(1 for e in endpoints if "required_permission" not in e)
2684
2792
 
@@ -20,7 +20,8 @@ _SYMBOL_LOOKBACK = 25 # líneas hacia atrás para encontrar el símbolo envolve
20
20
  _SKIP_DIRS = {
21
21
  "node_modules", ".git", "__pycache__", ".venv", "venv",
22
22
  ".mypy_cache", "dist", "build", ".tox", ".eggs",
23
- ".next", ".nuxt", ".output", "vendor", "coverage",
23
+ ".next", ".nuxt", ".output", "vendor", "vendors", "coverage",
24
+ "third_party", "thirdparty",
24
25
  }
25
26
 
26
27
  _CODE_EXTENSIONS = {
@@ -255,4 +256,6 @@ class CodeNotesAnalyzer:
255
256
  # here was redundant and caused files to be silently skipped when
256
257
  # traversal order varied (different files filled the quota first).
257
258
  if suffix in _CODE_EXTENSIONS:
258
- _scan_source_file(entry, rel, notes, total_count)
259
+ from sourcecode.path_filters import is_vendor_path as _is_vendor
260
+ if not _is_vendor(rel):
261
+ _scan_source_file(entry, rel, notes, total_count)
@@ -295,9 +295,13 @@ class JavaDetector(AbstractDetector):
295
295
  self._augment_deep_java_controllers(context, all_java)
296
296
 
297
297
  # 1. @SpringBootApplication entry: Application.java / Main.java by name
298
+ # Exclude test trees: test helpers like AdminApplication.java in
299
+ # integration/src/test/java/ must not be treated as production entrypoints.
300
+ from sourcecode.path_filters import is_test_path as _is_test_path
298
301
  app_candidates = [
299
302
  p for p in all_java
300
303
  if p.endswith(("Application.java", "Main.java"))
304
+ and not _is_test_path(p)
301
305
  ]
302
306
  entry_points: list[EntryPoint] = [
303
307
  EntryPoint(path=p, stack="java", kind="application", source="manifest")
@@ -307,7 +311,7 @@ class JavaDetector(AbstractDetector):
307
311
  # 2. Annotation-based scan: @RestController, @WebFilter, FilterRegistrationBean
308
312
  # Prioritize Controller-named files so all REST controllers are detected
309
313
  # even in large codebases where total files > _MAX_JAVA_ENTRY_SCAN.
310
- _non_test = [p for p in all_java if "/test/" not in p and "/tests/" not in p]
314
+ _non_test = [p for p in all_java if not _is_test_path(p)]
311
315
  _ctrl_files = [p for p in _non_test if "Controller" in p]
312
316
  _other_files = [p for p in _non_test if "Controller" not in p]
313
317
  scan_candidates = _ctrl_files + _other_files[:max(0, _MAX_JAVA_ENTRY_SCAN - len(_ctrl_files))]
@@ -0,0 +1,124 @@
1
+ """Shared path classification helpers used across all tools.
2
+
3
+ Centralises test-path and vendor-path detection so each tool does not
4
+ duplicate — and diverge — these heuristics.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ _TEST_SEGMENTS = frozenset({
9
+ "test", "tests", "spec", "specs",
10
+ "test-helpers", "test_helpers", "testfixtures",
11
+ "it", # integration-tests short name
12
+ "integrationtest", "integrationtests",
13
+ })
14
+
15
+ _VENDOR_SEGMENTS = frozenset({
16
+ "vendor", "vendors",
17
+ "third_party", "thirdparty",
18
+ "node_modules",
19
+ "external", "externals",
20
+ "contrib",
21
+ })
22
+
23
+ # lib/libs are vendor only for web-asset extensions.
24
+ # Java/Kotlin/Python source in a package named "lib" is NOT vendor.
25
+ _LIB_SEGMENTS = frozenset({"lib", "libs"})
26
+ _WEB_ASSET_EXTS = frozenset({
27
+ ".js", ".jsx", ".mjs", ".cjs",
28
+ ".ts", ".tsx",
29
+ ".css", ".less", ".scss", ".sass",
30
+ ".json", ".map",
31
+ })
32
+
33
+ _VENDOR_PATH_FRAGMENTS = (
34
+ "/vendor/", "/vendors/",
35
+ "/third_party/", "/thirdparty/",
36
+ "/node_modules/",
37
+ "/external/", "/externals/",
38
+ "/contrib/",
39
+ )
40
+
41
+ _JAVA_TEST_ROOTS = (
42
+ "/src/test/",
43
+ "\\src\\test\\",
44
+ )
45
+
46
+
47
+ def is_test_path(path: str) -> bool:
48
+ """Return True when *path* is part of a test tree, not production code.
49
+
50
+ Handles:
51
+ - Standard Maven/Gradle layout (src/test/java/…)
52
+ - Common naming conventions (/tests/, /spec/, /it/)
53
+ - Java file name conventions (FooTest.java, TestFoo.java)
54
+ - Python conventions (test_foo.py, foo_test.py)
55
+ - JS/TS conventions (foo.test.ts, foo.spec.ts)
56
+ """
57
+ norm = path.replace("\\", "/").lower()
58
+
59
+ # Maven/Gradle standard test root (fast path)
60
+ if "/src/test/" in norm:
61
+ return True
62
+
63
+ # Segment-based check – any directory component is a test segment
64
+ parts = norm.split("/")
65
+ for part in parts[:-1]: # skip filename itself
66
+ bare = part.rstrip("/")
67
+ if bare in _TEST_SEGMENTS:
68
+ return True
69
+
70
+ # File-name conventions
71
+ name = parts[-1]
72
+ if (
73
+ name.startswith("test_")
74
+ or name.endswith("_test.py")
75
+ or name.endswith(".test.ts")
76
+ or name.endswith(".test.js")
77
+ or name.endswith(".spec.ts")
78
+ or name.endswith(".spec.js")
79
+ or (name.endswith("test.java") and name != "test.java")
80
+ or name.endswith("tests.java")
81
+ or (name.startswith("test") and name.endswith(".java") and len(name) > 9)
82
+ ):
83
+ return True
84
+
85
+ return False
86
+
87
+
88
+ def is_vendor_path(path: str) -> bool:
89
+ """Return True when *path* is inside a vendored / third-party directory.
90
+
91
+ Handles:
92
+ - /vendor/, /vendors/, /third_party/, /node_modules/
93
+ - /lib/, /libs/ containing web assets (NOT JVM/Python source — those may
94
+ legitimately use "lib" as a package name)
95
+ - Minified JS/CSS files anywhere (*.min.js, *.min.css)
96
+ """
97
+ norm = path.replace("\\", "/").lower()
98
+
99
+ # Minified files are always vendor regardless of directory
100
+ if norm.endswith(".min.js") or norm.endswith(".min.css"):
101
+ return True
102
+
103
+ # Fast fragment check for unambiguous vendor directories
104
+ for frag in _VENDOR_PATH_FRAGMENTS:
105
+ if frag in norm:
106
+ return True
107
+
108
+ parts = norm.split("/")
109
+ dir_parts = parts[:-1] # exclude filename
110
+
111
+ # Unambiguous vendor directory names
112
+ for part in dir_parts:
113
+ if part in _VENDOR_SEGMENTS:
114
+ return True
115
+
116
+ # lib/libs: vendor only for web-asset file types, not JVM/Python source
117
+ filename = parts[-1]
118
+ ext = "." + filename.rsplit(".", 1)[-1] if "." in filename else ""
119
+ if ext in _WEB_ASSET_EXTS:
120
+ for part in dir_parts:
121
+ if part in _LIB_SEGMENTS:
122
+ return True
123
+
124
+ return False
@@ -310,7 +310,8 @@ class RelevantFile:
310
310
  role: str # entrypoint | source | test
311
311
  score: float
312
312
  reason: str
313
- why: str = "" # why this file matters for the specific task
313
+ why: str = "" # why this file matters for the specific task
314
+ tier: Optional[str] = None # fix-bug only: high | medium | low
314
315
 
315
316
 
316
317
  @dataclass
@@ -974,7 +975,33 @@ class TaskContextBuilder:
974
975
  and (d.role or "unknown") in {"runtime", "parsing", "serialization", "observability", "infra"}
975
976
  and d.scope not in {"dev"}
976
977
  ]
977
- direct.sort(key=lambda d: (0 if d.ecosystem == primary_eco else 1, d.name.lower()))
978
+ # Rank by framework centrality: core infra (ORM, Spring) > serialization > other.
979
+ # Penalise vendored tooling (closure-compiler, shaded utilities) so that
980
+ # Hibernate/JPA/Solr appear before minor build-time dependencies.
981
+ _HIGH_SIGNAL_FRAGMENTS = (
982
+ "hibernate", "jpa", "spring-core", "spring-context", "spring-web",
983
+ "spring-boot", "spring-security", "spring-data",
984
+ "solr", "elasticsearch", "kafka", "redis",
985
+ "jackson", "gson",
986
+ "mybatis", "druid", "datasource",
987
+ "tomcat", "undertow", "netty",
988
+ "slf4j", "logback", "log4j",
989
+ )
990
+ _LOW_SIGNAL_FRAGMENTS = (
991
+ "closure-compiler", "closure-library",
992
+ "google-closure", "rhino",
993
+ "guava-gwt",
994
+ )
995
+
996
+ def _dep_rank(d: Any) -> tuple:
997
+ art = (d.name or "").lower()
998
+ eco_match = 0 if d.ecosystem == primary_eco else 1
999
+ is_high = any(frag in art for frag in _HIGH_SIGNAL_FRAGMENTS)
1000
+ is_low = any(frag in art for frag in _LOW_SIGNAL_FRAGMENTS)
1001
+ infra_score = 0 if is_high else (2 if is_low else 1)
1002
+ return (eco_match, infra_score, art)
1003
+
1004
+ direct.sort(key=_dep_rank)
978
1005
  _SKIP_DEP_KEYS = {"parent", "workspace", "resolved_version", "manifest_path"}
979
1006
  key_dependencies = [
980
1007
  {k: v for k, v in asdict(d).items() if v is not None and k not in _SKIP_DEP_KEYS}
@@ -1182,6 +1209,7 @@ class TaskContextBuilder:
1182
1209
  uncommitted_files=uncommitted_files,
1183
1210
  code_notes=cn_notes_for_ranking if cn_notes_for_ranking else None,
1184
1211
  delta_files=None,
1212
+ symptom=symptom if task_name == "fix-bug" else None,
1185
1213
  )
1186
1214
 
1187
1215
  # ── 6b. review-pr: derive PR-specific impact sections from delta analysis ──
@@ -2025,6 +2053,7 @@ class TaskContextBuilder:
2025
2053
  uncommitted_files: Optional[set[str]] = None,
2026
2054
  code_notes: Optional[list] = None,
2027
2055
  delta_files: Optional[set[str]] = None,
2056
+ symptom: Optional[str] = None,
2028
2057
  ) -> list[RelevantFile]:
2029
2058
  from sourcecode.ranking_engine import RankingEngine
2030
2059
  from sourcecode.file_classifier import FileClassifier
@@ -2043,6 +2072,11 @@ class TaskContextBuilder:
2043
2072
  _annotated_files: set[str] = set()
2044
2073
  _dominant_stack = ""
2045
2074
  _recently_changed_stacks: set[str] = set()
2075
+ # Query-aware signals extracted from symptom (class names, exception types, tokens)
2076
+ _symptom_class_names: set[str] = set() # CamelCase class names
2077
+ _symptom_exception_types: set[str] = set() # *Exception / *Error tokens
2078
+ _symptom_tokens: set[str] = set() # all lowercase tokens
2079
+
2046
2080
  if task_name == "fix-bug":
2047
2081
  _bug_kinds = {"FIXME", "BUG", "HACK", "XXX"}
2048
2082
  for _n in (code_notes or []):
@@ -2068,6 +2102,19 @@ class TaskContextBuilder:
2068
2102
  _dominant_stack = _stk_counts.most_common(1)[0][0]
2069
2103
  _recently_changed_stacks = set(_stk_counts.keys())
2070
2104
 
2105
+ # Extract structured signals from symptom text for AND-weighted ranking
2106
+ if symptom:
2107
+ import re as _re_bug
2108
+ _camel_re = _re_bug.compile(r'\b([A-Z][a-zA-Z0-9]+)\b')
2109
+ for _tok in _camel_re.findall(symptom):
2110
+ if _tok.endswith(("Exception", "Error", "Throwable")):
2111
+ _symptom_exception_types.add(_tok)
2112
+ else:
2113
+ _symptom_class_names.add(_tok)
2114
+ _symptom_tokens = {
2115
+ w.lower() for w in _re_bug.split(r'[\s\W]+', symptom) if len(w) > 2
2116
+ }
2117
+
2071
2118
  scored: list[tuple[float, str, RelevantFile]] = []
2072
2119
 
2073
2120
  # For delta task, score only files changed in the specified git range.
@@ -2117,6 +2164,64 @@ class TaskContextBuilder:
2117
2164
  _fix_bug_why = ""
2118
2165
  if task_name == "fix-bug":
2119
2166
  _why_parts: list[str] = []
2167
+
2168
+ # ── Query-aware AND-weighted signals (symptom-derived) ──
2169
+ # These intentionally outweigh git-recency signals so that
2170
+ # OrderServiceImpl.java ranks top-3 regardless of churn history.
2171
+ if _symptom_class_names or _symptom_exception_types:
2172
+ _stem = Path(path).stem
2173
+ _stem_lower = _stem.lower()
2174
+ _matched_class = next(
2175
+ (c for c in _symptom_class_names if _stem_lower == c.lower()),
2176
+ None,
2177
+ )
2178
+ _matched_exc = next(
2179
+ (e for e in _symptom_exception_types if _stem_lower == e.lower()),
2180
+ None,
2181
+ )
2182
+ _impl_match = next(
2183
+ (c for c in _symptom_class_names
2184
+ if _stem_lower in (c.lower() + "impl", c.lower() + "service",
2185
+ c.lower() + "serviceimpl", c.lower() + "helper")),
2186
+ None,
2187
+ )
2188
+ if _matched_class:
2189
+ content_boost += 3.0
2190
+ _why_parts.append(f"exact class match: {_stem} (+3.0)")
2191
+ elif _matched_exc:
2192
+ content_boost += 2.0
2193
+ _why_parts.append(f"exception class match: {_stem} (+2.0)")
2194
+ elif _impl_match:
2195
+ content_boost += 2.5
2196
+ _why_parts.append(f"class impl match: {_stem} (+2.5)")
2197
+ else:
2198
+ # Symbol appears anywhere in path (package adjacency)
2199
+ _path_class_hit = next(
2200
+ (c for c in _symptom_class_names if c.lower() in path_lower),
2201
+ None,
2202
+ )
2203
+ if _path_class_hit:
2204
+ content_boost += 1.0
2205
+ _why_parts.append(f"symbol in path: {_path_class_hit} (+1.0)")
2206
+ elif any(e.lower() in path_lower for e in _symptom_exception_types):
2207
+ content_boost += 0.8
2208
+ _why_parts.append("exception type in path (+0.8)")
2209
+
2210
+ # AND-weighted token intersection — multiple matching tokens >> single
2211
+ if _symptom_tokens:
2212
+ _path_parts = set(path_lower.replace("/", " ").replace(".", " ").replace("_", " ").split())
2213
+ _intersection = _symptom_tokens & _path_parts
2214
+ _n_match = len(_intersection)
2215
+ if _n_match >= 3:
2216
+ _tok_boost = min(1.2, _n_match * 0.25)
2217
+ content_boost += _tok_boost
2218
+ _why_parts.append(f"token AND match ({_n_match} terms: {sorted(_intersection)[:3]}) (+{_tok_boost:.2f})")
2219
+ elif _n_match == 2:
2220
+ content_boost += 0.4
2221
+ _why_parts.append(f"token AND match (2 terms: {sorted(_intersection)}) (+0.40)")
2222
+ # Single-token match: no boost — avoids OR explosion
2223
+
2224
+ # ── Git / annotation signals ──
2120
2225
  if path in _uncommitted:
2121
2226
  content_boost += 0.40
2122
2227
  _why_parts.append("uncommitted change (+0.40)")
@@ -2203,7 +2308,7 @@ class TaskContextBuilder:
2203
2308
  }
2204
2309
  _repo_size = len(all_paths)
2205
2310
  _task_budget = {
2206
- "fix-bug": max(20, min(40, _repo_size // 80)),
2311
+ "fix-bug": 30, # hard cap prevents token explosion on large repos
2207
2312
  "onboard": max(15, min(25, _repo_size // 150)),
2208
2313
  "explain": max(10, min(20, _repo_size // 200)),
2209
2314
  "generate-tests": max(20, min(35, _repo_size // 100)),
@@ -2271,7 +2376,21 @@ class TaskContextBuilder:
2271
2376
  _covered.add(_layer)
2272
2377
  _missing.discard(_layer)
2273
2378
 
2274
- return [_rf_map[p] for p in _selected if p in _rf_map]
2379
+ result = [_rf_map[p] for p in _selected if p in _rf_map]
2380
+
2381
+ # Assign fix-bug tiers based on raw score (pre-normalised total)
2382
+ if task_name == "fix-bug":
2383
+ _score_lookup = {path: total for total, path, _ in scored}
2384
+ for _rf in result:
2385
+ _s = _score_lookup.get(_rf.path, 0.0)
2386
+ if _s >= 4.0:
2387
+ _rf.tier = "high"
2388
+ elif _s >= 1.5:
2389
+ _rf.tier = "medium"
2390
+ else:
2391
+ _rf.tier = "low"
2392
+
2393
+ return result
2275
2394
  except Exception:
2276
2395
  return [f for _, _, f in scored[:15]]
2277
2396
 
@@ -178,8 +178,11 @@ _SPRING_OTHER: frozenset[str] = frozenset({
178
178
  "@PutMapping", "@DeleteMapping", "@PatchMapping", "@Autowired",
179
179
  "@Inject", "@Value", "@Qualifier", "@EnableWebSecurity",
180
180
  "@SpringBootApplication", "@EnableAutoConfiguration",
181
+ "@EventListener", "@Async", "@Scheduled", "@Cacheable", "@CacheEvict",
181
182
  })
182
183
 
184
+ _PUBLISH_EVENT_RE = re.compile(r'\.publishEvent\s*\(\s*new\s+(\w+)\s*[(\{]')
185
+
183
186
  _HTTP_METHOD_MAP: dict[str, str] = {
184
187
  "@GetMapping": "GET",
185
188
  "@PostMapping": "POST",
@@ -787,6 +790,33 @@ def _build_relations(
787
790
  evidence={"type": "structural", "value": f"member of {enclosing}"},
788
791
  ))
789
792
 
793
+ # Fix 5: Event flow edges — publishEvent publishers and @EventListener subscribers.
794
+ # listens_to_event: method with @EventListener → resolved event parameter type(s).
795
+ for sym in symbols:
796
+ if sym.type == "method" and "@EventListener" in sym.annotations:
797
+ for imp_fqn in sym.imports_used:
798
+ edges.append(RelationEdge(
799
+ from_symbol=sym.symbol,
800
+ to_symbol=imp_fqn,
801
+ type="listens_to_event",
802
+ confidence="high",
803
+ evidence={"type": "annotation", "value": "@EventListener"},
804
+ ))
805
+
806
+ # publishes_event: class that calls publishEvent(new XxxEvent(...)) → event type FQN.
807
+ _class_syms = [s for s in symbols if s.type in ("class", "interface") and "#" not in s.symbol]
808
+ for m in _PUBLISH_EVENT_RE.finditer(source):
809
+ event_simple = m.group(1)
810
+ event_fqn = import_map.get(event_simple, event_simple)
811
+ for cls_sym in _class_syms:
812
+ edges.append(RelationEdge(
813
+ from_symbol=cls_sym.symbol,
814
+ to_symbol=event_fqn,
815
+ type="publishes_event",
816
+ confidence="medium",
817
+ evidence={"type": "method_call", "value": f"publishEvent(new {event_simple})"},
818
+ ))
819
+
790
820
  seen: set[tuple[str, str, str]] = set()
791
821
  unique: list[RelationEdge] = []
792
822
  for e in edges:
@@ -1190,6 +1220,8 @@ _EDGE_REASON_TEMPLATES: dict[str, str] = {
1190
1220
  "contained_in": "{from_sym} is a member of {to_sym}",
1191
1221
  "annotated_with": "{from_sym} is annotated with {to_sym}",
1192
1222
  "mapped_to": "Route {to_sym} depends on {from_sym}",
1223
+ "publishes_event": "{from_sym} publishes event {to_sym}",
1224
+ "listens_to_event": "{from_sym} listens for event {to_sym}",
1193
1225
  }
1194
1226
 
1195
1227
  # Edge types to exclude from reverse impact traversal (too noisy / non-dependency semantics)
@@ -1526,13 +1558,43 @@ def _assemble(
1526
1558
  },
1527
1559
  "subsystems": subsystems,
1528
1560
  "change_set": change_set_out,
1529
- "route_surface": route_diffs or [],
1561
+ "route_surface": _build_route_surface(sorted_syms, route_diffs),
1530
1562
  "audit": {
1531
1563
  "dropped_fields": dropped_fields,
1532
1564
  },
1533
1565
  }
1534
1566
 
1535
1567
 
1568
+ # ---------------------------------------------------------------------------
1569
+ # Route surface helper (Fix 4)
1570
+ # ---------------------------------------------------------------------------
1571
+
1572
+ def _build_route_surface(symbols: list[SymbolRecord], route_diffs: Optional[list[dict]]) -> list[dict]:
1573
+ """Return route surface: diffs when --since provided, else static snapshot of all endpoints."""
1574
+ if route_diffs:
1575
+ return route_diffs
1576
+ routes: list[dict] = []
1577
+ for sym in symbols:
1578
+ if sym.symbol_kind != "endpoint":
1579
+ continue
1580
+ ann_name = next((a for a in sym.annotations if a in _ENDPOINT_ANNOTATIONS), None)
1581
+ if not ann_name:
1582
+ continue
1583
+ args = sym.annotation_values.get(ann_name, "")
1584
+ path = _parse_route_path(args)
1585
+ method = _parse_route_http_method(ann_name, args)
1586
+ if not path and not method:
1587
+ continue
1588
+ routes.append({
1589
+ "symbol": sym.symbol,
1590
+ "controller": _enclosing_class(sym.symbol),
1591
+ "path": path,
1592
+ "method": method or "GET",
1593
+ "stable_id": sym.stable_id,
1594
+ })
1595
+ return sorted(routes, key=lambda r: (r["controller"], r["path"]))
1596
+
1597
+
1536
1598
  # ---------------------------------------------------------------------------
1537
1599
  # Public API
1538
1600
  # ---------------------------------------------------------------------------
@@ -1675,7 +1737,17 @@ def apply_ir_size_limits(
1675
1737
  "remove --summary-only to restore full graph"
1676
1738
  ),
1677
1739
  }
1678
- out["reverse_graph"] = {}
1740
+ # Fix 3: keep bounded reverse graph instead of wiping it.
1741
+ full_rg: dict = ir.get("reverse_graph") or {}
1742
+ if full_rg:
1743
+ _rg_sorted = sorted(
1744
+ full_rg.items(),
1745
+ key=lambda x: sum(len(v) for v in x[1].values()),
1746
+ reverse=True,
1747
+ )
1748
+ out["reverse_graph"] = dict(_rg_sorted[:50])
1749
+ else:
1750
+ out["reverse_graph"] = {}
1679
1751
  out["impact"] = {
1680
1752
  "global_score": (ir.get("impact") or {}).get("global_score", 0),
1681
1753
  "ranked_nodes": ranked[:20],
@@ -1703,10 +1775,19 @@ def apply_ir_size_limits(
1703
1775
 
1704
1776
  if kept_fqns is not None or max_edges is not None:
1705
1777
  if kept_fqns is not None:
1706
- # Priority: edges where both endpoints are kept nodes
1707
- priority = [e for e in edges if e["from"] in kept_fqns and e["to"] in kept_fqns]
1708
- rest = [e for e in edges if not (e["from"] in kept_fqns and e["to"] in kept_fqns)]
1709
- edges = priority + rest
1778
+ # Fix 2: type-aware priority so semantic edges survive node truncation.
1779
+ # Annotation strings (@Service etc.) and field FQNs are never in kept_fqns,
1780
+ # so "both endpoints kept" drops all injects/annotated_with edges.
1781
+ _SEMANTIC_TYPES = frozenset({"extends", "implements", "injects",
1782
+ "publishes_event", "listens_to_event"})
1783
+ _ANNOTATION_TYPES = frozenset({"annotated_with"})
1784
+ tier1 = [e for e in edges if e["from"] in kept_fqns and e["type"] in _SEMANTIC_TYPES]
1785
+ tier2 = [e for e in edges if e["from"] in kept_fqns and e["type"] in _ANNOTATION_TYPES]
1786
+ tier3 = [e for e in edges
1787
+ if e["from"] in kept_fqns and e["to"] in kept_fqns and e["type"] == "imports"]
1788
+ _seen_e = {(e["from"], e["to"], e["type"]) for e in tier1 + tier2 + tier3}
1789
+ tier4 = [e for e in edges if (e["from"], e["to"], e["type"]) not in _seen_e]
1790
+ edges = tier1 + tier2 + tier3 + tier4
1710
1791
  if max_edges is not None:
1711
1792
  edges = edges[:max_edges]
1712
1793
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.31.5
3
+ Version: 1.31.7
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -225,7 +225,7 @@ Description-Content-Type: text/markdown
225
225
 
226
226
  **Deterministic, behavior-aware codebase context for AI agents and PR review.**
227
227
 
228
- ![Version](https://img.shields.io/badge/version-1.31.5-blue)
228
+ ![Version](https://img.shields.io/badge/version-1.31.7-blue)
229
229
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
230
230
 
231
231
  ---
@@ -261,7 +261,7 @@ pipx install sourcecode
261
261
 
262
262
  ```bash
263
263
  sourcecode version
264
- # sourcecode 1.31.5
264
+ # sourcecode 1.31.7
265
265
  ```
266
266
 
267
267
  ---
@@ -1,11 +1,11 @@
1
- sourcecode/__init__.py,sha256=INTl4i-9L8tLlPUGq6eihBx0YEE8OQu6cHtsMNAu_vc,103
1
+ sourcecode/__init__.py,sha256=8R-nJsTbK4mRxB0Ix8W0xEdhYCBpK8R-2dyS_nBxiDc,103
2
2
  sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
3
3
  sourcecode/architecture_analyzer.py,sha256=MyBa0Hf5HmkudZQDLKrjcWDKETXETXl0mQX1swtTwAA,39091
4
4
  sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
5
5
  sourcecode/ast_extractor.py,sha256=XgrZg2DcWcUm9r87cRG3KGO7IK2TIL_N-CvhSbUmmh4,49901
6
6
  sourcecode/classifier.py,sha256=-0t0HLc9L9UleMLfclfLM3AXhBjUb_AYyBPDbvgWtac,7755
7
- sourcecode/cli.py,sha256=IDe_6kp4hgqPX6YLA-Gu6-rlwbyT4iXfuYJhG4pmhe0,126525
8
- sourcecode/code_notes_analyzer.py,sha256=y1MJBnPZHYp4i6cQCXUb9ATIyifS_qMQWjw_8lPkpsU,9215
7
+ sourcecode/cli.py,sha256=L0KDRruMogpw9kq7trZ7dmWQuFC746GIEN1oym6LmCE,131802
8
+ sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
9
9
  sourcecode/confidence_analyzer.py,sha256=ZUn-Nywi5TEQcuozqK_vfOyPT-a1dYYO42elAtVFV-k,16412
10
10
  sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
11
11
  sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
@@ -21,14 +21,15 @@ sourcecode/flow_analyzer.py,sha256=dSiuY4w49k29jW_EPXUOND9B5uVbuCA7kjnuHi-pIWA,2
21
21
  sourcecode/git_analyzer.py,sha256=0Gyj-vMpIIN4nfriKXVRouNYBeJ59s6pQDX2Xu9Pq-U,13177
22
22
  sourcecode/graph_analyzer.py,sha256=iUK-7pSV-cvGqqD2hENdYmhnm0wcXFEyK-xnu5ul8OU,62515
23
23
  sourcecode/metrics_analyzer.py,sha256=m0ENgtqKeBL17kUIK3fmGkgo7UfXBNHxCMj0H_Y5K7c,22750
24
+ sourcecode/path_filters.py,sha256=ROFRQ8eSLBEMiixK9f45-RO7um4VEEcjoD5AA4I427I,3739
24
25
  sourcecode/pr_comment_renderer.py,sha256=smHslxiG14lrytCkq5nFrFu-qTHgA-t-LFYfdrfjz2o,14423
25
- sourcecode/prepare_context.py,sha256=Eid3Wmh4lmtr20pmobcCU0yzjRXhFql2rEmfGUgkFpU,183214
26
+ sourcecode/prepare_context.py,sha256=-9kTYuPhwr79mF6lNe9tI7glGAU_d84GyVurHceGroo,189427
26
27
  sourcecode/progress.py,sha256=qn30sWaHOkjTgXsSBmiPkz7Rsbwc5oSlIe6JNEMYp_k,3149
27
28
  sourcecode/ranking_engine.py,sha256=ZAucq_YX2KkWUuAZf4P0lhtQ_38vEFnUhuGtSZd1S0E,12970
28
29
  sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
29
30
  sourcecode/relevance_scorer.py,sha256=MYF4FFkveAQps9SmTeTlh6ODiBz2F--_hWNeHMLtUHQ,8405
30
31
  sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
31
- sourcecode/repository_ir.py,sha256=KH5EehbjOh8ZwwTHcbzrAHiKDoquO49wgSvCX4bVq5k,64391
32
+ sourcecode/repository_ir.py,sha256=fLt33wadCMY77jd34YwkYbUT5TyNu2G0LX0x76Krsvo,68348
32
33
  sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBGPjQ,14792
33
34
  sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
34
35
  sourcecode/schema.py,sha256=fj3BZ3IcnNV4j21BFIEvz8Qnw_vZoqIbzzRg-qQ-nd0,24530
@@ -46,7 +47,7 @@ sourcecode/detectors/elixir.py,sha256=jCpvt5Yi6jvplc80ovRtWh17q-11ZGo9qX7o8b57TJ
46
47
  sourcecode/detectors/go.py,sha256=2r66uRQfeTWsqxr4HDhT6vExZErby0t46QXLHVBRv9w,2782
47
48
  sourcecode/detectors/heuristic.py,sha256=7cRxrip4yIaggYzZJB6ef8yHKh-gHgiH_pXMFcjlyFU,3723
48
49
  sourcecode/detectors/hybrid.py,sha256=IGFRUVsAZ1ooRlFdznCeJAV6vy1yVDx-VyghvLtddXc,9101
49
- sourcecode/detectors/java.py,sha256=ldvaDZJADAKslOpS5qJ0bxexgeY6h_4Yx4NCtHio7J8,24203
50
+ sourcecode/detectors/java.py,sha256=O2JdznVYv5364GSQExksYLsAi0pvDUW9ZowpsL0xLgM,24451
50
51
  sourcecode/detectors/jvm_ext.py,sha256=EgHJ5W8EE-ZTN9V607mVzohyKgZE8Mc2jCi-DF8RAZU,2616
51
52
  sourcecode/detectors/nodejs.py,sha256=Hg3Gmr7yIMJFiLoDwOTk2wtu00wxIs6kZf-oQujTFUA,13187
52
53
  sourcecode/detectors/parsers.py,sha256=ugPg8yNUf0Ai1gA7Fnn6wAkYGFjTxRodSP3IeViYJJ4,2290
@@ -72,8 +73,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
72
73
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
73
74
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
74
75
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
75
- sourcecode-1.31.5.dist-info/METADATA,sha256=UcfeOr7ZaZWAjC3WByNVOIoPw3VlJPF3pS9Fo7f-Ch0,29083
76
- sourcecode-1.31.5.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
77
- sourcecode-1.31.5.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
78
- sourcecode-1.31.5.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
79
- sourcecode-1.31.5.dist-info/RECORD,,
76
+ sourcecode-1.31.7.dist-info/METADATA,sha256=O08_eBOxRxeDd8UkUKyIrW6haJHuQvAddgiFAEma0dQ,29083
77
+ sourcecode-1.31.7.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
78
+ sourcecode-1.31.7.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
79
+ sourcecode-1.31.7.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
80
+ sourcecode-1.31.7.dist-info/RECORD,,