sourcecode 1.31.4__py3-none-any.whl → 1.31.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.31.4"
3
+ __version__ = "1.31.6"
sourcecode/cli.py CHANGED
@@ -401,7 +401,7 @@ def main(
401
401
  help=(
402
402
  "High-signal summary (typically 1000–3000 tokens depending on repo size): "
403
403
  "stacks, entry points, dependency summary, confidence, and gaps. "
404
- "Includes security_surface, mybatis, and transactional_boundaries for Java projects. "
404
+ "Includes security_surface (when @M3FiltroSeguridad detected), mybatis (when MyBatis framework detected), and transactional_boundaries for Java projects. "
405
405
  "Use --agent for maximum signal or --slim (when available) for minimal token footprint."
406
406
  ),
407
407
  ),
@@ -2418,14 +2418,20 @@ def repo_ir_cmd(
2418
2418
 
2419
2419
  if output_path:
2420
2420
  output_path.write_text(output, encoding="utf-8")
2421
- n_nodes = len((ir.get("graph") or {}).get("nodes") or [])
2422
- n_edges = len((ir.get("graph") or {}).get("edges") or [])
2423
2421
  size_kb = len(output.encode("utf-8")) // 1024
2424
- typer.echo(
2425
- f"IR written to {output_path} "
2426
- f"({size_kb}KB, {n_nodes} nodes, {n_edges} edges)",
2427
- err=True,
2428
- )
2422
+ if summary_only:
2423
+ typer.echo(
2424
+ f"IR written to {output_path} ({size_kb}KB, graph omitted by --summary-only)",
2425
+ err=True,
2426
+ )
2427
+ else:
2428
+ n_nodes = len((ir.get("graph") or {}).get("nodes") or [])
2429
+ n_edges = len((ir.get("graph") or {}).get("edges") or [])
2430
+ typer.echo(
2431
+ f"IR written to {output_path} "
2432
+ f"({size_kb}KB, {n_nodes} nodes, {n_edges} edges)",
2433
+ err=True,
2434
+ )
2429
2435
  else:
2430
2436
  try:
2431
2437
  _sys.stdout.buffer.write(output.encode("utf-8"))
@@ -2462,7 +2468,11 @@ def _extract_java_endpoints(root: "Path") -> "dict[str, Any]":
2462
2468
  r'@(Get|Post|Put|Delete|Patch|Request)Mapping\s*'
2463
2469
  r'(?:\(\s*(?:value\s*=\s*)?(?:"([^"]*)"|\{[^}]*\}|[^)]*)\s*\))?',
2464
2470
  )
2465
- _CLASS_RE = _re.compile(r'(?:class|interface)\s+(\w+)')
2471
+ _CLASS_RE = _re.compile(
2472
+ r'^[ \t]*(?:(?:public|protected|private|abstract|final|@\w+)\s+)*'
2473
+ r'(?:class|interface)\s+(\w+)',
2474
+ _re.MULTILINE,
2475
+ )
2466
2476
  _METHOD_RE = _re.compile(
2467
2477
  r'(?:public|protected|private)\s+\S+\s+(\w+)\s*\(',
2468
2478
  )
@@ -2486,22 +2496,76 @@ def _extract_java_endpoints(root: "Path") -> "dict[str, Any]":
2486
2496
  endpoints: list[dict] = []
2487
2497
  seen: set[tuple] = set()
2488
2498
 
2499
+ from sourcecode.path_filters import is_test_path as _is_test_path
2500
+
2489
2501
  java_files = [
2490
2502
  p for p in root.rglob("*.java")
2491
- if "/test/" not in str(p).replace("\\", "/")
2492
- and "/tests/" not in str(p).replace("\\", "/")
2503
+ if not _is_test_path(str(p).replace("\\", "/"))
2493
2504
  and "target/" not in str(p).replace("\\", "/")
2494
2505
  ]
2495
2506
 
2507
+ # ── Meta-annotation index ─────────────────────────────────────────────────
2508
+ # First pass: find @interface declarations and the annotations they carry.
2509
+ # This lets us detect controllers annotated with framework wrappers like
2510
+ # @FrameworkController that itself carries @Controller.
2511
+ #
2512
+ # Index maps annotation simple name → set of annotation names it is
2513
+ # annotated with (one level; resolution is done recursively below).
2514
+ _ANN_DECL_RE = _re.compile(r'public\s+@interface\s+(\w+)')
2515
+ _ANN_USE_RE = _re.compile(r'@(\w+)')
2516
+
2517
+ _meta_index: dict[str, set[str]] = {}
2518
+ for _jf in java_files:
2519
+ _raw: str
2520
+ try:
2521
+ _raw = _jf.read_text(encoding="utf-8", errors="replace")
2522
+ except OSError:
2523
+ continue
2524
+ if "@interface" not in _raw:
2525
+ continue
2526
+ _decl_m = _ANN_DECL_RE.search(_raw)
2527
+ if not _decl_m:
2528
+ continue
2529
+ _ann_name = _decl_m.group(1)
2530
+ # Collect annotations appearing in the header (before the @interface line)
2531
+ _header = _raw[: _decl_m.start()]
2532
+ _meta_anns: set[str] = {
2533
+ m for m in _ANN_USE_RE.findall(_header)
2534
+ if m not in ("interface", "interface") # strip keywords; annotation names only
2535
+ }
2536
+ _meta_index[_ann_name] = _meta_anns
2537
+
2538
+ _CONTROLLER_CORE = frozenset({"Controller", "RestController"})
2539
+
2540
+ def _resolves_to_controller(name: str, visited: "set[str]") -> bool:
2541
+ """Return True when annotation *name* transitively carries @Controller/@RestController."""
2542
+ if name in _CONTROLLER_CORE:
2543
+ return True
2544
+ if name in visited:
2545
+ return False
2546
+ visited.add(name)
2547
+ for parent in _meta_index.get(name, ()):
2548
+ if _resolves_to_controller(parent, visited):
2549
+ return True
2550
+ return False
2551
+
2496
2552
  for java_file in java_files:
2497
2553
  try:
2498
2554
  content = java_file.read_text(encoding="utf-8", errors="replace")
2499
2555
  except OSError:
2500
2556
  continue
2501
2557
 
2502
- # Only process files with REST controller or mapping annotations
2503
- if not any(x in content for x in ("@RestController", "@Controller", "@RequestMapping")):
2504
- continue
2558
+ # Process files with direct controller/mapping annotations OR
2559
+ # with custom annotations that transitively resolve to @Controller/@RestController.
2560
+ _has_direct = any(x in content for x in ("@RestController", "@Controller", "@RequestMapping"))
2561
+ if not _has_direct:
2562
+ # Quick meta-annotation check: extract class-level annotation names
2563
+ # (first 60 lines — before the class body opens) and resolve them.
2564
+ _header_lines = content.splitlines()[:60]
2565
+ _header_text = "\n".join(_header_lines)
2566
+ _file_anns = set(_ANN_USE_RE.findall(_header_text))
2567
+ if not any(_resolves_to_controller(a, set()) for a in _file_anns):
2568
+ continue
2505
2569
 
2506
2570
  try:
2507
2571
  rel_path = str(java_file.relative_to(root)).replace("\\", "/")
@@ -2540,7 +2604,45 @@ def _extract_java_endpoints(root: "Path") -> "dict[str, Any]":
2540
2604
  if "class " in block or "interface " in block:
2541
2605
  path_m = _CLASS_PATH_RE.search(block)
2542
2606
  if path_m:
2543
- class_bases = [path_m.group(1).rstrip("/")]
2607
+ captured = path_m.group(1).rstrip("/")
2608
+ # Handle string concat: @RequestMapping("lit" + ClassName.CONST)
2609
+ _CONCAT_CONST_RE = _re.compile(
2610
+ r'@RequestMapping\s*\(\s*(?:value\s*=\s*)?'
2611
+ r'["\']([^"\']*)["\'\s]*\+\s*(\w+)\.(\w+)'
2612
+ )
2613
+ cc_m = _CONCAT_CONST_RE.search(block)
2614
+ if cc_m:
2615
+ prefix_lit = cc_m.group(1)
2616
+ c_class = cc_m.group(2)
2617
+ c_field = cc_m.group(3)
2618
+ _SVAL_RE = _re.compile(
2619
+ r'static\s+final\s+String\s+'
2620
+ + _re.escape(c_field)
2621
+ + r'\s*=\s*"([^"]+)"'
2622
+ )
2623
+ resolved = None
2624
+ m_cur = _SVAL_RE.search(content)
2625
+ if m_cur:
2626
+ resolved = m_cur.group(1)
2627
+ else:
2628
+ for _jf in java_files:
2629
+ if _jf.stem == c_class:
2630
+ try:
2631
+ _jf_txt = _jf.read_text(
2632
+ encoding="utf-8", errors="replace"
2633
+ )
2634
+ m_jf = _SVAL_RE.search(_jf_txt)
2635
+ if m_jf:
2636
+ resolved = m_jf.group(1)
2637
+ break
2638
+ except OSError:
2639
+ pass
2640
+ if resolved is not None:
2641
+ class_bases = [(prefix_lit + resolved).rstrip("/")]
2642
+ else:
2643
+ class_bases = [captured] if captured else [""]
2644
+ else:
2645
+ class_bases = [captured] if captured else [""]
2544
2646
  else:
2545
2647
  arr_m = _CLASS_ARRAY_PATH_RE.search(block)
2546
2648
  if arr_m:
@@ -20,7 +20,8 @@ _SYMBOL_LOOKBACK = 25 # líneas hacia atrás para encontrar el símbolo envolve
20
20
  _SKIP_DIRS = {
21
21
  "node_modules", ".git", "__pycache__", ".venv", "venv",
22
22
  ".mypy_cache", "dist", "build", ".tox", ".eggs",
23
- ".next", ".nuxt", ".output", "vendor", "coverage",
23
+ ".next", ".nuxt", ".output", "vendor", "vendors", "coverage",
24
+ "third_party", "thirdparty",
24
25
  }
25
26
 
26
27
  _CODE_EXTENSIONS = {
@@ -255,4 +256,6 @@ class CodeNotesAnalyzer:
255
256
  # here was redundant and caused files to be silently skipped when
256
257
  # traversal order varied (different files filled the quota first).
257
258
  if suffix in _CODE_EXTENSIONS:
258
- _scan_source_file(entry, rel, notes, total_count)
259
+ from sourcecode.path_filters import is_vendor_path as _is_vendor
260
+ if not _is_vendor(rel):
261
+ _scan_source_file(entry, rel, notes, total_count)
@@ -59,8 +59,13 @@ class HeuristicDetector(AbstractDetector):
59
59
  paths = flatten_file_tree(context.file_tree)
60
60
  counts: Counter[str] = Counter()
61
61
  for path in paths:
62
- if path.startswith("."):
62
+ if path.startswith(".") or _is_auxiliary_path(path):
63
63
  continue
64
+ # Skip JS/TS files bundled as Java static resources (not Node.js source)
65
+ if path.endswith((".js", ".ts", ".tsx", ".jsx", ".mjs")):
66
+ _np = path.replace("\\", "/")
67
+ if "src/main/resources/" in _np or "src/main/webapp/" in _np:
68
+ continue
64
69
  for extension, stack in _EXTENSION_MAP.items():
65
70
  if path.endswith(extension):
66
71
  counts[stack] += 1
@@ -295,9 +295,13 @@ class JavaDetector(AbstractDetector):
295
295
  self._augment_deep_java_controllers(context, all_java)
296
296
 
297
297
  # 1. @SpringBootApplication entry: Application.java / Main.java by name
298
+ # Exclude test trees: test helpers like AdminApplication.java in
299
+ # integration/src/test/java/ must not be treated as production entrypoints.
300
+ from sourcecode.path_filters import is_test_path as _is_test_path
298
301
  app_candidates = [
299
302
  p for p in all_java
300
303
  if p.endswith(("Application.java", "Main.java"))
304
+ and not _is_test_path(p)
301
305
  ]
302
306
  entry_points: list[EntryPoint] = [
303
307
  EntryPoint(path=p, stack="java", kind="application", source="manifest")
@@ -307,7 +311,7 @@ class JavaDetector(AbstractDetector):
307
311
  # 2. Annotation-based scan: @RestController, @WebFilter, FilterRegistrationBean
308
312
  # Prioritize Controller-named files so all REST controllers are detected
309
313
  # even in large codebases where total files > _MAX_JAVA_ENTRY_SCAN.
310
- _non_test = [p for p in all_java if "/test/" not in p and "/tests/" not in p]
314
+ _non_test = [p for p in all_java if not _is_test_path(p)]
311
315
  _ctrl_files = [p for p in _non_test if "Controller" in p]
312
316
  _other_files = [p for p in _non_test if "Controller" not in p]
313
317
  scan_candidates = _ctrl_files + _other_files[:max(0, _MAX_JAVA_ENTRY_SCAN - len(_ctrl_files))]
@@ -0,0 +1,124 @@
1
+ """Shared path classification helpers used across all tools.
2
+
3
+ Centralises test-path and vendor-path detection so each tool does not
4
+ duplicate — and diverge — these heuristics.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ _TEST_SEGMENTS = frozenset({
9
+ "test", "tests", "spec", "specs",
10
+ "test-helpers", "test_helpers", "testfixtures",
11
+ "it", # integration-tests short name
12
+ "integrationtest", "integrationtests",
13
+ })
14
+
15
+ _VENDOR_SEGMENTS = frozenset({
16
+ "vendor", "vendors",
17
+ "third_party", "thirdparty",
18
+ "node_modules",
19
+ "external", "externals",
20
+ "contrib",
21
+ })
22
+
23
+ # lib/libs are vendor only for web-asset extensions.
24
+ # Java/Kotlin/Python source in a package named "lib" is NOT vendor.
25
+ _LIB_SEGMENTS = frozenset({"lib", "libs"})
26
+ _WEB_ASSET_EXTS = frozenset({
27
+ ".js", ".jsx", ".mjs", ".cjs",
28
+ ".ts", ".tsx",
29
+ ".css", ".less", ".scss", ".sass",
30
+ ".json", ".map",
31
+ })
32
+
33
+ _VENDOR_PATH_FRAGMENTS = (
34
+ "/vendor/", "/vendors/",
35
+ "/third_party/", "/thirdparty/",
36
+ "/node_modules/",
37
+ "/external/", "/externals/",
38
+ "/contrib/",
39
+ )
40
+
41
+ _JAVA_TEST_ROOTS = (
42
+ "/src/test/",
43
+ "\\src\\test\\",
44
+ )
45
+
46
+
47
+ def is_test_path(path: str) -> bool:
48
+ """Return True when *path* is part of a test tree, not production code.
49
+
50
+ Handles:
51
+ - Standard Maven/Gradle layout (src/test/java/…)
52
+ - Common naming conventions (/tests/, /spec/, /it/)
53
+ - Java file name conventions (FooTest.java, TestFoo.java)
54
+ - Python conventions (test_foo.py, foo_test.py)
55
+ - JS/TS conventions (foo.test.ts, foo.spec.ts)
56
+ """
57
+ norm = path.replace("\\", "/").lower()
58
+
59
+ # Maven/Gradle standard test root (fast path)
60
+ if "/src/test/" in norm:
61
+ return True
62
+
63
+ # Segment-based check – any directory component is a test segment
64
+ parts = norm.split("/")
65
+ for part in parts[:-1]: # skip filename itself
66
+ bare = part.rstrip("/")
67
+ if bare in _TEST_SEGMENTS:
68
+ return True
69
+
70
+ # File-name conventions
71
+ name = parts[-1]
72
+ if (
73
+ name.startswith("test_")
74
+ or name.endswith("_test.py")
75
+ or name.endswith(".test.ts")
76
+ or name.endswith(".test.js")
77
+ or name.endswith(".spec.ts")
78
+ or name.endswith(".spec.js")
79
+ or (name.endswith("test.java") and name != "test.java")
80
+ or name.endswith("tests.java")
81
+ or (name.startswith("test") and name.endswith(".java") and len(name) > 9)
82
+ ):
83
+ return True
84
+
85
+ return False
86
+
87
+
88
+ def is_vendor_path(path: str) -> bool:
89
+ """Return True when *path* is inside a vendored / third-party directory.
90
+
91
+ Handles:
92
+ - /vendor/, /vendors/, /third_party/, /node_modules/
93
+ - /lib/, /libs/ containing web assets (NOT JVM/Python source — those may
94
+ legitimately use "lib" as a package name)
95
+ - Minified JS/CSS files anywhere (*.min.js, *.min.css)
96
+ """
97
+ norm = path.replace("\\", "/").lower()
98
+
99
+ # Minified files are always vendor regardless of directory
100
+ if norm.endswith(".min.js") or norm.endswith(".min.css"):
101
+ return True
102
+
103
+ # Fast fragment check for unambiguous vendor directories
104
+ for frag in _VENDOR_PATH_FRAGMENTS:
105
+ if frag in norm:
106
+ return True
107
+
108
+ parts = norm.split("/")
109
+ dir_parts = parts[:-1] # exclude filename
110
+
111
+ # Unambiguous vendor directory names
112
+ for part in dir_parts:
113
+ if part in _VENDOR_SEGMENTS:
114
+ return True
115
+
116
+ # lib/libs: vendor only for web-asset file types, not JVM/Python source
117
+ filename = parts[-1]
118
+ ext = "." + filename.rsplit(".", 1)[-1] if "." in filename else ""
119
+ if ext in _WEB_ASSET_EXTS:
120
+ for part in dir_parts:
121
+ if part in _LIB_SEGMENTS:
122
+ return True
123
+
124
+ return False
@@ -310,7 +310,8 @@ class RelevantFile:
310
310
  role: str # entrypoint | source | test
311
311
  score: float
312
312
  reason: str
313
- why: str = "" # why this file matters for the specific task
313
+ why: str = "" # why this file matters for the specific task
314
+ tier: Optional[str] = None # fix-bug only: high | medium | low
314
315
 
315
316
 
316
317
  @dataclass
@@ -974,7 +975,33 @@ class TaskContextBuilder:
974
975
  and (d.role or "unknown") in {"runtime", "parsing", "serialization", "observability", "infra"}
975
976
  and d.scope not in {"dev"}
976
977
  ]
977
- direct.sort(key=lambda d: (0 if d.ecosystem == primary_eco else 1, d.name.lower()))
978
+ # Rank by framework centrality: core infra (ORM, Spring) > serialization > other.
979
+ # Penalise vendored tooling (closure-compiler, shaded utilities) so that
980
+ # Hibernate/JPA/Solr appear before minor build-time dependencies.
981
+ _HIGH_SIGNAL_FRAGMENTS = (
982
+ "hibernate", "jpa", "spring-core", "spring-context", "spring-web",
983
+ "spring-boot", "spring-security", "spring-data",
984
+ "solr", "elasticsearch", "kafka", "redis",
985
+ "jackson", "gson",
986
+ "mybatis", "druid", "datasource",
987
+ "tomcat", "undertow", "netty",
988
+ "slf4j", "logback", "log4j",
989
+ )
990
+ _LOW_SIGNAL_FRAGMENTS = (
991
+ "closure-compiler", "closure-library",
992
+ "google-closure", "rhino",
993
+ "guava-gwt",
994
+ )
995
+
996
+ def _dep_rank(d: Any) -> tuple:
997
+ art = (d.name or "").lower()
998
+ eco_match = 0 if d.ecosystem == primary_eco else 1
999
+ is_high = any(frag in art for frag in _HIGH_SIGNAL_FRAGMENTS)
1000
+ is_low = any(frag in art for frag in _LOW_SIGNAL_FRAGMENTS)
1001
+ infra_score = 0 if is_high else (2 if is_low else 1)
1002
+ return (eco_match, infra_score, art)
1003
+
1004
+ direct.sort(key=_dep_rank)
978
1005
  _SKIP_DEP_KEYS = {"parent", "workspace", "resolved_version", "manifest_path"}
979
1006
  key_dependencies = [
980
1007
  {k: v for k, v in asdict(d).items() if v is not None and k not in _SKIP_DEP_KEYS}
@@ -1182,6 +1209,7 @@ class TaskContextBuilder:
1182
1209
  uncommitted_files=uncommitted_files,
1183
1210
  code_notes=cn_notes_for_ranking if cn_notes_for_ranking else None,
1184
1211
  delta_files=None,
1212
+ symptom=symptom if task_name == "fix-bug" else None,
1185
1213
  )
1186
1214
 
1187
1215
  # ── 6b. review-pr: derive PR-specific impact sections from delta analysis ──
@@ -2025,6 +2053,7 @@ class TaskContextBuilder:
2025
2053
  uncommitted_files: Optional[set[str]] = None,
2026
2054
  code_notes: Optional[list] = None,
2027
2055
  delta_files: Optional[set[str]] = None,
2056
+ symptom: Optional[str] = None,
2028
2057
  ) -> list[RelevantFile]:
2029
2058
  from sourcecode.ranking_engine import RankingEngine
2030
2059
  from sourcecode.file_classifier import FileClassifier
@@ -2043,6 +2072,11 @@ class TaskContextBuilder:
2043
2072
  _annotated_files: set[str] = set()
2044
2073
  _dominant_stack = ""
2045
2074
  _recently_changed_stacks: set[str] = set()
2075
+ # Query-aware signals extracted from symptom (class names, exception types, tokens)
2076
+ _symptom_class_names: set[str] = set() # CamelCase class names
2077
+ _symptom_exception_types: set[str] = set() # *Exception / *Error tokens
2078
+ _symptom_tokens: set[str] = set() # all lowercase tokens
2079
+
2046
2080
  if task_name == "fix-bug":
2047
2081
  _bug_kinds = {"FIXME", "BUG", "HACK", "XXX"}
2048
2082
  for _n in (code_notes or []):
@@ -2068,6 +2102,19 @@ class TaskContextBuilder:
2068
2102
  _dominant_stack = _stk_counts.most_common(1)[0][0]
2069
2103
  _recently_changed_stacks = set(_stk_counts.keys())
2070
2104
 
2105
+ # Extract structured signals from symptom text for AND-weighted ranking
2106
+ if symptom:
2107
+ import re as _re_bug
2108
+ _camel_re = _re_bug.compile(r'\b([A-Z][a-zA-Z0-9]+)\b')
2109
+ for _tok in _camel_re.findall(symptom):
2110
+ if _tok.endswith(("Exception", "Error", "Throwable")):
2111
+ _symptom_exception_types.add(_tok)
2112
+ else:
2113
+ _symptom_class_names.add(_tok)
2114
+ _symptom_tokens = {
2115
+ w.lower() for w in _re_bug.split(r'[\s\W]+', symptom) if len(w) > 2
2116
+ }
2117
+
2071
2118
  scored: list[tuple[float, str, RelevantFile]] = []
2072
2119
 
2073
2120
  # For delta task, score only files changed in the specified git range.
@@ -2117,6 +2164,64 @@ class TaskContextBuilder:
2117
2164
  _fix_bug_why = ""
2118
2165
  if task_name == "fix-bug":
2119
2166
  _why_parts: list[str] = []
2167
+
2168
+ # ── Query-aware AND-weighted signals (symptom-derived) ──
2169
+ # These intentionally outweigh git-recency signals so that
2170
+ # OrderServiceImpl.java ranks top-3 regardless of churn history.
2171
+ if _symptom_class_names or _symptom_exception_types:
2172
+ _stem = Path(path).stem
2173
+ _stem_lower = _stem.lower()
2174
+ _matched_class = next(
2175
+ (c for c in _symptom_class_names if _stem_lower == c.lower()),
2176
+ None,
2177
+ )
2178
+ _matched_exc = next(
2179
+ (e for e in _symptom_exception_types if _stem_lower == e.lower()),
2180
+ None,
2181
+ )
2182
+ _impl_match = next(
2183
+ (c for c in _symptom_class_names
2184
+ if _stem_lower in (c.lower() + "impl", c.lower() + "service",
2185
+ c.lower() + "serviceimpl", c.lower() + "helper")),
2186
+ None,
2187
+ )
2188
+ if _matched_class:
2189
+ content_boost += 3.0
2190
+ _why_parts.append(f"exact class match: {_stem} (+3.0)")
2191
+ elif _matched_exc:
2192
+ content_boost += 2.0
2193
+ _why_parts.append(f"exception class match: {_stem} (+2.0)")
2194
+ elif _impl_match:
2195
+ content_boost += 2.5
2196
+ _why_parts.append(f"class impl match: {_stem} (+2.5)")
2197
+ else:
2198
+ # Symbol appears anywhere in path (package adjacency)
2199
+ _path_class_hit = next(
2200
+ (c for c in _symptom_class_names if c.lower() in path_lower),
2201
+ None,
2202
+ )
2203
+ if _path_class_hit:
2204
+ content_boost += 1.0
2205
+ _why_parts.append(f"symbol in path: {_path_class_hit} (+1.0)")
2206
+ elif any(e.lower() in path_lower for e in _symptom_exception_types):
2207
+ content_boost += 0.8
2208
+ _why_parts.append("exception type in path (+0.8)")
2209
+
2210
+ # AND-weighted token intersection — multiple matching tokens >> single
2211
+ if _symptom_tokens:
2212
+ _path_parts = set(path_lower.replace("/", " ").replace(".", " ").replace("_", " ").split())
2213
+ _intersection = _symptom_tokens & _path_parts
2214
+ _n_match = len(_intersection)
2215
+ if _n_match >= 3:
2216
+ _tok_boost = min(1.2, _n_match * 0.25)
2217
+ content_boost += _tok_boost
2218
+ _why_parts.append(f"token AND match ({_n_match} terms: {sorted(_intersection)[:3]}) (+{_tok_boost:.2f})")
2219
+ elif _n_match == 2:
2220
+ content_boost += 0.4
2221
+ _why_parts.append(f"token AND match (2 terms: {sorted(_intersection)}) (+0.40)")
2222
+ # Single-token match: no boost — avoids OR explosion
2223
+
2224
+ # ── Git / annotation signals ──
2120
2225
  if path in _uncommitted:
2121
2226
  content_boost += 0.40
2122
2227
  _why_parts.append("uncommitted change (+0.40)")
@@ -2203,7 +2308,7 @@ class TaskContextBuilder:
2203
2308
  }
2204
2309
  _repo_size = len(all_paths)
2205
2310
  _task_budget = {
2206
- "fix-bug": max(20, min(40, _repo_size // 80)),
2311
+ "fix-bug": 30, # hard cap prevents token explosion on large repos
2207
2312
  "onboard": max(15, min(25, _repo_size // 150)),
2208
2313
  "explain": max(10, min(20, _repo_size // 200)),
2209
2314
  "generate-tests": max(20, min(35, _repo_size // 100)),
@@ -2271,7 +2376,21 @@ class TaskContextBuilder:
2271
2376
  _covered.add(_layer)
2272
2377
  _missing.discard(_layer)
2273
2378
 
2274
- return [_rf_map[p] for p in _selected if p in _rf_map]
2379
+ result = [_rf_map[p] for p in _selected if p in _rf_map]
2380
+
2381
+ # Assign fix-bug tiers based on raw score (pre-normalised total)
2382
+ if task_name == "fix-bug":
2383
+ _score_lookup = {path: total for total, path, _ in scored}
2384
+ for _rf in result:
2385
+ _s = _score_lookup.get(_rf.path, 0.0)
2386
+ if _s >= 4.0:
2387
+ _rf.tier = "high"
2388
+ elif _s >= 1.5:
2389
+ _rf.tier = "medium"
2390
+ else:
2391
+ _rf.tier = "low"
2392
+
2393
+ return result
2275
2394
  except Exception:
2276
2395
  return [f for _, _, f in scored[:15]]
2277
2396
 
sourcecode/serializer.py CHANGED
@@ -409,7 +409,10 @@ def _spring_profiles_context(sm: "SourceMap") -> "Optional[dict[str, Any]]":
409
409
  else:
410
410
  matches = [
411
411
  p for p in sm.file_paths
412
- if pfx in Path(p).stem.lower() and p.endswith(".java")
412
+ if (Path(p).stem.lower() == pfx
413
+ or Path(p).stem.lower().startswith(pfx + "-")
414
+ or Path(p).stem.lower().endswith("-" + pfx))
415
+ and p.endswith(".java")
413
416
  ]
414
417
  if matches:
415
418
  per_profile[profile] = [Path(p).name for p in matches[:5]]
@@ -534,6 +537,8 @@ def _bootstrap_structured(eps: list) -> "Optional[dict[str, Any]]":
534
537
 
535
538
  for ep in eps:
536
539
  path = getattr(ep, "path", "")
540
+ if "/test/" in path or "/tests/" in path:
541
+ continue
537
542
  kind = getattr(ep, "kind", "")
538
543
  stem = _Path(path).stem
539
544
 
@@ -587,8 +592,9 @@ def _bootstrap_structured(eps: list) -> "Optional[dict[str, Any]]":
587
592
  module_names.append(module)
588
593
 
589
594
  _ctrl_note = (
590
- f"{controller_methods} @RequestMapping methods across "
595
+ f"{controller_methods} detected entry-point methods across "
591
596
  f"{controller_classes} controller classes"
597
+ f" (use 'sourcecode endpoints' for full surface)"
592
598
  )
593
599
  if len(module_names) > 30:
594
600
  # Group by first path segment under ddd/ (inferred domain area)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.31.4
3
+ Version: 1.31.6
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -225,7 +225,7 @@ Description-Content-Type: text/markdown
225
225
 
226
226
  **Deterministic, behavior-aware codebase context for AI agents and PR review.**
227
227
 
228
- ![Version](https://img.shields.io/badge/version-1.31.4-blue)
228
+ ![Version](https://img.shields.io/badge/version-1.31.6-blue)
229
229
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
230
230
 
231
231
  ---
@@ -261,7 +261,7 @@ pipx install sourcecode
261
261
 
262
262
  ```bash
263
263
  sourcecode version
264
- # sourcecode 1.31.4
264
+ # sourcecode 1.31.6
265
265
  ```
266
266
 
267
267
  ---
@@ -1,11 +1,11 @@
1
- sourcecode/__init__.py,sha256=P8MbrmTVGyjN1Uy_38VxYTdrjIoEPOf9I4Xn71cCRs8,103
1
+ sourcecode/__init__.py,sha256=YtkXxLCwI2P3youz8qWDCC8rLLuveg8_p3Rw5TwvrXs,103
2
2
  sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
3
3
  sourcecode/architecture_analyzer.py,sha256=MyBa0Hf5HmkudZQDLKrjcWDKETXETXl0mQX1swtTwAA,39091
4
4
  sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
5
5
  sourcecode/ast_extractor.py,sha256=XgrZg2DcWcUm9r87cRG3KGO7IK2TIL_N-CvhSbUmmh4,49901
6
6
  sourcecode/classifier.py,sha256=-0t0HLc9L9UleMLfclfLM3AXhBjUb_AYyBPDbvgWtac,7755
7
- sourcecode/cli.py,sha256=dcHIYDvDpcIzbExxKuemtElk3RClLRWaaO2RU9R8Hoc,123945
8
- sourcecode/code_notes_analyzer.py,sha256=y1MJBnPZHYp4i6cQCXUb9ATIyifS_qMQWjw_8lPkpsU,9215
7
+ sourcecode/cli.py,sha256=lAqlKvLs3nrHKCM6RfsvQr1O_jWi3WOcdjhhlE8fmyg,129082
8
+ sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
9
9
  sourcecode/confidence_analyzer.py,sha256=ZUn-Nywi5TEQcuozqK_vfOyPT-a1dYYO42elAtVFV-k,16412
10
10
  sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
11
11
  sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
@@ -21,8 +21,9 @@ sourcecode/flow_analyzer.py,sha256=dSiuY4w49k29jW_EPXUOND9B5uVbuCA7kjnuHi-pIWA,2
21
21
  sourcecode/git_analyzer.py,sha256=0Gyj-vMpIIN4nfriKXVRouNYBeJ59s6pQDX2Xu9Pq-U,13177
22
22
  sourcecode/graph_analyzer.py,sha256=iUK-7pSV-cvGqqD2hENdYmhnm0wcXFEyK-xnu5ul8OU,62515
23
23
  sourcecode/metrics_analyzer.py,sha256=m0ENgtqKeBL17kUIK3fmGkgo7UfXBNHxCMj0H_Y5K7c,22750
24
+ sourcecode/path_filters.py,sha256=ROFRQ8eSLBEMiixK9f45-RO7um4VEEcjoD5AA4I427I,3739
24
25
  sourcecode/pr_comment_renderer.py,sha256=smHslxiG14lrytCkq5nFrFu-qTHgA-t-LFYfdrfjz2o,14423
25
- sourcecode/prepare_context.py,sha256=Eid3Wmh4lmtr20pmobcCU0yzjRXhFql2rEmfGUgkFpU,183214
26
+ sourcecode/prepare_context.py,sha256=-9kTYuPhwr79mF6lNe9tI7glGAU_d84GyVurHceGroo,189427
26
27
  sourcecode/progress.py,sha256=qn30sWaHOkjTgXsSBmiPkz7Rsbwc5oSlIe6JNEMYp_k,3149
27
28
  sourcecode/ranking_engine.py,sha256=ZAucq_YX2KkWUuAZf4P0lhtQ_38vEFnUhuGtSZd1S0E,12970
28
29
  sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
@@ -33,7 +34,7 @@ sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBG
33
34
  sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
34
35
  sourcecode/schema.py,sha256=fj3BZ3IcnNV4j21BFIEvz8Qnw_vZoqIbzzRg-qQ-nd0,24530
35
36
  sourcecode/semantic_analyzer.py,sha256=12TwXYkYbDcBdu0heX_EmfPM2EkO8a_r5osf0SaeQbs,88956
36
- sourcecode/serializer.py,sha256=KHVqwUK53axF10detPzqgmIY2P31rjLLJ_9T9Eyqp-E,111647
37
+ sourcecode/serializer.py,sha256=1y9DAkH2aBzlsmkHcSSc-t72_4fv9RZIuG4uhhGG5QE,111933
37
38
  sourcecode/summarizer.py,sha256=lPlKhMh28nueXkPo2xKeD3DUFYVGRlJMIdY-8TSM-ls,17486
38
39
  sourcecode/tree_utils.py,sha256=8GAkIfQAsvtEudIeW1l4ooH_oRtrWR8cpJQJsEa_Pfw,2093
39
40
  sourcecode/workspace.py,sha256=X_6NmNnitvT3_38V-JDChydo_sR68s249hLFlrQskU0,8271
@@ -44,9 +45,9 @@ sourcecode/detectors/dart.py,sha256=QbqaL5v18-_ort75HihVBt8MsKUfOcFDF8IpWFLiXpI,
44
45
  sourcecode/detectors/dotnet.py,sha256=oi8zq3AfUItlK3h_qM81vOe1ZVTIU9LBKIlIrRDuqOs,6864
45
46
  sourcecode/detectors/elixir.py,sha256=jCpvt5Yi6jvplc80ovRtWh17q-11ZGo9qX7o8b57TJE,1713
46
47
  sourcecode/detectors/go.py,sha256=2r66uRQfeTWsqxr4HDhT6vExZErby0t46QXLHVBRv9w,2782
47
- sourcecode/detectors/heuristic.py,sha256=bCqqgbHavl4Sse3dqT8mwmo1wAdgeJr7VyXOmfClLKo,3387
48
+ sourcecode/detectors/heuristic.py,sha256=7cRxrip4yIaggYzZJB6ef8yHKh-gHgiH_pXMFcjlyFU,3723
48
49
  sourcecode/detectors/hybrid.py,sha256=IGFRUVsAZ1ooRlFdznCeJAV6vy1yVDx-VyghvLtddXc,9101
49
- sourcecode/detectors/java.py,sha256=ldvaDZJADAKslOpS5qJ0bxexgeY6h_4Yx4NCtHio7J8,24203
50
+ sourcecode/detectors/java.py,sha256=O2JdznVYv5364GSQExksYLsAi0pvDUW9ZowpsL0xLgM,24451
50
51
  sourcecode/detectors/jvm_ext.py,sha256=EgHJ5W8EE-ZTN9V607mVzohyKgZE8Mc2jCi-DF8RAZU,2616
51
52
  sourcecode/detectors/nodejs.py,sha256=Hg3Gmr7yIMJFiLoDwOTk2wtu00wxIs6kZf-oQujTFUA,13187
52
53
  sourcecode/detectors/parsers.py,sha256=ugPg8yNUf0Ai1gA7Fnn6wAkYGFjTxRodSP3IeViYJJ4,2290
@@ -72,8 +73,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
72
73
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
73
74
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
74
75
  sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
75
- sourcecode-1.31.4.dist-info/METADATA,sha256=JZXcuZYPozOjtbIY_xnVgUNwMF1aEU12mV4m2T__E1A,29083
76
- sourcecode-1.31.4.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
77
- sourcecode-1.31.4.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
78
- sourcecode-1.31.4.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
79
- sourcecode-1.31.4.dist-info/RECORD,,
76
+ sourcecode-1.31.6.dist-info/METADATA,sha256=aKMdH4KYx2KrZ1rx8Ylo73TwfocQ5szGN1U3CDUn_tM,29083
77
+ sourcecode-1.31.6.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
78
+ sourcecode-1.31.6.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
79
+ sourcecode-1.31.6.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
80
+ sourcecode-1.31.6.dist-info/RECORD,,