sourcecode 1.31.4__py3-none-any.whl → 1.31.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sourcecode/__init__.py +1 -1
- sourcecode/cli.py +117 -15
- sourcecode/code_notes_analyzer.py +5 -2
- sourcecode/detectors/heuristic.py +6 -1
- sourcecode/detectors/java.py +5 -1
- sourcecode/path_filters.py +124 -0
- sourcecode/prepare_context.py +123 -4
- sourcecode/serializer.py +8 -2
- {sourcecode-1.31.4.dist-info → sourcecode-1.31.6.dist-info}/METADATA +3 -3
- {sourcecode-1.31.4.dist-info → sourcecode-1.31.6.dist-info}/RECORD +13 -12
- {sourcecode-1.31.4.dist-info → sourcecode-1.31.6.dist-info}/WHEEL +0 -0
- {sourcecode-1.31.4.dist-info → sourcecode-1.31.6.dist-info}/entry_points.txt +0 -0
- {sourcecode-1.31.4.dist-info → sourcecode-1.31.6.dist-info}/licenses/LICENSE +0 -0
sourcecode/__init__.py
CHANGED
sourcecode/cli.py
CHANGED
|
@@ -401,7 +401,7 @@ def main(
|
|
|
401
401
|
help=(
|
|
402
402
|
"High-signal summary (typically 1000–3000 tokens depending on repo size): "
|
|
403
403
|
"stacks, entry points, dependency summary, confidence, and gaps. "
|
|
404
|
-
"Includes security_surface, mybatis, and transactional_boundaries for Java projects. "
|
|
404
|
+
"Includes security_surface (when @M3FiltroSeguridad detected), mybatis (when MyBatis framework detected), and transactional_boundaries for Java projects. "
|
|
405
405
|
"Use --agent for maximum signal or --slim (when available) for minimal token footprint."
|
|
406
406
|
),
|
|
407
407
|
),
|
|
@@ -2418,14 +2418,20 @@ def repo_ir_cmd(
|
|
|
2418
2418
|
|
|
2419
2419
|
if output_path:
|
|
2420
2420
|
output_path.write_text(output, encoding="utf-8")
|
|
2421
|
-
n_nodes = len((ir.get("graph") or {}).get("nodes") or [])
|
|
2422
|
-
n_edges = len((ir.get("graph") or {}).get("edges") or [])
|
|
2423
2421
|
size_kb = len(output.encode("utf-8")) // 1024
|
|
2424
|
-
|
|
2425
|
-
|
|
2426
|
-
|
|
2427
|
-
|
|
2428
|
-
|
|
2422
|
+
if summary_only:
|
|
2423
|
+
typer.echo(
|
|
2424
|
+
f"IR written to {output_path} ({size_kb}KB, graph omitted by --summary-only)",
|
|
2425
|
+
err=True,
|
|
2426
|
+
)
|
|
2427
|
+
else:
|
|
2428
|
+
n_nodes = len((ir.get("graph") or {}).get("nodes") or [])
|
|
2429
|
+
n_edges = len((ir.get("graph") or {}).get("edges") or [])
|
|
2430
|
+
typer.echo(
|
|
2431
|
+
f"IR written to {output_path} "
|
|
2432
|
+
f"({size_kb}KB, {n_nodes} nodes, {n_edges} edges)",
|
|
2433
|
+
err=True,
|
|
2434
|
+
)
|
|
2429
2435
|
else:
|
|
2430
2436
|
try:
|
|
2431
2437
|
_sys.stdout.buffer.write(output.encode("utf-8"))
|
|
@@ -2462,7 +2468,11 @@ def _extract_java_endpoints(root: "Path") -> "dict[str, Any]":
|
|
|
2462
2468
|
r'@(Get|Post|Put|Delete|Patch|Request)Mapping\s*'
|
|
2463
2469
|
r'(?:\(\s*(?:value\s*=\s*)?(?:"([^"]*)"|\{[^}]*\}|[^)]*)\s*\))?',
|
|
2464
2470
|
)
|
|
2465
|
-
_CLASS_RE = _re.compile(
|
|
2471
|
+
_CLASS_RE = _re.compile(
|
|
2472
|
+
r'^[ \t]*(?:(?:public|protected|private|abstract|final|@\w+)\s+)*'
|
|
2473
|
+
r'(?:class|interface)\s+(\w+)',
|
|
2474
|
+
_re.MULTILINE,
|
|
2475
|
+
)
|
|
2466
2476
|
_METHOD_RE = _re.compile(
|
|
2467
2477
|
r'(?:public|protected|private)\s+\S+\s+(\w+)\s*\(',
|
|
2468
2478
|
)
|
|
@@ -2486,22 +2496,76 @@ def _extract_java_endpoints(root: "Path") -> "dict[str, Any]":
|
|
|
2486
2496
|
endpoints: list[dict] = []
|
|
2487
2497
|
seen: set[tuple] = set()
|
|
2488
2498
|
|
|
2499
|
+
from sourcecode.path_filters import is_test_path as _is_test_path
|
|
2500
|
+
|
|
2489
2501
|
java_files = [
|
|
2490
2502
|
p for p in root.rglob("*.java")
|
|
2491
|
-
if
|
|
2492
|
-
and "/tests/" not in str(p).replace("\\", "/")
|
|
2503
|
+
if not _is_test_path(str(p).replace("\\", "/"))
|
|
2493
2504
|
and "target/" not in str(p).replace("\\", "/")
|
|
2494
2505
|
]
|
|
2495
2506
|
|
|
2507
|
+
# ── Meta-annotation index ─────────────────────────────────────────────────
|
|
2508
|
+
# First pass: find @interface declarations and the annotations they carry.
|
|
2509
|
+
# This lets us detect controllers annotated with framework wrappers like
|
|
2510
|
+
# @FrameworkController that itself carries @Controller.
|
|
2511
|
+
#
|
|
2512
|
+
# Index maps annotation simple name → set of annotation names it is
|
|
2513
|
+
# annotated with (one level; resolution is done recursively below).
|
|
2514
|
+
_ANN_DECL_RE = _re.compile(r'public\s+@interface\s+(\w+)')
|
|
2515
|
+
_ANN_USE_RE = _re.compile(r'@(\w+)')
|
|
2516
|
+
|
|
2517
|
+
_meta_index: dict[str, set[str]] = {}
|
|
2518
|
+
for _jf in java_files:
|
|
2519
|
+
_raw: str
|
|
2520
|
+
try:
|
|
2521
|
+
_raw = _jf.read_text(encoding="utf-8", errors="replace")
|
|
2522
|
+
except OSError:
|
|
2523
|
+
continue
|
|
2524
|
+
if "@interface" not in _raw:
|
|
2525
|
+
continue
|
|
2526
|
+
_decl_m = _ANN_DECL_RE.search(_raw)
|
|
2527
|
+
if not _decl_m:
|
|
2528
|
+
continue
|
|
2529
|
+
_ann_name = _decl_m.group(1)
|
|
2530
|
+
# Collect annotations appearing in the header (before the @interface line)
|
|
2531
|
+
_header = _raw[: _decl_m.start()]
|
|
2532
|
+
_meta_anns: set[str] = {
|
|
2533
|
+
m for m in _ANN_USE_RE.findall(_header)
|
|
2534
|
+
if m not in ("interface", "interface") # strip keywords; annotation names only
|
|
2535
|
+
}
|
|
2536
|
+
_meta_index[_ann_name] = _meta_anns
|
|
2537
|
+
|
|
2538
|
+
_CONTROLLER_CORE = frozenset({"Controller", "RestController"})
|
|
2539
|
+
|
|
2540
|
+
def _resolves_to_controller(name: str, visited: "set[str]") -> bool:
|
|
2541
|
+
"""Return True when annotation *name* transitively carries @Controller/@RestController."""
|
|
2542
|
+
if name in _CONTROLLER_CORE:
|
|
2543
|
+
return True
|
|
2544
|
+
if name in visited:
|
|
2545
|
+
return False
|
|
2546
|
+
visited.add(name)
|
|
2547
|
+
for parent in _meta_index.get(name, ()):
|
|
2548
|
+
if _resolves_to_controller(parent, visited):
|
|
2549
|
+
return True
|
|
2550
|
+
return False
|
|
2551
|
+
|
|
2496
2552
|
for java_file in java_files:
|
|
2497
2553
|
try:
|
|
2498
2554
|
content = java_file.read_text(encoding="utf-8", errors="replace")
|
|
2499
2555
|
except OSError:
|
|
2500
2556
|
continue
|
|
2501
2557
|
|
|
2502
|
-
#
|
|
2503
|
-
|
|
2504
|
-
|
|
2558
|
+
# Process files with direct controller/mapping annotations OR
|
|
2559
|
+
# with custom annotations that transitively resolve to @Controller/@RestController.
|
|
2560
|
+
_has_direct = any(x in content for x in ("@RestController", "@Controller", "@RequestMapping"))
|
|
2561
|
+
if not _has_direct:
|
|
2562
|
+
# Quick meta-annotation check: extract class-level annotation names
|
|
2563
|
+
# (first 60 lines — before the class body opens) and resolve them.
|
|
2564
|
+
_header_lines = content.splitlines()[:60]
|
|
2565
|
+
_header_text = "\n".join(_header_lines)
|
|
2566
|
+
_file_anns = set(_ANN_USE_RE.findall(_header_text))
|
|
2567
|
+
if not any(_resolves_to_controller(a, set()) for a in _file_anns):
|
|
2568
|
+
continue
|
|
2505
2569
|
|
|
2506
2570
|
try:
|
|
2507
2571
|
rel_path = str(java_file.relative_to(root)).replace("\\", "/")
|
|
@@ -2540,7 +2604,45 @@ def _extract_java_endpoints(root: "Path") -> "dict[str, Any]":
|
|
|
2540
2604
|
if "class " in block or "interface " in block:
|
|
2541
2605
|
path_m = _CLASS_PATH_RE.search(block)
|
|
2542
2606
|
if path_m:
|
|
2543
|
-
|
|
2607
|
+
captured = path_m.group(1).rstrip("/")
|
|
2608
|
+
# Handle string concat: @RequestMapping("lit" + ClassName.CONST)
|
|
2609
|
+
_CONCAT_CONST_RE = _re.compile(
|
|
2610
|
+
r'@RequestMapping\s*\(\s*(?:value\s*=\s*)?'
|
|
2611
|
+
r'["\']([^"\']*)["\'\s]*\+\s*(\w+)\.(\w+)'
|
|
2612
|
+
)
|
|
2613
|
+
cc_m = _CONCAT_CONST_RE.search(block)
|
|
2614
|
+
if cc_m:
|
|
2615
|
+
prefix_lit = cc_m.group(1)
|
|
2616
|
+
c_class = cc_m.group(2)
|
|
2617
|
+
c_field = cc_m.group(3)
|
|
2618
|
+
_SVAL_RE = _re.compile(
|
|
2619
|
+
r'static\s+final\s+String\s+'
|
|
2620
|
+
+ _re.escape(c_field)
|
|
2621
|
+
+ r'\s*=\s*"([^"]+)"'
|
|
2622
|
+
)
|
|
2623
|
+
resolved = None
|
|
2624
|
+
m_cur = _SVAL_RE.search(content)
|
|
2625
|
+
if m_cur:
|
|
2626
|
+
resolved = m_cur.group(1)
|
|
2627
|
+
else:
|
|
2628
|
+
for _jf in java_files:
|
|
2629
|
+
if _jf.stem == c_class:
|
|
2630
|
+
try:
|
|
2631
|
+
_jf_txt = _jf.read_text(
|
|
2632
|
+
encoding="utf-8", errors="replace"
|
|
2633
|
+
)
|
|
2634
|
+
m_jf = _SVAL_RE.search(_jf_txt)
|
|
2635
|
+
if m_jf:
|
|
2636
|
+
resolved = m_jf.group(1)
|
|
2637
|
+
break
|
|
2638
|
+
except OSError:
|
|
2639
|
+
pass
|
|
2640
|
+
if resolved is not None:
|
|
2641
|
+
class_bases = [(prefix_lit + resolved).rstrip("/")]
|
|
2642
|
+
else:
|
|
2643
|
+
class_bases = [captured] if captured else [""]
|
|
2644
|
+
else:
|
|
2645
|
+
class_bases = [captured] if captured else [""]
|
|
2544
2646
|
else:
|
|
2545
2647
|
arr_m = _CLASS_ARRAY_PATH_RE.search(block)
|
|
2546
2648
|
if arr_m:
|
|
@@ -20,7 +20,8 @@ _SYMBOL_LOOKBACK = 25 # líneas hacia atrás para encontrar el símbolo envolve
|
|
|
20
20
|
_SKIP_DIRS = {
|
|
21
21
|
"node_modules", ".git", "__pycache__", ".venv", "venv",
|
|
22
22
|
".mypy_cache", "dist", "build", ".tox", ".eggs",
|
|
23
|
-
".next", ".nuxt", ".output", "vendor", "coverage",
|
|
23
|
+
".next", ".nuxt", ".output", "vendor", "vendors", "coverage",
|
|
24
|
+
"third_party", "thirdparty",
|
|
24
25
|
}
|
|
25
26
|
|
|
26
27
|
_CODE_EXTENSIONS = {
|
|
@@ -255,4 +256,6 @@ class CodeNotesAnalyzer:
|
|
|
255
256
|
# here was redundant and caused files to be silently skipped when
|
|
256
257
|
# traversal order varied (different files filled the quota first).
|
|
257
258
|
if suffix in _CODE_EXTENSIONS:
|
|
258
|
-
|
|
259
|
+
from sourcecode.path_filters import is_vendor_path as _is_vendor
|
|
260
|
+
if not _is_vendor(rel):
|
|
261
|
+
_scan_source_file(entry, rel, notes, total_count)
|
|
@@ -59,8 +59,13 @@ class HeuristicDetector(AbstractDetector):
|
|
|
59
59
|
paths = flatten_file_tree(context.file_tree)
|
|
60
60
|
counts: Counter[str] = Counter()
|
|
61
61
|
for path in paths:
|
|
62
|
-
if path.startswith("."):
|
|
62
|
+
if path.startswith(".") or _is_auxiliary_path(path):
|
|
63
63
|
continue
|
|
64
|
+
# Skip JS/TS files bundled as Java static resources (not Node.js source)
|
|
65
|
+
if path.endswith((".js", ".ts", ".tsx", ".jsx", ".mjs")):
|
|
66
|
+
_np = path.replace("\\", "/")
|
|
67
|
+
if "src/main/resources/" in _np or "src/main/webapp/" in _np:
|
|
68
|
+
continue
|
|
64
69
|
for extension, stack in _EXTENSION_MAP.items():
|
|
65
70
|
if path.endswith(extension):
|
|
66
71
|
counts[stack] += 1
|
sourcecode/detectors/java.py
CHANGED
|
@@ -295,9 +295,13 @@ class JavaDetector(AbstractDetector):
|
|
|
295
295
|
self._augment_deep_java_controllers(context, all_java)
|
|
296
296
|
|
|
297
297
|
# 1. @SpringBootApplication entry: Application.java / Main.java by name
|
|
298
|
+
# Exclude test trees: test helpers like AdminApplication.java in
|
|
299
|
+
# integration/src/test/java/ must not be treated as production entrypoints.
|
|
300
|
+
from sourcecode.path_filters import is_test_path as _is_test_path
|
|
298
301
|
app_candidates = [
|
|
299
302
|
p for p in all_java
|
|
300
303
|
if p.endswith(("Application.java", "Main.java"))
|
|
304
|
+
and not _is_test_path(p)
|
|
301
305
|
]
|
|
302
306
|
entry_points: list[EntryPoint] = [
|
|
303
307
|
EntryPoint(path=p, stack="java", kind="application", source="manifest")
|
|
@@ -307,7 +311,7 @@ class JavaDetector(AbstractDetector):
|
|
|
307
311
|
# 2. Annotation-based scan: @RestController, @WebFilter, FilterRegistrationBean
|
|
308
312
|
# Prioritize Controller-named files so all REST controllers are detected
|
|
309
313
|
# even in large codebases where total files > _MAX_JAVA_ENTRY_SCAN.
|
|
310
|
-
_non_test = [p for p in all_java if
|
|
314
|
+
_non_test = [p for p in all_java if not _is_test_path(p)]
|
|
311
315
|
_ctrl_files = [p for p in _non_test if "Controller" in p]
|
|
312
316
|
_other_files = [p for p in _non_test if "Controller" not in p]
|
|
313
317
|
scan_candidates = _ctrl_files + _other_files[:max(0, _MAX_JAVA_ENTRY_SCAN - len(_ctrl_files))]
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Shared path classification helpers used across all tools.
|
|
2
|
+
|
|
3
|
+
Centralises test-path and vendor-path detection so each tool does not
|
|
4
|
+
duplicate — and diverge — these heuristics.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
_TEST_SEGMENTS = frozenset({
|
|
9
|
+
"test", "tests", "spec", "specs",
|
|
10
|
+
"test-helpers", "test_helpers", "testfixtures",
|
|
11
|
+
"it", # integration-tests short name
|
|
12
|
+
"integrationtest", "integrationtests",
|
|
13
|
+
})
|
|
14
|
+
|
|
15
|
+
_VENDOR_SEGMENTS = frozenset({
|
|
16
|
+
"vendor", "vendors",
|
|
17
|
+
"third_party", "thirdparty",
|
|
18
|
+
"node_modules",
|
|
19
|
+
"external", "externals",
|
|
20
|
+
"contrib",
|
|
21
|
+
})
|
|
22
|
+
|
|
23
|
+
# lib/libs are vendor only for web-asset extensions.
|
|
24
|
+
# Java/Kotlin/Python source in a package named "lib" is NOT vendor.
|
|
25
|
+
_LIB_SEGMENTS = frozenset({"lib", "libs"})
|
|
26
|
+
_WEB_ASSET_EXTS = frozenset({
|
|
27
|
+
".js", ".jsx", ".mjs", ".cjs",
|
|
28
|
+
".ts", ".tsx",
|
|
29
|
+
".css", ".less", ".scss", ".sass",
|
|
30
|
+
".json", ".map",
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
_VENDOR_PATH_FRAGMENTS = (
|
|
34
|
+
"/vendor/", "/vendors/",
|
|
35
|
+
"/third_party/", "/thirdparty/",
|
|
36
|
+
"/node_modules/",
|
|
37
|
+
"/external/", "/externals/",
|
|
38
|
+
"/contrib/",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
_JAVA_TEST_ROOTS = (
|
|
42
|
+
"/src/test/",
|
|
43
|
+
"\\src\\test\\",
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def is_test_path(path: str) -> bool:
|
|
48
|
+
"""Return True when *path* is part of a test tree, not production code.
|
|
49
|
+
|
|
50
|
+
Handles:
|
|
51
|
+
- Standard Maven/Gradle layout (src/test/java/…)
|
|
52
|
+
- Common naming conventions (/tests/, /spec/, /it/)
|
|
53
|
+
- Java file name conventions (FooTest.java, TestFoo.java)
|
|
54
|
+
- Python conventions (test_foo.py, foo_test.py)
|
|
55
|
+
- JS/TS conventions (foo.test.ts, foo.spec.ts)
|
|
56
|
+
"""
|
|
57
|
+
norm = path.replace("\\", "/").lower()
|
|
58
|
+
|
|
59
|
+
# Maven/Gradle standard test root (fast path)
|
|
60
|
+
if "/src/test/" in norm:
|
|
61
|
+
return True
|
|
62
|
+
|
|
63
|
+
# Segment-based check – any directory component is a test segment
|
|
64
|
+
parts = norm.split("/")
|
|
65
|
+
for part in parts[:-1]: # skip filename itself
|
|
66
|
+
bare = part.rstrip("/")
|
|
67
|
+
if bare in _TEST_SEGMENTS:
|
|
68
|
+
return True
|
|
69
|
+
|
|
70
|
+
# File-name conventions
|
|
71
|
+
name = parts[-1]
|
|
72
|
+
if (
|
|
73
|
+
name.startswith("test_")
|
|
74
|
+
or name.endswith("_test.py")
|
|
75
|
+
or name.endswith(".test.ts")
|
|
76
|
+
or name.endswith(".test.js")
|
|
77
|
+
or name.endswith(".spec.ts")
|
|
78
|
+
or name.endswith(".spec.js")
|
|
79
|
+
or (name.endswith("test.java") and name != "test.java")
|
|
80
|
+
or name.endswith("tests.java")
|
|
81
|
+
or (name.startswith("test") and name.endswith(".java") and len(name) > 9)
|
|
82
|
+
):
|
|
83
|
+
return True
|
|
84
|
+
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def is_vendor_path(path: str) -> bool:
|
|
89
|
+
"""Return True when *path* is inside a vendored / third-party directory.
|
|
90
|
+
|
|
91
|
+
Handles:
|
|
92
|
+
- /vendor/, /vendors/, /third_party/, /node_modules/
|
|
93
|
+
- /lib/, /libs/ containing web assets (NOT JVM/Python source — those may
|
|
94
|
+
legitimately use "lib" as a package name)
|
|
95
|
+
- Minified JS/CSS files anywhere (*.min.js, *.min.css)
|
|
96
|
+
"""
|
|
97
|
+
norm = path.replace("\\", "/").lower()
|
|
98
|
+
|
|
99
|
+
# Minified files are always vendor regardless of directory
|
|
100
|
+
if norm.endswith(".min.js") or norm.endswith(".min.css"):
|
|
101
|
+
return True
|
|
102
|
+
|
|
103
|
+
# Fast fragment check for unambiguous vendor directories
|
|
104
|
+
for frag in _VENDOR_PATH_FRAGMENTS:
|
|
105
|
+
if frag in norm:
|
|
106
|
+
return True
|
|
107
|
+
|
|
108
|
+
parts = norm.split("/")
|
|
109
|
+
dir_parts = parts[:-1] # exclude filename
|
|
110
|
+
|
|
111
|
+
# Unambiguous vendor directory names
|
|
112
|
+
for part in dir_parts:
|
|
113
|
+
if part in _VENDOR_SEGMENTS:
|
|
114
|
+
return True
|
|
115
|
+
|
|
116
|
+
# lib/libs: vendor only for web-asset file types, not JVM/Python source
|
|
117
|
+
filename = parts[-1]
|
|
118
|
+
ext = "." + filename.rsplit(".", 1)[-1] if "." in filename else ""
|
|
119
|
+
if ext in _WEB_ASSET_EXTS:
|
|
120
|
+
for part in dir_parts:
|
|
121
|
+
if part in _LIB_SEGMENTS:
|
|
122
|
+
return True
|
|
123
|
+
|
|
124
|
+
return False
|
sourcecode/prepare_context.py
CHANGED
|
@@ -310,7 +310,8 @@ class RelevantFile:
|
|
|
310
310
|
role: str # entrypoint | source | test
|
|
311
311
|
score: float
|
|
312
312
|
reason: str
|
|
313
|
-
why: str = ""
|
|
313
|
+
why: str = "" # why this file matters for the specific task
|
|
314
|
+
tier: Optional[str] = None # fix-bug only: high | medium | low
|
|
314
315
|
|
|
315
316
|
|
|
316
317
|
@dataclass
|
|
@@ -974,7 +975,33 @@ class TaskContextBuilder:
|
|
|
974
975
|
and (d.role or "unknown") in {"runtime", "parsing", "serialization", "observability", "infra"}
|
|
975
976
|
and d.scope not in {"dev"}
|
|
976
977
|
]
|
|
977
|
-
|
|
978
|
+
# Rank by framework centrality: core infra (ORM, Spring) > serialization > other.
|
|
979
|
+
# Penalise vendored tooling (closure-compiler, shaded utilities) so that
|
|
980
|
+
# Hibernate/JPA/Solr appear before minor build-time dependencies.
|
|
981
|
+
_HIGH_SIGNAL_FRAGMENTS = (
|
|
982
|
+
"hibernate", "jpa", "spring-core", "spring-context", "spring-web",
|
|
983
|
+
"spring-boot", "spring-security", "spring-data",
|
|
984
|
+
"solr", "elasticsearch", "kafka", "redis",
|
|
985
|
+
"jackson", "gson",
|
|
986
|
+
"mybatis", "druid", "datasource",
|
|
987
|
+
"tomcat", "undertow", "netty",
|
|
988
|
+
"slf4j", "logback", "log4j",
|
|
989
|
+
)
|
|
990
|
+
_LOW_SIGNAL_FRAGMENTS = (
|
|
991
|
+
"closure-compiler", "closure-library",
|
|
992
|
+
"google-closure", "rhino",
|
|
993
|
+
"guava-gwt",
|
|
994
|
+
)
|
|
995
|
+
|
|
996
|
+
def _dep_rank(d: Any) -> tuple:
|
|
997
|
+
art = (d.name or "").lower()
|
|
998
|
+
eco_match = 0 if d.ecosystem == primary_eco else 1
|
|
999
|
+
is_high = any(frag in art for frag in _HIGH_SIGNAL_FRAGMENTS)
|
|
1000
|
+
is_low = any(frag in art for frag in _LOW_SIGNAL_FRAGMENTS)
|
|
1001
|
+
infra_score = 0 if is_high else (2 if is_low else 1)
|
|
1002
|
+
return (eco_match, infra_score, art)
|
|
1003
|
+
|
|
1004
|
+
direct.sort(key=_dep_rank)
|
|
978
1005
|
_SKIP_DEP_KEYS = {"parent", "workspace", "resolved_version", "manifest_path"}
|
|
979
1006
|
key_dependencies = [
|
|
980
1007
|
{k: v for k, v in asdict(d).items() if v is not None and k not in _SKIP_DEP_KEYS}
|
|
@@ -1182,6 +1209,7 @@ class TaskContextBuilder:
|
|
|
1182
1209
|
uncommitted_files=uncommitted_files,
|
|
1183
1210
|
code_notes=cn_notes_for_ranking if cn_notes_for_ranking else None,
|
|
1184
1211
|
delta_files=None,
|
|
1212
|
+
symptom=symptom if task_name == "fix-bug" else None,
|
|
1185
1213
|
)
|
|
1186
1214
|
|
|
1187
1215
|
# ── 6b. review-pr: derive PR-specific impact sections from delta analysis ──
|
|
@@ -2025,6 +2053,7 @@ class TaskContextBuilder:
|
|
|
2025
2053
|
uncommitted_files: Optional[set[str]] = None,
|
|
2026
2054
|
code_notes: Optional[list] = None,
|
|
2027
2055
|
delta_files: Optional[set[str]] = None,
|
|
2056
|
+
symptom: Optional[str] = None,
|
|
2028
2057
|
) -> list[RelevantFile]:
|
|
2029
2058
|
from sourcecode.ranking_engine import RankingEngine
|
|
2030
2059
|
from sourcecode.file_classifier import FileClassifier
|
|
@@ -2043,6 +2072,11 @@ class TaskContextBuilder:
|
|
|
2043
2072
|
_annotated_files: set[str] = set()
|
|
2044
2073
|
_dominant_stack = ""
|
|
2045
2074
|
_recently_changed_stacks: set[str] = set()
|
|
2075
|
+
# Query-aware signals extracted from symptom (class names, exception types, tokens)
|
|
2076
|
+
_symptom_class_names: set[str] = set() # CamelCase class names
|
|
2077
|
+
_symptom_exception_types: set[str] = set() # *Exception / *Error tokens
|
|
2078
|
+
_symptom_tokens: set[str] = set() # all lowercase tokens
|
|
2079
|
+
|
|
2046
2080
|
if task_name == "fix-bug":
|
|
2047
2081
|
_bug_kinds = {"FIXME", "BUG", "HACK", "XXX"}
|
|
2048
2082
|
for _n in (code_notes or []):
|
|
@@ -2068,6 +2102,19 @@ class TaskContextBuilder:
|
|
|
2068
2102
|
_dominant_stack = _stk_counts.most_common(1)[0][0]
|
|
2069
2103
|
_recently_changed_stacks = set(_stk_counts.keys())
|
|
2070
2104
|
|
|
2105
|
+
# Extract structured signals from symptom text for AND-weighted ranking
|
|
2106
|
+
if symptom:
|
|
2107
|
+
import re as _re_bug
|
|
2108
|
+
_camel_re = _re_bug.compile(r'\b([A-Z][a-zA-Z0-9]+)\b')
|
|
2109
|
+
for _tok in _camel_re.findall(symptom):
|
|
2110
|
+
if _tok.endswith(("Exception", "Error", "Throwable")):
|
|
2111
|
+
_symptom_exception_types.add(_tok)
|
|
2112
|
+
else:
|
|
2113
|
+
_symptom_class_names.add(_tok)
|
|
2114
|
+
_symptom_tokens = {
|
|
2115
|
+
w.lower() for w in _re_bug.split(r'[\s\W]+', symptom) if len(w) > 2
|
|
2116
|
+
}
|
|
2117
|
+
|
|
2071
2118
|
scored: list[tuple[float, str, RelevantFile]] = []
|
|
2072
2119
|
|
|
2073
2120
|
# For delta task, score only files changed in the specified git range.
|
|
@@ -2117,6 +2164,64 @@ class TaskContextBuilder:
|
|
|
2117
2164
|
_fix_bug_why = ""
|
|
2118
2165
|
if task_name == "fix-bug":
|
|
2119
2166
|
_why_parts: list[str] = []
|
|
2167
|
+
|
|
2168
|
+
# ── Query-aware AND-weighted signals (symptom-derived) ──
|
|
2169
|
+
# These intentionally outweigh git-recency signals so that
|
|
2170
|
+
# OrderServiceImpl.java ranks top-3 regardless of churn history.
|
|
2171
|
+
if _symptom_class_names or _symptom_exception_types:
|
|
2172
|
+
_stem = Path(path).stem
|
|
2173
|
+
_stem_lower = _stem.lower()
|
|
2174
|
+
_matched_class = next(
|
|
2175
|
+
(c for c in _symptom_class_names if _stem_lower == c.lower()),
|
|
2176
|
+
None,
|
|
2177
|
+
)
|
|
2178
|
+
_matched_exc = next(
|
|
2179
|
+
(e for e in _symptom_exception_types if _stem_lower == e.lower()),
|
|
2180
|
+
None,
|
|
2181
|
+
)
|
|
2182
|
+
_impl_match = next(
|
|
2183
|
+
(c for c in _symptom_class_names
|
|
2184
|
+
if _stem_lower in (c.lower() + "impl", c.lower() + "service",
|
|
2185
|
+
c.lower() + "serviceimpl", c.lower() + "helper")),
|
|
2186
|
+
None,
|
|
2187
|
+
)
|
|
2188
|
+
if _matched_class:
|
|
2189
|
+
content_boost += 3.0
|
|
2190
|
+
_why_parts.append(f"exact class match: {_stem} (+3.0)")
|
|
2191
|
+
elif _matched_exc:
|
|
2192
|
+
content_boost += 2.0
|
|
2193
|
+
_why_parts.append(f"exception class match: {_stem} (+2.0)")
|
|
2194
|
+
elif _impl_match:
|
|
2195
|
+
content_boost += 2.5
|
|
2196
|
+
_why_parts.append(f"class impl match: {_stem} (+2.5)")
|
|
2197
|
+
else:
|
|
2198
|
+
# Symbol appears anywhere in path (package adjacency)
|
|
2199
|
+
_path_class_hit = next(
|
|
2200
|
+
(c for c in _symptom_class_names if c.lower() in path_lower),
|
|
2201
|
+
None,
|
|
2202
|
+
)
|
|
2203
|
+
if _path_class_hit:
|
|
2204
|
+
content_boost += 1.0
|
|
2205
|
+
_why_parts.append(f"symbol in path: {_path_class_hit} (+1.0)")
|
|
2206
|
+
elif any(e.lower() in path_lower for e in _symptom_exception_types):
|
|
2207
|
+
content_boost += 0.8
|
|
2208
|
+
_why_parts.append("exception type in path (+0.8)")
|
|
2209
|
+
|
|
2210
|
+
# AND-weighted token intersection — multiple matching tokens >> single
|
|
2211
|
+
if _symptom_tokens:
|
|
2212
|
+
_path_parts = set(path_lower.replace("/", " ").replace(".", " ").replace("_", " ").split())
|
|
2213
|
+
_intersection = _symptom_tokens & _path_parts
|
|
2214
|
+
_n_match = len(_intersection)
|
|
2215
|
+
if _n_match >= 3:
|
|
2216
|
+
_tok_boost = min(1.2, _n_match * 0.25)
|
|
2217
|
+
content_boost += _tok_boost
|
|
2218
|
+
_why_parts.append(f"token AND match ({_n_match} terms: {sorted(_intersection)[:3]}) (+{_tok_boost:.2f})")
|
|
2219
|
+
elif _n_match == 2:
|
|
2220
|
+
content_boost += 0.4
|
|
2221
|
+
_why_parts.append(f"token AND match (2 terms: {sorted(_intersection)}) (+0.40)")
|
|
2222
|
+
# Single-token match: no boost — avoids OR explosion
|
|
2223
|
+
|
|
2224
|
+
# ── Git / annotation signals ──
|
|
2120
2225
|
if path in _uncommitted:
|
|
2121
2226
|
content_boost += 0.40
|
|
2122
2227
|
_why_parts.append("uncommitted change (+0.40)")
|
|
@@ -2203,7 +2308,7 @@ class TaskContextBuilder:
|
|
|
2203
2308
|
}
|
|
2204
2309
|
_repo_size = len(all_paths)
|
|
2205
2310
|
_task_budget = {
|
|
2206
|
-
"fix-bug":
|
|
2311
|
+
"fix-bug": 30, # hard cap — prevents token explosion on large repos
|
|
2207
2312
|
"onboard": max(15, min(25, _repo_size // 150)),
|
|
2208
2313
|
"explain": max(10, min(20, _repo_size // 200)),
|
|
2209
2314
|
"generate-tests": max(20, min(35, _repo_size // 100)),
|
|
@@ -2271,7 +2376,21 @@ class TaskContextBuilder:
|
|
|
2271
2376
|
_covered.add(_layer)
|
|
2272
2377
|
_missing.discard(_layer)
|
|
2273
2378
|
|
|
2274
|
-
|
|
2379
|
+
result = [_rf_map[p] for p in _selected if p in _rf_map]
|
|
2380
|
+
|
|
2381
|
+
# Assign fix-bug tiers based on raw score (pre-normalised total)
|
|
2382
|
+
if task_name == "fix-bug":
|
|
2383
|
+
_score_lookup = {path: total for total, path, _ in scored}
|
|
2384
|
+
for _rf in result:
|
|
2385
|
+
_s = _score_lookup.get(_rf.path, 0.0)
|
|
2386
|
+
if _s >= 4.0:
|
|
2387
|
+
_rf.tier = "high"
|
|
2388
|
+
elif _s >= 1.5:
|
|
2389
|
+
_rf.tier = "medium"
|
|
2390
|
+
else:
|
|
2391
|
+
_rf.tier = "low"
|
|
2392
|
+
|
|
2393
|
+
return result
|
|
2275
2394
|
except Exception:
|
|
2276
2395
|
return [f for _, _, f in scored[:15]]
|
|
2277
2396
|
|
sourcecode/serializer.py
CHANGED
|
@@ -409,7 +409,10 @@ def _spring_profiles_context(sm: "SourceMap") -> "Optional[dict[str, Any]]":
|
|
|
409
409
|
else:
|
|
410
410
|
matches = [
|
|
411
411
|
p for p in sm.file_paths
|
|
412
|
-
if
|
|
412
|
+
if (Path(p).stem.lower() == pfx
|
|
413
|
+
or Path(p).stem.lower().startswith(pfx + "-")
|
|
414
|
+
or Path(p).stem.lower().endswith("-" + pfx))
|
|
415
|
+
and p.endswith(".java")
|
|
413
416
|
]
|
|
414
417
|
if matches:
|
|
415
418
|
per_profile[profile] = [Path(p).name for p in matches[:5]]
|
|
@@ -534,6 +537,8 @@ def _bootstrap_structured(eps: list) -> "Optional[dict[str, Any]]":
|
|
|
534
537
|
|
|
535
538
|
for ep in eps:
|
|
536
539
|
path = getattr(ep, "path", "")
|
|
540
|
+
if "/test/" in path or "/tests/" in path:
|
|
541
|
+
continue
|
|
537
542
|
kind = getattr(ep, "kind", "")
|
|
538
543
|
stem = _Path(path).stem
|
|
539
544
|
|
|
@@ -587,8 +592,9 @@ def _bootstrap_structured(eps: list) -> "Optional[dict[str, Any]]":
|
|
|
587
592
|
module_names.append(module)
|
|
588
593
|
|
|
589
594
|
_ctrl_note = (
|
|
590
|
-
f"{controller_methods}
|
|
595
|
+
f"{controller_methods} detected entry-point methods across "
|
|
591
596
|
f"{controller_classes} controller classes"
|
|
597
|
+
f" (use 'sourcecode endpoints' for full surface)"
|
|
592
598
|
)
|
|
593
599
|
if len(module_names) > 30:
|
|
594
600
|
# Group by first path segment under ddd/ (inferred domain area)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcecode
|
|
3
|
-
Version: 1.31.
|
|
3
|
+
Version: 1.31.6
|
|
4
4
|
Summary: Deterministic codebase context for AI coding agents
|
|
5
5
|
License: Apache License
|
|
6
6
|
Version 2.0, January 2004
|
|
@@ -225,7 +225,7 @@ Description-Content-Type: text/markdown
|
|
|
225
225
|
|
|
226
226
|
**Deterministic, behavior-aware codebase context for AI agents and PR review.**
|
|
227
227
|
|
|
228
|
-

|
|
229
229
|

|
|
230
230
|
|
|
231
231
|
---
|
|
@@ -261,7 +261,7 @@ pipx install sourcecode
|
|
|
261
261
|
|
|
262
262
|
```bash
|
|
263
263
|
sourcecode version
|
|
264
|
-
# sourcecode 1.31.
|
|
264
|
+
# sourcecode 1.31.6
|
|
265
265
|
```
|
|
266
266
|
|
|
267
267
|
---
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
sourcecode/__init__.py,sha256=
|
|
1
|
+
sourcecode/__init__.py,sha256=YtkXxLCwI2P3youz8qWDCC8rLLuveg8_p3Rw5TwvrXs,103
|
|
2
2
|
sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
|
|
3
3
|
sourcecode/architecture_analyzer.py,sha256=MyBa0Hf5HmkudZQDLKrjcWDKETXETXl0mQX1swtTwAA,39091
|
|
4
4
|
sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
|
|
5
5
|
sourcecode/ast_extractor.py,sha256=XgrZg2DcWcUm9r87cRG3KGO7IK2TIL_N-CvhSbUmmh4,49901
|
|
6
6
|
sourcecode/classifier.py,sha256=-0t0HLc9L9UleMLfclfLM3AXhBjUb_AYyBPDbvgWtac,7755
|
|
7
|
-
sourcecode/cli.py,sha256=
|
|
8
|
-
sourcecode/code_notes_analyzer.py,sha256=
|
|
7
|
+
sourcecode/cli.py,sha256=lAqlKvLs3nrHKCM6RfsvQr1O_jWi3WOcdjhhlE8fmyg,129082
|
|
8
|
+
sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
|
|
9
9
|
sourcecode/confidence_analyzer.py,sha256=ZUn-Nywi5TEQcuozqK_vfOyPT-a1dYYO42elAtVFV-k,16412
|
|
10
10
|
sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
|
|
11
11
|
sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
|
|
@@ -21,8 +21,9 @@ sourcecode/flow_analyzer.py,sha256=dSiuY4w49k29jW_EPXUOND9B5uVbuCA7kjnuHi-pIWA,2
|
|
|
21
21
|
sourcecode/git_analyzer.py,sha256=0Gyj-vMpIIN4nfriKXVRouNYBeJ59s6pQDX2Xu9Pq-U,13177
|
|
22
22
|
sourcecode/graph_analyzer.py,sha256=iUK-7pSV-cvGqqD2hENdYmhnm0wcXFEyK-xnu5ul8OU,62515
|
|
23
23
|
sourcecode/metrics_analyzer.py,sha256=m0ENgtqKeBL17kUIK3fmGkgo7UfXBNHxCMj0H_Y5K7c,22750
|
|
24
|
+
sourcecode/path_filters.py,sha256=ROFRQ8eSLBEMiixK9f45-RO7um4VEEcjoD5AA4I427I,3739
|
|
24
25
|
sourcecode/pr_comment_renderer.py,sha256=smHslxiG14lrytCkq5nFrFu-qTHgA-t-LFYfdrfjz2o,14423
|
|
25
|
-
sourcecode/prepare_context.py,sha256
|
|
26
|
+
sourcecode/prepare_context.py,sha256=-9kTYuPhwr79mF6lNe9tI7glGAU_d84GyVurHceGroo,189427
|
|
26
27
|
sourcecode/progress.py,sha256=qn30sWaHOkjTgXsSBmiPkz7Rsbwc5oSlIe6JNEMYp_k,3149
|
|
27
28
|
sourcecode/ranking_engine.py,sha256=ZAucq_YX2KkWUuAZf4P0lhtQ_38vEFnUhuGtSZd1S0E,12970
|
|
28
29
|
sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
|
|
@@ -33,7 +34,7 @@ sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBG
|
|
|
33
34
|
sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
|
|
34
35
|
sourcecode/schema.py,sha256=fj3BZ3IcnNV4j21BFIEvz8Qnw_vZoqIbzzRg-qQ-nd0,24530
|
|
35
36
|
sourcecode/semantic_analyzer.py,sha256=12TwXYkYbDcBdu0heX_EmfPM2EkO8a_r5osf0SaeQbs,88956
|
|
36
|
-
sourcecode/serializer.py,sha256=
|
|
37
|
+
sourcecode/serializer.py,sha256=1y9DAkH2aBzlsmkHcSSc-t72_4fv9RZIuG4uhhGG5QE,111933
|
|
37
38
|
sourcecode/summarizer.py,sha256=lPlKhMh28nueXkPo2xKeD3DUFYVGRlJMIdY-8TSM-ls,17486
|
|
38
39
|
sourcecode/tree_utils.py,sha256=8GAkIfQAsvtEudIeW1l4ooH_oRtrWR8cpJQJsEa_Pfw,2093
|
|
39
40
|
sourcecode/workspace.py,sha256=X_6NmNnitvT3_38V-JDChydo_sR68s249hLFlrQskU0,8271
|
|
@@ -44,9 +45,9 @@ sourcecode/detectors/dart.py,sha256=QbqaL5v18-_ort75HihVBt8MsKUfOcFDF8IpWFLiXpI,
|
|
|
44
45
|
sourcecode/detectors/dotnet.py,sha256=oi8zq3AfUItlK3h_qM81vOe1ZVTIU9LBKIlIrRDuqOs,6864
|
|
45
46
|
sourcecode/detectors/elixir.py,sha256=jCpvt5Yi6jvplc80ovRtWh17q-11ZGo9qX7o8b57TJE,1713
|
|
46
47
|
sourcecode/detectors/go.py,sha256=2r66uRQfeTWsqxr4HDhT6vExZErby0t46QXLHVBRv9w,2782
|
|
47
|
-
sourcecode/detectors/heuristic.py,sha256=
|
|
48
|
+
sourcecode/detectors/heuristic.py,sha256=7cRxrip4yIaggYzZJB6ef8yHKh-gHgiH_pXMFcjlyFU,3723
|
|
48
49
|
sourcecode/detectors/hybrid.py,sha256=IGFRUVsAZ1ooRlFdznCeJAV6vy1yVDx-VyghvLtddXc,9101
|
|
49
|
-
sourcecode/detectors/java.py,sha256=
|
|
50
|
+
sourcecode/detectors/java.py,sha256=O2JdznVYv5364GSQExksYLsAi0pvDUW9ZowpsL0xLgM,24451
|
|
50
51
|
sourcecode/detectors/jvm_ext.py,sha256=EgHJ5W8EE-ZTN9V607mVzohyKgZE8Mc2jCi-DF8RAZU,2616
|
|
51
52
|
sourcecode/detectors/nodejs.py,sha256=Hg3Gmr7yIMJFiLoDwOTk2wtu00wxIs6kZf-oQujTFUA,13187
|
|
52
53
|
sourcecode/detectors/parsers.py,sha256=ugPg8yNUf0Ai1gA7Fnn6wAkYGFjTxRodSP3IeViYJJ4,2290
|
|
@@ -72,8 +73,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
|
|
|
72
73
|
sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
|
|
73
74
|
sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
|
|
74
75
|
sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
|
|
75
|
-
sourcecode-1.31.
|
|
76
|
-
sourcecode-1.31.
|
|
77
|
-
sourcecode-1.31.
|
|
78
|
-
sourcecode-1.31.
|
|
79
|
-
sourcecode-1.31.
|
|
76
|
+
sourcecode-1.31.6.dist-info/METADATA,sha256=aKMdH4KYx2KrZ1rx8Ylo73TwfocQ5szGN1U3CDUn_tM,29083
|
|
77
|
+
sourcecode-1.31.6.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
78
|
+
sourcecode-1.31.6.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
|
|
79
|
+
sourcecode-1.31.6.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
|
|
80
|
+
sourcecode-1.31.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|