sourcecode 1.33.10__tar.gz → 1.33.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sourcecode-1.33.10 → sourcecode-1.33.12}/PKG-INFO +2 -2
- {sourcecode-1.33.10 → sourcecode-1.33.12}/README.md +1 -1
- {sourcecode-1.33.10 → sourcecode-1.33.12}/pyproject.toml +1 -1
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/__init__.py +1 -1
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/cli.py +92 -12
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/server.py +3 -1
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/prepare_context.py +100 -13
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/repository_ir.py +63 -13
- {sourcecode-1.33.10 → sourcecode-1.33.12}/.github/workflows/build-windows.yml +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/.gitignore +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/.ruff.toml +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/CHANGELOG.md +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/CONTRIBUTING.md +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/LICENSE +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/SECURITY.md +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/raw +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/adaptive_scanner.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/architecture_analyzer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/architecture_summary.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/ast_extractor.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/cache.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/canonical_ir.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/classifier.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/code_notes_analyzer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/confidence_analyzer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/context_scorer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/context_summarizer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/contract_model.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/contract_pipeline.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/coverage_parser.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/dependency_analyzer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/__init__.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/base.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/csproj_parser.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/dart.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/dotnet.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/elixir.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/go.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/heuristic.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/hybrid.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/java.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/jvm_ext.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/nodejs.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/parsers.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/php.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/project.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/python.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/ruby.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/rust.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/systems.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/terraform.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/tooling.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/doc_analyzer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/entrypoint_classifier.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/env_analyzer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/error_schema.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/file_classifier.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/flow_analyzer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/git_analyzer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/graph_analyzer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/license.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/__init__.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/__init__.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/applier.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/backup.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/detector.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/planner.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/orchestrator.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/registry.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/runner.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp_nudge.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/metrics_analyzer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/output_budget.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/path_filters.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/pr_comment_renderer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/progress.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/ranking_engine.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/redactor.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/relevance_scorer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/repo_classifier.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/ris.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/runtime_classifier.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/scanner.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/schema.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/semantic_analyzer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/serializer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/summarizer.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/telemetry/__init__.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/telemetry/config.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/telemetry/consent.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/telemetry/events.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/telemetry/filters.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/telemetry/transport.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/tree_utils.py +0 -0
- {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/workspace.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcecode
|
|
3
|
-
Version: 1.33.
|
|
3
|
+
Version: 1.33.12
|
|
4
4
|
Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Keywords: agents,ai,codebase,context,developer-tools,llm
|
|
@@ -39,7 +39,7 @@ Description-Content-Type: text/markdown
|
|
|
39
39
|
|
|
40
40
|
**Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
|
|
41
41
|
|
|
42
|
-

|
|
43
43
|

|
|
44
44
|
|
|
45
45
|
---
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
**Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
|
|
4
4
|
|
|
5
|
-

|
|
6
6
|

|
|
7
7
|
|
|
8
8
|
---
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sourcecode"
|
|
7
|
-
version = "1.33.
|
|
7
|
+
version = "1.33.12"
|
|
8
8
|
description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -2677,10 +2677,11 @@ def prepare_context_cmd(
|
|
|
2677
2677
|
# relevant_files goal: untested SOURCE files. Test files belong in test_gaps.
|
|
2678
2678
|
# Without this filter, high-churn test files rank above untested source files.
|
|
2679
2679
|
_rfs = [f for f in _rfs if getattr(f, "role", None) != "test"]
|
|
2680
|
-
|
|
2681
|
-
|
|
2682
|
-
|
|
2683
|
-
|
|
2680
|
+
_serialized_rfs = [_serialize_relevant_file(f) for f in _rfs]
|
|
2681
|
+
out["relevant_files"] = _serialized_rfs
|
|
2682
|
+
if task == "fix-bug":
|
|
2683
|
+
# ranked_files was the v1 name for this field — emit as backward-compat alias.
|
|
2684
|
+
out["ranked_files"] = _serialized_rfs
|
|
2684
2685
|
if _task_include("key_dependencies") and output.key_dependencies:
|
|
2685
2686
|
out["key_dependencies"] = output.key_dependencies
|
|
2686
2687
|
if _task_include("gaps") and output.gaps:
|
|
@@ -3291,13 +3292,33 @@ def impact_cmd(
|
|
|
3291
3292
|
)
|
|
3292
3293
|
from sourcecode.output_budget import trim_to_budget as _trim, BUDGET_IMPACT
|
|
3293
3294
|
|
|
3295
|
+
import sys as _sys_ic
|
|
3296
|
+
# Legacy-compat: old syntax was `impact <path> <target>`.
|
|
3297
|
+
# Detect: target resolves to an existing directory (not a class name), and
|
|
3298
|
+
# the path arg is not a valid directory (looks like a class name).
|
|
3299
|
+
_target_as_path = Path(target)
|
|
3300
|
+
if _target_as_path.is_dir() and not path.resolve().is_dir():
|
|
3301
|
+
# Gate on isatty() — non-TTY (MCP, pipes) must not receive text mixed into JSON stdout.
|
|
3302
|
+
if getattr(_sys_ic.stderr, "isatty", lambda: False)():
|
|
3303
|
+
_sys_ic.stderr.write(
|
|
3304
|
+
f"[impact] Legacy argument order detected: '{target}' is a directory, not a class name.\n"
|
|
3305
|
+
f"[impact] Swapping: target='{path}', path='{target}'. "
|
|
3306
|
+
f"New syntax: sourcecode impact <target> [path]\n"
|
|
3307
|
+
)
|
|
3308
|
+
_sys_ic.stderr.flush()
|
|
3309
|
+
target, path = str(path), _target_as_path
|
|
3310
|
+
|
|
3294
3311
|
root = path.resolve()
|
|
3295
3312
|
if not root.is_dir():
|
|
3296
3313
|
_emit_error_json(
|
|
3297
3314
|
INVALID_INPUT_CODE,
|
|
3298
3315
|
f"'{root}' is not a valid directory.",
|
|
3299
3316
|
path=str(root),
|
|
3300
|
-
hint=
|
|
3317
|
+
hint=(
|
|
3318
|
+
"Pass an existing repository directory as the second argument. "
|
|
3319
|
+
"New syntax: sourcecode impact <target> [path] — "
|
|
3320
|
+
"target is the class name, path is the repo root."
|
|
3321
|
+
),
|
|
3301
3322
|
expected="A directory path.",
|
|
3302
3323
|
)
|
|
3303
3324
|
raise typer.Exit(1)
|
|
@@ -3621,11 +3642,15 @@ def fix_bug_cmd(
|
|
|
3621
3642
|
sourcecode onboard . — Full architecture context first
|
|
3622
3643
|
"""
|
|
3623
3644
|
if not symptom:
|
|
3624
|
-
|
|
3625
|
-
|
|
3626
|
-
|
|
3627
|
-
|
|
3628
|
-
|
|
3645
|
+
import sys as _sys_fb
|
|
3646
|
+
# Only emit advisory to interactive terminals — non-TTY (MCP, pipes, scripts)
|
|
3647
|
+
# must never receive informational text mixed into JSON stdout.
|
|
3648
|
+
if getattr(_sys_fb.stderr, "isatty", lambda: False)():
|
|
3649
|
+
typer.echo(
|
|
3650
|
+
"[fix-bug] Results are significantly better with --symptom. "
|
|
3651
|
+
"Example: --symptom 'NullPointerException in PaymentService'",
|
|
3652
|
+
err=True,
|
|
3653
|
+
)
|
|
3629
3654
|
ctx.invoke(
|
|
3630
3655
|
prepare_context_cmd,
|
|
3631
3656
|
task="fix-bug",
|
|
@@ -3904,8 +3929,23 @@ def activate_cmd(
|
|
|
3904
3929
|
|
|
3905
3930
|
@app.command("version")
|
|
3906
3931
|
def version_cmd() -> None:
|
|
3907
|
-
"""Show version and exit.
|
|
3908
|
-
|
|
3932
|
+
"""Show version and exit.
|
|
3933
|
+
|
|
3934
|
+
Outputs human-readable text on interactive terminals.
|
|
3935
|
+
Outputs structured JSON on non-TTY (MCP, pipes, scripts):
|
|
3936
|
+
{"cli_version": "1.33.11", "mcp_schema_version": "1.33.11",
|
|
3937
|
+
"compatibility_schema_version": "1.0"}
|
|
3938
|
+
"""
|
|
3939
|
+
import sys as _sys_ver
|
|
3940
|
+
if getattr(_sys_ver.stdout, "isatty", lambda: False)():
|
|
3941
|
+
typer.echo(f"sourcecode {__version__}")
|
|
3942
|
+
else:
|
|
3943
|
+
import json as _json_ver
|
|
3944
|
+
typer.echo(_json_ver.dumps({
|
|
3945
|
+
"cli_version": __version__,
|
|
3946
|
+
"mcp_schema_version": __version__,
|
|
3947
|
+
"compatibility_schema_version": "1.0",
|
|
3948
|
+
}, ensure_ascii=False))
|
|
3909
3949
|
|
|
3910
3950
|
|
|
3911
3951
|
# ── config ────────────────────────────────────────────────────────────────────
|
|
@@ -4351,6 +4391,46 @@ def mcp_remove(
|
|
|
4351
4391
|
typer.echo(" Re-add: sourcecode mcp init")
|
|
4352
4392
|
|
|
4353
4393
|
|
|
4394
|
+
@mcp_app.command("list-tools")
|
|
4395
|
+
def mcp_list_tools(
|
|
4396
|
+
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
|
|
4397
|
+
) -> None:
|
|
4398
|
+
"""List all MCP tools exposed by the sourcecode server.
|
|
4399
|
+
|
|
4400
|
+
\b
|
|
4401
|
+
Shows each tool name, its description, and the CLI command it maps to.
|
|
4402
|
+
Useful for discovering capabilities when using sourcecode as an MCP server.
|
|
4403
|
+
|
|
4404
|
+
\b
|
|
4405
|
+
Examples:
|
|
4406
|
+
sourcecode mcp list-tools
|
|
4407
|
+
sourcecode mcp list-tools --json
|
|
4408
|
+
"""
|
|
4409
|
+
import asyncio
|
|
4410
|
+
import json as _json
|
|
4411
|
+
|
|
4412
|
+
from sourcecode.mcp.server import mcp as _mcp
|
|
4413
|
+
|
|
4414
|
+
tools = asyncio.run(_mcp.list_tools())
|
|
4415
|
+
tools_sorted = sorted(tools, key=lambda t: t.name)
|
|
4416
|
+
|
|
4417
|
+
if json_output:
|
|
4418
|
+
payload = [
|
|
4419
|
+
{"name": t.name, "description": (t.description or "").strip()}
|
|
4420
|
+
for t in tools_sorted
|
|
4421
|
+
]
|
|
4422
|
+
typer.echo(_json.dumps(payload, indent=2, ensure_ascii=False))
|
|
4423
|
+
return
|
|
4424
|
+
|
|
4425
|
+
typer.echo(f"sourcecode MCP tools ({len(tools_sorted)} available)\n")
|
|
4426
|
+
for t in tools_sorted:
|
|
4427
|
+
desc_first_line = (t.description or "").strip().splitlines()[0] if t.description else ""
|
|
4428
|
+
typer.echo(f" {t.name:<35} {desc_first_line}")
|
|
4429
|
+
typer.echo("")
|
|
4430
|
+
typer.echo("Use: sourcecode mcp serve — start MCP server on stdio")
|
|
4431
|
+
typer.echo("Use: sourcecode mcp init — configure MCP client")
|
|
4432
|
+
|
|
4433
|
+
|
|
4354
4434
|
# ── Cache subcommands ─────────────────────────────────────────────────────────
|
|
4355
4435
|
|
|
4356
4436
|
|
|
@@ -959,9 +959,11 @@ _TELEMETRY_ACTIONS = frozenset({"status", "enable", "disable"})
|
|
|
959
959
|
|
|
960
960
|
@mcp.tool()
|
|
961
961
|
def version() -> dict:
|
|
962
|
-
"""
|
|
962
|
+
"""Return sourcecode version and MCP compatibility metadata.
|
|
963
963
|
|
|
964
964
|
Maps to: sourcecode version
|
|
965
|
+
Returns structured JSON: cli_version, mcp_schema_version, compatibility_schema_version.
|
|
966
|
+
cli_version and mcp_schema_version are always identical (released together).
|
|
965
967
|
"""
|
|
966
968
|
return _execute(["version"])
|
|
967
969
|
|
|
@@ -627,6 +627,21 @@ _FRONTEND_SYMPTOM_MAP: dict[str, list[str]] = {
|
|
|
627
627
|
"trabajador": ["trabajador", "empleado", "worker", "asignacion", "trabajadordao", "trabajadorservice"],
|
|
628
628
|
}
|
|
629
629
|
|
|
630
|
+
# Generic words that add noise when used as symptom keywords in large repos.
|
|
631
|
+
# "token" and "user" are too ubiquitous in auth systems to be useful alone.
|
|
632
|
+
_SYMPTOM_STOP_WORDS: frozenset[str] = frozenset({
|
|
633
|
+
"fails", "fail", "failed", "failure",
|
|
634
|
+
"not", "for", "with", "when", "that", "the", "and", "but",
|
|
635
|
+
"are", "has", "had", "have", "was", "were",
|
|
636
|
+
"get", "set", "can", "does", "did", "should", "would", "could",
|
|
637
|
+
"null", "none", "empty", "invalid", "incorrect", "wrong", "missing",
|
|
638
|
+
"error", "issue", "problem", "bug",
|
|
639
|
+
"from", "into", "via", "due", "also", "after", "before",
|
|
640
|
+
"slow", "fast", "new", "old",
|
|
641
|
+
})
|
|
642
|
+
|
|
643
|
+
# Repo-scale threshold: above this file count, use stricter injection logic.
|
|
644
|
+
_LARGE_REPO_THRESHOLD = 500
|
|
630
645
|
|
|
631
646
|
MAX_FILES_FAST = 2000 # above this threshold --fast uses git-index-only mode
|
|
632
647
|
|
|
@@ -1225,8 +1240,8 @@ class TaskContextBuilder:
|
|
|
1225
1240
|
# Distinguish: no_staged_changes (CI, no --since) vs no_diff (empty range)
|
|
1226
1241
|
if _pr_scope_source == "no_staged_changes":
|
|
1227
1242
|
_no_diff_msg = (
|
|
1228
|
-
"No --since ref provided and no staged changes found. "
|
|
1229
|
-
"
|
|
1243
|
+
"No --since ref provided and no staged/uncommitted changes found. "
|
|
1244
|
+
"Provide --since <ref> to specify the base commit for the diff."
|
|
1230
1245
|
)
|
|
1231
1246
|
return TaskOutput(
|
|
1232
1247
|
task="review-pr", goal=spec.goal,
|
|
@@ -1239,7 +1254,8 @@ class TaskContextBuilder:
|
|
|
1239
1254
|
error_message=_no_diff_msg,
|
|
1240
1255
|
error_hints=[
|
|
1241
1256
|
"Add --since <ref> to specify a base commit.",
|
|
1242
|
-
"
|
|
1257
|
+
"Common values: --since HEAD~1 (last commit) | --since origin/main | --since main",
|
|
1258
|
+
"If reviewing uncommitted changes: stage them first (git add), then run without --since.",
|
|
1243
1259
|
],
|
|
1244
1260
|
gaps=[_no_diff_msg],
|
|
1245
1261
|
ci_decision="no_diff_source",
|
|
@@ -1694,7 +1710,7 @@ class TaskContextBuilder:
|
|
|
1694
1710
|
_camel_expanded = _re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _camel_expanded)
|
|
1695
1711
|
symptom_keywords = [
|
|
1696
1712
|
w.lower() for w in _re.split(r"[\s\W]+", _camel_expanded)
|
|
1697
|
-
if len(w) > 2
|
|
1713
|
+
if len(w) > 2 and w.lower() not in _SYMPTOM_STOP_WORDS
|
|
1698
1714
|
]
|
|
1699
1715
|
if symptom_keywords:
|
|
1700
1716
|
# Pre-compile combined keyword pattern for fast content scanning
|
|
@@ -1758,14 +1774,27 @@ class TaskContextBuilder:
|
|
|
1758
1774
|
))
|
|
1759
1775
|
_existing_paths.add(_cp)
|
|
1760
1776
|
|
|
1761
|
-
#
|
|
1777
|
+
# Scale-awareness: large repos need wider scan and stricter injection.
|
|
1778
|
+
_is_large_repo = len(all_paths) > _LARGE_REPO_THRESHOLD
|
|
1779
|
+
|
|
1780
|
+
# Pass 4: inject files whose path matches symptom keywords.
|
|
1781
|
+
# CamelCase-expand the filename stem so "OfflineSessionLoader" matches
|
|
1782
|
+
# the keyword "offline" even without an explicit directory separator.
|
|
1783
|
+
_p4_dirs_of_injected: set[str] = set() # directories of high-score injects
|
|
1762
1784
|
for _p in all_paths:
|
|
1763
1785
|
if _p in _existing_paths:
|
|
1764
1786
|
continue
|
|
1765
1787
|
if Path(_p).suffix.lower() not in _ALL_EXTENSIONS:
|
|
1766
1788
|
continue
|
|
1767
1789
|
_p_lower = _p.lower()
|
|
1768
|
-
|
|
1790
|
+
# CamelCase-expand the stem and append to the search string so
|
|
1791
|
+
# "OfflineSessionLoader" → "offline session loader" can match
|
|
1792
|
+
# individual keyword tokens beyond what substring search finds.
|
|
1793
|
+
_stem_raw = Path(_p).stem
|
|
1794
|
+
_stem_exp = _re.sub(r'([a-z])([A-Z])', r'\1 \2', _stem_raw)
|
|
1795
|
+
_stem_exp = _re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _stem_exp).lower()
|
|
1796
|
+
_p_search = _p_lower + " " + _stem_exp
|
|
1797
|
+
_matching_kws = [kw for kw in symptom_keywords if kw in _p_search]
|
|
1769
1798
|
if not _matching_kws:
|
|
1770
1799
|
continue
|
|
1771
1800
|
_boost = 0.2 * len(_matching_kws)
|
|
@@ -1780,6 +1809,8 @@ class TaskContextBuilder:
|
|
|
1780
1809
|
))
|
|
1781
1810
|
_existing_paths.add(_p)
|
|
1782
1811
|
_sx_direct_path.append(_p)
|
|
1812
|
+
if _injected_score >= 0.7:
|
|
1813
|
+
_p4_dirs_of_injected.add(str(Path(_p).parent))
|
|
1783
1814
|
|
|
1784
1815
|
# Pass 4b: grep-based injection for frontend→backend synonym terms.
|
|
1785
1816
|
# Runs parallel grep for each backend term to find files not yet in
|
|
@@ -1827,9 +1858,41 @@ class TaskContextBuilder:
|
|
|
1827
1858
|
))
|
|
1828
1859
|
_existing_paths_now.add(_gf)
|
|
1829
1860
|
|
|
1830
|
-
#
|
|
1831
|
-
|
|
1832
|
-
|
|
1861
|
+
# Pass 4c: subsystem co-location — inject sibling files from the same
|
|
1862
|
+
# directories as high-score (≥0.7) path-matched files. This catches
|
|
1863
|
+
# architecturally adjacent classes that don't mention symptom keywords
|
|
1864
|
+
# in their own name (e.g. InfinispanOfflineSessionCacheEntryLifespan…
|
|
1865
|
+
# siblings in the same infinispan/ package).
|
|
1866
|
+
if _is_large_repo and _p4_dirs_of_injected:
|
|
1867
|
+
_coloc_existing = {rf.path for rf in relevant_files}
|
|
1868
|
+
for _cp in all_paths:
|
|
1869
|
+
if _cp in _coloc_existing:
|
|
1870
|
+
continue
|
|
1871
|
+
if Path(_cp).suffix.lower() not in _src_exts:
|
|
1872
|
+
continue
|
|
1873
|
+
if str(Path(_cp).parent) in _p4_dirs_of_injected:
|
|
1874
|
+
relevant_files.append(RelevantFile(
|
|
1875
|
+
path=_cp,
|
|
1876
|
+
role="symptom_match",
|
|
1877
|
+
score=0.55,
|
|
1878
|
+
reason="subsystem co-location: same directory as symptom-matched file",
|
|
1879
|
+
why="directory proximity injection",
|
|
1880
|
+
))
|
|
1881
|
+
_coloc_existing.add(_cp)
|
|
1882
|
+
|
|
1883
|
+
# Sort before content scan so top candidates get read first.
|
|
1884
|
+
# In large repos: prioritise symptom_match files within each score band
|
|
1885
|
+
# so that subsystem-relevant files are content-scanned before generic
|
|
1886
|
+
# structural files at the same score.
|
|
1887
|
+
if _is_large_repo:
|
|
1888
|
+
relevant_files = sorted(
|
|
1889
|
+
relevant_files,
|
|
1890
|
+
key=lambda rf: (-rf.score, 0 if rf.role == "symptom_match" else 1),
|
|
1891
|
+
)
|
|
1892
|
+
_CONTENT_SCAN_LIMIT = 150
|
|
1893
|
+
else:
|
|
1894
|
+
relevant_files = sorted(relevant_files, key=lambda rf: -rf.score)
|
|
1895
|
+
_CONTENT_SCAN_LIMIT = 80
|
|
1833
1896
|
_scan_candidates = relevant_files[:_CONTENT_SCAN_LIMIT]
|
|
1834
1897
|
_no_scan_candidates = relevant_files[_CONTENT_SCAN_LIMIT:]
|
|
1835
1898
|
|
|
@@ -1904,15 +1967,31 @@ class TaskContextBuilder:
|
|
|
1904
1967
|
elif _extra_syn > 0:
|
|
1905
1968
|
_new_reason = _rf.reason + f", synonym-match backend (+{_extra_syn:.2f})"
|
|
1906
1969
|
|
|
1970
|
+
_final_score = round(min(_rf.score + _total_extra, 1.0), 2)
|
|
1907
1971
|
_boosted.append(RelevantFile(
|
|
1908
1972
|
path=_rf.path,
|
|
1909
1973
|
role=_rf.role,
|
|
1910
|
-
score=
|
|
1974
|
+
score=_final_score,
|
|
1911
1975
|
reason=_new_reason,
|
|
1912
1976
|
why=_rf.why,
|
|
1913
1977
|
))
|
|
1914
1978
|
|
|
1915
|
-
|
|
1979
|
+
# Use total boost as a secondary sort key so symptom-matched files
|
|
1980
|
+
# that were boosted from a lower base score rank above structural
|
|
1981
|
+
# files that coincidentally reach the same capped score of 1.0.
|
|
1982
|
+
# This prevents budget-trimming from discarding the most relevant files.
|
|
1983
|
+
_boost_totals: dict[str, float] = {}
|
|
1984
|
+
for _rf in _scan_candidates:
|
|
1985
|
+
pass # populated below
|
|
1986
|
+
_boost_totals = {}
|
|
1987
|
+
for _idx, _rf in enumerate(_scan_candidates):
|
|
1988
|
+
_b_rf = _boosted[_idx]
|
|
1989
|
+
_boost_totals[_b_rf.path] = round(_b_rf.score - _rf.score, 4)
|
|
1990
|
+
|
|
1991
|
+
relevant_files = sorted(
|
|
1992
|
+
_boosted + _no_scan_candidates,
|
|
1993
|
+
key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
|
|
1994
|
+
)
|
|
1916
1995
|
|
|
1917
1996
|
# Synonym note (only when synonyms actually fired)
|
|
1918
1997
|
if _frontend_kws and _sx_synonyms:
|
|
@@ -2389,7 +2468,8 @@ class TaskContextBuilder:
|
|
|
2389
2468
|
else:
|
|
2390
2469
|
_symptom_class_names.add(_tok)
|
|
2391
2470
|
_symptom_tokens = {
|
|
2392
|
-
w.lower() for w in _re_bug.split(r'[\s\W]+', symptom)
|
|
2471
|
+
w.lower() for w in _re_bug.split(r'[\s\W]+', symptom)
|
|
2472
|
+
if len(w) > 2 and w.lower() not in _SYMPTOM_STOP_WORDS
|
|
2393
2473
|
}
|
|
2394
2474
|
|
|
2395
2475
|
scored: list[tuple[float, str, RelevantFile]] = []
|
|
@@ -2486,9 +2566,16 @@ class TaskContextBuilder:
|
|
|
2486
2566
|
content_boost += 0.8
|
|
2487
2567
|
_why_parts.append("exception type in path (+0.8)")
|
|
2488
2568
|
|
|
2489
|
-
# AND-weighted token intersection — multiple matching tokens >> single
|
|
2569
|
+
# AND-weighted token intersection — multiple matching tokens >> single.
|
|
2570
|
+
# CamelCase-expand the filename stem so "OfflineSessionLoader" contributes
|
|
2571
|
+
# "offline", "session", "loader" as individual tokens beyond what the raw
|
|
2572
|
+
# path splitting yields. This lets multi-word symptoms match class names.
|
|
2490
2573
|
if _symptom_tokens:
|
|
2491
2574
|
_path_parts = set(path_lower.replace("/", " ").replace(".", " ").replace("_", " ").split())
|
|
2575
|
+
_stem_cc = Path(path).stem
|
|
2576
|
+
_stem_cc_exp = _re_bug.sub(r'([a-z])([A-Z])', r'\1 \2', _stem_cc)
|
|
2577
|
+
_stem_cc_exp = _re_bug.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _stem_cc_exp).lower()
|
|
2578
|
+
_path_parts.update(_stem_cc_exp.split())
|
|
2492
2579
|
_intersection = _symptom_tokens & _path_parts
|
|
2493
2580
|
_n_match = len(_intersection)
|
|
2494
2581
|
if _n_match >= 3:
|
|
@@ -888,15 +888,40 @@ def _extract_mapped_paths(source: str, class_fqn: str) -> dict[str, str]:
|
|
|
888
888
|
# Phase 3 — Symbol relation graph
|
|
889
889
|
# ---------------------------------------------------------------------------
|
|
890
890
|
|
|
891
|
+
def _build_same_package_map(symbols: list[SymbolRecord]) -> dict[str, dict[str, str]]:
|
|
892
|
+
"""Build {package: {simple_name: FQN}} map from all class/interface symbols.
|
|
893
|
+
|
|
894
|
+
Used by build_repo_ir to resolve same-package types that need no explicit import.
|
|
895
|
+
In Java, classes in the same package reference each other without import statements,
|
|
896
|
+
so import_map is empty for them — this map provides the fallback resolution.
|
|
897
|
+
"""
|
|
898
|
+
result: dict[str, dict[str, str]] = {}
|
|
899
|
+
for sym in symbols:
|
|
900
|
+
if sym.type not in ("class", "interface") or "#" in sym.symbol:
|
|
901
|
+
continue
|
|
902
|
+
pkg = sym.symbol.rsplit(".", 1)[0] if "." in sym.symbol else ""
|
|
903
|
+
simple = sym.symbol.split(".")[-1]
|
|
904
|
+
result.setdefault(pkg, {})[simple] = sym.symbol
|
|
905
|
+
return result
|
|
906
|
+
|
|
907
|
+
|
|
891
908
|
def _build_relations(
|
|
892
909
|
symbols: list[SymbolRecord],
|
|
893
910
|
raw_imports: list[str],
|
|
894
911
|
source: str,
|
|
895
912
|
package: str,
|
|
896
913
|
rel_path: str,
|
|
914
|
+
same_pkg_types: dict[str, str] | None = None,
|
|
897
915
|
) -> list[RelationEdge]:
|
|
898
|
-
"""Phase 3: Build directed relation graph for symbols in one file.
|
|
916
|
+
"""Phase 3: Build directed relation graph for symbols in one file.
|
|
917
|
+
|
|
918
|
+
same_pkg_types: {simple_name → FQN} for classes in the same package.
|
|
919
|
+
Passed by build_repo_ir after a first pass that collects all symbols.
|
|
920
|
+
Enables resolving injection targets that share a package with the caller
|
|
921
|
+
and therefore need no explicit Java import statement.
|
|
922
|
+
"""
|
|
899
923
|
edges: list[RelationEdge] = []
|
|
924
|
+
_same_pkg: dict[str, str] = same_pkg_types or {}
|
|
900
925
|
|
|
901
926
|
import_map: dict[str, str] = {}
|
|
902
927
|
for fqn in raw_imports:
|
|
@@ -929,15 +954,27 @@ def _build_relations(
|
|
|
929
954
|
))
|
|
930
955
|
|
|
931
956
|
if sym.type == "field":
|
|
932
|
-
|
|
957
|
+
_inject_ann = next(
|
|
958
|
+
(a for a in sym.annotations if a in _INJECT_ANNOTATIONS), "@Autowired"
|
|
959
|
+
)
|
|
960
|
+
_field_targets: set[str] = set(sym.imports_used)
|
|
961
|
+
# Same-package field injection: imports_used is empty when the field type
|
|
962
|
+
# shares a package with the declaring class (no import needed in Java).
|
|
963
|
+
# Extract type from signature ("Type name") and resolve via same_pkg_types.
|
|
964
|
+
if not _field_targets and _same_pkg:
|
|
965
|
+
_sig_type = (sym.signature or "").split()[0] if sym.signature else ""
|
|
966
|
+
_sig_base = re.sub(r'<.*', '', _sig_type).strip()
|
|
967
|
+
if _sig_base and _sig_base[0].isupper():
|
|
968
|
+
_same_fqn = _same_pkg.get(_sig_base)
|
|
969
|
+
if _same_fqn and _same_fqn != _enclosing_class(sym_fqn):
|
|
970
|
+
_field_targets.add(_same_fqn)
|
|
971
|
+
for imp_fqn in _field_targets:
|
|
933
972
|
edges.append(RelationEdge(
|
|
934
973
|
from_symbol=sym_fqn,
|
|
935
974
|
to_symbol=imp_fqn,
|
|
936
975
|
type="injects",
|
|
937
976
|
confidence="high",
|
|
938
|
-
evidence={"type": "annotation", "value":
|
|
939
|
-
(a for a in sym.annotations if a in _INJECT_ANNOTATIONS), "@Autowired"
|
|
940
|
-
)},
|
|
977
|
+
evidence={"type": "annotation", "value": _inject_ann},
|
|
941
978
|
))
|
|
942
979
|
|
|
943
980
|
# ── Constructor injection ─────────────────────────────────────────────────
|
|
@@ -949,7 +986,7 @@ def _build_relations(
|
|
|
949
986
|
continue
|
|
950
987
|
for simple_type in sym.param_types:
|
|
951
988
|
base = re.sub(r'<.*', '', simple_type).strip()
|
|
952
|
-
fqn = import_map.get(base)
|
|
989
|
+
fqn = import_map.get(base) or _same_pkg.get(base)
|
|
953
990
|
if fqn:
|
|
954
991
|
edges.append(RelationEdge(
|
|
955
992
|
from_symbol=sym.symbol,
|
|
@@ -982,7 +1019,7 @@ def _build_relations(
|
|
|
982
1019
|
continue
|
|
983
1020
|
_ftype = fld.group("type").strip()
|
|
984
1021
|
_base = re.sub(r'<.*', '', _ftype).strip()
|
|
985
|
-
_fqn = import_map.get(_base)
|
|
1022
|
+
_fqn = import_map.get(_base) or _same_pkg.get(_base)
|
|
986
1023
|
if _fqn:
|
|
987
1024
|
edges.append(RelationEdge(
|
|
988
1025
|
from_symbol=sym.symbol,
|
|
@@ -2632,24 +2669,38 @@ def build_repo_ir(
|
|
|
2632
2669
|
if since:
|
|
2633
2670
|
_since_changed = _get_git_changed_files(root, since)
|
|
2634
2671
|
|
|
2672
|
+
# Pass 1: extract symbols from all files so we can build the same-package
|
|
2673
|
+
# type map before building relations. Java classes in the same package
|
|
2674
|
+
# reference each other without import statements, so import_map alone cannot
|
|
2675
|
+
# resolve them — _build_same_package_map provides the cross-file fallback.
|
|
2676
|
+
_per_file: list[tuple[str, str, str, list[str], list[SymbolRecord]]] = []
|
|
2635
2677
|
for rel_path in sorted(file_paths):
|
|
2636
2678
|
abs_path = root / rel_path
|
|
2637
2679
|
try:
|
|
2638
2680
|
source = abs_path.read_text(encoding="utf-8", errors="replace")
|
|
2639
2681
|
except OSError:
|
|
2640
2682
|
continue
|
|
2683
|
+
package, symbols, raw_imports = _extract_symbols(source, rel_path)
|
|
2684
|
+
all_symbols.extend(symbols)
|
|
2685
|
+
_per_file.append((rel_path, source, package, raw_imports, symbols))
|
|
2686
|
+
|
|
2687
|
+
# Build {package: {simple_name: FQN}} from every class/interface found.
|
|
2688
|
+
_same_pkg_map: dict[str, dict[str, str]] = _build_same_package_map(all_symbols)
|
|
2689
|
+
|
|
2690
|
+
# Pass 2: build relations with same-package type resolution available.
|
|
2691
|
+
for rel_path, source, package, raw_imports, symbols in _per_file:
|
|
2692
|
+
same_pkg_types = _same_pkg_map.get(package, {})
|
|
2693
|
+
relations = _build_relations(
|
|
2694
|
+
symbols, raw_imports, source, package, rel_path,
|
|
2695
|
+
same_pkg_types=same_pkg_types,
|
|
2696
|
+
)
|
|
2641
2697
|
|
|
2642
2698
|
old_source: Optional[str] = None
|
|
2643
2699
|
if since:
|
|
2644
|
-
# Only fetch old content for files known to have changed.
|
|
2645
|
-
# Unchanged files have no diff entries — skip git show entirely.
|
|
2646
2700
|
_file_changed = _since_changed is None or rel_path in _since_changed
|
|
2647
2701
|
if _file_changed:
|
|
2648
2702
|
old_source = _get_git_old_content(root, rel_path, since)
|
|
2649
2703
|
|
|
2650
|
-
package, symbols, raw_imports = _extract_symbols(source, rel_path)
|
|
2651
|
-
relations = _build_relations(symbols, raw_imports, source, package, rel_path)
|
|
2652
|
-
|
|
2653
2704
|
if old_source is not None:
|
|
2654
2705
|
_, old_symbols, _ = _extract_symbols(old_source, rel_path)
|
|
2655
2706
|
all_changed.extend(_diff_symbols(old_symbols, symbols))
|
|
@@ -2664,7 +2715,6 @@ def build_repo_ir(
|
|
|
2664
2715
|
confidence="high",
|
|
2665
2716
|
))
|
|
2666
2717
|
|
|
2667
|
-
all_symbols.extend(symbols)
|
|
2668
2718
|
all_relations.extend(relations)
|
|
2669
2719
|
|
|
2670
2720
|
spring_summary = _build_spring_summary(all_symbols)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|