sourcecode 1.33.10__tar.gz → 1.33.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {sourcecode-1.33.10 → sourcecode-1.33.12}/PKG-INFO +2 -2
  2. {sourcecode-1.33.10 → sourcecode-1.33.12}/README.md +1 -1
  3. {sourcecode-1.33.10 → sourcecode-1.33.12}/pyproject.toml +1 -1
  4. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/__init__.py +1 -1
  5. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/cli.py +92 -12
  6. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/server.py +3 -1
  7. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/prepare_context.py +100 -13
  8. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/repository_ir.py +63 -13
  9. {sourcecode-1.33.10 → sourcecode-1.33.12}/.github/workflows/build-windows.yml +0 -0
  10. {sourcecode-1.33.10 → sourcecode-1.33.12}/.gitignore +0 -0
  11. {sourcecode-1.33.10 → sourcecode-1.33.12}/.ruff.toml +0 -0
  12. {sourcecode-1.33.10 → sourcecode-1.33.12}/CHANGELOG.md +0 -0
  13. {sourcecode-1.33.10 → sourcecode-1.33.12}/CONTRIBUTING.md +0 -0
  14. {sourcecode-1.33.10 → sourcecode-1.33.12}/LICENSE +0 -0
  15. {sourcecode-1.33.10 → sourcecode-1.33.12}/SECURITY.md +0 -0
  16. {sourcecode-1.33.10 → sourcecode-1.33.12}/raw +0 -0
  17. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/adaptive_scanner.py +0 -0
  18. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/architecture_analyzer.py +0 -0
  19. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/architecture_summary.py +0 -0
  20. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/ast_extractor.py +0 -0
  21. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/cache.py +0 -0
  22. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/canonical_ir.py +0 -0
  23. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/classifier.py +0 -0
  24. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/code_notes_analyzer.py +0 -0
  25. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/confidence_analyzer.py +0 -0
  26. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/context_scorer.py +0 -0
  27. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/context_summarizer.py +0 -0
  28. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/contract_model.py +0 -0
  29. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/contract_pipeline.py +0 -0
  30. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/coverage_parser.py +0 -0
  31. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/dependency_analyzer.py +0 -0
  32. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/__init__.py +0 -0
  33. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/base.py +0 -0
  34. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/csproj_parser.py +0 -0
  35. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/dart.py +0 -0
  36. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/dotnet.py +0 -0
  37. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/elixir.py +0 -0
  38. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/go.py +0 -0
  39. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/heuristic.py +0 -0
  40. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/hybrid.py +0 -0
  41. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/java.py +0 -0
  42. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/jvm_ext.py +0 -0
  43. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/nodejs.py +0 -0
  44. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/parsers.py +0 -0
  45. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/php.py +0 -0
  46. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/project.py +0 -0
  47. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/python.py +0 -0
  48. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/ruby.py +0 -0
  49. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/rust.py +0 -0
  50. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/systems.py +0 -0
  51. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/terraform.py +0 -0
  52. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/detectors/tooling.py +0 -0
  53. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/doc_analyzer.py +0 -0
  54. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/entrypoint_classifier.py +0 -0
  55. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/env_analyzer.py +0 -0
  56. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/error_schema.py +0 -0
  57. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/file_classifier.py +0 -0
  58. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/flow_analyzer.py +0 -0
  59. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/git_analyzer.py +0 -0
  60. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/graph_analyzer.py +0 -0
  61. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/license.py +0 -0
  62. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/__init__.py +0 -0
  63. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/__init__.py +0 -0
  64. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/applier.py +0 -0
  65. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/backup.py +0 -0
  66. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/detector.py +0 -0
  67. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/onboarding/planner.py +0 -0
  68. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/orchestrator.py +0 -0
  69. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/registry.py +0 -0
  70. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp/runner.py +0 -0
  71. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/mcp_nudge.py +0 -0
  72. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/metrics_analyzer.py +0 -0
  73. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/output_budget.py +0 -0
  74. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/path_filters.py +0 -0
  75. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/pr_comment_renderer.py +0 -0
  76. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/progress.py +0 -0
  77. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/ranking_engine.py +0 -0
  78. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/redactor.py +0 -0
  79. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/relevance_scorer.py +0 -0
  80. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/repo_classifier.py +0 -0
  81. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/ris.py +0 -0
  82. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/runtime_classifier.py +0 -0
  83. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/scanner.py +0 -0
  84. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/schema.py +0 -0
  85. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/semantic_analyzer.py +0 -0
  86. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/serializer.py +0 -0
  87. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/summarizer.py +0 -0
  88. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/telemetry/__init__.py +0 -0
  89. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/telemetry/config.py +0 -0
  90. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/telemetry/consent.py +0 -0
  91. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/telemetry/events.py +0 -0
  92. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/telemetry/filters.py +0 -0
  93. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/telemetry/transport.py +0 -0
  94. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/tree_utils.py +0 -0
  95. {sourcecode-1.33.10 → sourcecode-1.33.12}/src/sourcecode/workspace.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.33.10
3
+ Version: 1.33.12
4
4
  Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
5
5
  License-File: LICENSE
6
6
  Keywords: agents,ai,codebase,context,developer-tools,llm
@@ -39,7 +39,7 @@ Description-Content-Type: text/markdown
39
39
 
40
40
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
41
41
 
42
- ![Version](https://img.shields.io/badge/version-1.33.10-blue)
42
+ ![Version](https://img.shields.io/badge/version-1.33.12-blue)
43
43
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
44
44
 
45
45
  ---
@@ -2,7 +2,7 @@
2
2
 
3
3
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
4
4
 
5
- ![Version](https://img.shields.io/badge/version-1.33.10-blue)
5
+ ![Version](https://img.shields.io/badge/version-1.33.12-blue)
6
6
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
7
7
 
8
8
  ---
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sourcecode"
7
- version = "1.33.10"
7
+ version = "1.33.12"
8
8
  description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.33.10"
3
+ __version__ = "1.33.12"
@@ -2677,10 +2677,11 @@ def prepare_context_cmd(
2677
2677
  # relevant_files goal: untested SOURCE files. Test files belong in test_gaps.
2678
2678
  # Without this filter, high-churn test files rank above untested source files.
2679
2679
  _rfs = [f for f in _rfs if getattr(f, "role", None) != "test"]
2680
- out["relevant_files"] = [
2681
- _serialize_relevant_file(f)
2682
- for f in _rfs
2683
- ]
2680
+ _serialized_rfs = [_serialize_relevant_file(f) for f in _rfs]
2681
+ out["relevant_files"] = _serialized_rfs
2682
+ if task == "fix-bug":
2683
+ # ranked_files was the v1 name for this field — emit as backward-compat alias.
2684
+ out["ranked_files"] = _serialized_rfs
2684
2685
  if _task_include("key_dependencies") and output.key_dependencies:
2685
2686
  out["key_dependencies"] = output.key_dependencies
2686
2687
  if _task_include("gaps") and output.gaps:
@@ -3291,13 +3292,33 @@ def impact_cmd(
3291
3292
  )
3292
3293
  from sourcecode.output_budget import trim_to_budget as _trim, BUDGET_IMPACT
3293
3294
 
3295
+ import sys as _sys_ic
3296
+ # Legacy-compat: old syntax was `impact <path> <target>`.
3297
+ # Detect: target resolves to an existing directory (not a class name), and
3298
+ # the path arg is not a valid directory (looks like a class name).
3299
+ _target_as_path = Path(target)
3300
+ if _target_as_path.is_dir() and not path.resolve().is_dir():
3301
+ # Gate on isatty() — non-TTY (MCP, pipes) must not receive text mixed into JSON stdout.
3302
+ if getattr(_sys_ic.stderr, "isatty", lambda: False)():
3303
+ _sys_ic.stderr.write(
3304
+ f"[impact] Legacy argument order detected: '{target}' is a directory, not a class name.\n"
3305
+ f"[impact] Swapping: target='{path}', path='{target}'. "
3306
+ f"New syntax: sourcecode impact <target> [path]\n"
3307
+ )
3308
+ _sys_ic.stderr.flush()
3309
+ target, path = str(path), _target_as_path
3310
+
3294
3311
  root = path.resolve()
3295
3312
  if not root.is_dir():
3296
3313
  _emit_error_json(
3297
3314
  INVALID_INPUT_CODE,
3298
3315
  f"'{root}' is not a valid directory.",
3299
3316
  path=str(root),
3300
- hint="Pass an existing repository directory.",
3317
+ hint=(
3318
+ "Pass an existing repository directory as the second argument. "
3319
+ "New syntax: sourcecode impact <target> [path] — "
3320
+ "target is the class name, path is the repo root."
3321
+ ),
3301
3322
  expected="A directory path.",
3302
3323
  )
3303
3324
  raise typer.Exit(1)
@@ -3621,11 +3642,15 @@ def fix_bug_cmd(
3621
3642
  sourcecode onboard . — Full architecture context first
3622
3643
  """
3623
3644
  if not symptom:
3624
- typer.echo(
3625
- "[fix-bug] Results are significantly better with --symptom. "
3626
- "Example: --symptom 'NullPointerException in PaymentService'",
3627
- err=True,
3628
- )
3645
+ import sys as _sys_fb
3646
+ # Only emit advisory to interactive terminals — non-TTY (MCP, pipes, scripts)
3647
+ # must never receive informational text mixed into JSON stdout.
3648
+ if getattr(_sys_fb.stderr, "isatty", lambda: False)():
3649
+ typer.echo(
3650
+ "[fix-bug] Results are significantly better with --symptom. "
3651
+ "Example: --symptom 'NullPointerException in PaymentService'",
3652
+ err=True,
3653
+ )
3629
3654
  ctx.invoke(
3630
3655
  prepare_context_cmd,
3631
3656
  task="fix-bug",
@@ -3904,8 +3929,23 @@ def activate_cmd(
3904
3929
 
3905
3930
  @app.command("version")
3906
3931
  def version_cmd() -> None:
3907
- """Show version and exit."""
3908
- typer.echo(f"sourcecode {__version__}")
3932
+ """Show version and exit.
3933
+
3934
+ Outputs human-readable text on interactive terminals.
3935
+ Outputs structured JSON on non-TTY (MCP, pipes, scripts):
3936
+ {"cli_version": "1.33.11", "mcp_schema_version": "1.33.11",
3937
+ "compatibility_schema_version": "1.0"}
3938
+ """
3939
+ import sys as _sys_ver
3940
+ if getattr(_sys_ver.stdout, "isatty", lambda: False)():
3941
+ typer.echo(f"sourcecode {__version__}")
3942
+ else:
3943
+ import json as _json_ver
3944
+ typer.echo(_json_ver.dumps({
3945
+ "cli_version": __version__,
3946
+ "mcp_schema_version": __version__,
3947
+ "compatibility_schema_version": "1.0",
3948
+ }, ensure_ascii=False))
3909
3949
 
3910
3950
 
3911
3951
  # ── config ────────────────────────────────────────────────────────────────────
@@ -4351,6 +4391,46 @@ def mcp_remove(
4351
4391
  typer.echo(" Re-add: sourcecode mcp init")
4352
4392
 
4353
4393
 
4394
+ @mcp_app.command("list-tools")
4395
+ def mcp_list_tools(
4396
+ json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
4397
+ ) -> None:
4398
+ """List all MCP tools exposed by the sourcecode server.
4399
+
4400
+ \b
4401
+ Shows each tool name, its description, and the CLI command it maps to.
4402
+ Useful for discovering capabilities when using sourcecode as an MCP server.
4403
+
4404
+ \b
4405
+ Examples:
4406
+ sourcecode mcp list-tools
4407
+ sourcecode mcp list-tools --json
4408
+ """
4409
+ import asyncio
4410
+ import json as _json
4411
+
4412
+ from sourcecode.mcp.server import mcp as _mcp
4413
+
4414
+ tools = asyncio.run(_mcp.list_tools())
4415
+ tools_sorted = sorted(tools, key=lambda t: t.name)
4416
+
4417
+ if json_output:
4418
+ payload = [
4419
+ {"name": t.name, "description": (t.description or "").strip()}
4420
+ for t in tools_sorted
4421
+ ]
4422
+ typer.echo(_json.dumps(payload, indent=2, ensure_ascii=False))
4423
+ return
4424
+
4425
+ typer.echo(f"sourcecode MCP tools ({len(tools_sorted)} available)\n")
4426
+ for t in tools_sorted:
4427
+ desc_first_line = (t.description or "").strip().splitlines()[0] if t.description else ""
4428
+ typer.echo(f" {t.name:<35} {desc_first_line}")
4429
+ typer.echo("")
4430
+ typer.echo("Use: sourcecode mcp serve — start MCP server on stdio")
4431
+ typer.echo("Use: sourcecode mcp init — configure MCP client")
4432
+
4433
+
4354
4434
  # ── Cache subcommands ─────────────────────────────────────────────────────────
4355
4435
 
4356
4436
 
@@ -959,9 +959,11 @@ _TELEMETRY_ACTIONS = frozenset({"status", "enable", "disable"})
959
959
 
960
960
  @mcp.tool()
961
961
  def version() -> dict:
962
- """Print sourcecode CLI version.
962
+ """Return sourcecode version and MCP compatibility metadata.
963
963
 
964
964
  Maps to: sourcecode version
965
+ Returns structured JSON: cli_version, mcp_schema_version, compatibility_schema_version.
966
+ cli_version and mcp_schema_version are always identical (released together).
965
967
  """
966
968
  return _execute(["version"])
967
969
 
@@ -627,6 +627,21 @@ _FRONTEND_SYMPTOM_MAP: dict[str, list[str]] = {
627
627
  "trabajador": ["trabajador", "empleado", "worker", "asignacion", "trabajadordao", "trabajadorservice"],
628
628
  }
629
629
 
630
+ # Generic words that add noise when used as symptom keywords in large repos.
631
+ # "token" and "user" are too ubiquitous in auth systems to be useful alone.
632
+ _SYMPTOM_STOP_WORDS: frozenset[str] = frozenset({
633
+ "fails", "fail", "failed", "failure",
634
+ "not", "for", "with", "when", "that", "the", "and", "but",
635
+ "are", "has", "had", "have", "was", "were",
636
+ "get", "set", "can", "does", "did", "should", "would", "could",
637
+ "null", "none", "empty", "invalid", "incorrect", "wrong", "missing",
638
+ "error", "issue", "problem", "bug",
639
+ "from", "into", "via", "due", "also", "after", "before",
640
+ "slow", "fast", "new", "old",
641
+ })
642
+
643
+ # Repo-scale threshold: above this file count, use stricter injection logic.
644
+ _LARGE_REPO_THRESHOLD = 500
630
645
 
631
646
  MAX_FILES_FAST = 2000 # above this threshold --fast uses git-index-only mode
632
647
 
@@ -1225,8 +1240,8 @@ class TaskContextBuilder:
1225
1240
  # Distinguish: no_staged_changes (CI, no --since) vs no_diff (empty range)
1226
1241
  if _pr_scope_source == "no_staged_changes":
1227
1242
  _no_diff_msg = (
1228
- "No --since ref provided and no staged changes found. "
1229
- "Use --since <ref>"
1243
+ "No --since ref provided and no staged/uncommitted changes found. "
1244
+ "Provide --since <ref> to specify the base commit for the diff."
1230
1245
  )
1231
1246
  return TaskOutput(
1232
1247
  task="review-pr", goal=spec.goal,
@@ -1239,7 +1254,8 @@ class TaskContextBuilder:
1239
1254
  error_message=_no_diff_msg,
1240
1255
  error_hints=[
1241
1256
  "Add --since <ref> to specify a base commit.",
1242
- "Examples: --since origin/main | --since HEAD~3 | --since main",
1257
+ "Common values: --since HEAD~1 (last commit) | --since origin/main | --since main",
1258
+ "If reviewing uncommitted changes: stage them first (git add), then run without --since.",
1243
1259
  ],
1244
1260
  gaps=[_no_diff_msg],
1245
1261
  ci_decision="no_diff_source",
@@ -1694,7 +1710,7 @@ class TaskContextBuilder:
1694
1710
  _camel_expanded = _re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _camel_expanded)
1695
1711
  symptom_keywords = [
1696
1712
  w.lower() for w in _re.split(r"[\s\W]+", _camel_expanded)
1697
- if len(w) > 2
1713
+ if len(w) > 2 and w.lower() not in _SYMPTOM_STOP_WORDS
1698
1714
  ]
1699
1715
  if symptom_keywords:
1700
1716
  # Pre-compile combined keyword pattern for fast content scanning
@@ -1758,14 +1774,27 @@ class TaskContextBuilder:
1758
1774
  ))
1759
1775
  _existing_paths.add(_cp)
1760
1776
 
1761
- # Pass 4: inject files whose path matches symptom keywords
1777
+ # Scale-awareness: large repos need wider scan and stricter injection.
1778
+ _is_large_repo = len(all_paths) > _LARGE_REPO_THRESHOLD
1779
+
1780
+ # Pass 4: inject files whose path matches symptom keywords.
1781
+ # CamelCase-expand the filename stem so "OfflineSessionLoader" matches
1782
+ # the keyword "offline" even without an explicit directory separator.
1783
+ _p4_dirs_of_injected: set[str] = set() # directories of high-score injects
1762
1784
  for _p in all_paths:
1763
1785
  if _p in _existing_paths:
1764
1786
  continue
1765
1787
  if Path(_p).suffix.lower() not in _ALL_EXTENSIONS:
1766
1788
  continue
1767
1789
  _p_lower = _p.lower()
1768
- _matching_kws = [kw for kw in symptom_keywords if kw in _p_lower]
1790
+ # CamelCase-expand the stem and append to the search string so
1791
+ # "OfflineSessionLoader" → "offline session loader" can match
1792
+ # individual keyword tokens beyond what substring search finds.
1793
+ _stem_raw = Path(_p).stem
1794
+ _stem_exp = _re.sub(r'([a-z])([A-Z])', r'\1 \2', _stem_raw)
1795
+ _stem_exp = _re.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _stem_exp).lower()
1796
+ _p_search = _p_lower + " " + _stem_exp
1797
+ _matching_kws = [kw for kw in symptom_keywords if kw in _p_search]
1769
1798
  if not _matching_kws:
1770
1799
  continue
1771
1800
  _boost = 0.2 * len(_matching_kws)
@@ -1780,6 +1809,8 @@ class TaskContextBuilder:
1780
1809
  ))
1781
1810
  _existing_paths.add(_p)
1782
1811
  _sx_direct_path.append(_p)
1812
+ if _injected_score >= 0.7:
1813
+ _p4_dirs_of_injected.add(str(Path(_p).parent))
1783
1814
 
1784
1815
  # Pass 4b: grep-based injection for frontend→backend synonym terms.
1785
1816
  # Runs parallel grep for each backend term to find files not yet in
@@ -1827,9 +1858,41 @@ class TaskContextBuilder:
1827
1858
  ))
1828
1859
  _existing_paths_now.add(_gf)
1829
1860
 
1830
- # Sort before content scan so top candidates get read first
1831
- relevant_files = sorted(relevant_files, key=lambda rf: -rf.score)
1832
- _CONTENT_SCAN_LIMIT = 80
1861
+ # Pass 4c: subsystem co-location inject sibling files from the same
1862
+ # directories as high-score (≥0.7) path-matched files. This catches
1863
+ # architecturally adjacent classes that don't mention symptom keywords
1864
+ # in their own name (e.g. InfinispanOfflineSessionCacheEntryLifespan…
1865
+ # siblings in the same infinispan/ package).
1866
+ if _is_large_repo and _p4_dirs_of_injected:
1867
+ _coloc_existing = {rf.path for rf in relevant_files}
1868
+ for _cp in all_paths:
1869
+ if _cp in _coloc_existing:
1870
+ continue
1871
+ if Path(_cp).suffix.lower() not in _src_exts:
1872
+ continue
1873
+ if str(Path(_cp).parent) in _p4_dirs_of_injected:
1874
+ relevant_files.append(RelevantFile(
1875
+ path=_cp,
1876
+ role="symptom_match",
1877
+ score=0.55,
1878
+ reason="subsystem co-location: same directory as symptom-matched file",
1879
+ why="directory proximity injection",
1880
+ ))
1881
+ _coloc_existing.add(_cp)
1882
+
1883
+ # Sort before content scan so top candidates get read first.
1884
+ # In large repos: prioritise symptom_match files within each score band
1885
+ # so that subsystem-relevant files are content-scanned before generic
1886
+ # structural files at the same score.
1887
+ if _is_large_repo:
1888
+ relevant_files = sorted(
1889
+ relevant_files,
1890
+ key=lambda rf: (-rf.score, 0 if rf.role == "symptom_match" else 1),
1891
+ )
1892
+ _CONTENT_SCAN_LIMIT = 150
1893
+ else:
1894
+ relevant_files = sorted(relevant_files, key=lambda rf: -rf.score)
1895
+ _CONTENT_SCAN_LIMIT = 80
1833
1896
  _scan_candidates = relevant_files[:_CONTENT_SCAN_LIMIT]
1834
1897
  _no_scan_candidates = relevant_files[_CONTENT_SCAN_LIMIT:]
1835
1898
 
@@ -1904,15 +1967,31 @@ class TaskContextBuilder:
1904
1967
  elif _extra_syn > 0:
1905
1968
  _new_reason = _rf.reason + f", synonym-match backend (+{_extra_syn:.2f})"
1906
1969
 
1970
+ _final_score = round(min(_rf.score + _total_extra, 1.0), 2)
1907
1971
  _boosted.append(RelevantFile(
1908
1972
  path=_rf.path,
1909
1973
  role=_rf.role,
1910
- score=round(min(_rf.score + _total_extra, 1.0), 2),
1974
+ score=_final_score,
1911
1975
  reason=_new_reason,
1912
1976
  why=_rf.why,
1913
1977
  ))
1914
1978
 
1915
- relevant_files = sorted(_boosted + _no_scan_candidates, key=lambda rf: -rf.score)
1979
+ # Use total boost as a secondary sort key so symptom-matched files
1980
+ # that were boosted from a lower base score rank above structural
1981
+ # files that coincidentally reach the same capped score of 1.0.
1982
+ # This prevents budget-trimming from discarding the most relevant files.
1983
+ _boost_totals: dict[str, float] = {}
1984
+ for _rf in _scan_candidates:
1985
+ pass # populated below
1986
+ _boost_totals = {}
1987
+ for _idx, _rf in enumerate(_scan_candidates):
1988
+ _b_rf = _boosted[_idx]
1989
+ _boost_totals[_b_rf.path] = round(_b_rf.score - _rf.score, 4)
1990
+
1991
+ relevant_files = sorted(
1992
+ _boosted + _no_scan_candidates,
1993
+ key=lambda rf: (-rf.score, -_boost_totals.get(rf.path, 0)),
1994
+ )
1916
1995
 
1917
1996
  # Synonym note (only when synonyms actually fired)
1918
1997
  if _frontend_kws and _sx_synonyms:
@@ -2389,7 +2468,8 @@ class TaskContextBuilder:
2389
2468
  else:
2390
2469
  _symptom_class_names.add(_tok)
2391
2470
  _symptom_tokens = {
2392
- w.lower() for w in _re_bug.split(r'[\s\W]+', symptom) if len(w) > 2
2471
+ w.lower() for w in _re_bug.split(r'[\s\W]+', symptom)
2472
+ if len(w) > 2 and w.lower() not in _SYMPTOM_STOP_WORDS
2393
2473
  }
2394
2474
 
2395
2475
  scored: list[tuple[float, str, RelevantFile]] = []
@@ -2486,9 +2566,16 @@ class TaskContextBuilder:
2486
2566
  content_boost += 0.8
2487
2567
  _why_parts.append("exception type in path (+0.8)")
2488
2568
 
2489
- # AND-weighted token intersection — multiple matching tokens >> single
2569
+ # AND-weighted token intersection — multiple matching tokens >> single.
2570
+ # CamelCase-expand the filename stem so "OfflineSessionLoader" contributes
2571
+ # "offline", "session", "loader" as individual tokens beyond what the raw
2572
+ # path splitting yields. This lets multi-word symptoms match class names.
2490
2573
  if _symptom_tokens:
2491
2574
  _path_parts = set(path_lower.replace("/", " ").replace(".", " ").replace("_", " ").split())
2575
+ _stem_cc = Path(path).stem
2576
+ _stem_cc_exp = _re_bug.sub(r'([a-z])([A-Z])', r'\1 \2', _stem_cc)
2577
+ _stem_cc_exp = _re_bug.sub(r'([A-Z]+)([A-Z][a-z])', r'\1 \2', _stem_cc_exp).lower()
2578
+ _path_parts.update(_stem_cc_exp.split())
2492
2579
  _intersection = _symptom_tokens & _path_parts
2493
2580
  _n_match = len(_intersection)
2494
2581
  if _n_match >= 3:
@@ -888,15 +888,40 @@ def _extract_mapped_paths(source: str, class_fqn: str) -> dict[str, str]:
888
888
  # Phase 3 — Symbol relation graph
889
889
  # ---------------------------------------------------------------------------
890
890
 
891
+ def _build_same_package_map(symbols: list[SymbolRecord]) -> dict[str, dict[str, str]]:
892
+ """Build {package: {simple_name: FQN}} map from all class/interface symbols.
893
+
894
+ Used by build_repo_ir to resolve same-package types that need no explicit import.
895
+ In Java, classes in the same package reference each other without import statements,
896
+ so import_map is empty for them — this map provides the fallback resolution.
897
+ """
898
+ result: dict[str, dict[str, str]] = {}
899
+ for sym in symbols:
900
+ if sym.type not in ("class", "interface") or "#" in sym.symbol:
901
+ continue
902
+ pkg = sym.symbol.rsplit(".", 1)[0] if "." in sym.symbol else ""
903
+ simple = sym.symbol.split(".")[-1]
904
+ result.setdefault(pkg, {})[simple] = sym.symbol
905
+ return result
906
+
907
+
891
908
  def _build_relations(
892
909
  symbols: list[SymbolRecord],
893
910
  raw_imports: list[str],
894
911
  source: str,
895
912
  package: str,
896
913
  rel_path: str,
914
+ same_pkg_types: dict[str, str] | None = None,
897
915
  ) -> list[RelationEdge]:
898
- """Phase 3: Build directed relation graph for symbols in one file."""
916
+ """Phase 3: Build directed relation graph for symbols in one file.
917
+
918
+ same_pkg_types: {simple_name → FQN} for classes in the same package.
919
+ Passed by build_repo_ir after a first pass that collects all symbols.
920
+ Enables resolving injection targets that share a package with the caller
921
+ and therefore need no explicit Java import statement.
922
+ """
899
923
  edges: list[RelationEdge] = []
924
+ _same_pkg: dict[str, str] = same_pkg_types or {}
900
925
 
901
926
  import_map: dict[str, str] = {}
902
927
  for fqn in raw_imports:
@@ -929,15 +954,27 @@ def _build_relations(
929
954
  ))
930
955
 
931
956
  if sym.type == "field":
932
- for imp_fqn in sym.imports_used:
957
+ _inject_ann = next(
958
+ (a for a in sym.annotations if a in _INJECT_ANNOTATIONS), "@Autowired"
959
+ )
960
+ _field_targets: set[str] = set(sym.imports_used)
961
+ # Same-package field injection: imports_used is empty when the field type
962
+ # shares a package with the declaring class (no import needed in Java).
963
+ # Extract type from signature ("Type name") and resolve via same_pkg_types.
964
+ if not _field_targets and _same_pkg:
965
+ _sig_type = (sym.signature or "").split()[0] if sym.signature else ""
966
+ _sig_base = re.sub(r'<.*', '', _sig_type).strip()
967
+ if _sig_base and _sig_base[0].isupper():
968
+ _same_fqn = _same_pkg.get(_sig_base)
969
+ if _same_fqn and _same_fqn != _enclosing_class(sym_fqn):
970
+ _field_targets.add(_same_fqn)
971
+ for imp_fqn in _field_targets:
933
972
  edges.append(RelationEdge(
934
973
  from_symbol=sym_fqn,
935
974
  to_symbol=imp_fqn,
936
975
  type="injects",
937
976
  confidence="high",
938
- evidence={"type": "annotation", "value": next(
939
- (a for a in sym.annotations if a in _INJECT_ANNOTATIONS), "@Autowired"
940
- )},
977
+ evidence={"type": "annotation", "value": _inject_ann},
941
978
  ))
942
979
 
943
980
  # ── Constructor injection ─────────────────────────────────────────────────
@@ -949,7 +986,7 @@ def _build_relations(
949
986
  continue
950
987
  for simple_type in sym.param_types:
951
988
  base = re.sub(r'<.*', '', simple_type).strip()
952
- fqn = import_map.get(base)
989
+ fqn = import_map.get(base) or _same_pkg.get(base)
953
990
  if fqn:
954
991
  edges.append(RelationEdge(
955
992
  from_symbol=sym.symbol,
@@ -982,7 +1019,7 @@ def _build_relations(
982
1019
  continue
983
1020
  _ftype = fld.group("type").strip()
984
1021
  _base = re.sub(r'<.*', '', _ftype).strip()
985
- _fqn = import_map.get(_base)
1022
+ _fqn = import_map.get(_base) or _same_pkg.get(_base)
986
1023
  if _fqn:
987
1024
  edges.append(RelationEdge(
988
1025
  from_symbol=sym.symbol,
@@ -2632,24 +2669,38 @@ def build_repo_ir(
2632
2669
  if since:
2633
2670
  _since_changed = _get_git_changed_files(root, since)
2634
2671
 
2672
+ # Pass 1: extract symbols from all files so we can build the same-package
2673
+ # type map before building relations. Java classes in the same package
2674
+ # reference each other without import statements, so import_map alone cannot
2675
+ # resolve them — _build_same_package_map provides the cross-file fallback.
2676
+ _per_file: list[tuple[str, str, str, list[str], list[SymbolRecord]]] = []
2635
2677
  for rel_path in sorted(file_paths):
2636
2678
  abs_path = root / rel_path
2637
2679
  try:
2638
2680
  source = abs_path.read_text(encoding="utf-8", errors="replace")
2639
2681
  except OSError:
2640
2682
  continue
2683
+ package, symbols, raw_imports = _extract_symbols(source, rel_path)
2684
+ all_symbols.extend(symbols)
2685
+ _per_file.append((rel_path, source, package, raw_imports, symbols))
2686
+
2687
+ # Build {package: {simple_name: FQN}} from every class/interface found.
2688
+ _same_pkg_map: dict[str, dict[str, str]] = _build_same_package_map(all_symbols)
2689
+
2690
+ # Pass 2: build relations with same-package type resolution available.
2691
+ for rel_path, source, package, raw_imports, symbols in _per_file:
2692
+ same_pkg_types = _same_pkg_map.get(package, {})
2693
+ relations = _build_relations(
2694
+ symbols, raw_imports, source, package, rel_path,
2695
+ same_pkg_types=same_pkg_types,
2696
+ )
2641
2697
 
2642
2698
  old_source: Optional[str] = None
2643
2699
  if since:
2644
- # Only fetch old content for files known to have changed.
2645
- # Unchanged files have no diff entries — skip git show entirely.
2646
2700
  _file_changed = _since_changed is None or rel_path in _since_changed
2647
2701
  if _file_changed:
2648
2702
  old_source = _get_git_old_content(root, rel_path, since)
2649
2703
 
2650
- package, symbols, raw_imports = _extract_symbols(source, rel_path)
2651
- relations = _build_relations(symbols, raw_imports, source, package, rel_path)
2652
-
2653
2704
  if old_source is not None:
2654
2705
  _, old_symbols, _ = _extract_symbols(old_source, rel_path)
2655
2706
  all_changed.extend(_diff_symbols(old_symbols, symbols))
@@ -2664,7 +2715,6 @@ def build_repo_ir(
2664
2715
  confidence="high",
2665
2716
  ))
2666
2717
 
2667
- all_symbols.extend(symbols)
2668
2718
  all_relations.extend(relations)
2669
2719
 
2670
2720
  spring_summary = _build_spring_summary(all_symbols)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes