sourcecode 1.35.28__tar.gz → 1.35.30__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. {sourcecode-1.35.28 → sourcecode-1.35.30}/PKG-INFO +3 -3
  2. {sourcecode-1.35.28 → sourcecode-1.35.30}/README.md +2 -2
  3. {sourcecode-1.35.28 → sourcecode-1.35.30}/pyproject.toml +1 -1
  4. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/__init__.py +1 -1
  5. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/canonical_ir.py +1 -1
  6. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/cli.py +103 -19
  7. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/java.py +1 -7
  8. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/server.py +1 -1
  9. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/prepare_context.py +1 -1
  10. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/repository_ir.py +161 -61
  11. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/semantic_analyzer.py +2 -10
  12. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/serializer.py +12 -6
  13. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/spring_tx_analyzer.py +3 -1
  14. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/summarizer.py +17 -0
  15. {sourcecode-1.35.28 → sourcecode-1.35.30}/.github/workflows/build-windows.yml +0 -0
  16. {sourcecode-1.35.28 → sourcecode-1.35.30}/.gitignore +0 -0
  17. {sourcecode-1.35.28 → sourcecode-1.35.30}/.ruff.toml +0 -0
  18. {sourcecode-1.35.28 → sourcecode-1.35.30}/CHANGELOG.md +0 -0
  19. {sourcecode-1.35.28 → sourcecode-1.35.30}/CONTRIBUTING.md +0 -0
  20. {sourcecode-1.35.28 → sourcecode-1.35.30}/LICENSE +0 -0
  21. {sourcecode-1.35.28 → sourcecode-1.35.30}/SECURITY.md +0 -0
  22. {sourcecode-1.35.28 → sourcecode-1.35.30}/raw +0 -0
  23. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/adaptive_scanner.py +0 -0
  24. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/architecture_analyzer.py +0 -0
  25. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/architecture_summary.py +0 -0
  26. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/ast_extractor.py +0 -0
  27. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/cache.py +0 -0
  28. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/cir_graphs.py +0 -0
  29. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/classifier.py +0 -0
  30. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/code_notes_analyzer.py +0 -0
  31. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/confidence_analyzer.py +0 -0
  32. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/context_scorer.py +0 -0
  33. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/context_summarizer.py +0 -0
  34. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/contract_model.py +0 -0
  35. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/contract_pipeline.py +0 -0
  36. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/coverage_parser.py +0 -0
  37. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/dependency_analyzer.py +0 -0
  38. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/__init__.py +0 -0
  39. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/base.py +0 -0
  40. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/csproj_parser.py +0 -0
  41. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/dart.py +0 -0
  42. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/dotnet.py +0 -0
  43. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/elixir.py +0 -0
  44. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/go.py +0 -0
  45. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/heuristic.py +0 -0
  46. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/hybrid.py +0 -0
  47. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/jvm_ext.py +0 -0
  48. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/nodejs.py +0 -0
  49. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/parsers.py +0 -0
  50. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/php.py +0 -0
  51. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/project.py +0 -0
  52. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/python.py +0 -0
  53. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/ruby.py +0 -0
  54. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/rust.py +0 -0
  55. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/systems.py +0 -0
  56. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/terraform.py +0 -0
  57. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/tooling.py +0 -0
  58. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/doc_analyzer.py +0 -0
  59. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/entrypoint_classifier.py +0 -0
  60. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/env_analyzer.py +0 -0
  61. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/error_schema.py +0 -0
  62. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/explain.py +0 -0
  63. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/file_chunker.py +0 -0
  64. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/file_classifier.py +0 -0
  65. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/flow_analyzer.py +0 -0
  66. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/fqn_utils.py +0 -0
  67. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/git_analyzer.py +0 -0
  68. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/graph_analyzer.py +0 -0
  69. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/license.py +0 -0
  70. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/__init__.py +0 -0
  71. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/onboarding/__init__.py +0 -0
  72. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/onboarding/applier.py +0 -0
  73. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/onboarding/backup.py +0 -0
  74. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/onboarding/detector.py +0 -0
  75. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/onboarding/planner.py +0 -0
  76. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/orchestrator.py +0 -0
  77. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/registry.py +0 -0
  78. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/runner.py +0 -0
  79. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp_nudge.py +0 -0
  80. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/metrics_analyzer.py +0 -0
  81. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/migrate_check.py +0 -0
  82. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/output_budget.py +0 -0
  83. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/path_filters.py +0 -0
  84. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/pr_comment_renderer.py +0 -0
  85. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/pr_impact.py +0 -0
  86. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/progress.py +0 -0
  87. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/ranking_engine.py +0 -0
  88. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/redactor.py +0 -0
  89. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/relevance_scorer.py +0 -0
  90. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/rename_refactor.py +0 -0
  91. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/repo_classifier.py +0 -0
  92. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/ris.py +0 -0
  93. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/runtime_classifier.py +0 -0
  94. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/scanner.py +0 -0
  95. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/schema.py +0 -0
  96. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/spring_event_topology.py +0 -0
  97. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/spring_findings.py +0 -0
  98. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/spring_impact.py +0 -0
  99. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/spring_model.py +0 -0
  100. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/spring_security_audit.py +0 -0
  101. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/spring_semantic.py +0 -0
  102. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/telemetry/__init__.py +0 -0
  103. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/telemetry/config.py +0 -0
  104. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/telemetry/consent.py +0 -0
  105. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/telemetry/events.py +0 -0
  106. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/telemetry/filters.py +0 -0
  107. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/telemetry/transport.py +0 -0
  108. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/tree_utils.py +0 -0
  109. {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/workspace.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.35.28
3
+ Version: 1.35.30
4
4
  Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
5
5
  License-File: LICENSE
6
6
  Keywords: agents,ai,codebase,context,developer-tools,llm
@@ -40,7 +40,7 @@ Description-Content-Type: text/markdown
40
40
 
41
41
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
42
42
 
43
- ![Version](https://img.shields.io/badge/version-1.35.28-blue)
43
+ ![Version](https://img.shields.io/badge/version-1.35.30-blue)
44
44
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
45
45
 
46
46
  ---
@@ -114,7 +114,7 @@ pipx install sourcecode
114
114
 
115
115
  ```bash
116
116
  sourcecode version
117
- # sourcecode 1.35.28
117
+ # sourcecode 1.35.30
118
118
 
119
119
  **v1.35.28** — 7 bug fixes: `rename-class` cross-package disambiguation (BUG-4), `rename-class` collision detection (BUG-2), `find_java_files` false positive on `com/test/` package paths (BUG-1), `cold-start --compact` correct key names (BUG-6), `@EnableMethodSecurity` no longer suppresses SEC-001 (BUG-3), `explain` @Entity stereotype detection (BUG-5), XML+annotation mixed security retagging (BUG-7).
120
120
  ```
@@ -2,7 +2,7 @@
2
2
 
3
3
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
4
4
 
5
- ![Version](https://img.shields.io/badge/version-1.35.28-blue)
5
+ ![Version](https://img.shields.io/badge/version-1.35.30-blue)
6
6
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
7
7
 
8
8
  ---
@@ -76,7 +76,7 @@ pipx install sourcecode
76
76
 
77
77
  ```bash
78
78
  sourcecode version
79
- # sourcecode 1.35.28
79
+ # sourcecode 1.35.30
80
80
 
81
81
  **v1.35.28** — 7 bug fixes: `rename-class` cross-package disambiguation (BUG-4), `rename-class` collision detection (BUG-2), `find_java_files` false positive on `com/test/` package paths (BUG-1), `cold-start --compact` correct key names (BUG-6), `@EnableMethodSecurity` no longer suppresses SEC-001 (BUG-3), `explain` @Entity stereotype detection (BUG-5), XML+annotation mixed security retagging (BUG-7).
82
82
  ```
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sourcecode"
7
- version = "1.35.28"
7
+ version = "1.35.30"
8
8
  description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.35.28"
3
+ __version__ = "1.35.30"
@@ -58,7 +58,7 @@ class CanonicalSecurity:
58
58
  source_scope: str # method|class|inherited
59
59
  effective_roles: list[str] = field(default_factory=list)
60
60
  expression: str = "" # SpEL for @PreAuthorize/@PostAuthorize
61
- required_permission: str = "" # for @M3FiltroSeguridad
61
+ required_permission: str = "" # for custom permission annotations
62
62
  raw: dict = field(default_factory=dict) # full original policy dict
63
63
 
64
64
  def to_dict(self) -> dict:
@@ -178,10 +178,10 @@ Cold scan: 2–10s depending on repo size. Warm cache: 0.3–0.6s.
178
178
  cache clear [dim]# clear all cached results for this repo[/dim]
179
179
 
180
180
  [bold]Examples:[/bold]
181
- sourcecode saint-server --compact
181
+ sourcecode my-project --compact
182
182
  sourcecode . --compact --git-context --copy
183
183
  sourcecode . --changed-only --git-context
184
- sourcecode prepare-context onboard saint-server
184
+ sourcecode prepare-context onboard my-project
185
185
  sourcecode prepare-context delta . --since main
186
186
 
187
187
  [bold]Subcommands:[/bold]
@@ -629,7 +629,7 @@ def main(
629
629
  help=(
630
630
  "High-signal summary (typically 1000–3000 tokens depending on repo size): "
631
631
  "stacks, entry points, dependency summary, confidence, and gaps. "
632
- "Includes security_surface (when @M3FiltroSeguridad detected), mybatis (when MyBatis framework detected), and transactional_boundaries for Java projects. "
632
+ "Includes security_surface (when custom security annotations detected), mybatis (when MyBatis framework detected), and transactional_boundaries for Java projects. "
633
633
  "Use --agent for maximum signal."
634
634
  ),
635
635
  ),
@@ -3311,6 +3311,11 @@ def repo_ir_cmd(
3311
3311
  "--force",
3312
3312
  help="Bypass the token-size guard and emit output even when estimated tokens exceed 50K.",
3313
3313
  ),
3314
+ gzip_output: bool = typer.Option(
3315
+ False,
3316
+ "--gzip",
3317
+ help="Compress output with gzip. Requires --output. Reduces large IR files by ~70-80%.",
3318
+ ),
3314
3319
  ) -> None:
3315
3320
  """Deterministic symbol-level IR for Java repositories.
3316
3321
 
@@ -3323,6 +3328,7 @@ def repo_ir_cmd(
3323
3328
  --summary-only Omit full graph; keep analysis + impact (smallest output)
3324
3329
  --max-nodes N Keep top N nodes by score
3325
3330
  --max-edges N Keep top N edges (priority: both endpoints kept)
3331
+ --gzip Compress output file (~70-80% smaller; requires --output)
3326
3332
 
3327
3333
  \b
3328
3334
  Examples:
@@ -3332,6 +3338,7 @@ def repo_ir_cmd(
3332
3338
  sourcecode repo-ir --since main --output ir.json
3333
3339
  sourcecode repo-ir --since HEAD~3 --summary-only --output ir-small.json
3334
3340
  sourcecode repo-ir --max-nodes 200 --max-edges 500
3341
+ sourcecode repo-ir --output ir.json.gz --gzip
3335
3342
  """
3336
3343
  import json as _json
3337
3344
 
@@ -3392,22 +3399,52 @@ def repo_ir_cmd(
3392
3399
  output = _serialize_dict(ir, format)
3393
3400
 
3394
3401
  if output_path:
3395
- output_path.write_text(output, encoding="utf-8")
3396
- size_kb = len(output.encode("utf-8")) // 1024
3397
- if summary_only:
3402
+ if gzip_output and not str(output_path).endswith(".gz"):
3403
+ output_path = output_path.with_suffix(output_path.suffix + ".gz")
3404
+ raw_bytes = output.encode("utf-8")
3405
+ size_bytes = len(raw_bytes)
3406
+ _SIZE_WARN_BYTES = 10 * 1024 * 1024 # 10MB
3407
+ if size_bytes > _SIZE_WARN_BYTES and not gzip_output:
3398
3408
  typer.echo(
3399
- f"IR written to {output_path} ({size_kb}KB, graph omitted by --summary-only)",
3409
+ f"[repo-ir] Output is {size_bytes // (1024 * 1024)}MB "
3410
+ "consider --summary-only, --max-nodes N --max-edges N, or --gzip to compress.",
3400
3411
  err=True,
3401
3412
  )
3402
- else:
3403
- n_nodes = len((ir.get("graph") or {}).get("nodes") or [])
3404
- n_edges = len((ir.get("graph") or {}).get("edges") or [])
3413
+ if gzip_output:
3414
+ import gzip as _gzip
3415
+ with _gzip.open(output_path, "wb") as _gz:
3416
+ _gz.write(raw_bytes)
3417
+ compressed_kb = output_path.stat().st_size // 1024
3418
+ size_kb = size_bytes // 1024
3405
3419
  typer.echo(
3406
- f"IR written to {output_path} "
3407
- f"({size_kb}KB, {n_nodes} nodes, {n_edges} edges)",
3420
+ f"IR written to {output_path} ({compressed_kb}KB gzip, {size_kb}KB uncompressed)",
3408
3421
  err=True,
3409
3422
  )
3423
+ else:
3424
+ output_path.write_bytes(raw_bytes)
3425
+ size_kb = size_bytes // 1024
3426
+ if summary_only:
3427
+ typer.echo(
3428
+ f"IR written to {output_path} ({size_kb}KB, graph omitted by --summary-only)",
3429
+ err=True,
3430
+ )
3431
+ else:
3432
+ n_nodes = len((ir.get("graph") or {}).get("nodes") or [])
3433
+ n_edges = len((ir.get("graph") or {}).get("edges") or [])
3434
+ typer.echo(
3435
+ f"IR written to {output_path} "
3436
+ f"({size_kb}KB, {n_nodes} nodes, {n_edges} edges)",
3437
+ err=True,
3438
+ )
3410
3439
  else:
3440
+ if gzip_output:
3441
+ _emit_error_json(
3442
+ INVALID_INPUT_CODE,
3443
+ "--gzip requires --output FILE.",
3444
+ hint="Add --output ir.json.gz to write compressed output to a file.",
3445
+ expected="--output path when --gzip is used.",
3446
+ )
3447
+ raise typer.Exit(1)
3411
3448
  _ir_size = len(output.encode("utf-8"))
3412
3449
  _ir_tokens_est = _ir_size // 4
3413
3450
  # P1-C: abort when estimated tokens > 50K unless --force or --output is given.
@@ -4376,10 +4413,13 @@ def pr_impact_cmd(
4376
4413
  if not files.exists():
4377
4414
  _emit_error_json(
4378
4415
  INVALID_INPUT_CODE,
4379
- f"--files path '{files}' does not exist.",
4416
+ f"--files '{files}' does not exist. Expected a text file listing changed file paths (one per line), not a directory or class name.",
4380
4417
  path=str(files),
4381
- hint="Pass a file containing one Java file path per line.",
4382
- expected="An existing file path.",
4418
+ hint=(
4419
+ "Create a file with one changed Java file path per line, then pass it with --files. "
4420
+ "Example: git diff --name-only HEAD~1 > changed.txt && sourcecode pr-impact . --files changed.txt"
4421
+ ),
4422
+ expected="A text file containing one Java file path per line.",
4383
4423
  )
4384
4424
  raise typer.Exit(code=1)
4385
4425
 
@@ -4749,6 +4789,21 @@ def fix_bug_cmd(
4749
4789
  sourcecode impact <target> — Propagate impact from a specific class
4750
4790
  sourcecode onboard . — Full architecture context first
4751
4791
  """
4792
+ # Detect misuse: `fix-bug "symptom text" /path` — path arg looks like a symptom.
4793
+ _path_str = str(path)
4794
+ _path_looks_like_symptom = (
4795
+ not Path(_path_str).exists()
4796
+ and (" " in _path_str or any(c.isupper() for c in _path_str))
4797
+ )
4798
+ if _path_looks_like_symptom and not symptom:
4799
+ _emit_error_json(
4800
+ INVALID_INPUT_CODE,
4801
+ f"'{_path_str}' is not a valid directory. Did you mean to use --symptom?",
4802
+ hint=f"Use: sourcecode fix-bug . --symptom {_path_str!r}",
4803
+ expected="A repository directory path as first argument.",
4804
+ )
4805
+ raise typer.Exit(code=1)
4806
+
4752
4807
  if not symptom:
4753
4808
  # Only emit advisory to interactive terminals — non-TTY (MCP, pipes, scripts)
4754
4809
  # must never receive informational text mixed into JSON stdout.
@@ -5380,6 +5435,12 @@ def cold_start_cmd(
5380
5435
  "--compact",
5381
5436
  help="Emit a compact subset (~10K tokens): status, git_head, stacks, entry_points, and key_dependencies only.",
5382
5437
  ),
5438
+ output_path: Optional[Path] = typer.Option(
5439
+ None,
5440
+ "--output",
5441
+ "-o",
5442
+ help="Write output to file instead of stdout.",
5443
+ ),
5383
5444
  ) -> None:
5384
5445
  """Output Repository Intelligence Snapshot bootstrap context as JSON.
5385
5446
 
@@ -5419,7 +5480,12 @@ def cold_start_cmd(
5419
5480
  "Use --compact for a ~10K token subset, or --output FILE to save.\n"
5420
5481
  )
5421
5482
  sys.stderr.flush()
5422
- typer.echo(_out)
5483
+ if output_path:
5484
+ output_path.write_text(_out, encoding="utf-8")
5485
+ sys.stderr.write(f"Saved {len(_out.encode('utf-8'))} bytes to {output_path}\n")
5486
+ sys.stderr.flush()
5487
+ else:
5488
+ typer.echo(_out)
5423
5489
 
5424
5490
 
5425
5491
  # ── MCP server ────────────────────────────────────────────────────────────────
@@ -5872,6 +5938,24 @@ def mcp_list_tools(
5872
5938
  # ── Cache subcommands ─────────────────────────────────────────────────────────
5873
5939
 
5874
5940
 
5941
+ def _resolve_repo_root(path: Path) -> Path:
5942
+ """Resolve *path* to a repo root by walking up to find a .git directory.
5943
+
5944
+ If *path* is already a git root (has .git), returns it directly.
5945
+ If *path* is a subdirectory of a git repo, returns the git root.
5946
+ Falls back to *path* itself if no git repo found.
5947
+ """
5948
+ candidate = path.resolve()
5949
+ while True:
5950
+ if (candidate / ".git").exists():
5951
+ return candidate
5952
+ parent = candidate.parent
5953
+ if parent == candidate:
5954
+ break
5955
+ candidate = parent
5956
+ return path.resolve()
5957
+
5958
+
5875
5959
  @cache_app.command("status")
5876
5960
  def cache_status_cmd(
5877
5961
  path: Path = typer.Argument(Path("."), help="Repository path (default: current directory)"),
@@ -5879,7 +5963,7 @@ def cache_status_cmd(
5879
5963
  ) -> None:
5880
5964
  """Show cache statistics for a repository."""
5881
5965
  from sourcecode import cache as _cm
5882
- target = Path(path).resolve()
5966
+ target = _resolve_repo_root(Path(path))
5883
5967
  stats = _cm.status(target)
5884
5968
  if json_output:
5885
5969
  import json as _j
@@ -5913,7 +5997,7 @@ def cache_clear_cmd(
5913
5997
  index used for cold-start bootstrapping. Use --all to also clear it.
5914
5998
  """
5915
5999
  from sourcecode import cache as _cm
5916
- target = Path(path).resolve()
6000
+ target = _resolve_repo_root(Path(path))
5917
6001
  _clear_ris = include_ris or all_
5918
6002
  if not yes:
5919
6003
  _ris_note = " (including RIS)" if _clear_ris else " (RIS preserved — use --all to also clear it)"
@@ -5935,7 +6019,7 @@ def cache_warm_cmd(
5935
6019
  """
5936
6020
  import shutil as _shutil
5937
6021
  import subprocess as _sub
5938
- target = Path(path).resolve()
6022
+ target = _resolve_repo_root(Path(path))
5939
6023
  typer.echo(f"Warming cache for {target} …", err=True)
5940
6024
  _sc_bin = _shutil.which("sourcecode") or sys.argv[0]
5941
6025
  cmd = [_sc_bin, str(target)]
@@ -38,13 +38,7 @@ _REQUEST_METHOD_VERB_RE = re.compile(
38
38
  # Custom security annotation registry — extend here for project-specific annotations.
39
39
  # Each entry: annotation_simple_name → compiled params regex.
40
40
  # Groups: (1) resource string literal, (2) resource constant ref, (3) level integer.
41
- _CUSTOM_SECURITY_ANNOTATIONS: dict[str, re.Pattern] = {
42
- "M3FiltroSeguridad": re.compile(
43
- r'@M3FiltroSeguridad\s*\(\s*'
44
- r'(?:nombreRecurso\s*=\s*(?:"([^"]*)"|([\w.]+)))?'
45
- r'(?:[^)]*nivelRequerido\s*=\s*(\d+))?'
46
- ),
47
- }
41
+ _CUSTOM_SECURITY_ANNOTATIONS: dict[str, re.Pattern] = {}
48
42
 
49
43
  # Security config detection
50
44
  _WEB_SECURITY_CONFIGURER_RE = re.compile(r'WebSecurityConfigurerAdapter\b')
@@ -639,7 +639,7 @@ def get_endpoints(repo_path: str = ".") -> dict:
639
639
  "unknown" (no security signals detected).
640
640
  Supports Spring MVC (@GetMapping etc.) and JAX-RS (@GET/@POST etc.).
641
641
  Security annotations detected: @RolesAllowed, @PermitAll, @DenyAll,
642
- @Authenticated, @PreAuthorize, @Secured, @SecurityRequirement, @M3FiltroSeguridad.
642
+ @Authenticated, @PreAuthorize, @Secured, @SecurityRequirement.
643
643
  repo_path: absolute path to the Java repository (default: current working directory).
644
644
  """
645
645
  _raw = repo_path
@@ -2003,7 +2003,7 @@ class TaskContextBuilder:
2003
2003
  for _cf in (_cr.files_changed or []):
2004
2004
  _cf_norm = _cf.replace("\\", "/")
2005
2005
  # Git reports paths relative to the git root, which may be
2006
- # a parent of the analyzed directory (e.g. MSAS/saint-server/).
2006
+ # a parent of the analyzed directory (e.g. a monorepo root).
2007
2007
  # Strip the analyzed-dir prefix so paths match all_paths.
2008
2008
  if _cf_norm.startswith(_rn_prefix):
2009
2009
  _cf_norm = _cf_norm[len(_rn_prefix):]
@@ -22,6 +22,8 @@ from dataclasses import dataclass, field
22
22
  from pathlib import Path
23
23
  from typing import Any, Optional
24
24
 
25
+ from sourcecode.fqn_utils import normalize_owner_fqn as _normalize_owner_fqn
26
+
25
27
  # ---------------------------------------------------------------------------
26
28
  # Data classes — Phases 1–4
27
29
  # ---------------------------------------------------------------------------
@@ -171,8 +173,6 @@ _PATH_ANNOTATIONS: frozenset[str] = frozenset({"@Path"})
171
173
  # Security / authorization annotations whose args must be captured.
172
174
  # Includes standard Jakarta EE, JAX-RS, Quarkus/MicroProfile, and custom patterns.
173
175
  _PERMISSION_ANNOTATIONS: frozenset[str] = frozenset({
174
- # Custom (kept for backward compat)
175
- "@M3FiltroSeguridad",
176
176
  # Jakarta EE / JAX-RS standard
177
177
  "@RolesAllowed",
178
178
  "@PermitAll",
@@ -361,6 +361,50 @@ def _strip_java_comments(source: str) -> str:
361
361
  source = _LINE_COMMENT_RE.sub(' ', source)
362
362
  return source
363
363
 
364
+
365
+ def _parse_annotation_line(line: str) -> tuple[str, str]:
366
+ """Parse annotation name and args from a line starting with '@'.
367
+
368
+ Returns (ann_name, ann_args) where ann_args is content inside the outermost ().
369
+ Uses O(n) character scanning instead of regex to avoid catastrophic backtracking
370
+ on lines with deeply nested annotation arguments (e.g. @APIResponse with @Content
371
+ containing @Schema — 3-level nesting that breaks _ANN_WITH_ARGS_RE).
372
+ """
373
+ if not line.startswith('@'):
374
+ return "", ""
375
+ i = 1
376
+ while i < len(line) and (line[i].isalnum() or line[i] in ('_', '.')):
377
+ i += 1
378
+ ann_name = line[:i]
379
+ while i < len(line) and line[i] in (' ', '\t'):
380
+ i += 1
381
+ if i >= len(line) or line[i] != '(':
382
+ return ann_name, ""
383
+ depth = 0
384
+ in_string = False
385
+ string_char = ''
386
+ start = i + 1
387
+ i += 1
388
+ while i < len(line):
389
+ c = line[i]
390
+ if in_string:
391
+ if c == '\\':
392
+ i += 2
393
+ continue
394
+ if c == string_char:
395
+ in_string = False
396
+ elif c in ('"', "'"):
397
+ in_string = True
398
+ string_char = c
399
+ elif c == '(':
400
+ depth += 1
401
+ elif c == ')':
402
+ if depth == 0:
403
+ return ann_name, line[start:i]
404
+ depth -= 1
405
+ i += 1
406
+ return ann_name, line[start:]
407
+
364
408
  # Edge types used for subsystem grouping — semantic hierarchy only, not imports
365
409
  _SUBSYSTEM_STRUCTURAL_EDGES: frozenset[str] = frozenset({
366
410
  "extends", "implements", "injects", "contained_in",
@@ -410,22 +454,27 @@ _BFS_MAX_DEPTH: int = 3
410
454
  # Regex to strip leading annotations from a single parameter (e.g. @NotNull @Valid String name)
411
455
  _ANN_PREFIX_RE = re.compile(r'^(?:@\w+\s*(?:\([^)]*\))?\s*)+')
412
456
 
457
+ # Used by _count_net_braces fast path: strip string/char literals before counting braces.
458
+ # Handles escape sequences (\\) so escaped quotes don't close the literal prematurely.
459
+ _STRING_LITERAL_RE = re.compile(r'"(?:[^"\\]|\\.)*"|\'(?:[^\'\\]|\\.)*\'')
460
+
461
+ # Module-level cache for class-keyword detection (avoids recompilation per _extract_symbols call)
462
+ _CLASS_KW_RE = re.compile(r'\b(?:class|interface|enum)\s+[A-Z]')
463
+
413
464
 
414
465
  # ---------------------------------------------------------------------------
415
466
  # Stable ID helpers
416
467
  # ---------------------------------------------------------------------------
417
468
 
418
- def _normalize_type_name(raw: str) -> str:
419
- """Strip annotations, final modifier, and param name; return only type.
469
+ _FINAL_STRIP_RE = re.compile(r'\bfinal\s+')
470
+ _TYPE_PARAM_RE = re.compile(r'^([\w<>\[\].,? ]+?)\s+\w+$')
420
471
 
421
- "(Long id)" -> strip after parsing → "Long"
422
- "@NotNull User user" "User"
423
- "List<String>" "List<String>"
424
- """
472
+
473
+ def _normalize_type_name(raw: str) -> str:
474
+ """Strip annotations, final modifier, and param name; return only type."""
425
475
  raw = _ANN_PREFIX_RE.sub("", raw).strip()
426
- raw = re.sub(r'\bfinal\s+', "", raw).strip()
427
- # "Type name" → extract Type (rightmost word is the param name)
428
- m = re.match(r'^([\w<>\[\].,? ]+?)\s+\w+$', raw)
476
+ raw = _FINAL_STRIP_RE.sub("", raw).strip()
477
+ m = _TYPE_PARAM_RE.match(raw)
429
478
  if m:
430
479
  return m.group(1).strip()
431
480
  return raw.strip()
@@ -503,26 +552,15 @@ def _compute_stable_id(
503
552
  # ---------------------------------------------------------------------------
504
553
 
505
554
  def _count_net_braces(line: str) -> int:
506
- depth = 0
507
- in_str = False
508
- in_char = False
509
- i = 0
510
- while i < len(line):
511
- ch = line[i]
512
- if ch == '\\' and (in_str or in_char):
513
- i += 2
514
- continue
515
- if ch == '"' and not in_char:
516
- in_str = not in_str
517
- elif ch == "'" and not in_str:
518
- in_char = not in_char
519
- elif not in_str and not in_char:
520
- if ch == '{':
521
- depth += 1
522
- elif ch == '}':
523
- depth -= 1
524
- i += 1
525
- return depth
555
+ # Fast exit: no braces on this line at all
556
+ if '{' not in line and '}' not in line:
557
+ return 0
558
+ # Fast path: no string/char literals — count directly (C-speed)
559
+ if '"' not in line and "'" not in line:
560
+ return line.count('{') - line.count('}')
561
+ # Slow path: strip string/char literals first so quoted braces don't count
562
+ clean = _STRING_LITERAL_RE.sub('', line)
563
+ return clean.count('{') - clean.count('}')
526
564
 
527
565
 
528
566
  def _extract_modifiers(text: str) -> list[str]:
@@ -591,7 +629,6 @@ def _extract_symbols(source: str, rel_path: str) -> tuple[str, list[SymbolRecord
591
629
  _raw_lines = source.splitlines()
592
630
  _joined: list[str] = []
593
631
  _i = 0
594
- _CLASS_KW_RE = re.compile(r'\b(?:class|interface|enum)\s+[A-Z]')
595
632
  while _i < len(_raw_lines):
596
633
  _line = _raw_lines[_i]
597
634
  _stripped = _line.strip()
@@ -633,10 +670,8 @@ def _extract_symbols(source: str, rel_path: str) -> tuple[str, list[SymbolRecord
633
670
  net = _count_net_braces(stripped)
634
671
 
635
672
  if stripped.startswith("@"):
636
- ann_m = _ANN_WITH_ARGS_RE.match(stripped)
637
- if ann_m:
638
- ann = ann_m.group(1)
639
- ann_args = ann_m.group(2) or ""
673
+ ann, ann_args = _parse_annotation_line(stripped)
674
+ if ann:
640
675
  if ann not in pending_anns:
641
676
  pending_anns.append(ann)
642
677
  if ann_args and ann in _CAPTURE_ANN_ARGS:
@@ -1141,17 +1176,26 @@ def _build_relations(
1141
1176
  evidence={"type": "signature", "value": f"implements {iface}"},
1142
1177
  ))
1143
1178
 
1144
- for m_path, class_fqn in _extract_mapped_paths(source, "").items():
1145
- for sym in symbols:
1146
- if sym.type in ("class", "interface") and (
1147
- "@RestController" in sym.annotations or "@Controller" in sym.annotations
1148
- ):
1179
+ # mapped_to edges: controller class → class-level @RequestMapping path prefix.
1180
+ # O(N) scan of symbols — do NOT call _extract_mapped_paths(source) here because
1181
+ # _REQUEST_MAPPING_RE also matches method-level @GetMapping/@PostMapping, producing
1182
+ # O(N_methods) paths × O(N_syms) inner loop = O(N²) on files with many endpoints.
1183
+ for sym in symbols:
1184
+ if sym.type not in ("class", "interface"):
1185
+ continue
1186
+ if "@RestController" not in sym.annotations and "@Controller" not in sym.annotations:
1187
+ continue
1188
+ if "@RequestMapping" not in sym.annotations:
1189
+ continue
1190
+ _rm_args = sym.annotation_values.get("@RequestMapping", "")
1191
+ for _m_path in _parse_route_paths(_rm_args):
1192
+ if _m_path:
1149
1193
  edges.append(RelationEdge(
1150
1194
  from_symbol=sym.symbol,
1151
- to_symbol=m_path,
1195
+ to_symbol=_m_path,
1152
1196
  type="mapped_to",
1153
1197
  confidence="high",
1154
- evidence={"type": "annotation", "value": f"@RequestMapping(\"{m_path}\")"},
1198
+ evidence={"type": "annotation", "value": f"@RequestMapping(\"{_m_path}\")"},
1155
1199
  ))
1156
1200
 
1157
1201
  # contained_in edges: method/field → enclosing class (structural membership)
@@ -1419,9 +1463,18 @@ def _collect_file_constants(source: str) -> dict[str, str]:
1419
1463
  Returns {simple_name: value} covering all classes in the file.
1420
1464
  Used by _resolve_ann_path_expr to fold constant references in @RequestMapping args.
1421
1465
  """
1466
+ # Fast path: skip entirely when no declarations present (C-speed string scan)
1467
+ if 'static final String' not in source:
1468
+ return {}
1469
+ # Scan only candidate lines (skips full-source regex over 100KB files).
1470
+ # Running _STATIC_FINAL_STR_RE over the whole source is O(source_size) due to
1471
+ # optional modifier group backtracking; per-line match is far cheaper.
1422
1472
  constants: dict[str, str] = {}
1423
- for m in _STATIC_FINAL_STR_RE.finditer(source):
1424
- constants[m.group(1)] = m.group(2)
1473
+ for line in source.splitlines():
1474
+ if 'static' in line and 'final' in line and 'String' in line and '=' in line and '"' in line:
1475
+ m = _STATIC_FINAL_STR_RE.search(line)
1476
+ if m:
1477
+ constants[m.group(1)] = m.group(2)
1425
1478
  return constants
1426
1479
 
1427
1480
 
@@ -2205,11 +2258,19 @@ def _assemble(
2205
2258
 
2206
2259
  all_fqns_set = {s.symbol for s in sorted_syms}
2207
2260
 
2208
- # Bounded BFS reachability per node (graph-only)
2209
- bfs_reach: dict[str, int] = {
2210
- s.symbol: _bfs_reachability(s.symbol, adjacency)
2211
- for s in sorted_syms
2212
- }
2261
+ # Bounded BFS reachability per node (graph-only).
2262
+ # Skipped when symbol count exceeds threshold: O(N*(V+E)) BFS for every symbol
2263
+ # hangs on large repos (keycloak: 80K+ symbols → 180s+ with no output).
2264
+ # bfs_reach contributes only 0.1× weight vs in_deg+out_deg; skipping it on large
2265
+ # repos causes no accuracy loss for spring-audit/endpoints/security analysis.
2266
+ _BFS_SYMBOL_THRESHOLD: int = 5000
2267
+ if len(sorted_syms) <= _BFS_SYMBOL_THRESHOLD:
2268
+ bfs_reach: dict[str, int] = {
2269
+ s.symbol: _bfs_reachability(s.symbol, adjacency)
2270
+ for s in sorted_syms
2271
+ }
2272
+ else:
2273
+ bfs_reach = {}
2213
2274
 
2214
2275
  # Normalize centrality across all nodes
2215
2276
  max_raw = max(
@@ -2495,7 +2556,6 @@ def _route_security_from_sym(
2495
2556
  @RequiresRoles → {policy: requiresroles, roles: [...]}
2496
2557
  @RequiresPermissions → {policy: requirespermissions, roles: [...]}
2497
2558
  @SecurityRequirement → {policy: openapi_security, spec: ...}
2498
- @M3FiltroSeguridad → {policy: custom_permission, required_permission: ...}
2499
2559
 
2500
2560
  Falls back to class-level annotations if no method-level security found.
2501
2561
  Returns None if no security signal detected at either level.
@@ -2534,15 +2594,6 @@ def _route_security_from_sym(
2534
2594
  if "@SecurityRequirement" in anns:
2535
2595
  raw = vals.get("@SecurityRequirement", "")
2536
2596
  return {"policy": "openapi_security", "spec": raw.strip()}
2537
- # Custom legacy annotation
2538
- if "@M3FiltroSeguridad" in anns:
2539
- import re as _re2
2540
- raw = vals.get("@M3FiltroSeguridad", "")
2541
- m = _re2.search(r'(?:nombreRecurso\s*=\s*)?["\']([^"\']+)["\']', raw)
2542
- if m:
2543
- return {"policy": "custom_permission", "required_permission": m.group(1)}
2544
- # Value is a constant reference or empty — still flag the annotation
2545
- return {"policy": "custom_annotation", "annotation": "@M3FiltroSeguridad", "resource": raw.strip() or None}
2546
2597
  return None
2547
2598
 
2548
2599
  # Method-level first, then class-level fallback
@@ -2829,6 +2880,29 @@ def build_repo_ir(
2829
2880
  # type map before building relations. Java classes in the same package
2830
2881
  # reference each other without import statements, so import_map alone cannot
2831
2882
  # resolve them — _build_same_package_map provides the cross-file fallback.
2883
+ #
2884
+ # Pre-scan filter: skip full symbol extraction for files that have no
2885
+ # Spring/JAX-RS/CDI annotations. These files (utility classes, model beans,
2886
+ # SPI interfaces) contribute no endpoints, transactions, or security findings
2887
+ # to spring-audit. The text scan is C-speed vs O(lines) Python parse loop.
2888
+ # Non-annotated files still register their package+class via a lightweight
2889
+ # regex scan so same-package type resolution remains correct.
2890
+ _ANNOTATION_MARKERS: tuple[str, ...] = (
2891
+ '@Controller', '@RestController', '@Service', '@Repository',
2892
+ '@Component', '@Configuration', '@Bean', '@Transactional',
2893
+ '@Path', '@GET', '@POST', '@PUT', '@DELETE', '@PATCH',
2894
+ '@PreAuthorize', '@RolesAllowed', '@Secured', '@EnableWebSecurity',
2895
+ '@SpringBootApplication', '@EventListener', '@TransactionalEventListener',
2896
+ '@RequiredArgsConstructor', '@AllArgsConstructor',
2897
+ '@Inject', '@ApplicationScoped', '@RequestScoped', '@Singleton',
2898
+ '@EnableMethodSecurity', '@EnableGlobalMethodSecurity',
2899
+ # JPA / persistence (needed for stereotype detection in all commands)
2900
+ '@Entity', '@MappedSuperclass', '@Embeddable',
2901
+ # AOP / messaging / event sourcing
2902
+ '@Aspect', '@Aggregate', '@Document',
2903
+ # Spring Data
2904
+ '@Query', '@NamedQuery',
2905
+ )
2832
2906
  _per_file: list[tuple[str, str, str, list[str], list[SymbolRecord]]] = []
2833
2907
  for rel_path in sorted(file_paths):
2834
2908
  abs_path = root / rel_path
@@ -2839,6 +2913,23 @@ def build_repo_ir(
2839
2913
  _meta_files_read += 1
2840
2914
  _meta_lines_read += source.count("\n") + (1 if source and not source.endswith("\n") else 0)
2841
2915
  _meta_chars_read += len(source)
2916
+ # Fast pre-scan: if file has no relevant annotations skip full extraction.
2917
+ # Still register package/class name for same-package resolution.
2918
+ if not any(marker in source for marker in _ANNOTATION_MARKERS):
2919
+ pkg_m = _PKG_RE.search(source)
2920
+ _pkg = pkg_m.group(1) if pkg_m else ""
2921
+ # Minimal class-name symbols for same-package map (no methods/fields)
2922
+ _min_syms: list[SymbolRecord] = []
2923
+ for _cm in re.finditer(r'(?:class|interface|enum)\s+(\w+)', source):
2924
+ _cls_name = _cm.group(1)
2925
+ _fqn = f"{_pkg}.{_cls_name}" if _pkg else _cls_name
2926
+ _min_syms.append(SymbolRecord(
2927
+ symbol=_fqn, type="class", confidence="medium",
2928
+ declaring_file=rel_path,
2929
+ ))
2930
+ all_symbols.extend(_min_syms)
2931
+ # No relations needed for non-annotated files
2932
+ continue
2842
2933
  package, symbols, raw_imports = _extract_symbols(source, rel_path)
2843
2934
  all_symbols.extend(symbols)
2844
2935
  _per_file.append((rel_path, source, package, raw_imports, symbols))
@@ -4147,13 +4238,22 @@ def _all_callers_from_rg(fqn: str, reverse_graph: dict[str, dict[str, list[str]]
4147
4238
  BUG-01 fix: skip 'contained_in' edges — those represent structural membership
4148
4239
  (method→enclosing class), not actual callers. Without this, an Impl class
4149
4240
  with 91 own methods would show 91 "direct callers" and inflate risk to HIGH.
4241
+
4242
+ CH-002 fix: for 'injects' edges, normalize field/constructor FQNs to their
4243
+ enclosing class. e.g. pkg.ConsolidacionService.calcularField → pkg.ConsolidacionService
4244
+ so BFS can continue through DI injection chains and find controllers.
4150
4245
  """
4151
4246
  entry = reverse_graph.get(fqn) or {}
4152
4247
  callers: list[str] = []
4248
+ seen: set[str] = set()
4153
4249
  for edge_type, fqn_list in entry.items():
4154
4250
  if edge_type == "contained_in":
4155
4251
  continue # structural membership, not a caller
4156
- callers.extend(fqn_list)
4252
+ for c in fqn_list:
4253
+ normalized = _normalize_owner_fqn(c) if edge_type == "injects" else c
4254
+ if normalized not in seen:
4255
+ seen.add(normalized)
4256
+ callers.append(normalized)
4157
4257
  return callers
4158
4258
 
4159
4259
 
@@ -57,15 +57,7 @@ _EXTENDS_RE = re.compile(
57
57
  # Custom AOP annotation registry — extend here for project-specific security/AOP annotations.
58
58
  # Each entry: (method_regex, impl_symbol_name).
59
59
  # method_regex must capture the annotated method name in group 1.
60
- _CUSTOM_AOP_ANNOTATIONS: list[tuple[re.Pattern, str]] = [
61
- (
62
- re.compile(
63
- r'@M3FiltroSeguridad(?:\([^)]*\))?\s+(?:@[^\s]+\s+)*'
64
- r'(?:public|private|protected)\s+\w[\w<>\[\]]*\s+([a-z][A-Za-z0-9_]*)\s*\('
65
- ),
66
- "M3FiltroSeguridadImpl",
67
- ),
68
- ]
60
+ _CUSTOM_AOP_ANNOTATIONS: list[tuple[re.Pattern, str]] = []
69
61
  _LOMBOK_CLASS_RE = re.compile(
70
62
  r'(@(?:Data|Slf4j|Builder|AllArgsConstructor|NoArgsConstructor)(?:\([^)]*\))?\s+)*'
71
63
  r'(?:public\s+)?(?:class|interface)\s+([A-Z][A-Za-z0-9_]*)',
@@ -925,7 +917,7 @@ class SemanticAnalyzer:
925
917
 
926
918
  method="heuristic", confidence="low" para todos los edges Java.
927
919
  Includes: Lombok synthetic symbols, @Autowired field edges,
928
- @Mapper interface detection, inheritance chains, @M3FiltroSeguridad AOP edges.
920
+ @Mapper interface detection, inheritance chains, custom AOP annotation edges.
929
921
  """
930
922
  _JAVA_KEYWORDS: frozenset[str] = frozenset({
931
923
  "if", "for", "while", "switch", "catch", "super", "this", "new",
@@ -504,7 +504,9 @@ def _transactional_summary(sm: "SourceMap", *, full: bool = False) -> "Optional[
504
504
  classes = getattr(s, "transactional_classes", [])
505
505
  if classes:
506
506
  total = len(classes)
507
- result: dict[str, Any] = {"count": total, "classes": classes}
507
+ # class_count = unique classes with @Transactional anywhere (file-level scan).
508
+ # spring-audit metadata.tx_stats has method-level annotation breakdown.
509
+ result: dict[str, Any] = {"class_count": total, "classes": classes}
508
510
  if total > 10 and not full:
509
511
  result["classes"] = classes[:10]
510
512
  result["truncated"] = True
@@ -549,9 +551,13 @@ def _security_surface_from_eps(
549
551
  root: "Optional[Path]" = None,
550
552
  file_paths: "Optional[list[str]]" = None,
551
553
  ) -> "Optional[dict[str, Any]]":
552
- """Extract @M3FiltroSeguridad resource names from entry point evidence strings."""
554
+ """Extract permission resource names from entry point evidence strings.
555
+
556
+ Looks for resource=VALUE or nombreRecurso=VALUE patterns in evidence
557
+ produced by custom security annotations on REST controller methods.
558
+ """
553
559
  import re as _re
554
- _NOMBRE_RE = _re.compile(r"nombreRecurso=[\"']([^\"']+)[\"']")
560
+ _RESOURCE_RE = _re.compile(r"(?:resource|nombreRecurso)=[\"']([^\"']+)[\"']")
555
561
  _CONST_SYMBOL_RE = _re.compile(r'^[\w]+\.[\w]+$')
556
562
  resource_names: list[str] = []
557
563
  unresolved: list[str] = []
@@ -560,7 +566,7 @@ def _security_surface_from_eps(
560
566
  evidence = getattr(ep, "evidence", None)
561
567
  if not evidence:
562
568
  continue
563
- for m in _NOMBRE_RE.finditer(evidence):
569
+ for m in _RESOURCE_RE.finditer(evidence):
564
570
  nm = m.group(1)
565
571
  if not nm or nm in seen:
566
572
  continue
@@ -578,8 +584,8 @@ def _security_surface_from_eps(
578
584
  return None
579
585
  result: dict[str, Any] = {
580
586
  "schema": (
581
- "Values used in @M3FiltroSeguridad(nombreRecurso=VALUE) on REST controller "
582
- "methods. Each value names a permission resource checked at runtime."
587
+ "Permission resource identifiers found on REST controller methods. "
588
+ "Each value names a resource checked at runtime by a security annotation."
583
589
  ),
584
590
  "resource_names": resource_names,
585
591
  }
@@ -739,7 +739,9 @@ def run_tx_audit(
739
739
  limitations=_tx_limitations,
740
740
  metadata={
741
741
  "symbols_analyzed": len(getattr(cir, "symbols", [])),
742
- "tx_boundaries_found": tx_index.stats()["total"],
742
+ # tx_annotation_count = total @Transactional symbols (class-level + method-level).
743
+ # tx_stats.class_level matches compact transactional_boundaries.class_count.
744
+ "tx_annotation_count": tx_index.stats()["total"],
743
745
  "tx_stats": tx_index.stats(),
744
746
  "analysis_time_ms": elapsed_ms,
745
747
  },
@@ -223,6 +223,20 @@ class ProjectSummarizer:
223
223
  __import__("re").IGNORECASE,
224
224
  )
225
225
 
226
+ # Patterns that indicate security scanner / tool output, not project description.
227
+ # Trivy, OWASP, Snyk, etc. produce structured vulnerability reports.
228
+ _TOOL_OUTPUT_RE = __import__("re").compile(
229
+ r"CVE-\d{4}-\d{4,}" # CVE identifiers
230
+ r"|UNKNOWN:\s*\d+.*LOW:\s*\d+" # Trivy severity summary line
231
+ r"|(CRITICAL|HIGH|MEDIUM|LOW):\s*\d+" # severity: count pattern
232
+ r"|\bTotal:\s*\d+\s*\(" # "Total: 45 (UNKNOWN: 0, ..." Trivy header
233
+ r"|\bvulnerabilit(?:y|ies)\s+found\b" # "N vulnerabilities found"
234
+ r"|\bscan(?:ned|ning)\s+\d+\s+(?:file|package|image)\b" # scanner progress
235
+ r"|\bpkg:(?:npm|pypi|maven|cargo|golang)/" # PURL package identifiers
236
+ r"|\b(?:trivy|snyk|grype|syft|cosign)\b", # well-known scanner names
237
+ __import__("re").IGNORECASE,
238
+ )
239
+
226
240
  def _extract_first_useful_paragraph(self, content: str) -> str | None:
227
241
  """Extract the first paragraph that describes the project architecture, not its license or marketing."""
228
242
  import re as _re
@@ -268,6 +282,9 @@ class ProjectSummarizer:
268
282
  # Reject license notices and user-facing marketing text
269
283
  if self._LICENSE_MARKETING_RE.search(paragraph):
270
284
  continue
285
+ # Reject security scanner / tool output (Trivy, Snyk, OWASP, CVE lists)
286
+ if self._TOOL_OUTPUT_RE.search(paragraph):
287
+ continue
271
288
  # Reject link-list paragraphs (docs/navigation sections):
272
289
  # if more than 2 markdown links dominate the paragraph, it's a nav section
273
290
  _link_count = len(_MD_LINK_RE.findall(paragraph))
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes