sourcecode 1.33.24__tar.gz → 1.35.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. {sourcecode-1.33.24 → sourcecode-1.35.0}/PKG-INFO +2 -2
  2. {sourcecode-1.33.24 → sourcecode-1.35.0}/README.md +1 -1
  3. {sourcecode-1.33.24 → sourcecode-1.35.0}/pyproject.toml +1 -1
  4. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/__init__.py +1 -1
  5. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/cli.py +170 -0
  6. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/mcp/server.py +35 -0
  7. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/prepare_context.py +39 -0
  8. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/relevance_scorer.py +26 -1
  9. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/repository_ir.py +12 -1
  10. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/ris.py +53 -1
  11. sourcecode-1.35.0/src/sourcecode/spring_findings.py +130 -0
  12. sourcecode-1.35.0/src/sourcecode/spring_security_audit.py +462 -0
  13. sourcecode-1.35.0/src/sourcecode/spring_semantic.py +340 -0
  14. sourcecode-1.35.0/src/sourcecode/spring_tx_analyzer.py +629 -0
  15. {sourcecode-1.33.24 → sourcecode-1.35.0}/.github/workflows/build-windows.yml +0 -0
  16. {sourcecode-1.33.24 → sourcecode-1.35.0}/.gitignore +0 -0
  17. {sourcecode-1.33.24 → sourcecode-1.35.0}/.ruff.toml +0 -0
  18. {sourcecode-1.33.24 → sourcecode-1.35.0}/CHANGELOG.md +0 -0
  19. {sourcecode-1.33.24 → sourcecode-1.35.0}/CONTRIBUTING.md +0 -0
  20. {sourcecode-1.33.24 → sourcecode-1.35.0}/LICENSE +0 -0
  21. {sourcecode-1.33.24 → sourcecode-1.35.0}/SECURITY.md +0 -0
  22. {sourcecode-1.33.24 → sourcecode-1.35.0}/raw +0 -0
  23. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/adaptive_scanner.py +0 -0
  24. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/architecture_analyzer.py +0 -0
  25. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/architecture_summary.py +0 -0
  26. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/ast_extractor.py +0 -0
  27. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/cache.py +0 -0
  28. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/canonical_ir.py +0 -0
  29. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/classifier.py +0 -0
  30. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/code_notes_analyzer.py +0 -0
  31. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/confidence_analyzer.py +0 -0
  32. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/context_scorer.py +0 -0
  33. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/context_summarizer.py +0 -0
  34. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/contract_model.py +0 -0
  35. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/contract_pipeline.py +0 -0
  36. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/coverage_parser.py +0 -0
  37. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/dependency_analyzer.py +0 -0
  38. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/__init__.py +0 -0
  39. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/base.py +0 -0
  40. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/csproj_parser.py +0 -0
  41. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/dart.py +0 -0
  42. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/dotnet.py +0 -0
  43. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/elixir.py +0 -0
  44. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/go.py +0 -0
  45. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/heuristic.py +0 -0
  46. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/hybrid.py +0 -0
  47. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/java.py +0 -0
  48. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/jvm_ext.py +0 -0
  49. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/nodejs.py +0 -0
  50. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/parsers.py +0 -0
  51. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/php.py +0 -0
  52. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/project.py +0 -0
  53. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/python.py +0 -0
  54. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/ruby.py +0 -0
  55. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/rust.py +0 -0
  56. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/systems.py +0 -0
  57. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/terraform.py +0 -0
  58. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/detectors/tooling.py +0 -0
  59. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/doc_analyzer.py +0 -0
  60. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/entrypoint_classifier.py +0 -0
  61. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/env_analyzer.py +0 -0
  62. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/error_schema.py +0 -0
  63. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/file_classifier.py +0 -0
  64. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/flow_analyzer.py +0 -0
  65. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/git_analyzer.py +0 -0
  66. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/graph_analyzer.py +0 -0
  67. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/license.py +0 -0
  68. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/mcp/__init__.py +0 -0
  69. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/mcp/onboarding/__init__.py +0 -0
  70. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/mcp/onboarding/applier.py +0 -0
  71. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/mcp/onboarding/backup.py +0 -0
  72. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/mcp/onboarding/detector.py +0 -0
  73. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/mcp/onboarding/planner.py +0 -0
  74. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/mcp/orchestrator.py +0 -0
  75. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/mcp/registry.py +0 -0
  76. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/mcp/runner.py +0 -0
  77. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/mcp_nudge.py +0 -0
  78. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/metrics_analyzer.py +0 -0
  79. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/output_budget.py +0 -0
  80. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/path_filters.py +0 -0
  81. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/pr_comment_renderer.py +0 -0
  82. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/progress.py +0 -0
  83. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/ranking_engine.py +0 -0
  84. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/redactor.py +0 -0
  85. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/repo_classifier.py +0 -0
  86. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/runtime_classifier.py +0 -0
  87. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/scanner.py +0 -0
  88. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/schema.py +0 -0
  89. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/semantic_analyzer.py +0 -0
  90. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/serializer.py +0 -0
  91. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/summarizer.py +0 -0
  92. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/telemetry/__init__.py +0 -0
  93. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/telemetry/config.py +0 -0
  94. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/telemetry/consent.py +0 -0
  95. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/telemetry/events.py +0 -0
  96. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/telemetry/filters.py +0 -0
  97. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/telemetry/transport.py +0 -0
  98. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/tree_utils.py +0 -0
  99. {sourcecode-1.33.24 → sourcecode-1.35.0}/src/sourcecode/workspace.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.33.24
3
+ Version: 1.35.0
4
4
  Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
5
5
  License-File: LICENSE
6
6
  Keywords: agents,ai,codebase,context,developer-tools,llm
@@ -39,7 +39,7 @@ Description-Content-Type: text/markdown
39
39
 
40
40
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
41
41
 
42
- ![Version](https://img.shields.io/badge/version-1.33.24-blue)
42
+ ![Version](https://img.shields.io/badge/version-1.35.0-blue)
43
43
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
44
44
 
45
45
  ---
@@ -2,7 +2,7 @@
2
2
 
3
3
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
4
4
 
5
- ![Version](https://img.shields.io/badge/version-1.33.24-blue)
5
+ ![Version](https://img.shields.io/badge/version-1.35.0-blue)
6
6
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
7
7
 
8
8
  ---
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sourcecode"
7
- version = "1.33.24"
7
+ version = "1.35.0"
8
8
  description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.33.24"
3
+ __version__ = "1.35.0"
@@ -223,6 +223,8 @@ _SUBCOMMANDS: frozenset[str] = frozenset(
223
223
  "cache",
224
224
  # RIS bootstrap
225
225
  "cold-start",
226
+ # Spring semantic audit
227
+ "spring-audit",
226
228
  }
227
229
  )
228
230
 
@@ -3697,6 +3699,174 @@ def endpoints_cmd(
3697
3699
  _nudge()
3698
3700
 
3699
3701
 
3702
+ # ── Spring Semantic Audit ─────────────────────────────────────────────────────
3703
+
3704
+ @app.command("spring-audit")
3705
+ def spring_audit_cmd(
3706
+ path: Path = typer.Argument(
3707
+ Path("."),
3708
+ help="Repository path to audit (default: current directory)",
3709
+ ),
3710
+ output_path: Optional[Path] = typer.Option(
3711
+ None, "--output", "-o",
3712
+ help="Write output to a file instead of stdout.",
3713
+ ),
3714
+ format: str = typer.Option(
3715
+ "json",
3716
+ "--format",
3717
+ "-f",
3718
+ help="Output format: json (default) or yaml.",
3719
+ show_default=True,
3720
+ ),
3721
+ copy: bool = typer.Option(
3722
+ False,
3723
+ "--copy",
3724
+ "-c",
3725
+ help="Copy output to system clipboard after a successful run.",
3726
+ ),
3727
+ scope: str = typer.Option(
3728
+ "all",
3729
+ "--scope",
3730
+ "-s",
3731
+ help="Audit scope: all (default), tx, or security.",
3732
+ show_default=True,
3733
+ ),
3734
+ min_severity: str = typer.Option(
3735
+ "low",
3736
+ "--min-severity",
3737
+ help="Minimum severity to include: critical, high, medium, or low (default).",
3738
+ show_default=True,
3739
+ ),
3740
+ ) -> None:
3741
+ """Spring semantic audit: TX anomalies (TX-001..005) + security surface (SEC-001..003).
3742
+
3743
+ \b
3744
+ Detects:
3745
+ TX-001 @Transactional on private/final method (CGLIB proxy bypass)
3746
+ TX-002 REQUIRES_NEW nested in REQUIRED call chain
3747
+ TX-003 readOnly=true boundary propagating to write operation
3748
+ TX-004 NOT_SUPPORTED/NEVER within active TX chain
3749
+ TX-005 Exception swallowing inside @Transactional
3750
+ SEC-001 Unsecured endpoint in annotation_based security model
3751
+ SEC-002 CVE-2025-41248: @PreAuthorize on inherited method from generic supertype
3752
+ SEC-003 @Transactional on @Controller/@RestController (TX in wrong layer)
3753
+
3754
+ \b
3755
+ Examples:
3756
+ sourcecode spring-audit .
3757
+ sourcecode spring-audit /path/to/repo
3758
+ sourcecode spring-audit . --scope security
3759
+ sourcecode spring-audit . --min-severity high
3760
+ sourcecode spring-audit . --output audit.json
3761
+ """
3762
+ import json as _json
3763
+
3764
+ from sourcecode.repository_ir import find_java_files
3765
+ from sourcecode.canonical_ir import build_canonical_ir
3766
+ from sourcecode.spring_findings import SpringAuditResult, SpringFinding
3767
+ from sourcecode.spring_tx_analyzer import run_tx_audit
3768
+ from sourcecode.spring_security_audit import run_security_audit
3769
+ from sourcecode.spring_semantic import build_tx_index
3770
+
3771
+ target = path.resolve()
3772
+ if not target.exists() or not target.is_dir():
3773
+ _emit_error_json(
3774
+ INVALID_INPUT_CODE,
3775
+ f"'{target}' is not a valid directory.",
3776
+ path=str(target),
3777
+ hint="Pass an existing repository directory.",
3778
+ expected="A directory path.",
3779
+ )
3780
+ raise typer.Exit(code=1)
3781
+
3782
+ if scope not in ("all", "tx", "security"):
3783
+ _emit_error_json(
3784
+ INVALID_INPUT_CODE,
3785
+ f"Invalid scope '{scope}'.",
3786
+ hint="scope must be one of: all, tx, security.",
3787
+ expected="all | tx | security",
3788
+ )
3789
+ raise typer.Exit(code=1)
3790
+
3791
+ if min_severity not in ("critical", "high", "medium", "low"):
3792
+ _emit_error_json(
3793
+ INVALID_INPUT_CODE,
3794
+ f"Invalid min-severity '{min_severity}'.",
3795
+ hint="min-severity must be one of: critical, high, medium, low.",
3796
+ expected="critical | high | medium | low",
3797
+ )
3798
+ raise typer.Exit(code=1)
3799
+
3800
+ file_list = find_java_files(target)
3801
+ if not file_list:
3802
+ data = SpringAuditResult(
3803
+ spring_detected=False,
3804
+ scope=scope,
3805
+ limitations=["No Java files found in repository — Spring audit requires Java source."],
3806
+ metadata={"java_files_found": 0},
3807
+ ).finalize().to_dict()
3808
+ output = _serialize_dict(data, format)
3809
+ if output_path is not None:
3810
+ output_path.write_text(output, encoding="utf-8")
3811
+ typer.echo("Spring audit written to " + str(output_path), err=True)
3812
+ else:
3813
+ sys.stdout.buffer.write(output.encode("utf-8"))
3814
+ sys.stdout.buffer.write(b"\n")
3815
+ sys.stdout.buffer.flush()
3816
+ return
3817
+
3818
+ cir = build_canonical_ir(file_list, target)
3819
+ tx_idx = build_tx_index(cir)
3820
+
3821
+ results: list[SpringAuditResult] = []
3822
+ if scope in ("all", "tx"):
3823
+ results.append(run_tx_audit(cir, root=target, min_severity=min_severity))
3824
+ if scope in ("all", "security"):
3825
+ results.append(run_security_audit(cir, root=target, min_severity=min_severity, tx_index=tx_idx))
3826
+
3827
+ if len(results) == 1:
3828
+ combined = results[0]
3829
+ else:
3830
+ all_findings: list[SpringFinding] = []
3831
+ all_limitations: list[str] = []
3832
+ merged_meta: dict = {}
3833
+ for r in results:
3834
+ all_findings.extend(r.findings)
3835
+ all_limitations.extend(r.limitations)
3836
+ merged_meta.update(r.metadata)
3837
+ combined = SpringAuditResult(
3838
+ repo_id=results[0].repo_id,
3839
+ spring_detected=any(r.spring_detected for r in results),
3840
+ scope="all",
3841
+ findings=all_findings,
3842
+ limitations=all_limitations,
3843
+ metadata=merged_meta,
3844
+ ).finalize()
3845
+
3846
+ data = combined.to_dict()
3847
+
3848
+ # Non-fatal RIS side-effect — persist summary only (not full findings).
3849
+ try:
3850
+ from sourcecode.ris import update_ris_spring_audit as _ris_sa
3851
+ _ris_sa(target, data)
3852
+ except Exception:
3853
+ pass
3854
+
3855
+ output = _serialize_dict(data, format)
3856
+
3857
+ if output_path is not None:
3858
+ output_path.write_text(output, encoding="utf-8")
3859
+ total = combined.summary.get("total_findings", 0)
3860
+ typer.echo(f"Spring audit written to {output_path} ({total} findings)", err=True)
3861
+ else:
3862
+ sys.stdout.buffer.write(output.encode("utf-8"))
3863
+ sys.stdout.buffer.write(b"\n")
3864
+ sys.stdout.buffer.flush()
3865
+ if copy:
3866
+ if _copy_to_clipboard(output):
3867
+ typer.echo("✓ copied to clipboard", err=True)
3868
+
3869
+
3700
3870
  # ── Enterprise Workflow Commands ──────────────────────────────────────────────
3701
3871
  #
3702
3872
  # These are the five canonical enterprise workflows. Each is a thin wrapper
@@ -614,6 +614,41 @@ def get_endpoints(repo_path: str = ".") -> dict:
614
614
  )
615
615
 
616
616
 
617
+ @mcp.tool()
618
+ def get_spring_audit(repo_path: str = ".", scope: str = "all") -> dict:
619
+ """Spring semantic audit: TX anomalies + security surface findings. JAVA/SPRING ONLY.
620
+
621
+ Do NOT call this on non-Java repositories — it will return spring_detected=false.
622
+
623
+ Maps to: sourcecode spring-audit <repo_path> --scope <scope>
624
+ Returns: SpringAuditResult with schema_version, spring_detected, scope, summary,
625
+ findings list (id, pattern_id, category, severity, confidence, title,
626
+ symbol, source_file, evidence, explanation, fix_hint), limitations, metadata.
627
+ Patterns: TX-001..005 (transaction anomalies), SEC-001..003 (security surface).
628
+ scope: "all" (default) | "tx" | "security".
629
+ repo_path: absolute path to the Java repository (default: current working directory).
630
+ """
631
+ _raw = repo_path
632
+ try:
633
+ if not isinstance(repo_path, str):
634
+ return _err("repo_path must be a string", "INVALID_ARGUMENT")
635
+ if scope not in ("all", "tx", "security"):
636
+ return _err(
637
+ f"Invalid scope '{scope}' — must be one of: all, tx, security",
638
+ "INVALID_ARGUMENT",
639
+ )
640
+ repo_path = _normalize_repo_path(repo_path)
641
+ _path_err = _check_repo_path(repo_path)
642
+ if _path_err is not None:
643
+ return _path_err
644
+ return _execute(["spring-audit", repo_path, "--scope", scope])
645
+ except Exception as exc:
646
+ return _err(
647
+ f"Internal error: {type(exc).__name__}: {exc} — repo_path recibido: {_raw}",
648
+ "INTERNAL_ERROR",
649
+ )
650
+
651
+
617
652
  @mcp.tool()
618
653
  def get_module_context(repo_path: str = ".", module: str = "") -> dict:
619
654
  """Compact analysis of a specific module or subdirectory within a repository.
@@ -864,6 +864,10 @@ _SYMPTOM_STOP_WORDS: frozenset[str] = frozenset({
864
864
  "error", "issue", "problem", "bug",
865
865
  "from", "into", "via", "due", "also", "after", "before",
866
866
  "slow", "fast", "new", "old",
867
+ # Diagnostic abbreviations — too ubiquitous in comments/javadoc to be useful
868
+ # as path/content signals (e.g. "npe" matches "NPE" in thousands of comments).
869
+ "npe", "oom", "oob", "iae", "ise", "npe", "cce", "aioobe",
870
+ "exception", "throwable", "stacktrace",
867
871
  })
868
872
 
869
873
  # Repo-scale threshold: above this file count, use stricter injection logic.
@@ -2363,6 +2367,26 @@ class TaskContextBuilder:
2363
2367
  if _is_large_repo and len(relevant_files) > 40:
2364
2368
  relevant_files = relevant_files[:40]
2365
2369
 
2370
+ # Score normalization: prevent all-1.0 saturation when many files
2371
+ # accumulate enough boost to hit the cap. Normalize display scores
2372
+ # relative to max raw signal so the ranked gradient is preserved.
2373
+ _norm_max = max(
2374
+ (_raw_signals.get(rf.path, rf.score) for rf in relevant_files),
2375
+ default=1.0,
2376
+ ) or 1.0
2377
+ if _norm_max > 1.0:
2378
+ relevant_files = [
2379
+ RelevantFile(
2380
+ path=rf.path,
2381
+ role=rf.role,
2382
+ score=round(min(_raw_signals.get(rf.path, rf.score) / _norm_max, 1.0), 3),
2383
+ reason=rf.reason,
2384
+ why=rf.why,
2385
+ tier=rf.tier,
2386
+ )
2387
+ for rf in relevant_files
2388
+ ]
2389
+
2366
2390
  # Synonym note (only when synonyms actually fired)
2367
2391
  if _frontend_kws and _sx_synonyms:
2368
2392
  symptom_note = (
@@ -2491,12 +2515,27 @@ class TaskContextBuilder:
2491
2515
  key=lambda x: -(x["public_method_count"] * (1.5 if x["has_framework_annotations"] else 1.0))
2492
2516
  )
2493
2517
  _top = _java_candidates if all_gaps else _java_candidates[:20]
2518
+ _max_rank = max(
2519
+ (c["public_method_count"] * (1.5 if c["has_framework_annotations"] else 1.0)
2520
+ for c in _java_candidates),
2521
+ default=1.0,
2522
+ ) or 1.0
2494
2523
  test_gaps = [
2495
2524
  {
2496
2525
  "path": c["path"],
2497
2526
  "public_method_count": c["public_method_count"],
2498
2527
  "has_framework_annotations": c["has_framework_annotations"],
2499
2528
  "rank_score": round(c["public_method_count"] * (1.5 if c["has_framework_annotations"] else 1.0), 1),
2529
+ "score": round(min(
2530
+ c["public_method_count"] * (1.5 if c["has_framework_annotations"] else 1.0) / _max_rank,
2531
+ 1.0,
2532
+ ), 3),
2533
+ "reason": (
2534
+ (f"{c['public_method_count']} public method{'s' if c['public_method_count'] != 1 else ''}; "
2535
+ if c["public_method_count"] else "")
2536
+ + ("has Spring/Jakarta framework annotations" if c["has_framework_annotations"]
2537
+ else "no test coverage detected")
2538
+ ).strip("; ") or "untested source file",
2500
2539
  }
2501
2540
  for c in _top
2502
2541
  ]
@@ -88,6 +88,31 @@ _AUXILIARY_DIR_PATTERNS: list[re.Pattern[str]] = [
88
88
  re.compile(r"(?:^|/)stories(?:/|$)"),
89
89
  ]
90
90
 
91
+ # JVM source roots: path components below these are package namespaces, not
92
+ # functional directories. "com/example/foo" must not match the "example"
93
+ # auxiliary-dir pattern just because "example" is a package name component.
94
+ _JVM_SRC_ROOTS: tuple[str, ...] = (
95
+ "src/main/java/", "src/test/java/",
96
+ "src/main/kotlin/", "src/test/kotlin/",
97
+ "src/main/scala/", "src/test/scala/",
98
+ "src/integration-test/java/", "src/integrationTest/java/",
99
+ )
100
+
101
+
102
+ def _jvm_strip_path(path: str) -> str:
103
+ """Return the path segment used for auxiliary-dir pattern matching.
104
+
105
+ For JVM source trees the package path below the source root is a namespace,
106
+ not a functional directory hierarchy. Truncate at the source root so that
107
+ package components like 'example' in com.example.* don't false-match
108
+ _AUXILIARY_DIR_PATTERNS entries.
109
+ """
110
+ for root in _JVM_SRC_ROOTS:
111
+ idx = path.find(root)
112
+ if idx >= 0:
113
+ return path[: idx + len(root)]
114
+ return path
115
+
91
116
  # Test file patterns — scored low, excluded from default contract output
92
117
  _TEST_FILE_PATTERNS: tuple[str, ...] = (
93
118
  "_test.", ".test.", ".spec.", "test_", "conftest", "_spec.",
@@ -221,7 +246,7 @@ class RelevanceScorer:
221
246
  return self._is_auxiliary(path.replace("\\", "/"))
222
247
 
223
248
  def _is_auxiliary(self, norm: str) -> bool:
224
- return any(p.search(norm) for p in _AUXILIARY_DIR_PATTERNS)
249
+ return any(p.search(_jvm_strip_path(norm)) for p in _AUXILIARY_DIR_PATTERNS)
225
250
 
226
251
  def package_role(self, path: str) -> str:
227
252
  """Return the monorepo package role for this path, or empty string."""
@@ -238,6 +238,14 @@ _LOMBOK_CTOR_ANNOTATIONS: frozenset[str] = frozenset({
238
238
  "@AllArgsConstructor", # injects all non-static fields
239
239
  })
240
240
 
241
+ # Transaction annotations whose args must be captured for semantic analysis.
242
+ _TX_ANNOTATIONS: frozenset[str] = frozenset({"@Transactional"})
243
+
244
+ # Combined set used in _extract_symbols annotation-value capture.
245
+ _CAPTURE_ANN_ARGS: frozenset[str] = (
246
+ _ENDPOINT_ANNOTATIONS | _PERMISSION_ANNOTATIONS | _PATH_ANNOTATIONS | _TX_ANNOTATIONS
247
+ )
248
+
241
249
  _JAVA_ROLE_MAP: dict[str, str] = {
242
250
  # Spring MVC / Spring Boot
243
251
  "@RestController": "controller",
@@ -563,7 +571,7 @@ def _extract_symbols(source: str, rel_path: str) -> tuple[str, list[SymbolRecord
563
571
  ann_args = ann_m.group(2) or ""
564
572
  if ann not in pending_anns:
565
573
  pending_anns.append(ann)
566
- if ann_args and (ann in _ENDPOINT_ANNOTATIONS or ann in _PERMISSION_ANNOTATIONS or ann in _PATH_ANNOTATIONS):
574
+ if ann_args and ann in _CAPTURE_ANN_ARGS:
567
575
  # P1 fix: attempt to resolve constant expressions before storing.
568
576
  # Transforms '"/" + SECTION_KEY' → '"/category"' when constant
569
577
  # is defined in this file. Falls back to original if unresolvable.
@@ -2234,6 +2242,9 @@ def _assemble(
2234
2242
  "role": spring_role_map.get(s.symbol, "other"),
2235
2243
  "in_degree": in_deg.get(s.symbol, 0),
2236
2244
  "out_degree": out_deg.get(s.symbol, 0),
2245
+ "annotations": list(s.annotations),
2246
+ "annotation_values": dict(s.annotation_values),
2247
+ "modifiers": list(s.modifiers),
2237
2248
  }
2238
2249
  for s in sorted_syms
2239
2250
  ]
@@ -275,6 +275,53 @@ def maybe_update_ris(repo_root: Path, core_dict: dict, git_head: str) -> None:
275
275
  pass
276
276
 
277
277
 
278
+ def update_ris_spring_audit(repo_root: Path, audit_result: dict) -> None:
279
+ """Persist spring-audit summary into the RIS metadata section.
280
+
281
+ Stores summary-only snapshot (no full findings) in metadata["spring_audit"].
282
+ Called from spring_audit_cmd after a successful run. Never raises.
283
+ """
284
+ try:
285
+ if not isinstance(audit_result, dict):
286
+ return
287
+ summary = audit_result.get("summary") or {}
288
+ existing = load_ris(repo_root)
289
+ if existing is None:
290
+ from sourcecode.cache import repo_id as _repo_id_fn
291
+ now = _now_iso()
292
+ existing = RepositoryIntelligenceSnapshot(
293
+ repo_id=_repo_id_fn(repo_root),
294
+ created_at=now,
295
+ last_updated_at=now,
296
+ git_head="",
297
+ version=RIS_SCHEMA_VERSION,
298
+ structural_map={},
299
+ api_surface={},
300
+ dependency_graph={},
301
+ compact_summary={},
302
+ agent_index={},
303
+ git_context_snapshot={},
304
+ metadata={"snapshot_source": "existing_snapshot_system", "confidence": 0.0, "partial": True},
305
+ )
306
+
307
+ spring_audit_cache = {
308
+ "total_findings": summary.get("total_findings", 0),
309
+ "by_severity": summary.get("by_severity", {}),
310
+ "by_category": summary.get("by_category", {}),
311
+ "confidence_level": summary.get("confidence_level", ""),
312
+ "scope": audit_result.get("scope", "all"),
313
+ "spring_detected": audit_result.get("spring_detected", False),
314
+ "last_run_at": audit_result.get("generated_at", _now_iso()),
315
+ }
316
+ updated_meta = {**existing.metadata, "spring_audit": spring_audit_cache}
317
+ updated = RepositoryIntelligenceSnapshot(
318
+ **{**existing.__dict__, "metadata": updated_meta, "last_updated_at": _now_iso()}
319
+ )
320
+ save_ris(repo_root, updated)
321
+ except Exception:
322
+ pass
323
+
324
+
278
325
  def update_ris_api_surface(repo_root: Path, endpoints_data: dict) -> None:
279
326
  """Update the api_surface section from an ``endpoints`` command output.
280
327
 
@@ -422,7 +469,9 @@ def get_cold_start_context(repo_root: Path) -> dict:
422
469
  if _validation_status == "incomplete_snapshot" and not stale:
423
470
  _status_base = "cold_start_incomplete"
424
471
 
472
+ _compact = ris.compact_summary or {}
425
473
  result: dict = {
474
+ "schema_version": "1.1",
426
475
  "status": _status_base,
427
476
  "repo_id": ris.repo_id,
428
477
  "git_head": ris.git_head,
@@ -433,7 +482,10 @@ def get_cold_start_context(repo_root: Path) -> dict:
433
482
  "cache_source": "RIS",
434
483
  "data_scope": "RIS_BOOTSTRAP",
435
484
  "api_surface_complete": _api_complete,
436
- "summary": ris.compact_summary,
485
+ # Backward-compat alias: old consumers read cold-start.project_type directly.
486
+ # New consumers should use cold-start.summary.project_type.
487
+ "project_type": _compact.get("project_type"),
488
+ "summary": _compact,
437
489
  "entrypoints": ris.structural_map.get("entrypoints", []),
438
490
  "endpoints": endpoints,
439
491
  "hotspots": ris.git_context_snapshot.get("hotspots", []),
@@ -0,0 +1,130 @@
1
+ """spring_findings.py — Shared finding schema for Spring semantic audit.
2
+
3
+ SpringFinding is the canonical output unit.
4
+ SpringAuditResult is the top-level envelope returned by CLI and MCP.
5
+
6
+ IDs are deterministic: same symbol + pattern → same ID across runs.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import hashlib
11
+ import time
12
+ from dataclasses import dataclass, field
13
+ from datetime import datetime, timezone
14
+ from typing import Optional
15
+
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # SpringFinding
19
+ # ---------------------------------------------------------------------------
20
+
21
+ @dataclass
22
+ class SpringFinding:
23
+ """Single audit finding — one anomaly in one symbol."""
24
+
25
+ id: str # deterministic: "{pattern_id}-{symbol_hash[:12]}"
26
+ pattern_id: str # "TX-001", "SEC-001", ...
27
+ category: str # "tx" | "security"
28
+ severity: str # "critical" | "high" | "medium" | "low"
29
+ confidence: str # "high" | "medium" | "low"
30
+ title: str
31
+ symbol: str # FQN of affected symbol
32
+ source_file: str
33
+ evidence: dict # pattern-specific structured evidence
34
+ explanation: str # 2-3 sentences: what + why it matters
35
+ fix_hint: str # one actionable sentence
36
+ limitations: list[str] = field(default_factory=list)
37
+ related_symbols: list[str] = field(default_factory=list)
38
+
39
+ @staticmethod
40
+ def make_id(pattern_id: str, symbol: str) -> str:
41
+ h = hashlib.sha256(f"{pattern_id}:{symbol}".encode()).hexdigest()[:12]
42
+ return f"{pattern_id}-{h}"
43
+
44
+ def to_dict(self) -> dict:
45
+ d: dict = {
46
+ "id": self.id,
47
+ "pattern_id": self.pattern_id,
48
+ "category": self.category,
49
+ "severity": self.severity,
50
+ "confidence": self.confidence,
51
+ "title": self.title,
52
+ "symbol": self.symbol,
53
+ "source_file": self.source_file,
54
+ "evidence": self.evidence,
55
+ "explanation": self.explanation,
56
+ "fix_hint": self.fix_hint,
57
+ }
58
+ if self.limitations:
59
+ d["limitations"] = self.limitations
60
+ if self.related_symbols:
61
+ d["related_symbols"] = self.related_symbols
62
+ return d
63
+
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # SpringAuditResult
67
+ # ---------------------------------------------------------------------------
68
+
69
+ @dataclass
70
+ class SpringAuditResult:
71
+ """Top-level envelope returned by spring-audit command and MCP tool."""
72
+
73
+ schema_version: str = "1.0"
74
+ repo_id: str = ""
75
+ git_head: str = ""
76
+ generated_at: str = ""
77
+ spring_detected: bool = False
78
+ scope: str = "all" # "all" | "tx" | "security"
79
+ findings: list[SpringFinding] = field(default_factory=list)
80
+ limitations: list[str] = field(default_factory=list)
81
+ metadata: dict = field(default_factory=dict)
82
+
83
+ # Populated by finalize()
84
+ summary: dict = field(default_factory=dict)
85
+
86
+ def finalize(self) -> "SpringAuditResult":
87
+ """Compute summary stats. Call after all findings are added."""
88
+ if not self.generated_at:
89
+ self.generated_at = datetime.now(timezone.utc).isoformat()
90
+
91
+ by_severity: dict[str, int] = {
92
+ "critical": 0, "high": 0, "medium": 0, "low": 0
93
+ }
94
+ by_category: dict[str, int] = {}
95
+ for f in self.findings:
96
+ by_severity[f.severity] = by_severity.get(f.severity, 0) + 1
97
+ by_category[f.category] = by_category.get(f.category, 0) + 1
98
+
99
+ # Overall confidence: lowest confidence of any high/critical finding
100
+ high_findings = [f for f in self.findings if f.severity in ("high", "critical")]
101
+ if not high_findings:
102
+ conf_level = "high"
103
+ elif all(f.confidence == "high" for f in high_findings):
104
+ conf_level = "high"
105
+ elif any(f.confidence == "low" for f in high_findings):
106
+ conf_level = "low"
107
+ else:
108
+ conf_level = "medium"
109
+
110
+ self.summary = {
111
+ "total_findings": len(self.findings),
112
+ "by_severity": by_severity,
113
+ "by_category": by_category,
114
+ "confidence_level": conf_level,
115
+ }
116
+ return self
117
+
118
+ def to_dict(self) -> dict:
119
+ return {
120
+ "schema_version": self.schema_version,
121
+ "repo_id": self.repo_id,
122
+ "git_head": self.git_head,
123
+ "generated_at": self.generated_at,
124
+ "spring_detected": self.spring_detected,
125
+ "scope": self.scope,
126
+ "summary": self.summary,
127
+ "findings": [f.to_dict() for f in self.findings],
128
+ "limitations": self.limitations,
129
+ "metadata": self.metadata,
130
+ }