sourcecode 1.35.28__tar.gz → 1.35.30__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sourcecode-1.35.28 → sourcecode-1.35.30}/PKG-INFO +3 -3
- {sourcecode-1.35.28 → sourcecode-1.35.30}/README.md +2 -2
- {sourcecode-1.35.28 → sourcecode-1.35.30}/pyproject.toml +1 -1
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/__init__.py +1 -1
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/canonical_ir.py +1 -1
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/cli.py +103 -19
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/java.py +1 -7
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/server.py +1 -1
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/prepare_context.py +1 -1
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/repository_ir.py +161 -61
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/semantic_analyzer.py +2 -10
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/serializer.py +12 -6
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/spring_tx_analyzer.py +3 -1
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/summarizer.py +17 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/.github/workflows/build-windows.yml +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/.gitignore +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/.ruff.toml +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/CHANGELOG.md +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/CONTRIBUTING.md +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/LICENSE +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/SECURITY.md +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/raw +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/adaptive_scanner.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/architecture_analyzer.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/architecture_summary.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/ast_extractor.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/cache.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/cir_graphs.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/classifier.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/code_notes_analyzer.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/confidence_analyzer.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/context_scorer.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/context_summarizer.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/contract_model.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/contract_pipeline.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/coverage_parser.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/dependency_analyzer.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/__init__.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/base.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/csproj_parser.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/dart.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/dotnet.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/elixir.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/go.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/heuristic.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/hybrid.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/jvm_ext.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/nodejs.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/parsers.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/php.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/project.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/python.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/ruby.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/rust.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/systems.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/terraform.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/tooling.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/doc_analyzer.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/entrypoint_classifier.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/env_analyzer.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/error_schema.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/explain.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/file_chunker.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/file_classifier.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/flow_analyzer.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/fqn_utils.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/git_analyzer.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/graph_analyzer.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/license.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/__init__.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/onboarding/__init__.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/onboarding/applier.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/onboarding/backup.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/onboarding/detector.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/onboarding/planner.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/orchestrator.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/registry.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/runner.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp_nudge.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/metrics_analyzer.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/migrate_check.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/output_budget.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/path_filters.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/pr_comment_renderer.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/pr_impact.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/progress.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/ranking_engine.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/redactor.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/relevance_scorer.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/rename_refactor.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/repo_classifier.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/ris.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/runtime_classifier.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/scanner.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/schema.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/spring_event_topology.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/spring_findings.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/spring_impact.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/spring_model.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/spring_security_audit.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/spring_semantic.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/telemetry/__init__.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/telemetry/config.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/telemetry/consent.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/telemetry/events.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/telemetry/filters.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/telemetry/transport.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/tree_utils.py +0 -0
- {sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/workspace.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sourcecode
|
|
3
|
-
Version: 1.35.
|
|
3
|
+
Version: 1.35.30
|
|
4
4
|
Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
|
|
5
5
|
License-File: LICENSE
|
|
6
6
|
Keywords: agents,ai,codebase,context,developer-tools,llm
|
|
@@ -40,7 +40,7 @@ Description-Content-Type: text/markdown
|
|
|
40
40
|
|
|
41
41
|
**Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
|
|
42
42
|
|
|
43
|
-

|
|
44
44
|

|
|
45
45
|
|
|
46
46
|
---
|
|
@@ -114,7 +114,7 @@ pipx install sourcecode
|
|
|
114
114
|
|
|
115
115
|
```bash
|
|
116
116
|
sourcecode version
|
|
117
|
-
# sourcecode 1.35.
|
|
117
|
+
# sourcecode 1.35.30
|
|
118
118
|
|
|
119
119
|
**v1.35.28** — 7 bug fixes: `rename-class` cross-package disambiguation (BUG-4), `rename-class` collision detection (BUG-2), `find_java_files` false positive on `com/test/` package paths (BUG-1), `cold-start --compact` correct key names (BUG-6), `@EnableMethodSecurity` no longer suppresses SEC-001 (BUG-3), `explain` @Entity stereotype detection (BUG-5), XML+annotation mixed security retagging (BUG-7).
|
|
120
120
|
```
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
**Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
|
|
4
4
|
|
|
5
|
-

|
|
6
6
|

|
|
7
7
|
|
|
8
8
|
---
|
|
@@ -76,7 +76,7 @@ pipx install sourcecode
|
|
|
76
76
|
|
|
77
77
|
```bash
|
|
78
78
|
sourcecode version
|
|
79
|
-
# sourcecode 1.35.
|
|
79
|
+
# sourcecode 1.35.30
|
|
80
80
|
|
|
81
81
|
**v1.35.28** — 7 bug fixes: `rename-class` cross-package disambiguation (BUG-4), `rename-class` collision detection (BUG-2), `find_java_files` false positive on `com/test/` package paths (BUG-1), `cold-start --compact` correct key names (BUG-6), `@EnableMethodSecurity` no longer suppresses SEC-001 (BUG-3), `explain` @Entity stereotype detection (BUG-5), XML+annotation mixed security retagging (BUG-7).
|
|
82
82
|
```
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sourcecode"
|
|
7
|
-
version = "1.35.
|
|
7
|
+
version = "1.35.30"
|
|
8
8
|
description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
@@ -58,7 +58,7 @@ class CanonicalSecurity:
|
|
|
58
58
|
source_scope: str # method|class|inherited
|
|
59
59
|
effective_roles: list[str] = field(default_factory=list)
|
|
60
60
|
expression: str = "" # SpEL for @PreAuthorize/@PostAuthorize
|
|
61
|
-
required_permission: str = "" # for
|
|
61
|
+
required_permission: str = "" # for custom permission annotations
|
|
62
62
|
raw: dict = field(default_factory=dict) # full original policy dict
|
|
63
63
|
|
|
64
64
|
def to_dict(self) -> dict:
|
|
@@ -178,10 +178,10 @@ Cold scan: 2–10s depending on repo size. Warm cache: 0.3–0.6s.
|
|
|
178
178
|
cache clear [dim]# clear all cached results for this repo[/dim]
|
|
179
179
|
|
|
180
180
|
[bold]Examples:[/bold]
|
|
181
|
-
sourcecode
|
|
181
|
+
sourcecode my-project --compact
|
|
182
182
|
sourcecode . --compact --git-context --copy
|
|
183
183
|
sourcecode . --changed-only --git-context
|
|
184
|
-
sourcecode prepare-context onboard
|
|
184
|
+
sourcecode prepare-context onboard my-project
|
|
185
185
|
sourcecode prepare-context delta . --since main
|
|
186
186
|
|
|
187
187
|
[bold]Subcommands:[/bold]
|
|
@@ -629,7 +629,7 @@ def main(
|
|
|
629
629
|
help=(
|
|
630
630
|
"High-signal summary (typically 1000–3000 tokens depending on repo size): "
|
|
631
631
|
"stacks, entry points, dependency summary, confidence, and gaps. "
|
|
632
|
-
"Includes security_surface (when
|
|
632
|
+
"Includes security_surface (when custom security annotations detected), mybatis (when MyBatis framework detected), and transactional_boundaries for Java projects. "
|
|
633
633
|
"Use --agent for maximum signal."
|
|
634
634
|
),
|
|
635
635
|
),
|
|
@@ -3311,6 +3311,11 @@ def repo_ir_cmd(
|
|
|
3311
3311
|
"--force",
|
|
3312
3312
|
help="Bypass the token-size guard and emit output even when estimated tokens exceed 50K.",
|
|
3313
3313
|
),
|
|
3314
|
+
gzip_output: bool = typer.Option(
|
|
3315
|
+
False,
|
|
3316
|
+
"--gzip",
|
|
3317
|
+
help="Compress output with gzip. Requires --output. Reduces large IR files by ~70-80%.",
|
|
3318
|
+
),
|
|
3314
3319
|
) -> None:
|
|
3315
3320
|
"""Deterministic symbol-level IR for Java repositories.
|
|
3316
3321
|
|
|
@@ -3323,6 +3328,7 @@ def repo_ir_cmd(
|
|
|
3323
3328
|
--summary-only Omit full graph; keep analysis + impact (smallest output)
|
|
3324
3329
|
--max-nodes N Keep top N nodes by score
|
|
3325
3330
|
--max-edges N Keep top N edges (priority: both endpoints kept)
|
|
3331
|
+
--gzip Compress output file (~70-80% smaller; requires --output)
|
|
3326
3332
|
|
|
3327
3333
|
\b
|
|
3328
3334
|
Examples:
|
|
@@ -3332,6 +3338,7 @@ def repo_ir_cmd(
|
|
|
3332
3338
|
sourcecode repo-ir --since main --output ir.json
|
|
3333
3339
|
sourcecode repo-ir --since HEAD~3 --summary-only --output ir-small.json
|
|
3334
3340
|
sourcecode repo-ir --max-nodes 200 --max-edges 500
|
|
3341
|
+
sourcecode repo-ir --output ir.json.gz --gzip
|
|
3335
3342
|
"""
|
|
3336
3343
|
import json as _json
|
|
3337
3344
|
|
|
@@ -3392,22 +3399,52 @@ def repo_ir_cmd(
|
|
|
3392
3399
|
output = _serialize_dict(ir, format)
|
|
3393
3400
|
|
|
3394
3401
|
if output_path:
|
|
3395
|
-
output_path.
|
|
3396
|
-
|
|
3397
|
-
|
|
3402
|
+
if gzip_output and not str(output_path).endswith(".gz"):
|
|
3403
|
+
output_path = output_path.with_suffix(output_path.suffix + ".gz")
|
|
3404
|
+
raw_bytes = output.encode("utf-8")
|
|
3405
|
+
size_bytes = len(raw_bytes)
|
|
3406
|
+
_SIZE_WARN_BYTES = 10 * 1024 * 1024 # 10MB
|
|
3407
|
+
if size_bytes > _SIZE_WARN_BYTES and not gzip_output:
|
|
3398
3408
|
typer.echo(
|
|
3399
|
-
f"
|
|
3409
|
+
f"[repo-ir] Output is {size_bytes // (1024 * 1024)}MB — "
|
|
3410
|
+
"consider --summary-only, --max-nodes N --max-edges N, or --gzip to compress.",
|
|
3400
3411
|
err=True,
|
|
3401
3412
|
)
|
|
3402
|
-
|
|
3403
|
-
|
|
3404
|
-
|
|
3413
|
+
if gzip_output:
|
|
3414
|
+
import gzip as _gzip
|
|
3415
|
+
with _gzip.open(output_path, "wb") as _gz:
|
|
3416
|
+
_gz.write(raw_bytes)
|
|
3417
|
+
compressed_kb = output_path.stat().st_size // 1024
|
|
3418
|
+
size_kb = size_bytes // 1024
|
|
3405
3419
|
typer.echo(
|
|
3406
|
-
f"IR written to {output_path} "
|
|
3407
|
-
f"({size_kb}KB, {n_nodes} nodes, {n_edges} edges)",
|
|
3420
|
+
f"IR written to {output_path} ({compressed_kb}KB gzip, {size_kb}KB uncompressed)",
|
|
3408
3421
|
err=True,
|
|
3409
3422
|
)
|
|
3423
|
+
else:
|
|
3424
|
+
output_path.write_bytes(raw_bytes)
|
|
3425
|
+
size_kb = size_bytes // 1024
|
|
3426
|
+
if summary_only:
|
|
3427
|
+
typer.echo(
|
|
3428
|
+
f"IR written to {output_path} ({size_kb}KB, graph omitted by --summary-only)",
|
|
3429
|
+
err=True,
|
|
3430
|
+
)
|
|
3431
|
+
else:
|
|
3432
|
+
n_nodes = len((ir.get("graph") or {}).get("nodes") or [])
|
|
3433
|
+
n_edges = len((ir.get("graph") or {}).get("edges") or [])
|
|
3434
|
+
typer.echo(
|
|
3435
|
+
f"IR written to {output_path} "
|
|
3436
|
+
f"({size_kb}KB, {n_nodes} nodes, {n_edges} edges)",
|
|
3437
|
+
err=True,
|
|
3438
|
+
)
|
|
3410
3439
|
else:
|
|
3440
|
+
if gzip_output:
|
|
3441
|
+
_emit_error_json(
|
|
3442
|
+
INVALID_INPUT_CODE,
|
|
3443
|
+
"--gzip requires --output FILE.",
|
|
3444
|
+
hint="Add --output ir.json.gz to write compressed output to a file.",
|
|
3445
|
+
expected="--output path when --gzip is used.",
|
|
3446
|
+
)
|
|
3447
|
+
raise typer.Exit(1)
|
|
3411
3448
|
_ir_size = len(output.encode("utf-8"))
|
|
3412
3449
|
_ir_tokens_est = _ir_size // 4
|
|
3413
3450
|
# P1-C: abort when estimated tokens > 50K unless --force or --output is given.
|
|
@@ -4376,10 +4413,13 @@ def pr_impact_cmd(
|
|
|
4376
4413
|
if not files.exists():
|
|
4377
4414
|
_emit_error_json(
|
|
4378
4415
|
INVALID_INPUT_CODE,
|
|
4379
|
-
f"--files
|
|
4416
|
+
f"--files '{files}' does not exist. Expected a text file listing changed file paths (one per line), not a directory or class name.",
|
|
4380
4417
|
path=str(files),
|
|
4381
|
-
hint=
|
|
4382
|
-
|
|
4418
|
+
hint=(
|
|
4419
|
+
"Create a file with one changed Java file path per line, then pass it with --files. "
|
|
4420
|
+
"Example: git diff --name-only HEAD~1 > changed.txt && sourcecode pr-impact . --files changed.txt"
|
|
4421
|
+
),
|
|
4422
|
+
expected="A text file containing one Java file path per line.",
|
|
4383
4423
|
)
|
|
4384
4424
|
raise typer.Exit(code=1)
|
|
4385
4425
|
|
|
@@ -4749,6 +4789,21 @@ def fix_bug_cmd(
|
|
|
4749
4789
|
sourcecode impact <target> — Propagate impact from a specific class
|
|
4750
4790
|
sourcecode onboard . — Full architecture context first
|
|
4751
4791
|
"""
|
|
4792
|
+
# Detect misuse: `fix-bug "symptom text" /path` — path arg looks like a symptom.
|
|
4793
|
+
_path_str = str(path)
|
|
4794
|
+
_path_looks_like_symptom = (
|
|
4795
|
+
not Path(_path_str).exists()
|
|
4796
|
+
and (" " in _path_str or any(c.isupper() for c in _path_str))
|
|
4797
|
+
)
|
|
4798
|
+
if _path_looks_like_symptom and not symptom:
|
|
4799
|
+
_emit_error_json(
|
|
4800
|
+
INVALID_INPUT_CODE,
|
|
4801
|
+
f"'{_path_str}' is not a valid directory. Did you mean to use --symptom?",
|
|
4802
|
+
hint=f"Use: sourcecode fix-bug . --symptom {_path_str!r}",
|
|
4803
|
+
expected="A repository directory path as first argument.",
|
|
4804
|
+
)
|
|
4805
|
+
raise typer.Exit(code=1)
|
|
4806
|
+
|
|
4752
4807
|
if not symptom:
|
|
4753
4808
|
# Only emit advisory to interactive terminals — non-TTY (MCP, pipes, scripts)
|
|
4754
4809
|
# must never receive informational text mixed into JSON stdout.
|
|
@@ -5380,6 +5435,12 @@ def cold_start_cmd(
|
|
|
5380
5435
|
"--compact",
|
|
5381
5436
|
help="Emit a compact subset (~10K tokens): status, git_head, stacks, entry_points, and key_dependencies only.",
|
|
5382
5437
|
),
|
|
5438
|
+
output_path: Optional[Path] = typer.Option(
|
|
5439
|
+
None,
|
|
5440
|
+
"--output",
|
|
5441
|
+
"-o",
|
|
5442
|
+
help="Write output to file instead of stdout.",
|
|
5443
|
+
),
|
|
5383
5444
|
) -> None:
|
|
5384
5445
|
"""Output Repository Intelligence Snapshot bootstrap context as JSON.
|
|
5385
5446
|
|
|
@@ -5419,7 +5480,12 @@ def cold_start_cmd(
|
|
|
5419
5480
|
"Use --compact for a ~10K token subset, or --output FILE to save.\n"
|
|
5420
5481
|
)
|
|
5421
5482
|
sys.stderr.flush()
|
|
5422
|
-
|
|
5483
|
+
if output_path:
|
|
5484
|
+
output_path.write_text(_out, encoding="utf-8")
|
|
5485
|
+
sys.stderr.write(f"Saved {len(_out.encode('utf-8'))} bytes to {output_path}\n")
|
|
5486
|
+
sys.stderr.flush()
|
|
5487
|
+
else:
|
|
5488
|
+
typer.echo(_out)
|
|
5423
5489
|
|
|
5424
5490
|
|
|
5425
5491
|
# ── MCP server ────────────────────────────────────────────────────────────────
|
|
@@ -5872,6 +5938,24 @@ def mcp_list_tools(
|
|
|
5872
5938
|
# ── Cache subcommands ─────────────────────────────────────────────────────────
|
|
5873
5939
|
|
|
5874
5940
|
|
|
5941
|
+
def _resolve_repo_root(path: Path) -> Path:
|
|
5942
|
+
"""Resolve *path* to a repo root by walking up to find a .git directory.
|
|
5943
|
+
|
|
5944
|
+
If *path* is already a git root (has .git), returns it directly.
|
|
5945
|
+
If *path* is a subdirectory of a git repo, returns the git root.
|
|
5946
|
+
Falls back to *path* itself if no git repo found.
|
|
5947
|
+
"""
|
|
5948
|
+
candidate = path.resolve()
|
|
5949
|
+
while True:
|
|
5950
|
+
if (candidate / ".git").exists():
|
|
5951
|
+
return candidate
|
|
5952
|
+
parent = candidate.parent
|
|
5953
|
+
if parent == candidate:
|
|
5954
|
+
break
|
|
5955
|
+
candidate = parent
|
|
5956
|
+
return path.resolve()
|
|
5957
|
+
|
|
5958
|
+
|
|
5875
5959
|
@cache_app.command("status")
|
|
5876
5960
|
def cache_status_cmd(
|
|
5877
5961
|
path: Path = typer.Argument(Path("."), help="Repository path (default: current directory)"),
|
|
@@ -5879,7 +5963,7 @@ def cache_status_cmd(
|
|
|
5879
5963
|
) -> None:
|
|
5880
5964
|
"""Show cache statistics for a repository."""
|
|
5881
5965
|
from sourcecode import cache as _cm
|
|
5882
|
-
target = Path(path)
|
|
5966
|
+
target = _resolve_repo_root(Path(path))
|
|
5883
5967
|
stats = _cm.status(target)
|
|
5884
5968
|
if json_output:
|
|
5885
5969
|
import json as _j
|
|
@@ -5913,7 +5997,7 @@ def cache_clear_cmd(
|
|
|
5913
5997
|
index used for cold-start bootstrapping. Use --all to also clear it.
|
|
5914
5998
|
"""
|
|
5915
5999
|
from sourcecode import cache as _cm
|
|
5916
|
-
target = Path(path)
|
|
6000
|
+
target = _resolve_repo_root(Path(path))
|
|
5917
6001
|
_clear_ris = include_ris or all_
|
|
5918
6002
|
if not yes:
|
|
5919
6003
|
_ris_note = " (including RIS)" if _clear_ris else " (RIS preserved — use --all to also clear it)"
|
|
@@ -5935,7 +6019,7 @@ def cache_warm_cmd(
|
|
|
5935
6019
|
"""
|
|
5936
6020
|
import shutil as _shutil
|
|
5937
6021
|
import subprocess as _sub
|
|
5938
|
-
target = Path(path)
|
|
6022
|
+
target = _resolve_repo_root(Path(path))
|
|
5939
6023
|
typer.echo(f"Warming cache for {target} …", err=True)
|
|
5940
6024
|
_sc_bin = _shutil.which("sourcecode") or sys.argv[0]
|
|
5941
6025
|
cmd = [_sc_bin, str(target)]
|
|
@@ -38,13 +38,7 @@ _REQUEST_METHOD_VERB_RE = re.compile(
|
|
|
38
38
|
# Custom security annotation registry — extend here for project-specific annotations.
|
|
39
39
|
# Each entry: annotation_simple_name → compiled params regex.
|
|
40
40
|
# Groups: (1) resource string literal, (2) resource constant ref, (3) level integer.
|
|
41
|
-
_CUSTOM_SECURITY_ANNOTATIONS: dict[str, re.Pattern] = {
|
|
42
|
-
"M3FiltroSeguridad": re.compile(
|
|
43
|
-
r'@M3FiltroSeguridad\s*\(\s*'
|
|
44
|
-
r'(?:nombreRecurso\s*=\s*(?:"([^"]*)"|([\w.]+)))?'
|
|
45
|
-
r'(?:[^)]*nivelRequerido\s*=\s*(\d+))?'
|
|
46
|
-
),
|
|
47
|
-
}
|
|
41
|
+
_CUSTOM_SECURITY_ANNOTATIONS: dict[str, re.Pattern] = {}
|
|
48
42
|
|
|
49
43
|
# Security config detection
|
|
50
44
|
_WEB_SECURITY_CONFIGURER_RE = re.compile(r'WebSecurityConfigurerAdapter\b')
|
|
@@ -639,7 +639,7 @@ def get_endpoints(repo_path: str = ".") -> dict:
|
|
|
639
639
|
"unknown" (no security signals detected).
|
|
640
640
|
Supports Spring MVC (@GetMapping etc.) and JAX-RS (@GET/@POST etc.).
|
|
641
641
|
Security annotations detected: @RolesAllowed, @PermitAll, @DenyAll,
|
|
642
|
-
@Authenticated, @PreAuthorize, @Secured, @SecurityRequirement
|
|
642
|
+
@Authenticated, @PreAuthorize, @Secured, @SecurityRequirement.
|
|
643
643
|
repo_path: absolute path to the Java repository (default: current working directory).
|
|
644
644
|
"""
|
|
645
645
|
_raw = repo_path
|
|
@@ -2003,7 +2003,7 @@ class TaskContextBuilder:
|
|
|
2003
2003
|
for _cf in (_cr.files_changed or []):
|
|
2004
2004
|
_cf_norm = _cf.replace("\\", "/")
|
|
2005
2005
|
# Git reports paths relative to the git root, which may be
|
|
2006
|
-
# a parent of the analyzed directory (e.g.
|
|
2006
|
+
# a parent of the analyzed directory (e.g. a monorepo root).
|
|
2007
2007
|
# Strip the analyzed-dir prefix so paths match all_paths.
|
|
2008
2008
|
if _cf_norm.startswith(_rn_prefix):
|
|
2009
2009
|
_cf_norm = _cf_norm[len(_rn_prefix):]
|
|
@@ -22,6 +22,8 @@ from dataclasses import dataclass, field
|
|
|
22
22
|
from pathlib import Path
|
|
23
23
|
from typing import Any, Optional
|
|
24
24
|
|
|
25
|
+
from sourcecode.fqn_utils import normalize_owner_fqn as _normalize_owner_fqn
|
|
26
|
+
|
|
25
27
|
# ---------------------------------------------------------------------------
|
|
26
28
|
# Data classes — Phases 1–4
|
|
27
29
|
# ---------------------------------------------------------------------------
|
|
@@ -171,8 +173,6 @@ _PATH_ANNOTATIONS: frozenset[str] = frozenset({"@Path"})
|
|
|
171
173
|
# Security / authorization annotations whose args must be captured.
|
|
172
174
|
# Includes standard Jakarta EE, JAX-RS, Quarkus/MicroProfile, and custom patterns.
|
|
173
175
|
_PERMISSION_ANNOTATIONS: frozenset[str] = frozenset({
|
|
174
|
-
# Custom (kept for backward compat)
|
|
175
|
-
"@M3FiltroSeguridad",
|
|
176
176
|
# Jakarta EE / JAX-RS standard
|
|
177
177
|
"@RolesAllowed",
|
|
178
178
|
"@PermitAll",
|
|
@@ -361,6 +361,50 @@ def _strip_java_comments(source: str) -> str:
|
|
|
361
361
|
source = _LINE_COMMENT_RE.sub(' ', source)
|
|
362
362
|
return source
|
|
363
363
|
|
|
364
|
+
|
|
365
|
+
def _parse_annotation_line(line: str) -> tuple[str, str]:
|
|
366
|
+
"""Parse annotation name and args from a line starting with '@'.
|
|
367
|
+
|
|
368
|
+
Returns (ann_name, ann_args) where ann_args is content inside the outermost ().
|
|
369
|
+
Uses O(n) character scanning instead of regex to avoid catastrophic backtracking
|
|
370
|
+
on lines with deeply nested annotation arguments (e.g. @APIResponse with @Content
|
|
371
|
+
containing @Schema — 3-level nesting that breaks _ANN_WITH_ARGS_RE).
|
|
372
|
+
"""
|
|
373
|
+
if not line.startswith('@'):
|
|
374
|
+
return "", ""
|
|
375
|
+
i = 1
|
|
376
|
+
while i < len(line) and (line[i].isalnum() or line[i] in ('_', '.')):
|
|
377
|
+
i += 1
|
|
378
|
+
ann_name = line[:i]
|
|
379
|
+
while i < len(line) and line[i] in (' ', '\t'):
|
|
380
|
+
i += 1
|
|
381
|
+
if i >= len(line) or line[i] != '(':
|
|
382
|
+
return ann_name, ""
|
|
383
|
+
depth = 0
|
|
384
|
+
in_string = False
|
|
385
|
+
string_char = ''
|
|
386
|
+
start = i + 1
|
|
387
|
+
i += 1
|
|
388
|
+
while i < len(line):
|
|
389
|
+
c = line[i]
|
|
390
|
+
if in_string:
|
|
391
|
+
if c == '\\':
|
|
392
|
+
i += 2
|
|
393
|
+
continue
|
|
394
|
+
if c == string_char:
|
|
395
|
+
in_string = False
|
|
396
|
+
elif c in ('"', "'"):
|
|
397
|
+
in_string = True
|
|
398
|
+
string_char = c
|
|
399
|
+
elif c == '(':
|
|
400
|
+
depth += 1
|
|
401
|
+
elif c == ')':
|
|
402
|
+
if depth == 0:
|
|
403
|
+
return ann_name, line[start:i]
|
|
404
|
+
depth -= 1
|
|
405
|
+
i += 1
|
|
406
|
+
return ann_name, line[start:]
|
|
407
|
+
|
|
364
408
|
# Edge types used for subsystem grouping — semantic hierarchy only, not imports
|
|
365
409
|
_SUBSYSTEM_STRUCTURAL_EDGES: frozenset[str] = frozenset({
|
|
366
410
|
"extends", "implements", "injects", "contained_in",
|
|
@@ -410,22 +454,27 @@ _BFS_MAX_DEPTH: int = 3
|
|
|
410
454
|
# Regex to strip leading annotations from a single parameter (e.g. @NotNull @Valid String name)
|
|
411
455
|
_ANN_PREFIX_RE = re.compile(r'^(?:@\w+\s*(?:\([^)]*\))?\s*)+')
|
|
412
456
|
|
|
457
|
+
# Used by _count_net_braces fast path: strip string/char literals before counting braces.
|
|
458
|
+
# Handles escape sequences (\\) so escaped quotes don't close the literal prematurely.
|
|
459
|
+
_STRING_LITERAL_RE = re.compile(r'"(?:[^"\\]|\\.)*"|\'(?:[^\'\\]|\\.)*\'')
|
|
460
|
+
|
|
461
|
+
# Module-level cache for class-keyword detection (avoids recompilation per _extract_symbols call)
|
|
462
|
+
_CLASS_KW_RE = re.compile(r'\b(?:class|interface|enum)\s+[A-Z]')
|
|
463
|
+
|
|
413
464
|
|
|
414
465
|
# ---------------------------------------------------------------------------
|
|
415
466
|
# Stable ID helpers
|
|
416
467
|
# ---------------------------------------------------------------------------
|
|
417
468
|
|
|
418
|
-
|
|
419
|
-
|
|
469
|
+
_FINAL_STRIP_RE = re.compile(r'\bfinal\s+')
|
|
470
|
+
_TYPE_PARAM_RE = re.compile(r'^([\w<>\[\].,? ]+?)\s+\w+$')
|
|
420
471
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
"
|
|
424
|
-
"""
|
|
472
|
+
|
|
473
|
+
def _normalize_type_name(raw: str) -> str:
|
|
474
|
+
"""Strip annotations, final modifier, and param name; return only type."""
|
|
425
475
|
raw = _ANN_PREFIX_RE.sub("", raw).strip()
|
|
426
|
-
raw =
|
|
427
|
-
|
|
428
|
-
m = re.match(r'^([\w<>\[\].,? ]+?)\s+\w+$', raw)
|
|
476
|
+
raw = _FINAL_STRIP_RE.sub("", raw).strip()
|
|
477
|
+
m = _TYPE_PARAM_RE.match(raw)
|
|
429
478
|
if m:
|
|
430
479
|
return m.group(1).strip()
|
|
431
480
|
return raw.strip()
|
|
@@ -503,26 +552,15 @@ def _compute_stable_id(
|
|
|
503
552
|
# ---------------------------------------------------------------------------
|
|
504
553
|
|
|
505
554
|
def _count_net_braces(line: str) -> int:
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
if ch == '"' and not in_char:
|
|
516
|
-
in_str = not in_str
|
|
517
|
-
elif ch == "'" and not in_str:
|
|
518
|
-
in_char = not in_char
|
|
519
|
-
elif not in_str and not in_char:
|
|
520
|
-
if ch == '{':
|
|
521
|
-
depth += 1
|
|
522
|
-
elif ch == '}':
|
|
523
|
-
depth -= 1
|
|
524
|
-
i += 1
|
|
525
|
-
return depth
|
|
555
|
+
# Fast exit: no braces on this line at all
|
|
556
|
+
if '{' not in line and '}' not in line:
|
|
557
|
+
return 0
|
|
558
|
+
# Fast path: no string/char literals — count directly (C-speed)
|
|
559
|
+
if '"' not in line and "'" not in line:
|
|
560
|
+
return line.count('{') - line.count('}')
|
|
561
|
+
# Slow path: strip string/char literals first so quoted braces don't count
|
|
562
|
+
clean = _STRING_LITERAL_RE.sub('', line)
|
|
563
|
+
return clean.count('{') - clean.count('}')
|
|
526
564
|
|
|
527
565
|
|
|
528
566
|
def _extract_modifiers(text: str) -> list[str]:
|
|
@@ -591,7 +629,6 @@ def _extract_symbols(source: str, rel_path: str) -> tuple[str, list[SymbolRecord
|
|
|
591
629
|
_raw_lines = source.splitlines()
|
|
592
630
|
_joined: list[str] = []
|
|
593
631
|
_i = 0
|
|
594
|
-
_CLASS_KW_RE = re.compile(r'\b(?:class|interface|enum)\s+[A-Z]')
|
|
595
632
|
while _i < len(_raw_lines):
|
|
596
633
|
_line = _raw_lines[_i]
|
|
597
634
|
_stripped = _line.strip()
|
|
@@ -633,10 +670,8 @@ def _extract_symbols(source: str, rel_path: str) -> tuple[str, list[SymbolRecord
|
|
|
633
670
|
net = _count_net_braces(stripped)
|
|
634
671
|
|
|
635
672
|
if stripped.startswith("@"):
|
|
636
|
-
|
|
637
|
-
if
|
|
638
|
-
ann = ann_m.group(1)
|
|
639
|
-
ann_args = ann_m.group(2) or ""
|
|
673
|
+
ann, ann_args = _parse_annotation_line(stripped)
|
|
674
|
+
if ann:
|
|
640
675
|
if ann not in pending_anns:
|
|
641
676
|
pending_anns.append(ann)
|
|
642
677
|
if ann_args and ann in _CAPTURE_ANN_ARGS:
|
|
@@ -1141,17 +1176,26 @@ def _build_relations(
|
|
|
1141
1176
|
evidence={"type": "signature", "value": f"implements {iface}"},
|
|
1142
1177
|
))
|
|
1143
1178
|
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1179
|
+
# mapped_to edges: controller class → class-level @RequestMapping path prefix.
|
|
1180
|
+
# O(N) scan of symbols — do NOT call _extract_mapped_paths(source) here because
|
|
1181
|
+
# _REQUEST_MAPPING_RE also matches method-level @GetMapping/@PostMapping, producing
|
|
1182
|
+
# O(N_methods) paths × O(N_syms) inner loop = O(N²) on files with many endpoints.
|
|
1183
|
+
for sym in symbols:
|
|
1184
|
+
if sym.type not in ("class", "interface"):
|
|
1185
|
+
continue
|
|
1186
|
+
if "@RestController" not in sym.annotations and "@Controller" not in sym.annotations:
|
|
1187
|
+
continue
|
|
1188
|
+
if "@RequestMapping" not in sym.annotations:
|
|
1189
|
+
continue
|
|
1190
|
+
_rm_args = sym.annotation_values.get("@RequestMapping", "")
|
|
1191
|
+
for _m_path in _parse_route_paths(_rm_args):
|
|
1192
|
+
if _m_path:
|
|
1149
1193
|
edges.append(RelationEdge(
|
|
1150
1194
|
from_symbol=sym.symbol,
|
|
1151
|
-
to_symbol=
|
|
1195
|
+
to_symbol=_m_path,
|
|
1152
1196
|
type="mapped_to",
|
|
1153
1197
|
confidence="high",
|
|
1154
|
-
evidence={"type": "annotation", "value": f"@RequestMapping(\"{
|
|
1198
|
+
evidence={"type": "annotation", "value": f"@RequestMapping(\"{_m_path}\")"},
|
|
1155
1199
|
))
|
|
1156
1200
|
|
|
1157
1201
|
# contained_in edges: method/field → enclosing class (structural membership)
|
|
@@ -1419,9 +1463,18 @@ def _collect_file_constants(source: str) -> dict[str, str]:
|
|
|
1419
1463
|
Returns {simple_name: value} covering all classes in the file.
|
|
1420
1464
|
Used by _resolve_ann_path_expr to fold constant references in @RequestMapping args.
|
|
1421
1465
|
"""
|
|
1466
|
+
# Fast path: skip entirely when no declarations present (C-speed string scan)
|
|
1467
|
+
if 'static final String' not in source:
|
|
1468
|
+
return {}
|
|
1469
|
+
# Scan only candidate lines (skips full-source regex over 100KB files).
|
|
1470
|
+
# Running _STATIC_FINAL_STR_RE over the whole source is O(source_size) due to
|
|
1471
|
+
# optional modifier group backtracking; per-line match is far cheaper.
|
|
1422
1472
|
constants: dict[str, str] = {}
|
|
1423
|
-
for
|
|
1424
|
-
|
|
1473
|
+
for line in source.splitlines():
|
|
1474
|
+
if 'static' in line and 'final' in line and 'String' in line and '=' in line and '"' in line:
|
|
1475
|
+
m = _STATIC_FINAL_STR_RE.search(line)
|
|
1476
|
+
if m:
|
|
1477
|
+
constants[m.group(1)] = m.group(2)
|
|
1425
1478
|
return constants
|
|
1426
1479
|
|
|
1427
1480
|
|
|
@@ -2205,11 +2258,19 @@ def _assemble(
|
|
|
2205
2258
|
|
|
2206
2259
|
all_fqns_set = {s.symbol for s in sorted_syms}
|
|
2207
2260
|
|
|
2208
|
-
# Bounded BFS reachability per node (graph-only)
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2261
|
+
# Bounded BFS reachability per node (graph-only).
|
|
2262
|
+
# Skipped when symbol count exceeds threshold: O(N*(V+E)) BFS for every symbol
|
|
2263
|
+
# hangs on large repos (keycloak: 80K+ symbols → 180s+ with no output).
|
|
2264
|
+
# bfs_reach contributes only 0.1× weight vs in_deg+out_deg; skipping it on large
|
|
2265
|
+
# repos causes no accuracy loss for spring-audit/endpoints/security analysis.
|
|
2266
|
+
_BFS_SYMBOL_THRESHOLD: int = 5000
|
|
2267
|
+
if len(sorted_syms) <= _BFS_SYMBOL_THRESHOLD:
|
|
2268
|
+
bfs_reach: dict[str, int] = {
|
|
2269
|
+
s.symbol: _bfs_reachability(s.symbol, adjacency)
|
|
2270
|
+
for s in sorted_syms
|
|
2271
|
+
}
|
|
2272
|
+
else:
|
|
2273
|
+
bfs_reach = {}
|
|
2213
2274
|
|
|
2214
2275
|
# Normalize centrality across all nodes
|
|
2215
2276
|
max_raw = max(
|
|
@@ -2495,7 +2556,6 @@ def _route_security_from_sym(
|
|
|
2495
2556
|
@RequiresRoles → {policy: requiresroles, roles: [...]}
|
|
2496
2557
|
@RequiresPermissions → {policy: requirespermissions, roles: [...]}
|
|
2497
2558
|
@SecurityRequirement → {policy: openapi_security, spec: ...}
|
|
2498
|
-
@M3FiltroSeguridad → {policy: custom_permission, required_permission: ...}
|
|
2499
2559
|
|
|
2500
2560
|
Falls back to class-level annotations if no method-level security found.
|
|
2501
2561
|
Returns None if no security signal detected at either level.
|
|
@@ -2534,15 +2594,6 @@ def _route_security_from_sym(
|
|
|
2534
2594
|
if "@SecurityRequirement" in anns:
|
|
2535
2595
|
raw = vals.get("@SecurityRequirement", "")
|
|
2536
2596
|
return {"policy": "openapi_security", "spec": raw.strip()}
|
|
2537
|
-
# Custom legacy annotation
|
|
2538
|
-
if "@M3FiltroSeguridad" in anns:
|
|
2539
|
-
import re as _re2
|
|
2540
|
-
raw = vals.get("@M3FiltroSeguridad", "")
|
|
2541
|
-
m = _re2.search(r'(?:nombreRecurso\s*=\s*)?["\']([^"\']+)["\']', raw)
|
|
2542
|
-
if m:
|
|
2543
|
-
return {"policy": "custom_permission", "required_permission": m.group(1)}
|
|
2544
|
-
# Value is a constant reference or empty — still flag the annotation
|
|
2545
|
-
return {"policy": "custom_annotation", "annotation": "@M3FiltroSeguridad", "resource": raw.strip() or None}
|
|
2546
2597
|
return None
|
|
2547
2598
|
|
|
2548
2599
|
# Method-level first, then class-level fallback
|
|
@@ -2829,6 +2880,29 @@ def build_repo_ir(
|
|
|
2829
2880
|
# type map before building relations. Java classes in the same package
|
|
2830
2881
|
# reference each other without import statements, so import_map alone cannot
|
|
2831
2882
|
# resolve them — _build_same_package_map provides the cross-file fallback.
|
|
2883
|
+
#
|
|
2884
|
+
# Pre-scan filter: skip full symbol extraction for files that have no
|
|
2885
|
+
# Spring/JAX-RS/CDI annotations. These files (utility classes, model beans,
|
|
2886
|
+
# SPI interfaces) contribute no endpoints, transactions, or security findings
|
|
2887
|
+
# to spring-audit. The text scan is C-speed vs O(lines) Python parse loop.
|
|
2888
|
+
# Non-annotated files still register their package+class via a lightweight
|
|
2889
|
+
# regex scan so same-package type resolution remains correct.
|
|
2890
|
+
_ANNOTATION_MARKERS: tuple[str, ...] = (
|
|
2891
|
+
'@Controller', '@RestController', '@Service', '@Repository',
|
|
2892
|
+
'@Component', '@Configuration', '@Bean', '@Transactional',
|
|
2893
|
+
'@Path', '@GET', '@POST', '@PUT', '@DELETE', '@PATCH',
|
|
2894
|
+
'@PreAuthorize', '@RolesAllowed', '@Secured', '@EnableWebSecurity',
|
|
2895
|
+
'@SpringBootApplication', '@EventListener', '@TransactionalEventListener',
|
|
2896
|
+
'@RequiredArgsConstructor', '@AllArgsConstructor',
|
|
2897
|
+
'@Inject', '@ApplicationScoped', '@RequestScoped', '@Singleton',
|
|
2898
|
+
'@EnableMethodSecurity', '@EnableGlobalMethodSecurity',
|
|
2899
|
+
# JPA / persistence (needed for stereotype detection in all commands)
|
|
2900
|
+
'@Entity', '@MappedSuperclass', '@Embeddable',
|
|
2901
|
+
# AOP / messaging / event sourcing
|
|
2902
|
+
'@Aspect', '@Aggregate', '@Document',
|
|
2903
|
+
# Spring Data
|
|
2904
|
+
'@Query', '@NamedQuery',
|
|
2905
|
+
)
|
|
2832
2906
|
_per_file: list[tuple[str, str, str, list[str], list[SymbolRecord]]] = []
|
|
2833
2907
|
for rel_path in sorted(file_paths):
|
|
2834
2908
|
abs_path = root / rel_path
|
|
@@ -2839,6 +2913,23 @@ def build_repo_ir(
|
|
|
2839
2913
|
_meta_files_read += 1
|
|
2840
2914
|
_meta_lines_read += source.count("\n") + (1 if source and not source.endswith("\n") else 0)
|
|
2841
2915
|
_meta_chars_read += len(source)
|
|
2916
|
+
# Fast pre-scan: if file has no relevant annotations skip full extraction.
|
|
2917
|
+
# Still register package/class name for same-package resolution.
|
|
2918
|
+
if not any(marker in source for marker in _ANNOTATION_MARKERS):
|
|
2919
|
+
pkg_m = _PKG_RE.search(source)
|
|
2920
|
+
_pkg = pkg_m.group(1) if pkg_m else ""
|
|
2921
|
+
# Minimal class-name symbols for same-package map (no methods/fields)
|
|
2922
|
+
_min_syms: list[SymbolRecord] = []
|
|
2923
|
+
for _cm in re.finditer(r'(?:class|interface|enum)\s+(\w+)', source):
|
|
2924
|
+
_cls_name = _cm.group(1)
|
|
2925
|
+
_fqn = f"{_pkg}.{_cls_name}" if _pkg else _cls_name
|
|
2926
|
+
_min_syms.append(SymbolRecord(
|
|
2927
|
+
symbol=_fqn, type="class", confidence="medium",
|
|
2928
|
+
declaring_file=rel_path,
|
|
2929
|
+
))
|
|
2930
|
+
all_symbols.extend(_min_syms)
|
|
2931
|
+
# No relations needed for non-annotated files
|
|
2932
|
+
continue
|
|
2842
2933
|
package, symbols, raw_imports = _extract_symbols(source, rel_path)
|
|
2843
2934
|
all_symbols.extend(symbols)
|
|
2844
2935
|
_per_file.append((rel_path, source, package, raw_imports, symbols))
|
|
@@ -4147,13 +4238,22 @@ def _all_callers_from_rg(fqn: str, reverse_graph: dict[str, dict[str, list[str]]
|
|
|
4147
4238
|
BUG-01 fix: skip 'contained_in' edges — those represent structural membership
|
|
4148
4239
|
(method→enclosing class), not actual callers. Without this, an Impl class
|
|
4149
4240
|
with 91 own methods would show 91 "direct callers" and inflate risk to HIGH.
|
|
4241
|
+
|
|
4242
|
+
CH-002 fix: for 'injects' edges, normalize field/constructor FQNs to their
|
|
4243
|
+
enclosing class. e.g. pkg.ConsolidacionService.calcularField → pkg.ConsolidacionService
|
|
4244
|
+
so BFS can continue through DI injection chains and find controllers.
|
|
4150
4245
|
"""
|
|
4151
4246
|
entry = reverse_graph.get(fqn) or {}
|
|
4152
4247
|
callers: list[str] = []
|
|
4248
|
+
seen: set[str] = set()
|
|
4153
4249
|
for edge_type, fqn_list in entry.items():
|
|
4154
4250
|
if edge_type == "contained_in":
|
|
4155
4251
|
continue # structural membership, not a caller
|
|
4156
|
-
|
|
4252
|
+
for c in fqn_list:
|
|
4253
|
+
normalized = _normalize_owner_fqn(c) if edge_type == "injects" else c
|
|
4254
|
+
if normalized not in seen:
|
|
4255
|
+
seen.add(normalized)
|
|
4256
|
+
callers.append(normalized)
|
|
4157
4257
|
return callers
|
|
4158
4258
|
|
|
4159
4259
|
|
|
@@ -57,15 +57,7 @@ _EXTENDS_RE = re.compile(
|
|
|
57
57
|
# Custom AOP annotation registry — extend here for project-specific security/AOP annotations.
|
|
58
58
|
# Each entry: (method_regex, impl_symbol_name).
|
|
59
59
|
# method_regex must capture the annotated method name in group 1.
|
|
60
|
-
_CUSTOM_AOP_ANNOTATIONS: list[tuple[re.Pattern, str]] = [
|
|
61
|
-
(
|
|
62
|
-
re.compile(
|
|
63
|
-
r'@M3FiltroSeguridad(?:\([^)]*\))?\s+(?:@[^\s]+\s+)*'
|
|
64
|
-
r'(?:public|private|protected)\s+\w[\w<>\[\]]*\s+([a-z][A-Za-z0-9_]*)\s*\('
|
|
65
|
-
),
|
|
66
|
-
"M3FiltroSeguridadImpl",
|
|
67
|
-
),
|
|
68
|
-
]
|
|
60
|
+
_CUSTOM_AOP_ANNOTATIONS: list[tuple[re.Pattern, str]] = []
|
|
69
61
|
_LOMBOK_CLASS_RE = re.compile(
|
|
70
62
|
r'(@(?:Data|Slf4j|Builder|AllArgsConstructor|NoArgsConstructor)(?:\([^)]*\))?\s+)*'
|
|
71
63
|
r'(?:public\s+)?(?:class|interface)\s+([A-Z][A-Za-z0-9_]*)',
|
|
@@ -925,7 +917,7 @@ class SemanticAnalyzer:
|
|
|
925
917
|
|
|
926
918
|
method="heuristic", confidence="low" para todos los edges Java.
|
|
927
919
|
Includes: Lombok synthetic symbols, @Autowired field edges,
|
|
928
|
-
@Mapper interface detection, inheritance chains,
|
|
920
|
+
@Mapper interface detection, inheritance chains, custom AOP annotation edges.
|
|
929
921
|
"""
|
|
930
922
|
_JAVA_KEYWORDS: frozenset[str] = frozenset({
|
|
931
923
|
"if", "for", "while", "switch", "catch", "super", "this", "new",
|
|
@@ -504,7 +504,9 @@ def _transactional_summary(sm: "SourceMap", *, full: bool = False) -> "Optional[
|
|
|
504
504
|
classes = getattr(s, "transactional_classes", [])
|
|
505
505
|
if classes:
|
|
506
506
|
total = len(classes)
|
|
507
|
-
|
|
507
|
+
# class_count = unique classes with @Transactional anywhere (file-level scan).
|
|
508
|
+
# spring-audit metadata.tx_stats has method-level annotation breakdown.
|
|
509
|
+
result: dict[str, Any] = {"class_count": total, "classes": classes}
|
|
508
510
|
if total > 10 and not full:
|
|
509
511
|
result["classes"] = classes[:10]
|
|
510
512
|
result["truncated"] = True
|
|
@@ -549,9 +551,13 @@ def _security_surface_from_eps(
|
|
|
549
551
|
root: "Optional[Path]" = None,
|
|
550
552
|
file_paths: "Optional[list[str]]" = None,
|
|
551
553
|
) -> "Optional[dict[str, Any]]":
|
|
552
|
-
"""Extract
|
|
554
|
+
"""Extract permission resource names from entry point evidence strings.
|
|
555
|
+
|
|
556
|
+
Looks for resource=VALUE or nombreRecurso=VALUE patterns in evidence
|
|
557
|
+
produced by custom security annotations on REST controller methods.
|
|
558
|
+
"""
|
|
553
559
|
import re as _re
|
|
554
|
-
|
|
560
|
+
_RESOURCE_RE = _re.compile(r"(?:resource|nombreRecurso)=[\"']([^\"']+)[\"']")
|
|
555
561
|
_CONST_SYMBOL_RE = _re.compile(r'^[\w]+\.[\w]+$')
|
|
556
562
|
resource_names: list[str] = []
|
|
557
563
|
unresolved: list[str] = []
|
|
@@ -560,7 +566,7 @@ def _security_surface_from_eps(
|
|
|
560
566
|
evidence = getattr(ep, "evidence", None)
|
|
561
567
|
if not evidence:
|
|
562
568
|
continue
|
|
563
|
-
for m in
|
|
569
|
+
for m in _RESOURCE_RE.finditer(evidence):
|
|
564
570
|
nm = m.group(1)
|
|
565
571
|
if not nm or nm in seen:
|
|
566
572
|
continue
|
|
@@ -578,8 +584,8 @@ def _security_surface_from_eps(
|
|
|
578
584
|
return None
|
|
579
585
|
result: dict[str, Any] = {
|
|
580
586
|
"schema": (
|
|
581
|
-
"
|
|
582
|
-
"
|
|
587
|
+
"Permission resource identifiers found on REST controller methods. "
|
|
588
|
+
"Each value names a resource checked at runtime by a security annotation."
|
|
583
589
|
),
|
|
584
590
|
"resource_names": resource_names,
|
|
585
591
|
}
|
|
@@ -739,7 +739,9 @@ def run_tx_audit(
|
|
|
739
739
|
limitations=_tx_limitations,
|
|
740
740
|
metadata={
|
|
741
741
|
"symbols_analyzed": len(getattr(cir, "symbols", [])),
|
|
742
|
-
|
|
742
|
+
# tx_annotation_count = total @Transactional symbols (class-level + method-level).
|
|
743
|
+
# tx_stats.class_level matches compact transactional_boundaries.class_count.
|
|
744
|
+
"tx_annotation_count": tx_index.stats()["total"],
|
|
743
745
|
"tx_stats": tx_index.stats(),
|
|
744
746
|
"analysis_time_ms": elapsed_ms,
|
|
745
747
|
},
|
|
@@ -223,6 +223,20 @@ class ProjectSummarizer:
|
|
|
223
223
|
__import__("re").IGNORECASE,
|
|
224
224
|
)
|
|
225
225
|
|
|
226
|
+
# Patterns that indicate security scanner / tool output, not project description.
|
|
227
|
+
# Trivy, OWASP, Snyk, etc. produce structured vulnerability reports.
|
|
228
|
+
_TOOL_OUTPUT_RE = __import__("re").compile(
|
|
229
|
+
r"CVE-\d{4}-\d{4,}" # CVE identifiers
|
|
230
|
+
r"|UNKNOWN:\s*\d+.*LOW:\s*\d+" # Trivy severity summary line
|
|
231
|
+
r"|(CRITICAL|HIGH|MEDIUM|LOW):\s*\d+" # severity: count pattern
|
|
232
|
+
r"|\bTotal:\s*\d+\s*\(" # "Total: 45 (UNKNOWN: 0, ..." Trivy header
|
|
233
|
+
r"|\bvulnerabilit(?:y|ies)\s+found\b" # "N vulnerabilities found"
|
|
234
|
+
r"|\bscan(?:ned|ning)\s+\d+\s+(?:file|package|image)\b" # scanner progress
|
|
235
|
+
r"|\bpkg:(?:npm|pypi|maven|cargo|golang)/" # PURL package identifiers
|
|
236
|
+
r"|\b(?:trivy|snyk|grype|syft|cosign)\b", # well-known scanner names
|
|
237
|
+
__import__("re").IGNORECASE,
|
|
238
|
+
)
|
|
239
|
+
|
|
226
240
|
def _extract_first_useful_paragraph(self, content: str) -> str | None:
|
|
227
241
|
"""Extract the first paragraph that describes the project architecture, not its license or marketing."""
|
|
228
242
|
import re as _re
|
|
@@ -268,6 +282,9 @@ class ProjectSummarizer:
|
|
|
268
282
|
# Reject license notices and user-facing marketing text
|
|
269
283
|
if self._LICENSE_MARKETING_RE.search(paragraph):
|
|
270
284
|
continue
|
|
285
|
+
# Reject security scanner / tool output (Trivy, Snyk, OWASP, CVE lists)
|
|
286
|
+
if self._TOOL_OUTPUT_RE.search(paragraph):
|
|
287
|
+
continue
|
|
271
288
|
# Reject link-list paragraphs (docs/navigation sections):
|
|
272
289
|
# if more than 2 markdown links dominate the paragraph, it's a nav section
|
|
273
290
|
_link_count = len(_MD_LINK_RE.findall(paragraph))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|