PyPI - sourcecode - Versions diffs - 1.35.28__tar.gz → 1.35.30__tar.gz - Mend

sourcecode 1.35.28tar.gz → 1.35.30tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

{sourcecode-1.35.28 → sourcecode-1.35.30}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sourcecode
-Version: 1.35.28
+Version: 1.35.30
 Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
 License-File: LICENSE
 Keywords: agents,ai,codebase,context,developer-tools,llm
@@ -40,7 +40,7 @@ Description-Content-Type: text/markdown
 **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
-![Version](https://img.shields.io/badge/version-1.35.28-blue)
+![Version](https://img.shields.io/badge/version-1.35.30-blue)
 ![Python](https://img.shields.io/badge/python-3.10%2B-green)
 ---
@@ -114,7 +114,7 @@ pipx install sourcecode
 ```bash
 sourcecode version
-# sourcecode 1.35.28
+# sourcecode 1.35.30
 **v1.35.28** — 7 bug fixes: `rename-class` cross-package disambiguation (BUG-4), `rename-class` collision detection (BUG-2), `find_java_files` false positive on `com/test/` package paths (BUG-1), `cold-start --compact` correct key names (BUG-6), `@EnableMethodSecurity` no longer suppresses SEC-001 (BUG-3), `explain` @Entity stereotype detection (BUG-5), XML+annotation mixed security retagging (BUG-7).
 ```

{sourcecode-1.35.28 → sourcecode-1.35.30}/README.md RENAMED Viewed

@@ -2,7 +2,7 @@
 **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
-![Version](https://img.shields.io/badge/version-1.35.28-blue)
+![Version](https://img.shields.io/badge/version-1.35.30-blue)
 ![Python](https://img.shields.io/badge/python-3.10%2B-green)
 ---
@@ -76,7 +76,7 @@ pipx install sourcecode
 ```bash
 sourcecode version
-# sourcecode 1.35.28
+# sourcecode 1.35.30
 **v1.35.28** — 7 bug fixes: `rename-class` cross-package disambiguation (BUG-4), `rename-class` collision detection (BUG-2), `find_java_files` false positive on `com/test/` package paths (BUG-1), `cold-start --compact` correct key names (BUG-6), `@EnableMethodSecurity` no longer suppresses SEC-001 (BUG-3), `explain` @Entity stereotype detection (BUG-5), XML+annotation mixed security retagging (BUG-7).
 ```

{sourcecode-1.35.28 → sourcecode-1.35.30}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "sourcecode"
-version = "1.35.28"
+version = "1.35.30"
 description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
 readme = "README.md"
 requires-python = ">=3.9"

{sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """sourcecode — Deterministic codebase context maps for AI coding agents."""
-__version__ = "1.35.28"
+__version__ = "1.35.30"

{sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/canonical_ir.py RENAMED Viewed

@@ -58,7 +58,7 @@ class CanonicalSecurity:
     source_scope: str                        # method|class|inherited
     effective_roles: list[str] = field(default_factory=list)
     expression: str = ""                     # SpEL for @PreAuthorize/@PostAuthorize
-    required_permission: str = ""            # for @M3FiltroSeguridad
+    required_permission: str = ""            # for custom permission annotations
     raw: dict = field(default_factory=dict)  # full original policy dict
     def to_dict(self) -> dict:

{sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/cli.py RENAMED Viewed

@@ -178,10 +178,10 @@ Cold scan: 2–10s depending on repo size. Warm cache: 0.3–0.6s.
   cache clear                  [dim]# clear all cached results for this repo[/dim]
 [bold]Examples:[/bold]
-  sourcecode saint-server --compact
+  sourcecode my-project --compact
   sourcecode . --compact --git-context --copy
   sourcecode . --changed-only --git-context
-  sourcecode prepare-context onboard saint-server
+  sourcecode prepare-context onboard my-project
   sourcecode prepare-context delta . --since main
 [bold]Subcommands:[/bold]
@@ -629,7 +629,7 @@ def main(
         help=(
             "High-signal summary (typically 1000–3000 tokens depending on repo size): "
             "stacks, entry points, dependency summary, confidence, and gaps. "
-            "Includes security_surface (when @M3FiltroSeguridad detected), mybatis (when MyBatis framework detected), and transactional_boundaries for Java projects. "
+            "Includes security_surface (when custom security annotations detected), mybatis (when MyBatis framework detected), and transactional_boundaries for Java projects. "
             "Use --agent for maximum signal."
         ),
     ),
@@ -3311,6 +3311,11 @@ def repo_ir_cmd(
         "--force",
         help="Bypass the token-size guard and emit output even when estimated tokens exceed 50K.",
     ),
+    gzip_output: bool = typer.Option(
+        False,
+        "--gzip",
+        help="Compress output with gzip. Requires --output. Reduces large IR files by ~70-80%.",
+    ),
 ) -> None:
     """Deterministic symbol-level IR for Java repositories.
@@ -3323,6 +3328,7 @@ def repo_ir_cmd(
       --summary-only          Omit full graph; keep analysis + impact (smallest output)
       --max-nodes N           Keep top N nodes by score
       --max-edges N           Keep top N edges (priority: both endpoints kept)
+      --gzip                  Compress output file (~70-80% smaller; requires --output)
     \b
     Examples:
@@ -3332,6 +3338,7 @@ def repo_ir_cmd(
       sourcecode repo-ir --since main --output ir.json
       sourcecode repo-ir --since HEAD~3 --summary-only --output ir-small.json
       sourcecode repo-ir --max-nodes 200 --max-edges 500
+      sourcecode repo-ir --output ir.json.gz --gzip
     """
     import json as _json
@@ -3392,22 +3399,52 @@ def repo_ir_cmd(
     output = _serialize_dict(ir, format)
     if output_path:
-        output_path.write_text(output, encoding="utf-8")
-        size_kb = len(output.encode("utf-8")) // 1024
-        if summary_only:
+        if gzip_output and not str(output_path).endswith(".gz"):
+            output_path = output_path.with_suffix(output_path.suffix + ".gz")
+        raw_bytes = output.encode("utf-8")
+        size_bytes = len(raw_bytes)
+        _SIZE_WARN_BYTES = 10 * 1024 * 1024  # 10MB
+        if size_bytes > _SIZE_WARN_BYTES and not gzip_output:
             typer.echo(
-                f"IR written to {output_path} ({size_kb}KB, graph omitted by --summary-only)",
+                f"[repo-ir] Output is {size_bytes // (1024 * 1024)}MB — "
+                "consider --summary-only, --max-nodes N --max-edges N, or --gzip to compress.",
                 err=True,
             )
-        else:
-            n_nodes = len((ir.get("graph") or {}).get("nodes") or [])
-            n_edges = len((ir.get("graph") or {}).get("edges") or [])
+        if gzip_output:
+            import gzip as _gzip
+            with _gzip.open(output_path, "wb") as _gz:
+                _gz.write(raw_bytes)
+            compressed_kb = output_path.stat().st_size // 1024
+            size_kb = size_bytes // 1024
             typer.echo(
-                f"IR written to {output_path} "
-                f"({size_kb}KB, {n_nodes} nodes, {n_edges} edges)",
+                f"IR written to {output_path} ({compressed_kb}KB gzip, {size_kb}KB uncompressed)",
                 err=True,
             )
+        else:
+            output_path.write_bytes(raw_bytes)
+            size_kb = size_bytes // 1024
+            if summary_only:
+                typer.echo(
+                    f"IR written to {output_path} ({size_kb}KB, graph omitted by --summary-only)",
+                    err=True,
+                )
+            else:
+                n_nodes = len((ir.get("graph") or {}).get("nodes") or [])
+                n_edges = len((ir.get("graph") or {}).get("edges") or [])
+                typer.echo(
+                    f"IR written to {output_path} "
+                    f"({size_kb}KB, {n_nodes} nodes, {n_edges} edges)",
+                    err=True,
+                )
     else:
+        if gzip_output:
+            _emit_error_json(
+                INVALID_INPUT_CODE,
+                "--gzip requires --output FILE.",
+                hint="Add --output ir.json.gz to write compressed output to a file.",
+                expected="--output path when --gzip is used.",
+            )
+            raise typer.Exit(1)
         _ir_size = len(output.encode("utf-8"))
         _ir_tokens_est = _ir_size // 4
         # P1-C: abort when estimated tokens > 50K unless --force or --output is given.
@@ -4376,10 +4413,13 @@ def pr_impact_cmd(
     if not files.exists():
         _emit_error_json(
             INVALID_INPUT_CODE,
-            f"--files path '{files}' does not exist.",
+            f"--files '{files}' does not exist. Expected a text file listing changed file paths (one per line), not a directory or class name.",
             path=str(files),
-            hint="Pass a file containing one Java file path per line.",
-            expected="An existing file path.",
+            hint=(
+                "Create a file with one changed Java file path per line, then pass it with --files. "
+                "Example: git diff --name-only HEAD~1 > changed.txt && sourcecode pr-impact . --files changed.txt"
+            ),
+            expected="A text file containing one Java file path per line.",
         )
         raise typer.Exit(code=1)
@@ -4749,6 +4789,21 @@ def fix_bug_cmd(
       sourcecode impact <target>   — Propagate impact from a specific class
       sourcecode onboard .         — Full architecture context first
     """
+    # Detect misuse: `fix-bug "symptom text" /path` — path arg looks like a symptom.
+    _path_str = str(path)
+    _path_looks_like_symptom = (
+        not Path(_path_str).exists()
+        and (" " in _path_str or any(c.isupper() for c in _path_str))
+    )
+    if _path_looks_like_symptom and not symptom:
+        _emit_error_json(
+            INVALID_INPUT_CODE,
+            f"'{_path_str}' is not a valid directory. Did you mean to use --symptom?",
+            hint=f"Use: sourcecode fix-bug . --symptom {_path_str!r}",
+            expected="A repository directory path as first argument.",
+        )
+        raise typer.Exit(code=1)
     if not symptom:
         # Only emit advisory to interactive terminals — non-TTY (MCP, pipes, scripts)
         # must never receive informational text mixed into JSON stdout.
@@ -5380,6 +5435,12 @@ def cold_start_cmd(
         "--compact",
         help="Emit a compact subset (~10K tokens): status, git_head, stacks, entry_points, and key_dependencies only.",
     ),
+    output_path: Optional[Path] = typer.Option(
+        None,
+        "--output",
+        "-o",
+        help="Write output to file instead of stdout.",
+    ),
 ) -> None:
     """Output Repository Intelligence Snapshot bootstrap context as JSON.
@@ -5419,7 +5480,12 @@ def cold_start_cmd(
             "Use --compact for a ~10K token subset, or --output FILE to save.\n"
         )
         sys.stderr.flush()
-    typer.echo(_out)
+    if output_path:
+        output_path.write_text(_out, encoding="utf-8")
+        sys.stderr.write(f"Saved {len(_out.encode('utf-8'))} bytes to {output_path}\n")
+        sys.stderr.flush()
+    else:
+        typer.echo(_out)
 # ── MCP server ────────────────────────────────────────────────────────────────
@@ -5872,6 +5938,24 @@ def mcp_list_tools(
 # ── Cache subcommands ─────────────────────────────────────────────────────────
+def _resolve_repo_root(path: Path) -> Path:
+    """Resolve *path* to a repo root by walking up to find a .git directory.
+    If *path* is already a git root (has .git), returns it directly.
+    If *path* is a subdirectory of a git repo, returns the git root.
+    Falls back to *path* itself if no git repo found.
+    """
+    candidate = path.resolve()
+    while True:
+        if (candidate / ".git").exists():
+            return candidate
+        parent = candidate.parent
+        if parent == candidate:
+            break
+        candidate = parent
+    return path.resolve()
 @cache_app.command("status")
 def cache_status_cmd(
     path: Path = typer.Argument(Path("."), help="Repository path (default: current directory)"),
@@ -5879,7 +5963,7 @@ def cache_status_cmd(
 ) -> None:
     """Show cache statistics for a repository."""
     from sourcecode import cache as _cm
-    target = Path(path).resolve()
+    target = _resolve_repo_root(Path(path))
     stats = _cm.status(target)
     if json_output:
         import json as _j
@@ -5913,7 +5997,7 @@ def cache_clear_cmd(
     index used for cold-start bootstrapping.  Use --all to also clear it.
     """
     from sourcecode import cache as _cm
-    target = Path(path).resolve()
+    target = _resolve_repo_root(Path(path))
     _clear_ris = include_ris or all_
     if not yes:
         _ris_note = " (including RIS)" if _clear_ris else " (RIS preserved — use --all to also clear it)"
@@ -5935,7 +6019,7 @@ def cache_warm_cmd(
     """
     import shutil as _shutil
     import subprocess as _sub
-    target = Path(path).resolve()
+    target = _resolve_repo_root(Path(path))
     typer.echo(f"Warming cache for {target} …", err=True)
     _sc_bin = _shutil.which("sourcecode") or sys.argv[0]
     cmd = [_sc_bin, str(target)]

{sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/detectors/java.py RENAMED Viewed

@@ -38,13 +38,7 @@ _REQUEST_METHOD_VERB_RE = re.compile(
 # Custom security annotation registry — extend here for project-specific annotations.
 # Each entry: annotation_simple_name → compiled params regex.
 # Groups: (1) resource string literal, (2) resource constant ref, (3) level integer.
-_CUSTOM_SECURITY_ANNOTATIONS: dict[str, re.Pattern] = {
-    "M3FiltroSeguridad": re.compile(
-        r'@M3FiltroSeguridad\s*\(\s*'
-        r'(?:nombreRecurso\s*=\s*(?:"([^"]*)"|([\w.]+)))?'
-        r'(?:[^)]*nivelRequerido\s*=\s*(\d+))?'
-    ),
-}
+_CUSTOM_SECURITY_ANNOTATIONS: dict[str, re.Pattern] = {}
 # Security config detection
 _WEB_SECURITY_CONFIGURER_RE = re.compile(r'WebSecurityConfigurerAdapter\b')

{sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/mcp/server.py RENAMED Viewed

@@ -639,7 +639,7 @@ def get_endpoints(repo_path: str = ".") -> dict:
              "unknown" (no security signals detected).
     Supports Spring MVC (@GetMapping etc.) and JAX-RS (@GET/@POST etc.).
     Security annotations detected: @RolesAllowed, @PermitAll, @DenyAll,
-    @Authenticated, @PreAuthorize, @Secured, @SecurityRequirement, @M3FiltroSeguridad.
+    @Authenticated, @PreAuthorize, @Secured, @SecurityRequirement.
     repo_path: absolute path to the Java repository (default: current working directory).
     """
     _raw = repo_path

{sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/prepare_context.py RENAMED Viewed

@@ -2003,7 +2003,7 @@ class TaskContextBuilder:
                         for _cf in (_cr.files_changed or []):
                             _cf_norm = _cf.replace("\\", "/")
                             # Git reports paths relative to the git root, which may be
-                            # a parent of the analyzed directory (e.g. MSAS/saint-server/).
+                            # a parent of the analyzed directory (e.g. a monorepo root).
                             # Strip the analyzed-dir prefix so paths match all_paths.
                             if _cf_norm.startswith(_rn_prefix):
                                 _cf_norm = _cf_norm[len(_rn_prefix):]

{sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/repository_ir.py RENAMED Viewed

@@ -22,6 +22,8 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Optional
+from sourcecode.fqn_utils import normalize_owner_fqn as _normalize_owner_fqn
 # ---------------------------------------------------------------------------
 # Data classes — Phases 1–4
 # ---------------------------------------------------------------------------
@@ -171,8 +173,6 @@ _PATH_ANNOTATIONS: frozenset[str] = frozenset({"@Path"})
 # Security / authorization annotations whose args must be captured.
 # Includes standard Jakarta EE, JAX-RS, Quarkus/MicroProfile, and custom patterns.
 _PERMISSION_ANNOTATIONS: frozenset[str] = frozenset({
-    # Custom (kept for backward compat)
-    "@M3FiltroSeguridad",
     # Jakarta EE / JAX-RS standard
     "@RolesAllowed",
     "@PermitAll",
@@ -361,6 +361,50 @@ def _strip_java_comments(source: str) -> str:
     source = _LINE_COMMENT_RE.sub(' ', source)
     return source
+def _parse_annotation_line(line: str) -> tuple[str, str]:
+    """Parse annotation name and args from a line starting with '@'.
+    Returns (ann_name, ann_args) where ann_args is content inside the outermost ().
+    Uses O(n) character scanning instead of regex to avoid catastrophic backtracking
+    on lines with deeply nested annotation arguments (e.g. @APIResponse with @Content
+    containing @Schema — 3-level nesting that breaks _ANN_WITH_ARGS_RE).
+    """
+    if not line.startswith('@'):
+        return "", ""
+    i = 1
+    while i < len(line) and (line[i].isalnum() or line[i] in ('_', '.')):
+        i += 1
+    ann_name = line[:i]
+    while i < len(line) and line[i] in (' ', '\t'):
+        i += 1
+    if i >= len(line) or line[i] != '(':
+        return ann_name, ""
+    depth = 0
+    in_string = False
+    string_char = ''
+    start = i + 1
+    i += 1
+    while i < len(line):
+        c = line[i]
+        if in_string:
+            if c == '\\':
+                i += 2
+                continue
+            if c == string_char:
+                in_string = False
+        elif c in ('"', "'"):
+            in_string = True
+            string_char = c
+        elif c == '(':
+            depth += 1
+        elif c == ')':
+            if depth == 0:
+                return ann_name, line[start:i]
+            depth -= 1
+        i += 1
+    return ann_name, line[start:]
 # Edge types used for subsystem grouping — semantic hierarchy only, not imports
 _SUBSYSTEM_STRUCTURAL_EDGES: frozenset[str] = frozenset({
     "extends", "implements", "injects", "contained_in",
@@ -410,22 +454,27 @@ _BFS_MAX_DEPTH: int = 3
 # Regex to strip leading annotations from a single parameter (e.g. @NotNull @Valid String name)
 _ANN_PREFIX_RE = re.compile(r'^(?:@\w+\s*(?:\([^)]*\))?\s*)+')
+# Used by _count_net_braces fast path: strip string/char literals before counting braces.
+# Handles escape sequences (\\) so escaped quotes don't close the literal prematurely.
+_STRING_LITERAL_RE = re.compile(r'"(?:[^"\\]|\\.)*"|\'(?:[^\'\\]|\\.)*\'')
+# Module-level cache for class-keyword detection (avoids recompilation per _extract_symbols call)
+_CLASS_KW_RE = re.compile(r'\b(?:class|interface|enum)\s+[A-Z]')
 # ---------------------------------------------------------------------------
 # Stable ID helpers
 # ---------------------------------------------------------------------------
-def _normalize_type_name(raw: str) -> str:
-    """Strip annotations, final modifier, and param name; return only type.
+_FINAL_STRIP_RE = re.compile(r'\bfinal\s+')
+_TYPE_PARAM_RE = re.compile(r'^([\w<>\[\].,? ]+?)\s+\w+$')
-    "(Long id)"    -> strip after parsing → "Long"
-    "@NotNull User user" → "User"
-    "List<String>" → "List<String>"
-    """
+def _normalize_type_name(raw: str) -> str:
+    """Strip annotations, final modifier, and param name; return only type."""
     raw = _ANN_PREFIX_RE.sub("", raw).strip()
-    raw = re.sub(r'\bfinal\s+', "", raw).strip()
-    # "Type name" → extract Type (rightmost word is the param name)
-    m = re.match(r'^([\w<>\[\].,? ]+?)\s+\w+$', raw)
+    raw = _FINAL_STRIP_RE.sub("", raw).strip()
+    m = _TYPE_PARAM_RE.match(raw)
     if m:
         return m.group(1).strip()
     return raw.strip()
@@ -503,26 +552,15 @@ def _compute_stable_id(
 # ---------------------------------------------------------------------------
 def _count_net_braces(line: str) -> int:
-    depth = 0
-    in_str = False
-    in_char = False
-    i = 0
-    while i < len(line):
-        ch = line[i]
-        if ch == '\\' and (in_str or in_char):
-            i += 2
-            continue
-        if ch == '"' and not in_char:
-            in_str = not in_str
-        elif ch == "'" and not in_str:
-            in_char = not in_char
-        elif not in_str and not in_char:
-            if ch == '{':
-                depth += 1
-            elif ch == '}':
-                depth -= 1
-        i += 1
-    return depth
+    # Fast exit: no braces on this line at all
+    if '{' not in line and '}' not in line:
+        return 0
+    # Fast path: no string/char literals — count directly (C-speed)
+    if '"' not in line and "'" not in line:
+        return line.count('{') - line.count('}')
+    # Slow path: strip string/char literals first so quoted braces don't count
+    clean = _STRING_LITERAL_RE.sub('', line)
+    return clean.count('{') - clean.count('}')
 def _extract_modifiers(text: str) -> list[str]:
@@ -591,7 +629,6 @@ def _extract_symbols(source: str, rel_path: str) -> tuple[str, list[SymbolRecord
     _raw_lines = source.splitlines()
     _joined: list[str] = []
     _i = 0
-    _CLASS_KW_RE = re.compile(r'\b(?:class|interface|enum)\s+[A-Z]')
     while _i < len(_raw_lines):
         _line = _raw_lines[_i]
         _stripped = _line.strip()
@@ -633,10 +670,8 @@ def _extract_symbols(source: str, rel_path: str) -> tuple[str, list[SymbolRecord
         net = _count_net_braces(stripped)
         if stripped.startswith("@"):
-            ann_m = _ANN_WITH_ARGS_RE.match(stripped)
-            if ann_m:
-                ann = ann_m.group(1)
-                ann_args = ann_m.group(2) or ""
+            ann, ann_args = _parse_annotation_line(stripped)
+            if ann:
                 if ann not in pending_anns:
                     pending_anns.append(ann)
                 if ann_args and ann in _CAPTURE_ANN_ARGS:
@@ -1141,17 +1176,26 @@ def _build_relations(
                     evidence={"type": "signature", "value": f"implements {iface}"},
                 ))
-    for m_path, class_fqn in _extract_mapped_paths(source, "").items():
-        for sym in symbols:
-            if sym.type in ("class", "interface") and (
-                "@RestController" in sym.annotations or "@Controller" in sym.annotations
-            ):
+    # mapped_to edges: controller class → class-level @RequestMapping path prefix.
+    # O(N) scan of symbols — do NOT call _extract_mapped_paths(source) here because
+    # _REQUEST_MAPPING_RE also matches method-level @GetMapping/@PostMapping, producing
+    # O(N_methods) paths × O(N_syms) inner loop = O(N²) on files with many endpoints.
+    for sym in symbols:
+        if sym.type not in ("class", "interface"):
+            continue
+        if "@RestController" not in sym.annotations and "@Controller" not in sym.annotations:
+            continue
+        if "@RequestMapping" not in sym.annotations:
+            continue
+        _rm_args = sym.annotation_values.get("@RequestMapping", "")
+        for _m_path in _parse_route_paths(_rm_args):
+            if _m_path:
                 edges.append(RelationEdge(
                     from_symbol=sym.symbol,
-                    to_symbol=m_path,
+                    to_symbol=_m_path,
                     type="mapped_to",
                     confidence="high",
-                    evidence={"type": "annotation", "value": f"@RequestMapping(\"{m_path}\")"},
+                    evidence={"type": "annotation", "value": f"@RequestMapping(\"{_m_path}\")"},
                 ))
     # contained_in edges: method/field → enclosing class (structural membership)
@@ -1419,9 +1463,18 @@ def _collect_file_constants(source: str) -> dict[str, str]:
     Returns {simple_name: value} covering all classes in the file.
     Used by _resolve_ann_path_expr to fold constant references in @RequestMapping args.
     """
+    # Fast path: skip entirely when no declarations present (C-speed string scan)
+    if 'static final String' not in source:
+        return {}
+    # Scan only candidate lines (skips full-source regex over 100KB files).
+    # Running _STATIC_FINAL_STR_RE over the whole source is O(source_size) due to
+    # optional modifier group backtracking; per-line match is far cheaper.
     constants: dict[str, str] = {}
-    for m in _STATIC_FINAL_STR_RE.finditer(source):
-        constants[m.group(1)] = m.group(2)
+    for line in source.splitlines():
+        if 'static' in line and 'final' in line and 'String' in line and '=' in line and '"' in line:
+            m = _STATIC_FINAL_STR_RE.search(line)
+            if m:
+                constants[m.group(1)] = m.group(2)
     return constants
@@ -2205,11 +2258,19 @@ def _assemble(
     all_fqns_set = {s.symbol for s in sorted_syms}
-    # Bounded BFS reachability per node (graph-only)
-    bfs_reach: dict[str, int] = {
-        s.symbol: _bfs_reachability(s.symbol, adjacency)
-        for s in sorted_syms
-    }
+    # Bounded BFS reachability per node (graph-only).
+    # Skipped when symbol count exceeds threshold: O(N*(V+E)) BFS for every symbol
+    # hangs on large repos (keycloak: 80K+ symbols → 180s+ with no output).
+    # bfs_reach contributes only 0.1× weight vs in_deg+out_deg; skipping it on large
+    # repos causes no accuracy loss for spring-audit/endpoints/security analysis.
+    _BFS_SYMBOL_THRESHOLD: int = 5000
+    if len(sorted_syms) <= _BFS_SYMBOL_THRESHOLD:
+        bfs_reach: dict[str, int] = {
+            s.symbol: _bfs_reachability(s.symbol, adjacency)
+            for s in sorted_syms
+        }
+    else:
+        bfs_reach = {}
     # Normalize centrality across all nodes
     max_raw = max(
@@ -2495,7 +2556,6 @@ def _route_security_from_sym(
       @RequiresRoles          → {policy: requiresroles, roles: [...]}
       @RequiresPermissions    → {policy: requirespermissions, roles: [...]}
       @SecurityRequirement    → {policy: openapi_security, spec: ...}
-      @M3FiltroSeguridad      → {policy: custom_permission, required_permission: ...}
     Falls back to class-level annotations if no method-level security found.
     Returns None if no security signal detected at either level.
@@ -2534,15 +2594,6 @@ def _route_security_from_sym(
         if "@SecurityRequirement" in anns:
             raw = vals.get("@SecurityRequirement", "")
             return {"policy": "openapi_security", "spec": raw.strip()}
-        # Custom legacy annotation
-        if "@M3FiltroSeguridad" in anns:
-            import re as _re2
-            raw = vals.get("@M3FiltroSeguridad", "")
-            m = _re2.search(r'(?:nombreRecurso\s*=\s*)?["\']([^"\']+)["\']', raw)
-            if m:
-                return {"policy": "custom_permission", "required_permission": m.group(1)}
-            # Value is a constant reference or empty — still flag the annotation
-            return {"policy": "custom_annotation", "annotation": "@M3FiltroSeguridad", "resource": raw.strip() or None}
         return None
     # Method-level first, then class-level fallback
@@ -2829,6 +2880,29 @@ def build_repo_ir(
     # type map before building relations.  Java classes in the same package
     # reference each other without import statements, so import_map alone cannot
     # resolve them — _build_same_package_map provides the cross-file fallback.
+    #
+    # Pre-scan filter: skip full symbol extraction for files that have no
+    # Spring/JAX-RS/CDI annotations. These files (utility classes, model beans,
+    # SPI interfaces) contribute no endpoints, transactions, or security findings
+    # to spring-audit. The text scan is C-speed vs O(lines) Python parse loop.
+    # Non-annotated files still register their package+class via a lightweight
+    # regex scan so same-package type resolution remains correct.
+    _ANNOTATION_MARKERS: tuple[str, ...] = (
+        '@Controller', '@RestController', '@Service', '@Repository',
+        '@Component', '@Configuration', '@Bean', '@Transactional',
+        '@Path', '@GET', '@POST', '@PUT', '@DELETE', '@PATCH',
+        '@PreAuthorize', '@RolesAllowed', '@Secured', '@EnableWebSecurity',
+        '@SpringBootApplication', '@EventListener', '@TransactionalEventListener',
+        '@RequiredArgsConstructor', '@AllArgsConstructor',
+        '@Inject', '@ApplicationScoped', '@RequestScoped', '@Singleton',
+        '@EnableMethodSecurity', '@EnableGlobalMethodSecurity',
+        # JPA / persistence (needed for stereotype detection in all commands)
+        '@Entity', '@MappedSuperclass', '@Embeddable',
+        # AOP / messaging / event sourcing
+        '@Aspect', '@Aggregate', '@Document',
+        # Spring Data
+        '@Query', '@NamedQuery',
+    )
     _per_file: list[tuple[str, str, str, list[str], list[SymbolRecord]]] = []
     for rel_path in sorted(file_paths):
         abs_path = root / rel_path
@@ -2839,6 +2913,23 @@ def build_repo_ir(
         _meta_files_read += 1
         _meta_lines_read += source.count("\n") + (1 if source and not source.endswith("\n") else 0)
         _meta_chars_read += len(source)
+        # Fast pre-scan: if file has no relevant annotations skip full extraction.
+        # Still register package/class name for same-package resolution.
+        if not any(marker in source for marker in _ANNOTATION_MARKERS):
+            pkg_m = _PKG_RE.search(source)
+            _pkg = pkg_m.group(1) if pkg_m else ""
+            # Minimal class-name symbols for same-package map (no methods/fields)
+            _min_syms: list[SymbolRecord] = []
+            for _cm in re.finditer(r'(?:class|interface|enum)\s+(\w+)', source):
+                _cls_name = _cm.group(1)
+                _fqn = f"{_pkg}.{_cls_name}" if _pkg else _cls_name
+                _min_syms.append(SymbolRecord(
+                    symbol=_fqn, type="class", confidence="medium",
+                    declaring_file=rel_path,
+                ))
+            all_symbols.extend(_min_syms)
+            # No relations needed for non-annotated files
+            continue
         package, symbols, raw_imports = _extract_symbols(source, rel_path)
         all_symbols.extend(symbols)
         _per_file.append((rel_path, source, package, raw_imports, symbols))
@@ -4147,13 +4238,22 @@ def _all_callers_from_rg(fqn: str, reverse_graph: dict[str, dict[str, list[str]]
     BUG-01 fix: skip 'contained_in' edges — those represent structural membership
     (method→enclosing class), not actual callers.  Without this, an Impl class
     with 91 own methods would show 91 "direct callers" and inflate risk to HIGH.
+    CH-002 fix: for 'injects' edges, normalize field/constructor FQNs to their
+    enclosing class.  e.g. pkg.ConsolidacionService.calcularField → pkg.ConsolidacionService
+    so BFS can continue through DI injection chains and find controllers.
     """
     entry = reverse_graph.get(fqn) or {}
     callers: list[str] = []
+    seen: set[str] = set()
     for edge_type, fqn_list in entry.items():
         if edge_type == "contained_in":
             continue  # structural membership, not a caller
-        callers.extend(fqn_list)
+        for c in fqn_list:
+            normalized = _normalize_owner_fqn(c) if edge_type == "injects" else c
+            if normalized not in seen:
+                seen.add(normalized)
+                callers.append(normalized)
     return callers

{sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/semantic_analyzer.py RENAMED Viewed

@@ -57,15 +57,7 @@ _EXTENDS_RE = re.compile(
 # Custom AOP annotation registry — extend here for project-specific security/AOP annotations.
 # Each entry: (method_regex, impl_symbol_name).
 # method_regex must capture the annotated method name in group 1.
-_CUSTOM_AOP_ANNOTATIONS: list[tuple[re.Pattern, str]] = [
-    (
-        re.compile(
-            r'@M3FiltroSeguridad(?:\([^)]*\))?\s+(?:@[^\s]+\s+)*'
-            r'(?:public|private|protected)\s+\w[\w<>\[\]]*\s+([a-z][A-Za-z0-9_]*)\s*\('
-        ),
-        "M3FiltroSeguridadImpl",
-    ),
-]
+_CUSTOM_AOP_ANNOTATIONS: list[tuple[re.Pattern, str]] = []
 _LOMBOK_CLASS_RE = re.compile(
     r'(@(?:Data|Slf4j|Builder|AllArgsConstructor|NoArgsConstructor)(?:\([^)]*\))?\s+)*'
     r'(?:public\s+)?(?:class|interface)\s+([A-Z][A-Za-z0-9_]*)',
@@ -925,7 +917,7 @@ class SemanticAnalyzer:
         method="heuristic", confidence="low" para todos los edges Java.
         Includes: Lombok synthetic symbols, @Autowired field edges,
-        @Mapper interface detection, inheritance chains, @M3FiltroSeguridad AOP edges.
+        @Mapper interface detection, inheritance chains, custom AOP annotation edges.
         """
         _JAVA_KEYWORDS: frozenset[str] = frozenset({
             "if", "for", "while", "switch", "catch", "super", "this", "new",

{sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/serializer.py RENAMED Viewed

@@ -504,7 +504,9 @@ def _transactional_summary(sm: "SourceMap", *, full: bool = False) -> "Optional[
         classes = getattr(s, "transactional_classes", [])
         if classes:
             total = len(classes)
-            result: dict[str, Any] = {"count": total, "classes": classes}
+            # class_count = unique classes with @Transactional anywhere (file-level scan).
+            # spring-audit metadata.tx_stats has method-level annotation breakdown.
+            result: dict[str, Any] = {"class_count": total, "classes": classes}
             if total > 10 and not full:
                 result["classes"] = classes[:10]
                 result["truncated"] = True
@@ -549,9 +551,13 @@ def _security_surface_from_eps(
     root: "Optional[Path]" = None,
     file_paths: "Optional[list[str]]" = None,
 ) -> "Optional[dict[str, Any]]":
-    """Extract @M3FiltroSeguridad resource names from entry point evidence strings."""
+    """Extract permission resource names from entry point evidence strings.
+    Looks for resource=VALUE or nombreRecurso=VALUE patterns in evidence
+    produced by custom security annotations on REST controller methods.
+    """
     import re as _re
-    _NOMBRE_RE = _re.compile(r"nombreRecurso=[\"']([^\"']+)[\"']")
+    _RESOURCE_RE = _re.compile(r"(?:resource|nombreRecurso)=[\"']([^\"']+)[\"']")
     _CONST_SYMBOL_RE = _re.compile(r'^[\w]+\.[\w]+$')
     resource_names: list[str] = []
     unresolved: list[str] = []
@@ -560,7 +566,7 @@ def _security_surface_from_eps(
         evidence = getattr(ep, "evidence", None)
         if not evidence:
             continue
-        for m in _NOMBRE_RE.finditer(evidence):
+        for m in _RESOURCE_RE.finditer(evidence):
             nm = m.group(1)
             if not nm or nm in seen:
                 continue
@@ -578,8 +584,8 @@ def _security_surface_from_eps(
         return None
     result: dict[str, Any] = {
         "schema": (
-            "Values used in @M3FiltroSeguridad(nombreRecurso=VALUE) on REST controller "
-            "methods. Each value names a permission resource checked at runtime."
+            "Permission resource identifiers found on REST controller methods. "
+            "Each value names a resource checked at runtime by a security annotation."
         ),
         "resource_names": resource_names,
     }

{sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/spring_tx_analyzer.py RENAMED Viewed

@@ -739,7 +739,9 @@ def run_tx_audit(
         limitations=_tx_limitations,
         metadata={
             "symbols_analyzed": len(getattr(cir, "symbols", [])),
-            "tx_boundaries_found": tx_index.stats()["total"],
+            # tx_annotation_count = total @Transactional symbols (class-level + method-level).
+            # tx_stats.class_level matches compact transactional_boundaries.class_count.
+            "tx_annotation_count": tx_index.stats()["total"],
             "tx_stats": tx_index.stats(),
             "analysis_time_ms": elapsed_ms,
         },

{sourcecode-1.35.28 → sourcecode-1.35.30}/src/sourcecode/summarizer.py RENAMED Viewed

@@ -223,6 +223,20 @@ class ProjectSummarizer:
         __import__("re").IGNORECASE,
     )
+    # Patterns that indicate security scanner / tool output, not project description.
+    # Trivy, OWASP, Snyk, etc. produce structured vulnerability reports.
+    _TOOL_OUTPUT_RE = __import__("re").compile(
+        r"CVE-\d{4}-\d{4,}"                       # CVE identifiers
+        r"|UNKNOWN:\s*\d+.*LOW:\s*\d+"            # Trivy severity summary line
+        r"|(CRITICAL|HIGH|MEDIUM|LOW):\s*\d+"     # severity: count pattern
+        r"|\bTotal:\s*\d+\s*\("                   # "Total: 45 (UNKNOWN: 0, ..." Trivy header
+        r"|\bvulnerabilit(?:y|ies)\s+found\b"     # "N vulnerabilities found"
+        r"|\bscan(?:ned|ning)\s+\d+\s+(?:file|package|image)\b"  # scanner progress
+        r"|\bpkg:(?:npm|pypi|maven|cargo|golang)/" # PURL package identifiers
+        r"|\b(?:trivy|snyk|grype|syft|cosign)\b", # well-known scanner names
+        __import__("re").IGNORECASE,
+    )
     def _extract_first_useful_paragraph(self, content: str) -> str | None:
         """Extract the first paragraph that describes the project architecture, not its license or marketing."""
         import re as _re
@@ -268,6 +282,9 @@ class ProjectSummarizer:
             # Reject license notices and user-facing marketing text
             if self._LICENSE_MARKETING_RE.search(paragraph):
                 continue
+            # Reject security scanner / tool output (Trivy, Snyk, OWASP, CVE lists)
+            if self._TOOL_OUTPUT_RE.search(paragraph):
+                continue
             # Reject link-list paragraphs (docs/navigation sections):
             # if more than 2 markdown links dominate the paragraph, it's a nav section
             _link_count = len(_MD_LINK_RE.findall(paragraph))