PyPI - sourcecode - Versions diffs - 1.35.29__tar.gz → 1.35.31__tar.gz - Mend

sourcecode 1.35.29tar.gz → 1.35.31tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (109) hide show

{sourcecode-1.35.29 → sourcecode-1.35.31}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sourcecode
-Version: 1.35.29
+Version: 1.35.31
 Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
 License-File: LICENSE
 Keywords: agents,ai,codebase,context,developer-tools,llm
@@ -40,7 +40,7 @@ Description-Content-Type: text/markdown
 **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
-![Version](https://img.shields.io/badge/version-1.35.29-blue)
+![Version](https://img.shields.io/badge/version-1.35.31-blue)
 ![Python](https://img.shields.io/badge/python-3.10%2B-green)
 ---
@@ -114,7 +114,7 @@ pipx install sourcecode
 ```bash
 sourcecode version
-# sourcecode 1.35.29
+# sourcecode 1.35.31
 **v1.35.28** — 7 bug fixes: `rename-class` cross-package disambiguation (BUG-4), `rename-class` collision detection (BUG-2), `find_java_files` false positive on `com/test/` package paths (BUG-1), `cold-start --compact` correct key names (BUG-6), `@EnableMethodSecurity` no longer suppresses SEC-001 (BUG-3), `explain` @Entity stereotype detection (BUG-5), XML+annotation mixed security retagging (BUG-7).
 ```

{sourcecode-1.35.29 → sourcecode-1.35.31}/README.md RENAMED Viewed

@@ -2,7 +2,7 @@
 **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
-![Version](https://img.shields.io/badge/version-1.35.29-blue)
+![Version](https://img.shields.io/badge/version-1.35.31-blue)
 ![Python](https://img.shields.io/badge/python-3.10%2B-green)
 ---
@@ -76,7 +76,7 @@ pipx install sourcecode
 ```bash
 sourcecode version
-# sourcecode 1.35.29
+# sourcecode 1.35.31
 **v1.35.28** — 7 bug fixes: `rename-class` cross-package disambiguation (BUG-4), `rename-class` collision detection (BUG-2), `find_java_files` false positive on `com/test/` package paths (BUG-1), `cold-start --compact` correct key names (BUG-6), `@EnableMethodSecurity` no longer suppresses SEC-001 (BUG-3), `explain` @Entity stereotype detection (BUG-5), XML+annotation mixed security retagging (BUG-7).
 ```

{sourcecode-1.35.29 → sourcecode-1.35.31}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "sourcecode"
-version = "1.35.29"
+version = "1.35.31"
 description = "Persistent structural context and ultra-fast repeated analysis for AI coding agents"
 readme = "README.md"
 requires-python = ">=3.9"

{sourcecode-1.35.29 → sourcecode-1.35.31}/src/sourcecode/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """sourcecode — Deterministic codebase context maps for AI coding agents."""
-__version__ = "1.35.29"
+__version__ = "1.35.31"

{sourcecode-1.35.29 → sourcecode-1.35.31}/src/sourcecode/canonical_ir.py RENAMED Viewed

@@ -58,7 +58,7 @@ class CanonicalSecurity:
     source_scope: str                        # method|class|inherited
     effective_roles: list[str] = field(default_factory=list)
     expression: str = ""                     # SpEL for @PreAuthorize/@PostAuthorize
-    required_permission: str = ""            # for @M3FiltroSeguridad
+    required_permission: str = ""            # for custom permission annotations
     raw: dict = field(default_factory=dict)  # full original policy dict
     def to_dict(self) -> dict:

{sourcecode-1.35.29 → sourcecode-1.35.31}/src/sourcecode/cli.py RENAMED Viewed

@@ -178,10 +178,10 @@ Cold scan: 2–10s depending on repo size. Warm cache: 0.3–0.6s.
   cache clear                  [dim]# clear all cached results for this repo[/dim]
 [bold]Examples:[/bold]
-  sourcecode saint-server --compact
+  sourcecode my-project --compact
   sourcecode . --compact --git-context --copy
   sourcecode . --changed-only --git-context
-  sourcecode prepare-context onboard saint-server
+  sourcecode prepare-context onboard my-project
   sourcecode prepare-context delta . --since main
 [bold]Subcommands:[/bold]
@@ -629,7 +629,7 @@ def main(
         help=(
             "High-signal summary (typically 1000–3000 tokens depending on repo size): "
             "stacks, entry points, dependency summary, confidence, and gaps. "
-            "Includes security_surface (when @M3FiltroSeguridad detected), mybatis (when MyBatis framework detected), and transactional_boundaries for Java projects. "
+            "Includes security_surface (when custom security annotations detected), mybatis (when MyBatis framework detected), and transactional_boundaries for Java projects. "
             "Use --agent for maximum signal."
         ),
     ),
@@ -3311,6 +3311,11 @@ def repo_ir_cmd(
         "--force",
         help="Bypass the token-size guard and emit output even when estimated tokens exceed 50K.",
     ),
+    gzip_output: bool = typer.Option(
+        False,
+        "--gzip",
+        help="Compress output with gzip. Requires --output. Reduces large IR files by ~70-80%.",
+    ),
 ) -> None:
     """Deterministic symbol-level IR for Java repositories.
@@ -3323,6 +3328,7 @@ def repo_ir_cmd(
       --summary-only          Omit full graph; keep analysis + impact (smallest output)
       --max-nodes N           Keep top N nodes by score
       --max-edges N           Keep top N edges (priority: both endpoints kept)
+      --gzip                  Compress output file (~70-80% smaller; requires --output)
     \b
     Examples:
@@ -3332,6 +3338,7 @@ def repo_ir_cmd(
       sourcecode repo-ir --since main --output ir.json
       sourcecode repo-ir --since HEAD~3 --summary-only --output ir-small.json
       sourcecode repo-ir --max-nodes 200 --max-edges 500
+      sourcecode repo-ir --output ir.json.gz --gzip
     """
     import json as _json
@@ -3392,22 +3399,52 @@ def repo_ir_cmd(
     output = _serialize_dict(ir, format)
     if output_path:
-        output_path.write_text(output, encoding="utf-8")
-        size_kb = len(output.encode("utf-8")) // 1024
-        if summary_only:
+        if gzip_output and not str(output_path).endswith(".gz"):
+            output_path = output_path.with_suffix(output_path.suffix + ".gz")
+        raw_bytes = output.encode("utf-8")
+        size_bytes = len(raw_bytes)
+        _SIZE_WARN_BYTES = 10 * 1024 * 1024  # 10MB
+        if size_bytes > _SIZE_WARN_BYTES and not gzip_output:
             typer.echo(
-                f"IR written to {output_path} ({size_kb}KB, graph omitted by --summary-only)",
+                f"[repo-ir] Output is {size_bytes // (1024 * 1024)}MB — "
+                "consider --summary-only, --max-nodes N --max-edges N, or --gzip to compress.",
                 err=True,
             )
-        else:
-            n_nodes = len((ir.get("graph") or {}).get("nodes") or [])
-            n_edges = len((ir.get("graph") or {}).get("edges") or [])
+        if gzip_output:
+            import gzip as _gzip
+            with _gzip.open(output_path, "wb") as _gz:
+                _gz.write(raw_bytes)
+            compressed_kb = output_path.stat().st_size // 1024
+            size_kb = size_bytes // 1024
             typer.echo(
-                f"IR written to {output_path} "
-                f"({size_kb}KB, {n_nodes} nodes, {n_edges} edges)",
+                f"IR written to {output_path} ({compressed_kb}KB gzip, {size_kb}KB uncompressed)",
                 err=True,
             )
+        else:
+            output_path.write_bytes(raw_bytes)
+            size_kb = size_bytes // 1024
+            if summary_only:
+                typer.echo(
+                    f"IR written to {output_path} ({size_kb}KB, graph omitted by --summary-only)",
+                    err=True,
+                )
+            else:
+                n_nodes = len((ir.get("graph") or {}).get("nodes") or [])
+                n_edges = len((ir.get("graph") or {}).get("edges") or [])
+                typer.echo(
+                    f"IR written to {output_path} "
+                    f"({size_kb}KB, {n_nodes} nodes, {n_edges} edges)",
+                    err=True,
+                )
     else:
+        if gzip_output:
+            _emit_error_json(
+                INVALID_INPUT_CODE,
+                "--gzip requires --output FILE.",
+                hint="Add --output ir.json.gz to write compressed output to a file.",
+                expected="--output path when --gzip is used.",
+            )
+            raise typer.Exit(1)
         _ir_size = len(output.encode("utf-8"))
         _ir_tokens_est = _ir_size // 4
         # P1-C: abort when estimated tokens > 50K unless --force or --output is given.
@@ -4376,10 +4413,13 @@ def pr_impact_cmd(
     if not files.exists():
         _emit_error_json(
             INVALID_INPUT_CODE,
-            f"--files path '{files}' does not exist.",
+            f"--files '{files}' does not exist. Expected a text file listing changed file paths (one per line), not a directory or class name.",
             path=str(files),
-            hint="Pass a file containing one Java file path per line.",
-            expected="An existing file path.",
+            hint=(
+                "Create a file with one changed Java file path per line, then pass it with --files. "
+                "Example: git diff --name-only HEAD~1 > changed.txt && sourcecode pr-impact . --files changed.txt"
+            ),
+            expected="A text file containing one Java file path per line.",
         )
         raise typer.Exit(code=1)
@@ -4749,6 +4789,21 @@ def fix_bug_cmd(
       sourcecode impact <target>   — Propagate impact from a specific class
       sourcecode onboard .         — Full architecture context first
     """
+    # Detect misuse: `fix-bug "symptom text" /path` — path arg looks like a symptom.
+    _path_str = str(path)
+    _path_looks_like_symptom = (
+        not Path(_path_str).exists()
+        and (" " in _path_str or any(c.isupper() for c in _path_str))
+    )
+    if _path_looks_like_symptom and not symptom:
+        _emit_error_json(
+            INVALID_INPUT_CODE,
+            f"'{_path_str}' is not a valid directory. Did you mean to use --symptom?",
+            hint=f"Use: sourcecode fix-bug . --symptom {_path_str!r}",
+            expected="A repository directory path as first argument.",
+        )
+        raise typer.Exit(code=1)
     if not symptom:
         # Only emit advisory to interactive terminals — non-TTY (MCP, pipes, scripts)
         # must never receive informational text mixed into JSON stdout.
@@ -5380,6 +5435,12 @@ def cold_start_cmd(
         "--compact",
         help="Emit a compact subset (~10K tokens): status, git_head, stacks, entry_points, and key_dependencies only.",
     ),
+    output_path: Optional[Path] = typer.Option(
+        None,
+        "--output",
+        "-o",
+        help="Write output to file instead of stdout.",
+    ),
 ) -> None:
     """Output Repository Intelligence Snapshot bootstrap context as JSON.
@@ -5419,7 +5480,12 @@ def cold_start_cmd(
             "Use --compact for a ~10K token subset, or --output FILE to save.\n"
         )
         sys.stderr.flush()
-    typer.echo(_out)
+    if output_path:
+        output_path.write_text(_out, encoding="utf-8")
+        sys.stderr.write(f"Saved {len(_out.encode('utf-8'))} bytes to {output_path}\n")
+        sys.stderr.flush()
+    else:
+        typer.echo(_out)
 # ── MCP server ────────────────────────────────────────────────────────────────
@@ -5872,6 +5938,24 @@ def mcp_list_tools(
 # ── Cache subcommands ─────────────────────────────────────────────────────────
+def _resolve_repo_root(path: Path) -> Path:
+    """Resolve *path* to a repo root by walking up to find a .git directory.
+    If *path* is already a git root (has .git), returns it directly.
+    If *path* is a subdirectory of a git repo, returns the git root.
+    Falls back to *path* itself if no git repo found.
+    """
+    candidate = path.resolve()
+    while True:
+        if (candidate / ".git").exists():
+            return candidate
+        parent = candidate.parent
+        if parent == candidate:
+            break
+        candidate = parent
+    return path.resolve()
 @cache_app.command("status")
 def cache_status_cmd(
     path: Path = typer.Argument(Path("."), help="Repository path (default: current directory)"),
@@ -5879,7 +5963,7 @@ def cache_status_cmd(
 ) -> None:
     """Show cache statistics for a repository."""
     from sourcecode import cache as _cm
-    target = Path(path).resolve()
+    target = _resolve_repo_root(Path(path))
     stats = _cm.status(target)
     if json_output:
         import json as _j
@@ -5913,7 +5997,7 @@ def cache_clear_cmd(
     index used for cold-start bootstrapping.  Use --all to also clear it.
     """
     from sourcecode import cache as _cm
-    target = Path(path).resolve()
+    target = _resolve_repo_root(Path(path))
     _clear_ris = include_ris or all_
     if not yes:
         _ris_note = " (including RIS)" if _clear_ris else " (RIS preserved — use --all to also clear it)"
@@ -5935,7 +6019,7 @@ def cache_warm_cmd(
     """
     import shutil as _shutil
     import subprocess as _sub
-    target = Path(path).resolve()
+    target = _resolve_repo_root(Path(path))
     typer.echo(f"Warming cache for {target} …", err=True)
     _sc_bin = _shutil.which("sourcecode") or sys.argv[0]
     cmd = [_sc_bin, str(target)]

{sourcecode-1.35.29 → sourcecode-1.35.31}/src/sourcecode/detectors/java.py RENAMED Viewed

@@ -38,13 +38,7 @@ _REQUEST_METHOD_VERB_RE = re.compile(
 # Custom security annotation registry — extend here for project-specific annotations.
 # Each entry: annotation_simple_name → compiled params regex.
 # Groups: (1) resource string literal, (2) resource constant ref, (3) level integer.
-_CUSTOM_SECURITY_ANNOTATIONS: dict[str, re.Pattern] = {
-    "M3FiltroSeguridad": re.compile(
-        r'@M3FiltroSeguridad\s*\(\s*'
-        r'(?:nombreRecurso\s*=\s*(?:"([^"]*)"|([\w.]+)))?'
-        r'(?:[^)]*nivelRequerido\s*=\s*(\d+))?'
-    ),
-}
+_CUSTOM_SECURITY_ANNOTATIONS: dict[str, re.Pattern] = {}
 # Security config detection
 _WEB_SECURITY_CONFIGURER_RE = re.compile(r'WebSecurityConfigurerAdapter\b')

{sourcecode-1.35.29 → sourcecode-1.35.31}/src/sourcecode/mcp/server.py RENAMED Viewed

@@ -639,7 +639,7 @@ def get_endpoints(repo_path: str = ".") -> dict:
              "unknown" (no security signals detected).
     Supports Spring MVC (@GetMapping etc.) and JAX-RS (@GET/@POST etc.).
     Security annotations detected: @RolesAllowed, @PermitAll, @DenyAll,
-    @Authenticated, @PreAuthorize, @Secured, @SecurityRequirement, @M3FiltroSeguridad.
+    @Authenticated, @PreAuthorize, @Secured, @SecurityRequirement.
     repo_path: absolute path to the Java repository (default: current working directory).
     """
     _raw = repo_path

{sourcecode-1.35.29 → sourcecode-1.35.31}/src/sourcecode/prepare_context.py RENAMED Viewed

@@ -2003,7 +2003,7 @@ class TaskContextBuilder:
                         for _cf in (_cr.files_changed or []):
                             _cf_norm = _cf.replace("\\", "/")
                             # Git reports paths relative to the git root, which may be
-                            # a parent of the analyzed directory (e.g. MSAS/saint-server/).
+                            # a parent of the analyzed directory (e.g. a monorepo root).
                             # Strip the analyzed-dir prefix so paths match all_paths.
                             if _cf_norm.startswith(_rn_prefix):
                                 _cf_norm = _cf_norm[len(_rn_prefix):]

{sourcecode-1.35.29 → sourcecode-1.35.31}/src/sourcecode/repository_ir.py RENAMED Viewed

@@ -22,6 +22,8 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Optional
+from sourcecode.fqn_utils import normalize_owner_fqn as _normalize_owner_fqn
 # ---------------------------------------------------------------------------
 # Data classes — Phases 1–4
 # ---------------------------------------------------------------------------
@@ -171,8 +173,6 @@ _PATH_ANNOTATIONS: frozenset[str] = frozenset({"@Path"})
 # Security / authorization annotations whose args must be captured.
 # Includes standard Jakarta EE, JAX-RS, Quarkus/MicroProfile, and custom patterns.
 _PERMISSION_ANNOTATIONS: frozenset[str] = frozenset({
-    # Custom (kept for backward compat)
-    "@M3FiltroSeguridad",
     # Jakarta EE / JAX-RS standard
     "@RolesAllowed",
     "@PermitAll",
@@ -2556,7 +2556,6 @@ def _route_security_from_sym(
       @RequiresRoles          → {policy: requiresroles, roles: [...]}
       @RequiresPermissions    → {policy: requirespermissions, roles: [...]}
       @SecurityRequirement    → {policy: openapi_security, spec: ...}
-      @M3FiltroSeguridad      → {policy: custom_permission, required_permission: ...}
     Falls back to class-level annotations if no method-level security found.
     Returns None if no security signal detected at either level.
@@ -2595,15 +2594,6 @@ def _route_security_from_sym(
         if "@SecurityRequirement" in anns:
             raw = vals.get("@SecurityRequirement", "")
             return {"policy": "openapi_security", "spec": raw.strip()}
-        # Custom legacy annotation
-        if "@M3FiltroSeguridad" in anns:
-            import re as _re2
-            raw = vals.get("@M3FiltroSeguridad", "")
-            m = _re2.search(r'(?:nombreRecurso\s*=\s*)?["\']([^"\']+)["\']', raw)
-            if m:
-                return {"policy": "custom_permission", "required_permission": m.group(1)}
-            # Value is a constant reference or empty — still flag the annotation
-            return {"policy": "custom_annotation", "annotation": "@M3FiltroSeguridad", "resource": raw.strip() or None}
         return None
     # Method-level first, then class-level fallback
@@ -4248,13 +4238,22 @@ def _all_callers_from_rg(fqn: str, reverse_graph: dict[str, dict[str, list[str]]
     BUG-01 fix: skip 'contained_in' edges — those represent structural membership
     (method→enclosing class), not actual callers.  Without this, an Impl class
     with 91 own methods would show 91 "direct callers" and inflate risk to HIGH.
+    CH-002 fix: for 'injects' edges, normalize field/constructor FQNs to their
+    enclosing class.  e.g. pkg.ConsolidacionService.calcularField → pkg.ConsolidacionService
+    so BFS can continue through DI injection chains and find controllers.
     """
     entry = reverse_graph.get(fqn) or {}
     callers: list[str] = []
+    seen: set[str] = set()
     for edge_type, fqn_list in entry.items():
         if edge_type == "contained_in":
             continue  # structural membership, not a caller
-        callers.extend(fqn_list)
+        for c in fqn_list:
+            normalized = _normalize_owner_fqn(c) if edge_type == "injects" else c
+            if normalized not in seen:
+                seen.add(normalized)
+                callers.append(normalized)
     return callers

{sourcecode-1.35.29 → sourcecode-1.35.31}/src/sourcecode/semantic_analyzer.py RENAMED Viewed

@@ -57,15 +57,7 @@ _EXTENDS_RE = re.compile(
 # Custom AOP annotation registry — extend here for project-specific security/AOP annotations.
 # Each entry: (method_regex, impl_symbol_name).
 # method_regex must capture the annotated method name in group 1.
-_CUSTOM_AOP_ANNOTATIONS: list[tuple[re.Pattern, str]] = [
-    (
-        re.compile(
-            r'@M3FiltroSeguridad(?:\([^)]*\))?\s+(?:@[^\s]+\s+)*'
-            r'(?:public|private|protected)\s+\w[\w<>\[\]]*\s+([a-z][A-Za-z0-9_]*)\s*\('
-        ),
-        "M3FiltroSeguridadImpl",
-    ),
-]
+_CUSTOM_AOP_ANNOTATIONS: list[tuple[re.Pattern, str]] = []
 _LOMBOK_CLASS_RE = re.compile(
     r'(@(?:Data|Slf4j|Builder|AllArgsConstructor|NoArgsConstructor)(?:\([^)]*\))?\s+)*'
     r'(?:public\s+)?(?:class|interface)\s+([A-Z][A-Za-z0-9_]*)',
@@ -925,7 +917,7 @@ class SemanticAnalyzer:
         method="heuristic", confidence="low" para todos los edges Java.
         Includes: Lombok synthetic symbols, @Autowired field edges,
-        @Mapper interface detection, inheritance chains, @M3FiltroSeguridad AOP edges.
+        @Mapper interface detection, inheritance chains, custom AOP annotation edges.
         """
         _JAVA_KEYWORDS: frozenset[str] = frozenset({
             "if", "for", "while", "switch", "catch", "super", "this", "new",

{sourcecode-1.35.29 → sourcecode-1.35.31}/src/sourcecode/serializer.py RENAMED Viewed

@@ -504,7 +504,9 @@ def _transactional_summary(sm: "SourceMap", *, full: bool = False) -> "Optional[
         classes = getattr(s, "transactional_classes", [])
         if classes:
             total = len(classes)
-            result: dict[str, Any] = {"count": total, "classes": classes}
+            # class_count = unique classes with @Transactional anywhere (file-level scan).
+            # spring-audit metadata.tx_stats has method-level annotation breakdown.
+            result: dict[str, Any] = {"class_count": total, "classes": classes}
             if total > 10 and not full:
                 result["classes"] = classes[:10]
                 result["truncated"] = True
@@ -549,9 +551,13 @@ def _security_surface_from_eps(
     root: "Optional[Path]" = None,
     file_paths: "Optional[list[str]]" = None,
 ) -> "Optional[dict[str, Any]]":
-    """Extract @M3FiltroSeguridad resource names from entry point evidence strings."""
+    """Extract permission resource names from entry point evidence strings.
+    Looks for resource=VALUE or nombreRecurso=VALUE patterns in evidence
+    produced by custom security annotations on REST controller methods.
+    """
     import re as _re
-    _NOMBRE_RE = _re.compile(r"nombreRecurso=[\"']([^\"']+)[\"']")
+    _RESOURCE_RE = _re.compile(r"(?:resource|nombreRecurso)=[\"']([^\"']+)[\"']")
     _CONST_SYMBOL_RE = _re.compile(r'^[\w]+\.[\w]+$')
     resource_names: list[str] = []
     unresolved: list[str] = []
@@ -560,7 +566,7 @@ def _security_surface_from_eps(
         evidence = getattr(ep, "evidence", None)
         if not evidence:
             continue
-        for m in _NOMBRE_RE.finditer(evidence):
+        for m in _RESOURCE_RE.finditer(evidence):
             nm = m.group(1)
             if not nm or nm in seen:
                 continue
@@ -578,8 +584,8 @@ def _security_surface_from_eps(
         return None
     result: dict[str, Any] = {
         "schema": (
-            "Values used in @M3FiltroSeguridad(nombreRecurso=VALUE) on REST controller "
-            "methods. Each value names a permission resource checked at runtime."
+            "Permission resource identifiers found on REST controller methods. "
+            "Each value names a resource checked at runtime by a security annotation."
         ),
         "resource_names": resource_names,
     }

{sourcecode-1.35.29 → sourcecode-1.35.31}/src/sourcecode/spring_tx_analyzer.py RENAMED Viewed

@@ -739,7 +739,9 @@ def run_tx_audit(
         limitations=_tx_limitations,
         metadata={
             "symbols_analyzed": len(getattr(cir, "symbols", [])),
-            "tx_boundaries_found": tx_index.stats()["total"],
+            # tx_annotation_count = total @Transactional symbols (class-level + method-level).
+            # tx_stats.class_level matches compact transactional_boundaries.class_count.
+            "tx_annotation_count": tx_index.stats()["total"],
             "tx_stats": tx_index.stats(),
             "analysis_time_ms": elapsed_ms,
         },

{sourcecode-1.35.29 → sourcecode-1.35.31}/src/sourcecode/summarizer.py RENAMED Viewed

@@ -223,6 +223,20 @@ class ProjectSummarizer:
         __import__("re").IGNORECASE,
     )
+    # Patterns that indicate security scanner / tool output, not project description.
+    # Trivy, OWASP, Snyk, etc. produce structured vulnerability reports.
+    _TOOL_OUTPUT_RE = __import__("re").compile(
+        r"CVE-\d{4}-\d{4,}"                       # CVE identifiers
+        r"|UNKNOWN:\s*\d+.*LOW:\s*\d+"            # Trivy severity summary line
+        r"|(CRITICAL|HIGH|MEDIUM|LOW):\s*\d+"     # severity: count pattern
+        r"|\bTotal:\s*\d+\s*\("                   # "Total: 45 (UNKNOWN: 0, ..." Trivy header
+        r"|\bvulnerabilit(?:y|ies)\s+found\b"     # "N vulnerabilities found"
+        r"|\bscan(?:ned|ning)\s+\d+\s+(?:file|package|image)\b"  # scanner progress
+        r"|\bpkg:(?:npm|pypi|maven|cargo|golang)/" # PURL package identifiers
+        r"|\b(?:trivy|snyk|grype|syft|cosign)\b", # well-known scanner names
+        __import__("re").IGNORECASE,
+    )
     def _extract_first_useful_paragraph(self, content: str) -> str | None:
         """Extract the first paragraph that describes the project architecture, not its license or marketing."""
         import re as _re
@@ -268,6 +282,9 @@ class ProjectSummarizer:
             # Reject license notices and user-facing marketing text
             if self._LICENSE_MARKETING_RE.search(paragraph):
                 continue
+            # Reject security scanner / tool output (Trivy, Snyk, OWASP, CVE lists)
+            if self._TOOL_OUTPUT_RE.search(paragraph):
+                continue
             # Reject link-list paragraphs (docs/navigation sections):
             # if more than 2 markdown links dominate the paragraph, it's a nav section
             _link_count = len(_MD_LINK_RE.findall(paragraph))