sourcecode 1.35.29__py3-none-any.whl → 1.35.30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sourcecode/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "1.35.29"
3
+ __version__ = "1.35.30"
@@ -58,7 +58,7 @@ class CanonicalSecurity:
58
58
  source_scope: str # method|class|inherited
59
59
  effective_roles: list[str] = field(default_factory=list)
60
60
  expression: str = "" # SpEL for @PreAuthorize/@PostAuthorize
61
- required_permission: str = "" # for @M3FiltroSeguridad
61
+ required_permission: str = "" # for custom permission annotations
62
62
  raw: dict = field(default_factory=dict) # full original policy dict
63
63
 
64
64
  def to_dict(self) -> dict:
sourcecode/cli.py CHANGED
@@ -178,10 +178,10 @@ Cold scan: 2–10s depending on repo size. Warm cache: 0.3–0.6s.
178
178
  cache clear [dim]# clear all cached results for this repo[/dim]
179
179
 
180
180
  [bold]Examples:[/bold]
181
- sourcecode saint-server --compact
181
+ sourcecode my-project --compact
182
182
  sourcecode . --compact --git-context --copy
183
183
  sourcecode . --changed-only --git-context
184
- sourcecode prepare-context onboard saint-server
184
+ sourcecode prepare-context onboard my-project
185
185
  sourcecode prepare-context delta . --since main
186
186
 
187
187
  [bold]Subcommands:[/bold]
@@ -629,7 +629,7 @@ def main(
629
629
  help=(
630
630
  "High-signal summary (typically 1000–3000 tokens depending on repo size): "
631
631
  "stacks, entry points, dependency summary, confidence, and gaps. "
632
- "Includes security_surface (when @M3FiltroSeguridad detected), mybatis (when MyBatis framework detected), and transactional_boundaries for Java projects. "
632
+ "Includes security_surface (when custom security annotations detected), mybatis (when MyBatis framework detected), and transactional_boundaries for Java projects. "
633
633
  "Use --agent for maximum signal."
634
634
  ),
635
635
  ),
@@ -3311,6 +3311,11 @@ def repo_ir_cmd(
3311
3311
  "--force",
3312
3312
  help="Bypass the token-size guard and emit output even when estimated tokens exceed 50K.",
3313
3313
  ),
3314
+ gzip_output: bool = typer.Option(
3315
+ False,
3316
+ "--gzip",
3317
+ help="Compress output with gzip. Requires --output. Reduces large IR files by ~70-80%.",
3318
+ ),
3314
3319
  ) -> None:
3315
3320
  """Deterministic symbol-level IR for Java repositories.
3316
3321
 
@@ -3323,6 +3328,7 @@ def repo_ir_cmd(
3323
3328
  --summary-only Omit full graph; keep analysis + impact (smallest output)
3324
3329
  --max-nodes N Keep top N nodes by score
3325
3330
  --max-edges N Keep top N edges (priority: both endpoints kept)
3331
+ --gzip Compress output file (~70-80% smaller; requires --output)
3326
3332
 
3327
3333
  \b
3328
3334
  Examples:
@@ -3332,6 +3338,7 @@ def repo_ir_cmd(
3332
3338
  sourcecode repo-ir --since main --output ir.json
3333
3339
  sourcecode repo-ir --since HEAD~3 --summary-only --output ir-small.json
3334
3340
  sourcecode repo-ir --max-nodes 200 --max-edges 500
3341
+ sourcecode repo-ir --output ir.json.gz --gzip
3335
3342
  """
3336
3343
  import json as _json
3337
3344
 
@@ -3392,22 +3399,52 @@ def repo_ir_cmd(
3392
3399
  output = _serialize_dict(ir, format)
3393
3400
 
3394
3401
  if output_path:
3395
- output_path.write_text(output, encoding="utf-8")
3396
- size_kb = len(output.encode("utf-8")) // 1024
3397
- if summary_only:
3402
+ if gzip_output and not str(output_path).endswith(".gz"):
3403
+ output_path = output_path.with_suffix(output_path.suffix + ".gz")
3404
+ raw_bytes = output.encode("utf-8")
3405
+ size_bytes = len(raw_bytes)
3406
+ _SIZE_WARN_BYTES = 10 * 1024 * 1024 # 10MB
3407
+ if size_bytes > _SIZE_WARN_BYTES and not gzip_output:
3398
3408
  typer.echo(
3399
- f"IR written to {output_path} ({size_kb}KB, graph omitted by --summary-only)",
3409
+ f"[repo-ir] Output is {size_bytes // (1024 * 1024)}MB "
3410
+ "consider --summary-only, --max-nodes N --max-edges N, or --gzip to compress.",
3400
3411
  err=True,
3401
3412
  )
3402
- else:
3403
- n_nodes = len((ir.get("graph") or {}).get("nodes") or [])
3404
- n_edges = len((ir.get("graph") or {}).get("edges") or [])
3413
+ if gzip_output:
3414
+ import gzip as _gzip
3415
+ with _gzip.open(output_path, "wb") as _gz:
3416
+ _gz.write(raw_bytes)
3417
+ compressed_kb = output_path.stat().st_size // 1024
3418
+ size_kb = size_bytes // 1024
3405
3419
  typer.echo(
3406
- f"IR written to {output_path} "
3407
- f"({size_kb}KB, {n_nodes} nodes, {n_edges} edges)",
3420
+ f"IR written to {output_path} ({compressed_kb}KB gzip, {size_kb}KB uncompressed)",
3408
3421
  err=True,
3409
3422
  )
3423
+ else:
3424
+ output_path.write_bytes(raw_bytes)
3425
+ size_kb = size_bytes // 1024
3426
+ if summary_only:
3427
+ typer.echo(
3428
+ f"IR written to {output_path} ({size_kb}KB, graph omitted by --summary-only)",
3429
+ err=True,
3430
+ )
3431
+ else:
3432
+ n_nodes = len((ir.get("graph") or {}).get("nodes") or [])
3433
+ n_edges = len((ir.get("graph") or {}).get("edges") or [])
3434
+ typer.echo(
3435
+ f"IR written to {output_path} "
3436
+ f"({size_kb}KB, {n_nodes} nodes, {n_edges} edges)",
3437
+ err=True,
3438
+ )
3410
3439
  else:
3440
+ if gzip_output:
3441
+ _emit_error_json(
3442
+ INVALID_INPUT_CODE,
3443
+ "--gzip requires --output FILE.",
3444
+ hint="Add --output ir.json.gz to write compressed output to a file.",
3445
+ expected="--output path when --gzip is used.",
3446
+ )
3447
+ raise typer.Exit(1)
3411
3448
  _ir_size = len(output.encode("utf-8"))
3412
3449
  _ir_tokens_est = _ir_size // 4
3413
3450
  # P1-C: abort when estimated tokens > 50K unless --force or --output is given.
@@ -4376,10 +4413,13 @@ def pr_impact_cmd(
4376
4413
  if not files.exists():
4377
4414
  _emit_error_json(
4378
4415
  INVALID_INPUT_CODE,
4379
- f"--files path '{files}' does not exist.",
4416
+ f"--files '{files}' does not exist. Expected a text file listing changed file paths (one per line), not a directory or class name.",
4380
4417
  path=str(files),
4381
- hint="Pass a file containing one Java file path per line.",
4382
- expected="An existing file path.",
4418
+ hint=(
4419
+ "Create a file with one changed Java file path per line, then pass it with --files. "
4420
+ "Example: git diff --name-only HEAD~1 > changed.txt && sourcecode pr-impact . --files changed.txt"
4421
+ ),
4422
+ expected="A text file containing one Java file path per line.",
4383
4423
  )
4384
4424
  raise typer.Exit(code=1)
4385
4425
 
@@ -4749,6 +4789,21 @@ def fix_bug_cmd(
4749
4789
  sourcecode impact <target> — Propagate impact from a specific class
4750
4790
  sourcecode onboard . — Full architecture context first
4751
4791
  """
4792
+ # Detect misuse: `fix-bug "symptom text" /path` — path arg looks like a symptom.
4793
+ _path_str = str(path)
4794
+ _path_looks_like_symptom = (
4795
+ not Path(_path_str).exists()
4796
+ and (" " in _path_str or any(c.isupper() for c in _path_str))
4797
+ )
4798
+ if _path_looks_like_symptom and not symptom:
4799
+ _emit_error_json(
4800
+ INVALID_INPUT_CODE,
4801
+ f"'{_path_str}' is not a valid directory. Did you mean to use --symptom?",
4802
+ hint=f"Use: sourcecode fix-bug . --symptom {_path_str!r}",
4803
+ expected="A repository directory path as first argument.",
4804
+ )
4805
+ raise typer.Exit(code=1)
4806
+
4752
4807
  if not symptom:
4753
4808
  # Only emit advisory to interactive terminals — non-TTY (MCP, pipes, scripts)
4754
4809
  # must never receive informational text mixed into JSON stdout.
@@ -5380,6 +5435,12 @@ def cold_start_cmd(
5380
5435
  "--compact",
5381
5436
  help="Emit a compact subset (~10K tokens): status, git_head, stacks, entry_points, and key_dependencies only.",
5382
5437
  ),
5438
+ output_path: Optional[Path] = typer.Option(
5439
+ None,
5440
+ "--output",
5441
+ "-o",
5442
+ help="Write output to file instead of stdout.",
5443
+ ),
5383
5444
  ) -> None:
5384
5445
  """Output Repository Intelligence Snapshot bootstrap context as JSON.
5385
5446
 
@@ -5419,7 +5480,12 @@ def cold_start_cmd(
5419
5480
  "Use --compact for a ~10K token subset, or --output FILE to save.\n"
5420
5481
  )
5421
5482
  sys.stderr.flush()
5422
- typer.echo(_out)
5483
+ if output_path:
5484
+ output_path.write_text(_out, encoding="utf-8")
5485
+ sys.stderr.write(f"Saved {len(_out.encode('utf-8'))} bytes to {output_path}\n")
5486
+ sys.stderr.flush()
5487
+ else:
5488
+ typer.echo(_out)
5423
5489
 
5424
5490
 
5425
5491
  # ── MCP server ────────────────────────────────────────────────────────────────
@@ -5872,6 +5938,24 @@ def mcp_list_tools(
5872
5938
  # ── Cache subcommands ─────────────────────────────────────────────────────────
5873
5939
 
5874
5940
 
5941
+ def _resolve_repo_root(path: Path) -> Path:
5942
+ """Resolve *path* to a repo root by walking up to find a .git directory.
5943
+
5944
+ If *path* is already a git root (has .git), returns it directly.
5945
+ If *path* is a subdirectory of a git repo, returns the git root.
5946
+ Falls back to *path* itself if no git repo found.
5947
+ """
5948
+ candidate = path.resolve()
5949
+ while True:
5950
+ if (candidate / ".git").exists():
5951
+ return candidate
5952
+ parent = candidate.parent
5953
+ if parent == candidate:
5954
+ break
5955
+ candidate = parent
5956
+ return path.resolve()
5957
+
5958
+
5875
5959
  @cache_app.command("status")
5876
5960
  def cache_status_cmd(
5877
5961
  path: Path = typer.Argument(Path("."), help="Repository path (default: current directory)"),
@@ -5879,7 +5963,7 @@ def cache_status_cmd(
5879
5963
  ) -> None:
5880
5964
  """Show cache statistics for a repository."""
5881
5965
  from sourcecode import cache as _cm
5882
- target = Path(path).resolve()
5966
+ target = _resolve_repo_root(Path(path))
5883
5967
  stats = _cm.status(target)
5884
5968
  if json_output:
5885
5969
  import json as _j
@@ -5913,7 +5997,7 @@ def cache_clear_cmd(
5913
5997
  index used for cold-start bootstrapping. Use --all to also clear it.
5914
5998
  """
5915
5999
  from sourcecode import cache as _cm
5916
- target = Path(path).resolve()
6000
+ target = _resolve_repo_root(Path(path))
5917
6001
  _clear_ris = include_ris or all_
5918
6002
  if not yes:
5919
6003
  _ris_note = " (including RIS)" if _clear_ris else " (RIS preserved — use --all to also clear it)"
@@ -5935,7 +6019,7 @@ def cache_warm_cmd(
5935
6019
  """
5936
6020
  import shutil as _shutil
5937
6021
  import subprocess as _sub
5938
- target = Path(path).resolve()
6022
+ target = _resolve_repo_root(Path(path))
5939
6023
  typer.echo(f"Warming cache for {target} …", err=True)
5940
6024
  _sc_bin = _shutil.which("sourcecode") or sys.argv[0]
5941
6025
  cmd = [_sc_bin, str(target)]
@@ -38,13 +38,7 @@ _REQUEST_METHOD_VERB_RE = re.compile(
38
38
  # Custom security annotation registry — extend here for project-specific annotations.
39
39
  # Each entry: annotation_simple_name → compiled params regex.
40
40
  # Groups: (1) resource string literal, (2) resource constant ref, (3) level integer.
41
- _CUSTOM_SECURITY_ANNOTATIONS: dict[str, re.Pattern] = {
42
- "M3FiltroSeguridad": re.compile(
43
- r'@M3FiltroSeguridad\s*\(\s*'
44
- r'(?:nombreRecurso\s*=\s*(?:"([^"]*)"|([\w.]+)))?'
45
- r'(?:[^)]*nivelRequerido\s*=\s*(\d+))?'
46
- ),
47
- }
41
+ _CUSTOM_SECURITY_ANNOTATIONS: dict[str, re.Pattern] = {}
48
42
 
49
43
  # Security config detection
50
44
  _WEB_SECURITY_CONFIGURER_RE = re.compile(r'WebSecurityConfigurerAdapter\b')
sourcecode/mcp/server.py CHANGED
@@ -639,7 +639,7 @@ def get_endpoints(repo_path: str = ".") -> dict:
639
639
  "unknown" (no security signals detected).
640
640
  Supports Spring MVC (@GetMapping etc.) and JAX-RS (@GET/@POST etc.).
641
641
  Security annotations detected: @RolesAllowed, @PermitAll, @DenyAll,
642
- @Authenticated, @PreAuthorize, @Secured, @SecurityRequirement, @M3FiltroSeguridad.
642
+ @Authenticated, @PreAuthorize, @Secured, @SecurityRequirement.
643
643
  repo_path: absolute path to the Java repository (default: current working directory).
644
644
  """
645
645
  _raw = repo_path
@@ -2003,7 +2003,7 @@ class TaskContextBuilder:
2003
2003
  for _cf in (_cr.files_changed or []):
2004
2004
  _cf_norm = _cf.replace("\\", "/")
2005
2005
  # Git reports paths relative to the git root, which may be
2006
- # a parent of the analyzed directory (e.g. MSAS/saint-server/).
2006
+ # a parent of the analyzed directory (e.g. a monorepo root).
2007
2007
  # Strip the analyzed-dir prefix so paths match all_paths.
2008
2008
  if _cf_norm.startswith(_rn_prefix):
2009
2009
  _cf_norm = _cf_norm[len(_rn_prefix):]
@@ -22,6 +22,8 @@ from dataclasses import dataclass, field
22
22
  from pathlib import Path
23
23
  from typing import Any, Optional
24
24
 
25
+ from sourcecode.fqn_utils import normalize_owner_fqn as _normalize_owner_fqn
26
+
25
27
  # ---------------------------------------------------------------------------
26
28
  # Data classes — Phases 1–4
27
29
  # ---------------------------------------------------------------------------
@@ -171,8 +173,6 @@ _PATH_ANNOTATIONS: frozenset[str] = frozenset({"@Path"})
171
173
  # Security / authorization annotations whose args must be captured.
172
174
  # Includes standard Jakarta EE, JAX-RS, Quarkus/MicroProfile, and custom patterns.
173
175
  _PERMISSION_ANNOTATIONS: frozenset[str] = frozenset({
174
- # Custom (kept for backward compat)
175
- "@M3FiltroSeguridad",
176
176
  # Jakarta EE / JAX-RS standard
177
177
  "@RolesAllowed",
178
178
  "@PermitAll",
@@ -2556,7 +2556,6 @@ def _route_security_from_sym(
2556
2556
  @RequiresRoles → {policy: requiresroles, roles: [...]}
2557
2557
  @RequiresPermissions → {policy: requirespermissions, roles: [...]}
2558
2558
  @SecurityRequirement → {policy: openapi_security, spec: ...}
2559
- @M3FiltroSeguridad → {policy: custom_permission, required_permission: ...}
2560
2559
 
2561
2560
  Falls back to class-level annotations if no method-level security found.
2562
2561
  Returns None if no security signal detected at either level.
@@ -2595,15 +2594,6 @@ def _route_security_from_sym(
2595
2594
  if "@SecurityRequirement" in anns:
2596
2595
  raw = vals.get("@SecurityRequirement", "")
2597
2596
  return {"policy": "openapi_security", "spec": raw.strip()}
2598
- # Custom legacy annotation
2599
- if "@M3FiltroSeguridad" in anns:
2600
- import re as _re2
2601
- raw = vals.get("@M3FiltroSeguridad", "")
2602
- m = _re2.search(r'(?:nombreRecurso\s*=\s*)?["\']([^"\']+)["\']', raw)
2603
- if m:
2604
- return {"policy": "custom_permission", "required_permission": m.group(1)}
2605
- # Value is a constant reference or empty — still flag the annotation
2606
- return {"policy": "custom_annotation", "annotation": "@M3FiltroSeguridad", "resource": raw.strip() or None}
2607
2597
  return None
2608
2598
 
2609
2599
  # Method-level first, then class-level fallback
@@ -4248,13 +4238,22 @@ def _all_callers_from_rg(fqn: str, reverse_graph: dict[str, dict[str, list[str]]
4248
4238
  BUG-01 fix: skip 'contained_in' edges — those represent structural membership
4249
4239
  (method→enclosing class), not actual callers. Without this, an Impl class
4250
4240
  with 91 own methods would show 91 "direct callers" and inflate risk to HIGH.
4241
+
4242
+ CH-002 fix: for 'injects' edges, normalize field/constructor FQNs to their
4243
+ enclosing class. e.g. pkg.ConsolidacionService.calcularField → pkg.ConsolidacionService
4244
+ so BFS can continue through DI injection chains and find controllers.
4251
4245
  """
4252
4246
  entry = reverse_graph.get(fqn) or {}
4253
4247
  callers: list[str] = []
4248
+ seen: set[str] = set()
4254
4249
  for edge_type, fqn_list in entry.items():
4255
4250
  if edge_type == "contained_in":
4256
4251
  continue # structural membership, not a caller
4257
- callers.extend(fqn_list)
4252
+ for c in fqn_list:
4253
+ normalized = _normalize_owner_fqn(c) if edge_type == "injects" else c
4254
+ if normalized not in seen:
4255
+ seen.add(normalized)
4256
+ callers.append(normalized)
4258
4257
  return callers
4259
4258
 
4260
4259
 
@@ -57,15 +57,7 @@ _EXTENDS_RE = re.compile(
57
57
  # Custom AOP annotation registry — extend here for project-specific security/AOP annotations.
58
58
  # Each entry: (method_regex, impl_symbol_name).
59
59
  # method_regex must capture the annotated method name in group 1.
60
- _CUSTOM_AOP_ANNOTATIONS: list[tuple[re.Pattern, str]] = [
61
- (
62
- re.compile(
63
- r'@M3FiltroSeguridad(?:\([^)]*\))?\s+(?:@[^\s]+\s+)*'
64
- r'(?:public|private|protected)\s+\w[\w<>\[\]]*\s+([a-z][A-Za-z0-9_]*)\s*\('
65
- ),
66
- "M3FiltroSeguridadImpl",
67
- ),
68
- ]
60
+ _CUSTOM_AOP_ANNOTATIONS: list[tuple[re.Pattern, str]] = []
69
61
  _LOMBOK_CLASS_RE = re.compile(
70
62
  r'(@(?:Data|Slf4j|Builder|AllArgsConstructor|NoArgsConstructor)(?:\([^)]*\))?\s+)*'
71
63
  r'(?:public\s+)?(?:class|interface)\s+([A-Z][A-Za-z0-9_]*)',
@@ -925,7 +917,7 @@ class SemanticAnalyzer:
925
917
 
926
918
  method="heuristic", confidence="low" para todos los edges Java.
927
919
  Includes: Lombok synthetic symbols, @Autowired field edges,
928
- @Mapper interface detection, inheritance chains, @M3FiltroSeguridad AOP edges.
920
+ @Mapper interface detection, inheritance chains, custom AOP annotation edges.
929
921
  """
930
922
  _JAVA_KEYWORDS: frozenset[str] = frozenset({
931
923
  "if", "for", "while", "switch", "catch", "super", "this", "new",
sourcecode/serializer.py CHANGED
@@ -504,7 +504,9 @@ def _transactional_summary(sm: "SourceMap", *, full: bool = False) -> "Optional[
504
504
  classes = getattr(s, "transactional_classes", [])
505
505
  if classes:
506
506
  total = len(classes)
507
- result: dict[str, Any] = {"count": total, "classes": classes}
507
+ # class_count = unique classes with @Transactional anywhere (file-level scan).
508
+ # spring-audit metadata.tx_stats has method-level annotation breakdown.
509
+ result: dict[str, Any] = {"class_count": total, "classes": classes}
508
510
  if total > 10 and not full:
509
511
  result["classes"] = classes[:10]
510
512
  result["truncated"] = True
@@ -549,9 +551,13 @@ def _security_surface_from_eps(
549
551
  root: "Optional[Path]" = None,
550
552
  file_paths: "Optional[list[str]]" = None,
551
553
  ) -> "Optional[dict[str, Any]]":
552
- """Extract @M3FiltroSeguridad resource names from entry point evidence strings."""
554
+ """Extract permission resource names from entry point evidence strings.
555
+
556
+ Looks for resource=VALUE or nombreRecurso=VALUE patterns in evidence
557
+ produced by custom security annotations on REST controller methods.
558
+ """
553
559
  import re as _re
554
- _NOMBRE_RE = _re.compile(r"nombreRecurso=[\"']([^\"']+)[\"']")
560
+ _RESOURCE_RE = _re.compile(r"(?:resource|nombreRecurso)=[\"']([^\"']+)[\"']")
555
561
  _CONST_SYMBOL_RE = _re.compile(r'^[\w]+\.[\w]+$')
556
562
  resource_names: list[str] = []
557
563
  unresolved: list[str] = []
@@ -560,7 +566,7 @@ def _security_surface_from_eps(
560
566
  evidence = getattr(ep, "evidence", None)
561
567
  if not evidence:
562
568
  continue
563
- for m in _NOMBRE_RE.finditer(evidence):
569
+ for m in _RESOURCE_RE.finditer(evidence):
564
570
  nm = m.group(1)
565
571
  if not nm or nm in seen:
566
572
  continue
@@ -578,8 +584,8 @@ def _security_surface_from_eps(
578
584
  return None
579
585
  result: dict[str, Any] = {
580
586
  "schema": (
581
- "Values used in @M3FiltroSeguridad(nombreRecurso=VALUE) on REST controller "
582
- "methods. Each value names a permission resource checked at runtime."
587
+ "Permission resource identifiers found on REST controller methods. "
588
+ "Each value names a resource checked at runtime by a security annotation."
583
589
  ),
584
590
  "resource_names": resource_names,
585
591
  }
@@ -739,7 +739,9 @@ def run_tx_audit(
739
739
  limitations=_tx_limitations,
740
740
  metadata={
741
741
  "symbols_analyzed": len(getattr(cir, "symbols", [])),
742
- "tx_boundaries_found": tx_index.stats()["total"],
742
+ # tx_annotation_count = total @Transactional symbols (class-level + method-level).
743
+ # tx_stats.class_level matches compact transactional_boundaries.class_count.
744
+ "tx_annotation_count": tx_index.stats()["total"],
743
745
  "tx_stats": tx_index.stats(),
744
746
  "analysis_time_ms": elapsed_ms,
745
747
  },
sourcecode/summarizer.py CHANGED
@@ -223,6 +223,20 @@ class ProjectSummarizer:
223
223
  __import__("re").IGNORECASE,
224
224
  )
225
225
 
226
+ # Patterns that indicate security scanner / tool output, not project description.
227
+ # Trivy, OWASP, Snyk, etc. produce structured vulnerability reports.
228
+ _TOOL_OUTPUT_RE = __import__("re").compile(
229
+ r"CVE-\d{4}-\d{4,}" # CVE identifiers
230
+ r"|UNKNOWN:\s*\d+.*LOW:\s*\d+" # Trivy severity summary line
231
+ r"|(CRITICAL|HIGH|MEDIUM|LOW):\s*\d+" # severity: count pattern
232
+ r"|\bTotal:\s*\d+\s*\(" # "Total: 45 (UNKNOWN: 0, ..." Trivy header
233
+ r"|\bvulnerabilit(?:y|ies)\s+found\b" # "N vulnerabilities found"
234
+ r"|\bscan(?:ned|ning)\s+\d+\s+(?:file|package|image)\b" # scanner progress
235
+ r"|\bpkg:(?:npm|pypi|maven|cargo|golang)/" # PURL package identifiers
236
+ r"|\b(?:trivy|snyk|grype|syft|cosign)\b", # well-known scanner names
237
+ __import__("re").IGNORECASE,
238
+ )
239
+
226
240
  def _extract_first_useful_paragraph(self, content: str) -> str | None:
227
241
  """Extract the first paragraph that describes the project architecture, not its license or marketing."""
228
242
  import re as _re
@@ -268,6 +282,9 @@ class ProjectSummarizer:
268
282
  # Reject license notices and user-facing marketing text
269
283
  if self._LICENSE_MARKETING_RE.search(paragraph):
270
284
  continue
285
+ # Reject security scanner / tool output (Trivy, Snyk, OWASP, CVE lists)
286
+ if self._TOOL_OUTPUT_RE.search(paragraph):
287
+ continue
271
288
  # Reject link-list paragraphs (docs/navigation sections):
272
289
  # if more than 2 markdown links dominate the paragraph, it's a nav section
273
290
  _link_count = len(_MD_LINK_RE.findall(paragraph))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 1.35.29
3
+ Version: 1.35.30
4
4
  Summary: Persistent structural context and ultra-fast repeated analysis for AI coding agents
5
5
  License-File: LICENSE
6
6
  Keywords: agents,ai,codebase,context,developer-tools,llm
@@ -40,7 +40,7 @@ Description-Content-Type: text/markdown
40
40
 
41
41
  **Persistent structural context and ultra-fast repeated analysis for AI coding agents.**
42
42
 
43
- ![Version](https://img.shields.io/badge/version-1.35.29-blue)
43
+ ![Version](https://img.shields.io/badge/version-1.35.30-blue)
44
44
  ![Python](https://img.shields.io/badge/python-3.10%2B-green)
45
45
 
46
46
  ---
@@ -114,7 +114,7 @@ pipx install sourcecode
114
114
 
115
115
  ```bash
116
116
  sourcecode version
117
- # sourcecode 1.35.29
117
+ # sourcecode 1.35.30
118
118
 
119
119
  **v1.35.28** — 7 bug fixes: `rename-class` cross-package disambiguation (BUG-4), `rename-class` collision detection (BUG-2), `find_java_files` false positive on `com/test/` package paths (BUG-1), `cold-start --compact` correct key names (BUG-6), `@EnableMethodSecurity` no longer suppresses SEC-001 (BUG-3), `explain` @Entity stereotype detection (BUG-5), XML+annotation mixed security retagging (BUG-7).
120
120
  ```
@@ -1,13 +1,13 @@
1
- sourcecode/__init__.py,sha256=7uoEdJsiULS_BcH-jg6p93kBPCNguzP9l_u66BOtiOk,104
1
+ sourcecode/__init__.py,sha256=oWXmTtM2iHgXkdQx6ZPJP3HD1muMtWETKpKtejZqsPY,104
2
2
  sourcecode/adaptive_scanner.py,sha256=XffluXKzJUXrMtjEiAOnSNPZnztdIcts17T9ouHeID0,10521
3
3
  sourcecode/architecture_analyzer.py,sha256=qh749a7ykPtGmQI1MR9y6j8TtL_jBdVYFx9YRsLqOMw,44121
4
4
  sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
5
5
  sourcecode/ast_extractor.py,sha256=sa6CmLpn-k5G3_Hzxn8hAlZ5-TS-EVzXDD0Gvxd2jzs,50613
6
6
  sourcecode/cache.py,sha256=wAyPrXN5DqiGivnMpeEuun2xHDKfBer2_oBsh6kj_vc,30447
7
- sourcecode/canonical_ir.py,sha256=2vTLc6wL1cH3NNbEcdZpfX5okh8h5dKq7xd0m0rv_Ro,24167
7
+ sourcecode/canonical_ir.py,sha256=c_lYTVoegg-1W2dZ34_2s3tN8L0GVx7eiDRh9ghdSD8,24178
8
8
  sourcecode/cir_graphs.py,sha256=rZi8JV4ZrAa2WSCeyNa4JIEKQ_yZzDZTsrvVz2KfuKA,8919
9
9
  sourcecode/classifier.py,sha256=2lYoSH3vOTkXZYPU7Go2WIet1-IuNzTWVhc-ULnXtgw,8024
10
- sourcecode/cli.py,sha256=SZDc7biuDWEXYGn1kvvN4RqmWOA-GHnmJbGdHnYBdh0,246491
10
+ sourcecode/cli.py,sha256=pcblBewwYo8t8VIwc7naWeHT33khsLARQxc8O9ZYX_U,250086
11
11
  sourcecode/code_notes_analyzer.py,sha256=EJemNCNc9Dn-1RZYu-aNbK0ELzmsyC4s6FdHi3XyNEI,9392
12
12
  sourcecode/confidence_analyzer.py,sha256=_jckZSxksV-OU38vbkxfVNBnWCtlCq8Vwfg23x1uspA,19054
13
13
  sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -35,28 +35,28 @@ sourcecode/output_budget.py,sha256=Js9yUlfQtPhqBl9R6wn_9UHVjjJc3GtLcqyfjf5t50Q,9
35
35
  sourcecode/path_filters.py,sha256=ROFRQ8eSLBEMiixK9f45-RO7um4VEEcjoD5AA4I427I,3739
36
36
  sourcecode/pr_comment_renderer.py,sha256=smHslxiG14lrytCkq5nFrFu-qTHgA-t-LFYfdrfjz2o,14423
37
37
  sourcecode/pr_impact.py,sha256=dCDVw83EDbyVf6F9ZmEQmsFz8ruVH7d4mpeKQCIZHM0,16805
38
- sourcecode/prepare_context.py,sha256=_indldfE-5OmMdusS_5rYB7MAJb8ub3UCCMHOXe7A_Y,222292
38
+ sourcecode/prepare_context.py,sha256=nvBs5dCzTQMsm5jNEuXp7JHE5l9YIgUmv__pDWbLYgQ,222289
39
39
  sourcecode/progress.py,sha256=qn30sWaHOkjTgXsSBmiPkz7Rsbwc5oSlIe6JNEMYp_k,3149
40
40
  sourcecode/ranking_engine.py,sha256=ZAucq_YX2KkWUuAZf4P0lhtQ_38vEFnUhuGtSZd1S0E,12970
41
41
  sourcecode/redactor.py,sha256=SB4hwIvg8h-hvcqKcDWaZvA-aSyn-at-BIRwa0tUv5E,3227
42
42
  sourcecode/relevance_scorer.py,sha256=0AgEt4KrV73nioMqBgjhGjtY7L2C7L7cSyKtj3IKcrw,9408
43
43
  sourcecode/rename_refactor.py,sha256=rWCsXoDxJNdsmkUXjPtHphT5CjYOgEPmcc817_8Gu-Y,12538
44
44
  sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
45
- sourcecode/repository_ir.py,sha256=vlReshZputMZSmPLUkM6zbnAvygi3aSk0lKHUbW9ijc,180308
45
+ sourcecode/repository_ir.py,sha256=XbdoDXWRiDepcX14SVSPVcCUIeBYrGBaNAI1XL6GULw,180151
46
46
  sourcecode/ris.py,sha256=RcqLVwC-doFcKKViYDkCjZLBqf_wzLES7-F6vHEeWzE,20419
47
47
  sourcecode/runtime_classifier.py,sha256=uTAD6BDCiBLUZEDRfqk718kM4RTT_vAbfkcOI2_Xx58,18432
48
48
  sourcecode/scanner.py,sha256=WdOQ78mMzjR1NjmKTlbxdgwinnCTfAhxCVLBEFQiFHU,8899
49
49
  sourcecode/schema.py,sha256=aHNXDf8LGyUC8ZDE_VS9kiskC2-Oswhi_WnpdGy6HDw,24897
50
- sourcecode/semantic_analyzer.py,sha256=TDuC3wzZR2DPm1mgrAg1YSLk2QzJoueS3TZAmyGGpCU,89417
51
- sourcecode/serializer.py,sha256=7SBJIbpC_Lg0RGWq8jjNbF5TiuZwoP_fi0qhHnzQM8M,124386
50
+ sourcecode/semantic_analyzer.py,sha256=4OdG6tTSnTvq3_dSWMbQu8Ad1ndSCKeG-b9qM4hIxkw,89176
51
+ sourcecode/serializer.py,sha256=GR1RcgY2abmanTIHiYzRXuOnhB2MidBVAiETat1w5cU,124724
52
52
  sourcecode/spring_event_topology.py,sha256=5_ON_21Le5zbG-1GRc5GLIi5HJfy_QjcXLVPC5WeUGQ,18055
53
53
  sourcecode/spring_findings.py,sha256=8V91iHOg9hFgg6tLLl4FSsgrF-dBqOcO2s-K5sD_goA,5417
54
54
  sourcecode/spring_impact.py,sha256=Ohm2k3W4Wts8Kx8Z7DIM-J-cwGtTJBWKFBsX-WkupBQ,32943
55
55
  sourcecode/spring_model.py,sha256=6Lk3rGGFy2suq867S8Da_aCNAXtSGJ36XBaQd9VNTFc,14888
56
56
  sourcecode/spring_security_audit.py,sha256=AmUkqoExkNZ3YxxZf9TwkwX-f7P_SETm0QC7VqEAqh4,20618
57
57
  sourcecode/spring_semantic.py,sha256=CiAf77p48-RFrUF0zbgww4w2Xigrbo1t5M3ZCDIfV_g,12032
58
- sourcecode/spring_tx_analyzer.py,sha256=u4_ckdEFZUiIsHdUX4OaIhnvoTdAwrxNTFweG6vc7wE,30526
59
- sourcecode/summarizer.py,sha256=YspHEVeYJVmltq0FMtGZF8kIP3qiR2KLcanGL6Y7uTI,20747
58
+ sourcecode/spring_tx_analyzer.py,sha256=DgauE1gUIuLorbNxMdSwIlXJxK_wTHgc0pDV0HyxWPY,30710
59
+ sourcecode/summarizer.py,sha256=zgdps7yS2IktAbWe7IWz0oUcr3QIuNPRGrsScbZ4R1g,21797
60
60
  sourcecode/tree_utils.py,sha256=8GAkIfQAsvtEudIeW1l4ooH_oRtrWR8cpJQJsEa_Pfw,2093
61
61
  sourcecode/workspace.py,sha256=X_6NmNnitvT3_38V-JDChydo_sR68s249hLFlrQskU0,8271
62
62
  sourcecode/detectors/__init__.py,sha256=A0AACJFF6HWf_RgatNtWu3PUzstcKtIGM9f1PoFcJug,1987
@@ -68,7 +68,7 @@ sourcecode/detectors/elixir.py,sha256=jCpvt5Yi6jvplc80ovRtWh17q-11ZGo9qX7o8b57TJ
68
68
  sourcecode/detectors/go.py,sha256=2r66uRQfeTWsqxr4HDhT6vExZErby0t46QXLHVBRv9w,2782
69
69
  sourcecode/detectors/heuristic.py,sha256=7cRxrip4yIaggYzZJB6ef8yHKh-gHgiH_pXMFcjlyFU,3723
70
70
  sourcecode/detectors/hybrid.py,sha256=IGFRUVsAZ1ooRlFdznCeJAV6vy1yVDx-VyghvLtddXc,9101
71
- sourcecode/detectors/java.py,sha256=BMdDLBe6vzMa8dqNc8cZ6An0OpQmW5oWcJ02ULzHC_Q,29288
71
+ sourcecode/detectors/java.py,sha256=YFX7RELCST3hMF9VvA3ech0rsvY5G58-wcPA6qYEp04,29098
72
72
  sourcecode/detectors/jvm_ext.py,sha256=EgHJ5W8EE-ZTN9V607mVzohyKgZE8Mc2jCi-DF8RAZU,2616
73
73
  sourcecode/detectors/nodejs.py,sha256=Hg3Gmr7yIMJFiLoDwOTk2wtu00wxIs6kZf-oQujTFUA,13187
74
74
  sourcecode/detectors/parsers.py,sha256=ugPg8yNUf0Ai1gA7Fnn6wAkYGFjTxRodSP3IeViYJJ4,2290
@@ -84,7 +84,7 @@ sourcecode/mcp/__init__.py,sha256=XU4HfRGbdid8wdUA0x_4f7uKZD1z3mv_XUY_WU_T9Mw,17
84
84
  sourcecode/mcp/orchestrator.py,sha256=BMi1D6liJHI3DXiaC8yeBLLP0wXajpCP3-vnRGqrvnw,26850
85
85
  sourcecode/mcp/registry.py,sha256=XeshSuT6NMmeUZ2GCzNVcKcr-2Ljoj4qO-lvSrg17EM,63135
86
86
  sourcecode/mcp/runner.py,sha256=-Dp2qPGRkfNTVen6bKh7WtzQqpcEtsrXoiuajvshlKk,2866
87
- sourcecode/mcp/server.py,sha256=Zapr4lY0i4tqSXY2BfA283VzStHTohNr9N0uMnRSIIA,59911
87
+ sourcecode/mcp/server.py,sha256=f4-k-nx2amhSghlM7EBeZWyqCMEAGodOgrYPdbIUK08,59891
88
88
  sourcecode/mcp/onboarding/__init__.py,sha256=sj2PWqEBmMc4zBNkomg89WtL0M6S7A9yb7_wAuSWNP4,66
89
89
  sourcecode/mcp/onboarding/applier.py,sha256=B9CneieWTpaDSDIyW3S5nrlRlBpvfqUcgi93-mm_ApQ,2135
90
90
  sourcecode/mcp/onboarding/backup.py,sha256=ihqGOR8QTX8HASRSEDyfFyXr5bkXrygPHamv4p9KTmk,1452
@@ -96,8 +96,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
96
96
  sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
97
97
  sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
98
98
  sourcecode/telemetry/transport.py,sha256=QSslxIwij8YkRWcVvxykODDrkiN_GAAEu3dUP7KIWeE,1651
99
- sourcecode-1.35.29.dist-info/METADATA,sha256=jYYziktPfdsRvuLAdGB_lj_3o27zWYeKypP_pBlZ9gE,21705
100
- sourcecode-1.35.29.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
101
- sourcecode-1.35.29.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
102
- sourcecode-1.35.29.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
103
- sourcecode-1.35.29.dist-info/RECORD,,
99
+ sourcecode-1.35.30.dist-info/METADATA,sha256=hVyLBw8IhmtdMLHg6IxGvWNWz9GIEQy5MLSFfid_sxQ,21705
100
+ sourcecode-1.35.30.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
101
+ sourcecode-1.35.30.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
102
+ sourcecode-1.35.30.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
103
+ sourcecode-1.35.30.dist-info/RECORD,,