PyPI - sourcecode - Versions diffs - 0.41.0__tar.gz → 0.43.0__tar.gz - Mend

sourcecode 0.41.0tar.gz → 0.43.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

{sourcecode-0.41.0 → sourcecode-0.43.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sourcecode
-Version: 0.41.0
+Version: 0.43.0
 Summary: Deterministic codebase context for AI coding agents
 License:                                  Apache License
                                    Version 2.0, January 2004

{sourcecode-0.41.0 → sourcecode-0.43.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "sourcecode"
-version = "0.41.0"
+version = "0.43.0"
 description = "Deterministic codebase context for AI coding agents"
 readme = "README.md"
 requires-python = ">=3.9"

{sourcecode-0.41.0 → sourcecode-0.43.0}/src/sourcecode/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """sourcecode — Deterministic codebase context maps for AI coding agents."""
-__version__ = "0.41.0"
+__version__ = "0.43.0"

{sourcecode-0.41.0 → sourcecode-0.43.0}/src/sourcecode/architecture_analyzer.py RENAMED Viewed

@@ -172,6 +172,7 @@ class ArchitectureAnalyzer:
         graph: Optional[ModuleGraph] = None,
     ) -> ArchitectureAnalysis:
         limitations: list[str] = []
+        evidence: list[dict] = []
         # Step 1: filter paths
         filtered = self._filter_paths(sm.file_paths)
@@ -180,6 +181,8 @@ class ArchitectureAnalyzer:
                 requested=True,
                 pattern="unknown",
                 limitations=["Arquitectura no inferida: proyecto sin archivos de codigo suficientes"],
+                evidence=[{"type": "none", "paths": [], "reason": "insufficient source files", "confidence": "high"}],
+                tentative=False,
             )
         # Step 2: domain clustering
@@ -193,17 +196,32 @@ class ArchitectureAnalyzer:
             elif pattern == "unknown":
                 limitations.append("Patron de capas no reconocido: estructura de directorios sin senales claras")
-        # Step 3b: monorepo override — workspace config is hard evidence
-        if self._has_workspace_config(sm.file_paths) and pattern not in (
+        # Step 3b: monorepo override — workspace config is hard evidence.
+        # Overrides all weak inferred patterns; only truly specialised patterns
+        # (cqrs, clean, onion, hexagonal) take precedence over workspace config.
+        has_workspace = self._has_workspace_config(sm.file_paths)
+        if has_workspace and pattern not in (
             "monorepo", "cqrs", "clean", "onion", "hexagonal"
         ):
             mono_layers = self._detect_monorepo_packages(filtered)
-            if mono_layers or pattern in (None, "unknown", "flat", "modular", "layered"):
+            # Override whenever: monorepo packages detected, OR pattern is any weak/generic type.
+            # "fullstack", "layered", "mvc", "microservices", "modular", "flat", "unknown", None
+            # all yield to workspace config evidence.
+            _WEAK_PATTERNS = {None, "unknown", "flat", "modular", "layered",
+                              "fullstack", "mvc", "microservices"}
+            if mono_layers or pattern in _WEAK_PATTERNS:
                 pattern = "monorepo"
                 layers = mono_layers
                 limitations.append(
                     "Workspace config detectado — arquitectura refleja topologia de paquetes"
                 )
+                ws_files = [p for p in sm.file_paths if p.split("/")[-1] in _WORKSPACE_CONFIG_FILES]
+                evidence.append({
+                    "type": "workspace_config",
+                    "paths": ws_files[:4],
+                    "reason": "Monorepo workspace config file(s) detected — hard evidence for monorepo topology",
+                    "confidence": "high",
+                })
         # Step 4: bounded context inference
         bounded_contexts = self._infer_bounded_contexts(domains, graph)
@@ -212,25 +230,91 @@ class ArchitectureAnalyzer:
         confidence: Literal["high", "medium", "low"]
         strong_domains = [d for d in domains if d.confidence in ("high", "medium")]
         all_layers_weak = layers and all(l.confidence == "low" for l in layers)
+        method = "graph+structure" if graph is not None else "filesystem_inference"
+        # High-confidence evidence (workspace config) makes pattern non-tentative.
+        tentative = not any(e.get("confidence") == "high" for e in evidence)
+        # _hard_evidence: high-confidence evidence was already set (e.g. workspace_config).
+        # When True, tentative must stay False and confidence must stay at least "medium".
+        _hard_evidence = not tentative  # tentative=False iff high-conf evidence present
         if pattern not in (None, "unknown", "flat"):
-            if all_layers_weak:
+            if graph is not None:
+                # Import graph provided — structural validation available
+                confidence = "medium" if len(strong_domains) >= 3 else "low"
+                evidence.append({
+                    "type": "import_graph",
+                    "paths": [n.id for n in graph.nodes[:6]],
+                    "reason": f"Module import graph with {len(graph.nodes)} nodes used for pattern validation",
+                    "confidence": "medium",
+                })
+            elif all_layers_weak:
                 # Layers came from file-naming heuristic only, not directory structure
                 confidence = "low"
+                if not _hard_evidence:
+                    tentative = True
                 limitations.append(
                     "Low confidence inference: pattern inferred from filenames only, without import graph confirmation"
                 )
+                evidence.append({
+                    "type": "filesystem_naming",
+                    "paths": [l.files[0] for l in layers if l.files][:6],
+                    "reason": (
+                        f"Pattern '{pattern}' inferred from file stem naming conventions only "
+                        "(e.g. *_controller.py, *_service.py). "
+                        "No directory structure or import graph confirmation."
+                    ),
+                    "confidence": "low",
+                })
             else:
-                confidence = "medium" if len(strong_domains) >= 3 else "low"
-                if graph is None:
+                # Directory structure match (or monorepo/workspace override with no layers)
+                confidence = "medium" if (_hard_evidence or len(strong_domains) >= 3) else "low"
+                if confidence == "low" and not _hard_evidence:
+                    tentative = True
+                if not _hard_evidence:
                     limitations.append(
                         "Pattern not confirmed by module import graph; run with --graph-modules for structural validation"
                     )
+                if not _hard_evidence:
+                    matched_dirs = sorted({
+                        p.replace("\\", "/").split("/")[0]
+                        for layer in layers for p in layer.files
+                    })
+                    evidence.append({
+                        "type": "filesystem_naming",
+                        "paths": matched_dirs[:8],
+                        "reason": (
+                            f"Pattern '{pattern}' inferred from directory names matching layer keywords. "
+                            "Import graph not available — structural direction of dependencies unverified."
+                        ),
+                        "confidence": "low" if confidence == "low" else "medium",
+                    })
         elif len(strong_domains) >= 1:
             confidence = "medium"
+            if not _hard_evidence:
+                tentative = True
+            evidence.append({
+                "type": "filesystem_naming",
+                "paths": [d.name for d in strong_domains[:6]],
+                "reason": "Domain clustering from directory names; no layer pattern confirmed",
+                "confidence": "low",
+            })
         else:
             confidence = "low"
-        method = "graph+structure" if graph is not None else "filesystem_inference"
+            if not _hard_evidence:
+                tentative = True
+            if not evidence:
+                limitations.append(
+                    "insufficient_evidence: no recognizable architectural signals found; "
+                    "filesystem structure does not match known patterns"
+                )
+                evidence.append({
+                    "type": "filesystem_naming",
+                    "paths": filtered[:6],
+                    "reason": "Only filesystem paths available; no pattern matched",
+                    "confidence": "low",
+                })
         return ArchitectureAnalysis(
             requested=True,
@@ -241,6 +325,8 @@ class ArchitectureAnalyzer:
             confidence=confidence,
             method=method,
             limitations=limitations,
+            evidence=evidence,
+            tentative=tentative,
         )
     # ------------------------------------------------------------------

{sourcecode-0.41.0 → sourcecode-0.43.0}/src/sourcecode/cli.py RENAMED Viewed

@@ -181,6 +181,7 @@ _OPTIONS_WITH_VALUE: frozenset[str] = frozenset({
     "--dependency-depth",
     "--rank-by",
     "--symbol",
+    "--max-importers",
 })
@@ -594,6 +595,17 @@ def main(
         "--symbol",
         help="Contract mode: extract localized context for a specific symbol name. Returns defining file + all importers.",
     ),
+    max_importers: int = typer.Option(
+        50,
+        "--max-importers",
+        help=(
+            "Maximum importer files returned by --symbol (default: 50). "
+            "Popular symbols can have hundreds of importers — this prevents output explosion. "
+            "Defining files are never truncated. Override: --symbol Foo --max-importers 200."
+        ),
+        min=1,
+        max=10000,
+    ),
     copy: bool = typer.Option(
         False,
         "--copy",
@@ -770,6 +782,21 @@ def main(
         code_notes = True
         no_tree = True  # agents never need the raw file tree
         typer.echo("[agent] dependencies env-map code-notes (no-tree)", err=True)
+        # Warn about flags that are computed but excluded from agent_view output
+        _agent_suppressed: list[str] = []
+        if full_metrics:
+            _agent_suppressed.append("--full-metrics")
+        if graph_modules:
+            _agent_suppressed.append("--graph-modules")
+        if docs:
+            _agent_suppressed.append("--docs")
+        if _agent_suppressed:
+            typer.echo(
+                f"[agent] warning: {', '.join(_agent_suppressed)} computed but excluded "
+                "from --agent output — agent_view does not include these sections. "
+                "Remove these flags to skip unnecessary computation.",
+                err=True,
+            )
     scanner = AdaptiveScanner(target, topology=_topology, base_depth=effective_depth)
     raw_tree = scanner.scan_tree()
@@ -1343,6 +1370,7 @@ def main(
             changed_only=changed_only,
             symbol=symbol,
             compress_types=compress_types,
+            max_importers=max_importers,
         )
         sm = _replace(sm, file_contracts=_contracts, contract_summary=_contract_summary)
         if symbol is not None and len(_contracts) == 0:

{sourcecode-0.41.0 → sourcecode-0.43.0}/src/sourcecode/contract_model.py RENAMED Viewed

@@ -109,3 +109,4 @@ class ContractSummary:
     method_breakdown: dict[str, int] = field(default_factory=dict)
     ranked_by: str = "relevance"
     limitations: list[str] = field(default_factory=list)
+    symbol_truncation: Optional[dict] = None  # set when --symbol truncates importers

{sourcecode-0.41.0 → sourcecode-0.43.0}/src/sourcecode/contract_pipeline.py RENAMED Viewed

@@ -45,9 +45,10 @@ def _get_changed_files(root: Path) -> set[str]:
     ]:
         try:
             result = subprocess.run(
-                cmd, cwd=root, capture_output=True, text=True, timeout=10
+                cmd, cwd=root, capture_output=True, text=True,
+                encoding="utf-8", errors="replace", timeout=10,
             )
-            for line in result.stdout.splitlines():
+            for line in (result.stdout or "").splitlines():
                 line = line.strip()
                 if line:
                     changed.add(line.replace("\\", "/"))
@@ -56,9 +57,10 @@ def _get_changed_files(root: Path) -> set[str]:
     try:
         result = subprocess.run(
             ["git", "status", "--porcelain"],
-            cwd=root, capture_output=True, text=True, timeout=10
+            cwd=root, capture_output=True, text=True,
+            encoding="utf-8", errors="replace", timeout=10,
         )
-        for line in result.stdout.splitlines():
+        for line in (result.stdout or "").splitlines():
             if len(line) > 3:
                 changed.add(line[3:].strip().replace("\\", "/"))
     except Exception:
@@ -129,11 +131,12 @@ def _get_git_churn(root: Path, file_paths: list[str]) -> dict[str, int]:
     try:
         result = subprocess.run(
             ["git", "log", "--name-only", "--format=", "--since=90.days.ago"],
-            cwd=root, capture_output=True, text=True, timeout=15,
+            cwd=root, capture_output=True, text=True,
+            encoding="utf-8", errors="replace", timeout=15,
         )
         path_set = set(file_paths)
         counter: Counter[str] = Counter()
-        for line in result.stdout.splitlines():
+        for line in (result.stdout or "").splitlines():
             line = line.strip().replace("\\", "/")
             if line in path_set:
                 counter[line] += 1
@@ -172,6 +175,7 @@ class ContractPipeline:
         changed_only: bool = False,
         symbol: Optional[str] = None,
         compress_types: bool = False,
+        max_importers: int = 50,
     ) -> tuple[list[FileContract], ContractSummary]:
         """Run the full extraction pipeline.
@@ -276,17 +280,19 @@ class ContractPipeline:
         contracts = self._rank(contracts, rank_by)
         # 8. Symbol filter — keep files that define or import the symbol
+        _symbol_truncation: Optional[dict] = None
         if symbol:
-            contracts = _filter_by_symbol(contracts, symbol)
+            contracts, _symbol_truncation = _filter_by_symbol(contracts, symbol, max_importers=max_importers)
             # When shallow scan missed the defining file (deep monorepo), fall back
             # to a grep-based filesystem search over the full directory tree.
             if not contracts:
-                contracts = self._symbol_deep_scan(
+                contracts, _symbol_truncation = self._symbol_deep_scan(
                     root, symbol,
                     known_paths=set(src_paths),
                     entry_paths=entry_paths,
                     changed_files=changed_files,
                     engine=engine,
+                    max_importers=max_importers,
                 )
         # 9. Entrypoints-only filter
@@ -310,6 +316,7 @@ class ContractPipeline:
             method_breakdown=dict(method_counts),
             ranked_by=rank_by,
             limitations=limitations,
+            symbol_truncation=_symbol_truncation,
         )
         return contracts, summary
@@ -329,7 +336,8 @@ class ContractPipeline:
         entry_paths: set[str],
         changed_files: set[str],
         engine: RankingEngine,
-    ) -> list[FileContract]:
+        max_importers: int = 50,
+    ) -> tuple[list[FileContract], dict]:
         """Grep-based fallback when the shallow scan missed the defining files.
         Searches the full directory tree for source files containing *symbol*,
@@ -353,7 +361,7 @@ class ContractPipeline:
             contract.ranking_reasons = fs.reasons
             extra.append(contract)
-        return _filter_by_symbol(extra, symbol)
+        return _filter_by_symbol(extra, symbol, max_importers=max_importers)
 # ---------------------------------------------------------------------------
@@ -409,7 +417,11 @@ def _limit_symbols(contracts: list[FileContract], max_symbols: int) -> list[File
 # Symbol-aware filter
 # ---------------------------------------------------------------------------
-def _filter_by_symbol(contracts: list[FileContract], symbol: str) -> list[FileContract]:
+def _filter_by_symbol(
+    contracts: list[FileContract],
+    symbol: str,
+    max_importers: int = 50,
+) -> tuple[list[FileContract], dict]:
     """Return contracts that define, import, or structurally reference *symbol*.
     Four tiers applied in order:
@@ -420,6 +432,8 @@ def _filter_by_symbol(contracts: list[FileContract], symbol: str) -> list[FileCo
        function signatures (word-boundary). Only used when tiers 1-3 fail.
     Defining contracts are ranked first; importers and references follow.
+    max_importers caps tier 3 results to prevent output explosion on popular symbols.
+    Returns (contracts, truncation_metadata).
     """
     sym_l = symbol.lower()
     word_re = re.compile(
@@ -463,8 +477,14 @@ def _filter_by_symbol(contracts: list[FileContract], symbol: str) -> list[FileCo
     # Tier 3: import matching (case-insensitive when no definers found)
     ci_imports = len(defining) == 0
-    importer_paths = {c.path for c in contracts if _imports_sym(c, case=ci_imports)}
-    importers = [c for c in contracts if c.path in importer_paths and c.path not in defining_paths]
+    all_importer_paths = {c.path for c in contracts if _imports_sym(c, case=ci_imports)}
+    all_importers = [c for c in contracts if c.path in all_importer_paths and c.path not in defining_paths]
+    # Apply importer cap — definers are never truncated
+    total_importers = len(all_importers)
+    truncated = total_importers > max_importers
+    importers = all_importers[:max_importers] if truncated else all_importers
+    importer_paths = {c.path for c in importers}
     # Tier 4: type-reference matching (only when tiers 1-3 yield nothing)
     references: list[FileContract] = []
@@ -480,12 +500,27 @@ def _filter_by_symbol(contracts: list[FileContract], symbol: str) -> list[FileCo
             seen.add(c.path)
             merged.append(c)
-    return sorted(merged, key=lambda c: (
+    result = sorted(merged, key=lambda c: (
         c.path not in defining_paths,
         c.path not in importer_paths,
         -c.relevance_score,
     ))
+    truncation: dict = {
+        "symbol": symbol,
+        "definers_found": len(defining),
+        "importers_found": total_importers,
+        "importers_returned": len(importers),
+        "references_found": len(references),
+        "total_returned": len(result),
+        "truncated": truncated,
+    }
+    if truncated:
+        truncation["truncation_reason"] = "max_importers_limit"
+        truncation["override_hint"] = f"--symbol {symbol} --max-importers {total_importers}"
+    return result, truncation
 # ---------------------------------------------------------------------------
 # Deep symbol scan — grep-based fallback for shallow-scanned repos

{sourcecode-0.41.0 → sourcecode-0.43.0}/src/sourcecode/doc_analyzer.py RENAMED Viewed

@@ -132,6 +132,8 @@ class DocAnalyzer:
         records: list[DocRecord] = []
         limitations: list[str] = list(limitations_pre)
         languages: set[str] = set()
+        # Track per-language support status for honest reporting
+        unsupported_langs: set[str] = set()
         for relative_path in file_paths:
             abs_path = root / relative_path
@@ -176,8 +178,18 @@ class DocAnalyzer:
                 # Unsupported language — D-04: no emitir DocRecord, solo registrar limitation
                 limitations.append(f"docs_unavailable:{norm_path}:language={lang}")
                 languages.add(lang)
+                unsupported_langs.add(lang)
                 # NO records.append() here
+        # Build language_coverage: explicit per-language support status
+        _SUPPORTED_LANGS = {"python", "javascript", "typescript"}
+        lang_coverage: dict[str, str] = {}
+        for lang in languages:
+            if lang in _SUPPORTED_LANGS:
+                lang_coverage[lang] = "supported"
+            else:
+                lang_coverage[lang] = "unsupported"
         # Build summary
         symbol_count = sum(1 for r in records if r.kind != "module")
         total_count = len(records)
@@ -192,6 +204,15 @@ class DocAnalyzer:
                 "no docstrings or JSDoc comments found"
             )
+        # Warn explicitly when unsupported languages are present — agents must not
+        # assume full coverage when Java/Go/Rust files are in scope but not analyzed.
+        if unsupported_langs:
+            sorted_unsupported = sorted(unsupported_langs)
+            limitations.append(
+                f"docs_not_extracted: language(s) {sorted_unsupported} present but not supported; "
+                "only Python and JS/TS docstrings are extracted"
+            )
         summary = DocSummary(
             requested=True,
             total_count=total_count,
@@ -200,6 +221,7 @@ class DocAnalyzer:
             depth=depth,
             truncated=truncated,
             limitations=limitations,
+            language_coverage=lang_coverage,
         )
         return records, summary

sourcecode 0.41.0__tar.gz → 0.43.0__tar.gz

sourcecode 0.41.0tar.gz → 0.43.0tar.gz