PyPI - sourcecode - Versions diffs - 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl - Mend

sourcecode 0.30.0py3-none-any.whl → 0.31.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

sourcecode/__init__.py +1 -1
sourcecode/architecture_analyzer.py +9 -5
sourcecode/cli.py +7 -5
sourcecode/confidence_analyzer.py +5 -5
sourcecode/detectors/nodejs.py +11 -3
sourcecode/file_classifier.py +215 -0
sourcecode/prepare_context.py +12 -7
sourcecode/serializer.py +187 -23
sourcecode/summarizer.py +10 -7
{sourcecode-0.30.0.dist-info → sourcecode-0.31.0.dist-info}/METADATA +1 -1
{sourcecode-0.30.0.dist-info → sourcecode-0.31.0.dist-info}/RECORD +14 -13
{sourcecode-0.30.0.dist-info → sourcecode-0.31.0.dist-info}/WHEEL +0 -0
{sourcecode-0.30.0.dist-info → sourcecode-0.31.0.dist-info}/entry_points.txt +0 -0
{sourcecode-0.30.0.dist-info → sourcecode-0.31.0.dist-info}/licenses/LICENSE +0 -0

sourcecode/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """sourcecode — Genera mapas de contexto estructurado para agentes IA."""
-__version__ = "0.30.0"
+__version__ = "0.31.0"

sourcecode/architecture_analyzer.py CHANGED Viewed

@@ -215,18 +215,22 @@ class ArchitectureAnalyzer:
         if pattern not in (None, "unknown", "flat"):
             if all_layers_weak:
                 # Layers came from file-naming heuristic only, not directory structure
-                confidence = "medium"
+                confidence = "low"
                 limitations.append(
-                    "Patron inferido de nombres de archivo — sin estructura de directorios confirmatoria"
+                    "Low confidence inference: pattern inferred from filenames only, without import graph confirmation"
                 )
             else:
-                confidence = "high" if len(strong_domains) >= 3 else "medium"
+                confidence = "medium" if len(strong_domains) >= 3 else "low"
+                if graph is None:
+                    limitations.append(
+                        "Pattern not confirmed by module import graph; run with --graph-modules for structural validation"
+                    )
         elif len(strong_domains) >= 1:
             confidence = "medium"
         else:
             confidence = "low"
-        method = "graph+heuristic" if graph is not None else "heuristic"
+        method = "graph+structure" if graph is not None else "filesystem_inference"
         return ArchitectureAnalysis(
             requested=True,
@@ -339,7 +343,7 @@ class ArchitectureAnalyzer:
                 best_matched = matched
         if best_score >= 2:
-            layer_confidence: Literal["high", "medium", "low"] = "high" if best_score >= 3 else "medium"
+            layer_confidence: Literal["high", "medium", "low"] = "medium" if best_score >= 3 else "low"
             layers: list[ArchitectureLayer] = []
             for layer_key, matched_dirs in best_matched.items():
                 matched_files = [

sourcecode/cli.py CHANGED Viewed

@@ -896,11 +896,13 @@ def main(
     if dependency_analyzer is not None:
         from sourcecode.dependency_analyzer import _ROLE_PRIORITY
-        primary_ecosystem = sm.stacks[0].stack if sm.stacks else ""
-        direct_deps = [
-            d for d in sm.dependencies
-            if d.scope != "transitive" and d.source in {"manifest", "lockfile"}
-        ]
+        primary_ecosystem = sm.stacks[0].stack if sm.stacks else ""
+        direct_deps = [
+            d for d in sm.dependencies
+            if d.scope != "transitive" and d.source in {"manifest", "lockfile"}
+            and (d.role or "unknown") in {"runtime", "parsing", "serialization", "observability", "infra"}
+            and d.scope not in {"dev"}
+        ]
         def _dep_sort_key(d: Any) -> tuple[int, int, str]:
             role_order = _ROLE_PRIORITY.get(d.role or "runtime", 5)

sourcecode/confidence_analyzer.py CHANGED Viewed

@@ -135,7 +135,7 @@ class ConfidenceAnalyzer:
         if not normalized_entry_points:
             gaps.append(AnalysisGap(
                 area="entry_points",
-                reason="No entry point detected — project may use non-standard structure or be a library",
+                reason="Critical: no runtime entrypoint detected; system cannot be executed without manual inference",
                 impact="high",
             ))
         elif all(
@@ -145,16 +145,16 @@ class ConfidenceAnalyzer:
             gaps.append(AnalysisGap(
                 area="entry_points",
                 reason=(
-                    "All detected entry points are development or auxiliary — "
-                    "no production entry point found. Verify project has a 'start'/'serve' "
-                    "script or production binary."
+                    "Critical: no production runtime entrypoint detected; detected entries are "
+                    "development or auxiliary only. Add/verify a start/serve script, CLI bin, "
+                    "or server bootstrap before using this context for automation."
                 ),
                 impact="high",
             ))
         elif all(ep.confidence == "low" for ep in normalized_entry_points):
             gaps.append(AnalysisGap(
                 area="entry_points",
-                reason="Entry points inferred from code patterns only, no manifest declaration found",
+                reason="Entry points inferred from code patterns only; no manifest script, CLI bin, or server bootstrap declaration found",
                 impact="medium",
             ))

sourcecode/detectors/nodejs.py CHANGED Viewed

@@ -58,7 +58,7 @@ class NodejsDetector(AbstractDetector):
         from sourcecode.detectors.hybrid import merge_framework_detections, scan_for_frameworks
-        dependency_names = self._collect_dependency_names(package_json)
+        dependency_names = self._collect_dependency_names(package_json, runtime_only=True)
         seen_fw: set[str] = set()
         manifest_frameworks = []
         for pkg_name, label in _FRAMEWORK_MAP.items():
@@ -98,9 +98,17 @@ class NodejsDetector(AbstractDetector):
             signals.append("monorepo:npm-workspaces")
         return signals
-    def _collect_dependency_names(self, package_json: dict[str, Any]) -> set[str]:
+    def _collect_dependency_names(
+        self,
+        package_json: dict[str, Any],
+        *,
+        runtime_only: bool = False,
+    ) -> set[str]:
         names: set[str] = set()
-        for field in ("dependencies", "devDependencies", "peerDependencies", "optionalDependencies"):
+        fields = ("dependencies", "peerDependencies", "optionalDependencies")
+        if not runtime_only:
+            fields = fields + ("devDependencies",)
+        for field in fields:
             raw = package_json.get(field, {})
             if isinstance(raw, dict):
                 names.update(str(name) for name in raw)

sourcecode/file_classifier.py ADDED Viewed

@@ -0,0 +1,215 @@
+from __future__ import annotations
+"""Evidence-based file classification for agent context.
+This module intentionally avoids assigning runtime/application roles from a
+directory name alone. Runtime roles require execution evidence, imports,
+definitions, or manifest/config evidence. Tests/tooling/build classifications
+can be structural because their purpose is explicitly encoded by conventional
+locations and config filenames.
+"""
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Literal
+from sourcecode.entrypoint_classifier import is_production_entry_point, normalize_entry_point
+from sourcecode.schema import EntryPoint, MonorepoPackageInfo
+FileCategory = Literal[
+    "runtime_core",
+    "application_logic",
+    "domain_model",
+    "infrastructure",
+    "database_layer",
+    "api_layer",
+    "cli_entrypoint",
+    "tests",
+    "tooling",
+    "build_system",
+]
+@dataclass
+class FileClassification:
+    path: str
+    category: FileCategory
+    confidence: Literal["high", "medium", "low"]
+    relevance: float
+    reason: str
+    evidence: list[str] = field(default_factory=list)
+_CODE_EXTENSIONS = {
+    ".py", ".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs",
+    ".go", ".rs", ".java", ".kt", ".scala", ".rb", ".php", ".cs",
+}
+_TEST_DIRS = {"test", "tests", "__tests__", "spec", "specs", "e2e"}
+_TOOLING_DIRS = {"scripts", "script", "tools", "tool", "tooling", "ci", ".github", ".vscode"}
+_BUILD_FILES = {
+    "package.json", "pyproject.toml", "go.mod", "Cargo.toml", "pom.xml",
+    "build.gradle", "settings.gradle", "Makefile", "Dockerfile",
+    "tsconfig.json", "vite.config.ts", "vite.config.js", "webpack.config.js",
+    "rollup.config.js", "turbo.json", "nx.json", "pnpm-workspace.yaml",
+}
+_TOOLING_FILES = {
+    ".eslintrc", ".prettierrc", "eslint.config.js", "eslint.config.ts",
+    "prettier.config.js", "jest.config.js", "jest.config.ts",
+    "vitest.config.ts", "vitest.config.js", ".editorconfig",
+}
+_API_IMPORTS = {
+    "fastapi", "flask", "django", "express", "koa", "fastify", "hono",
+    "@nestjs/core", "@apollo/server", "graphql", "springframework",
+}
+_DB_IMPORTS = {
+    "sqlalchemy", "psycopg2", "asyncpg", "pymongo", "mongoose", "prisma",
+    "@prisma/client", "typeorm", "sequelize", "pg", "mysql2", "redis",
+}
+_INFRA_IMPORTS = {
+    "boto3", "botocore", "kubernetes", "celery", "dramatiq", "bullmq",
+    "kafkajs", "amqplib", "firebase-admin", "@aws-sdk/",
+}
+_IMPORT_RE = re.compile(
+    r"(?:from\s+([A-Za-z0-9_@./-]+)\s+import|import\s+([A-Za-z0-9_@./-]+)|"
+    r"require\(['\"]([^'\"]+)['\"]\)|from\s+['\"]([^'\"]+)['\"])",
+    re.MULTILINE,
+)
+_DEF_RE = re.compile(r"\b(class|def|function|const|export\s+class|interface|type)\s+[A-Za-z_]", re.MULTILINE)
+class FileClassifier:
+    def __init__(
+        self,
+        root: Path,
+        entry_points: list[EntryPoint],
+        monorepo_packages: list[MonorepoPackageInfo] | None = None,
+    ) -> None:
+        self.root = root
+        self.entry_points = [normalize_entry_point(ep) for ep in entry_points]
+        self.production_entry_paths = {
+            ep.path for ep in self.entry_points if is_production_entry_point(ep)
+        }
+        self.cli_entry_paths = {
+            ep.path for ep in self.entry_points
+            if is_production_entry_point(ep) and ep.kind == "cli"
+        }
+        self._pkg_roles = {
+            pkg.path.rstrip("/") + "/": pkg.architectural_role
+            for pkg in (monorepo_packages or [])
+        }
+    def classify_paths(self, paths: list[str], *, limit: int = 20) -> list[FileClassification]:
+        classified: list[FileClassification] = []
+        for path in paths:
+            item = self.classify(path)
+            if item is not None:
+                classified.append(item)
+        classified.sort(key=lambda item: (-item.relevance, item.path))
+        return classified[:limit]
+    def classify(self, path: str) -> FileClassification | None:
+        norm = path.replace("\\", "/").lstrip("/")
+        parts = norm.split("/")
+        filename = Path(norm).name
+        suffix = Path(norm).suffix.lower()
+        if any(part.lower() in _TEST_DIRS for part in parts[:-1]) or self._is_test_file(norm):
+            return FileClassification(norm, "tests", "high", 0.35, "test file by path/suffix convention", [norm])
+        if filename in _BUILD_FILES:
+            return FileClassification(norm, "build_system", "high", 0.45, "build or package manifest", [filename])
+        if filename in _TOOLING_FILES or any(part.lower() in _TOOLING_DIRS for part in parts[:-1]):
+            return FileClassification(norm, "tooling", "high", 0.25, "tooling/config path", [norm])
+        if suffix not in _CODE_EXTENSIONS:
+            return None
+        content = self._read(norm)
+        imports = self._imports(content)
+        has_defs = bool(_DEF_RE.search(content))
+        evidence: list[str] = []
+        if norm in self.cli_entry_paths:
+            return FileClassification(norm, "cli_entrypoint", "high", 1.0, "declared production CLI entrypoint", ["entry_points"])
+        if norm in self.production_entry_paths:
+            return FileClassification(norm, "runtime_core", "high", 0.95, "declared production runtime entrypoint", ["entry_points"])
+        if self._has_any_import(imports, _API_IMPORTS):
+            evidence = self._matched_imports(imports, _API_IMPORTS)
+            return FileClassification(norm, "api_layer", "high", 0.82, "imports API/server framework", evidence)
+        if self._has_any_import(imports, _DB_IMPORTS):
+            evidence = self._matched_imports(imports, _DB_IMPORTS)
+            return FileClassification(norm, "database_layer", "high", 0.78, "imports database/persistence dependency", evidence)
+        if self._has_any_import(imports, _INFRA_IMPORTS):
+            evidence = self._matched_imports(imports, _INFRA_IMPORTS)
+            return FileClassification(norm, "infrastructure", "high", 0.72, "imports infrastructure dependency", evidence)
+        role = self._package_role(norm)
+        if role in {"runtime_core", "backend_runtime", "frontend_runtime", "plugin_host"} and has_defs:
+            return FileClassification(norm, "application_logic", "medium", 0.65, "code definitions inside runtime package", [f"workspace_role:{role}"])
+        if self._looks_like_domain_model(norm, content, has_defs):
+            return FileClassification(norm, "domain_model", "medium", 0.58, "model/entity definitions detected", ["class/type definition"])
+        if has_defs and imports:
+            return FileClassification(norm, "application_logic", "medium", 0.52, "code definitions with imports", self._sample(imports))
+        return None
+    def _read(self, path: str) -> str:
+        try:
+            return (self.root / path).read_text(encoding="utf-8", errors="replace")[:12000]
+        except OSError:
+            return ""
+    def _imports(self, content: str) -> list[str]:
+        imports: list[str] = []
+        for match in _IMPORT_RE.findall(content):
+            value = next((part for part in match if part), "")
+            if value:
+                imports.append(value)
+        return imports
+    def _has_any_import(self, imports: list[str], needles: set[str]) -> bool:
+        return bool(self._matched_imports(imports, needles))
+    def _matched_imports(self, imports: list[str], needles: set[str]) -> list[str]:
+        matched: list[str] = []
+        for imp in imports:
+            low = imp.lower()
+            if any(low == n or low.startswith(n + "/") or low.startswith(n + ".") for n in needles):
+                matched.append(f"import:{imp}")
+        return matched[:4]
+    def _package_role(self, path: str) -> str:
+        for prefix, role in self._pkg_roles.items():
+            if path.startswith(prefix):
+                return role
+        return ""
+    def _is_test_file(self, path: str) -> bool:
+        name = Path(path).name.lower()
+        return (
+            name.startswith("test_")
+            or ".test." in name
+            or ".spec." in name
+            or name.endswith("_test.py")
+        )
+    def _looks_like_domain_model(self, path: str, content: str, has_defs: bool) -> bool:
+        if not has_defs:
+            return False
+        parts = {part.lower() for part in path.split("/")[:-1]}
+        if parts & {"domain", "models", "model", "entities", "entity"}:
+            return True
+        return "@dataclass" in content or "pydantic" in content.lower()
+    def _sample(self, imports: list[str]) -> list[str]:
+        return [f"import:{imp}" for imp in imports[:4]]

sourcecode/prepare_context.py CHANGED Viewed

@@ -410,6 +410,8 @@ class TaskContextBuilder:
             direct = [
                 d for d in dep_records
                 if d.scope != "transitive" and d.source in {"manifest", "lockfile"}
+                and (d.role or "unknown") in {"runtime", "parsing", "serialization", "observability", "infra"}
+                and d.scope not in {"dev"}
             ]
             direct.sort(key=lambda d: (0 if d.ecosystem == primary_eco else 1, d.name.lower()))
             key_dependencies = [asdict(d) for d in direct[:15]]
@@ -626,7 +628,12 @@ class TaskContextBuilder:
         uncommitted_files: Optional[set[str]] = None,
     ) -> list[RelevantFile]:
         from sourcecode.relevance_scorer import RelevanceScorer
+        from sourcecode.file_classifier import FileClassifier
         scorer = RelevanceScorer(monorepo_packages or [])
+        file_classifier = FileClassifier(self.root, [
+            # _rank_files only needs production path evidence; EntryPoint objects
+            # are not available here, so category evidence is best-effort below.
+        ], monorepo_packages or [])
         # Auxiliary entry points (benchmark, docs, examples) must not get
         # the production entry boost — they are not runtime signals.
@@ -660,12 +667,10 @@ class TaskContextBuilder:
                 score += 3.0
                 reasons.append("entry point")
-            path_lower = path.lower()
-            for keyword in spec.ranking_boosts:
-                if keyword in path_lower:
-                    score += 1.5
-                    reasons.append(f"matches '{keyword}'")
-                    break
+            file_class = file_classifier.classify(path)
+            if file_class is not None:
+                score += file_class.relevance * 2.0
+                reasons.append(f"{file_class.category}: {file_class.reason}")
             if is_test:
                 score += 2.0
@@ -673,7 +678,7 @@ class TaskContextBuilder:
             elif self._is_source(path):
                 score += 0.5
                 if not reasons:
-                    reasons.append("source file")
+                    reasons.append("source file with supported extension")
             # Operational relevance boost/penalty from package role
             rel = scorer.score(path)

sourcecode/serializer.py CHANGED Viewed

@@ -16,6 +16,7 @@ from pathlib import Path
 from typing import Any, Optional
 from sourcecode.entrypoint_classifier import normalize_entry_point, is_production_entry_point
+from sourcecode.file_classifier import FileClassifier
 from sourcecode.schema import (
     ArchitectureAnalysis,
     ModuleGraph,
@@ -86,6 +87,146 @@ def _entry_point_groups(entry_points: list[Any]) -> dict[str, list[dict[str, Any
     return groups
+_PRODUCTION_DEP_ROLES = {"runtime", "parsing", "serialization", "observability", "infra"}
+_DEV_DEP_ROLES = {"devtool"}
+_TEST_DEP_ROLES = {"testtool"}
+_BUILD_DEP_ROLES = {"buildtool"}
+def _dependency_groups(sm: SourceMap) -> dict[str, list[dict[str, Any]]]:
+    groups: dict[str, list[dict[str, Any]]] = {
+        "production_dependencies": [],
+        "dev_tools": [],
+        "test_utilities": [],
+        "build_tooling": [],
+        "noise_dependencies": [],
+        "suspicious_dependencies": [],
+    }
+    if sm.dependency_summary is None or not sm.dependency_summary.requested:
+        return groups
+    root = Path(sm.metadata.analyzed_path) if sm.metadata.analyzed_path else Path(".")
+    import_index = _dependency_import_index(root, sm.file_paths)
+    for dep in sm.dependency_summary.dependencies:
+        if dep.scope == "transitive":
+            continue
+        item = {
+            k: v for k, v in asdict(dep).items()
+            if v is not None and k not in {"parent"}
+        }
+        role = dep.role or "unknown"
+        scope = dep.scope
+        name_key = _dep_import_key(dep.name)
+        if role in _PRODUCTION_DEP_ROLES and scope not in {"dev"}:
+            groups["production_dependencies"].append(item)
+            if dep.source == "manifest" and name_key not in import_index:
+                suspect = dict(item)
+                suspect["reason"] = "declared as production dependency but no static import observed"
+                groups["suspicious_dependencies"].append(suspect)
+        elif role in _TEST_DEP_ROLES:
+            groups["test_utilities"].append(item)
+        elif role in _BUILD_DEP_ROLES:
+            groups["build_tooling"].append(item)
+        elif role in _DEV_DEP_ROLES or scope in {"dev", "optional"}:
+            groups["dev_tools"].append(item)
+        else:
+            groups["noise_dependencies"].append(item)
+    for values in groups.values():
+        values.sort(key=lambda d: (d.get("ecosystem", ""), d.get("name", "")))
+    return groups
+def _dependency_import_index(root: Path, file_paths: list[str]) -> set[str]:
+    import re
+    index: set[str] = set()
+    import_re = re.compile(
+        r"(?:from\s+([A-Za-z0-9_@./-]+)\s+import|import\s+([A-Za-z0-9_@./-]+)|"
+        r"require\(['\"]([^'\"]+)['\"]\)|from\s+['\"]([^'\"]+)['\"])",
+        re.MULTILINE,
+    )
+    for path in file_paths[:2000]:
+        if Path(path).suffix.lower() not in {".py", ".js", ".ts", ".tsx", ".jsx", ".mjs", ".cjs"}:
+            continue
+        try:
+            content = (root / path).read_text(encoding="utf-8", errors="replace")[:20000]
+        except OSError:
+            continue
+        for match in import_re.findall(content):
+            raw = next((part for part in match if part), "")
+            if raw and not raw.startswith("."):
+                index.add(_dep_import_key(raw))
+    return index
+def _dep_import_key(name: str) -> str:
+    lowered = name.lower()
+    if lowered.startswith("@"):
+        parts = lowered.split("/")
+        return "/".join(parts[:2])
+    return lowered.split("/")[0].replace("_", "-")
+def _file_relevance(sm: SourceMap, *, limit: int = 15) -> list[dict[str, Any]]:
+    root = Path(sm.metadata.analyzed_path) if sm.metadata.analyzed_path else Path(".")
+    classifier = FileClassifier(root, sm.entry_points, sm.monorepo_packages)
+    items = classifier.classify_paths(sm.file_paths, limit=limit)
+    return [asdict(item) for item in items]
+def _architecture_context(sm: SourceMap) -> dict[str, Any]:
+    arch = sm.architecture
+    if arch is not None and arch.requested:
+        pattern = arch.pattern if arch.pattern not in (None, "unknown", "flat") else "no confirmed architecture pattern; inferred partial layering"
+        return {
+            "summary": sm.architecture_summary,
+            "pattern": pattern,
+            "confidence": arch.confidence,
+            "method": arch.method,
+            "layers": [
+                {
+                    "name": layer.name,
+                    "confidence": layer.confidence,
+                    "file_count": len(layer.files),
+                }
+                for layer in arch.layers
+            ],
+            "limitations": arch.limitations,
+        }
+    return {
+        "summary": sm.architecture_summary,
+        "pattern": "no confirmed architecture pattern; inferred partial layering",
+        "confidence": "low",
+        "method": "not_requested",
+        "limitations": [
+            "architecture analyzer not requested; summary limited to stack, filesystem and entrypoint evidence"
+        ],
+    }
+def _section_confidence(sm: SourceMap) -> dict[str, str]:
+    cs = sm.confidence_summary
+    dep_conf = "low"
+    if sm.dependency_summary is not None and sm.dependency_summary.requested:
+        dep_conf = "medium"
+        if sm.dependency_summary.sources and sm.dependency_summary.total_count > 0:
+            dep_conf = "high"
+    arch_conf = "low"
+    if sm.architecture is not None and sm.architecture.requested:
+        arch_conf = sm.architecture.confidence
+    file_conf = "medium" if sm.file_paths else "low"
+    return {
+        "stack": cs.stack_confidence if cs else "low",
+        "entrypoints": cs.entry_point_confidence if cs else "low",
+        "dependencies": dep_conf,
+        "architecture": arch_conf,
+        "file_relevance": file_conf,
+    }
 def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
     """Context package ready for prompt or handoff (~600-800 tokens).
@@ -102,10 +243,13 @@ def compact_view(sm: SourceMap, *, no_tree: bool = False) -> dict[str, Any]:
     """
     dep_summary_dict: Any = None
     key_deps: Any = None
-    if sm.dependency_summary is not None and sm.dependency_summary.requested:
-        dep_summary_dict = asdict(sm.dependency_summary)
-        dep_summary_dict.pop("dependencies", None)
-        key_deps = [asdict(d) for d in sm.key_dependencies]
+    if sm.dependency_summary is not None and sm.dependency_summary.requested:
+        dep_summary_dict = asdict(sm.dependency_summary)
+        dep_summary_dict.pop("dependencies", None)
+        key_deps = [
+            asdict(d) for d in sm.key_dependencies
+            if (d.role or "unknown") in _PRODUCTION_DEP_ROLES and d.scope not in {"dev"}
+        ]
     elif sm.dependency_summary is None or not sm.dependency_summary.requested:
         dep_summary_dict = None  # "not analyzed" — agent should add --dependencies
@@ -455,9 +599,13 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
     result["development_entry_points"] = ep_groups["development"]
     result["auxiliary_entry_points"] = ep_groups["auxiliary"]
-    # ── 3. Architecture ───────────────────────────────────────────────────────
-    if sm.architecture_summary:
-        result["architecture"] = sm.architecture_summary
+    # ── 3. Architecture ───────────────────────────────────────────────────────
+    result["architecture"] = _architecture_context(sm)
+    # ── 3a. File relevance: evidence-backed categories, not keyword matches ──
+    relevant_files = _file_relevance(sm)
+    if relevant_files:
+        result["file_relevance"] = relevant_files
     # ── 3b. Monorepo package roles (when available) ───────────────────────────
     if sm.monorepo_packages:
@@ -470,13 +618,25 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
         if operational_pkgs:
             result["runtime_packages"] = operational_pkgs
-    # ── 4. Key dependencies (role-sorted, already computed) ───────────────────
-    if sm.dependency_summary and sm.dependency_summary.requested and sm.key_dependencies:
-        _dep_skip = {"parent", "manifest_path", "workspace", "source", "ecosystem"}
-        result["key_dependencies"] = [
-            {k: v for k, v in asdict(d).items() if v is not None and k not in _dep_skip}
-            for d in sm.key_dependencies
-        ]
+    # ── 4. Dependencies: separated by operational role ───────────────────────
+    dep_groups = _dependency_groups(sm)
+    if dep_groups["production_dependencies"]:
+        result["production_dependencies"] = dep_groups["production_dependencies"][:15]
+    for dep_key in ("dev_tools", "test_utilities", "build_tooling", "noise_dependencies", "suspicious_dependencies"):
+        if dep_groups[dep_key]:
+            result[dep_key] = dep_groups[dep_key][:15]
+    # Backward-compatible compact list, now production-only.
+    production_key_deps = [
+        d for d in sm.key_dependencies
+        if (d.role or "unknown") in _PRODUCTION_DEP_ROLES and d.scope not in {"dev"}
+    ]
+    if sm.dependency_summary and sm.dependency_summary.requested and production_key_deps:
+        _dep_skip = {"parent", "manifest_path", "workspace", "source", "ecosystem"}
+        result["key_dependencies"] = [
+            {k: v for k, v in asdict(d).items() if v is not None and k not in _dep_skip}
+            for d in production_key_deps[:15]
+        ]
     # ── 5. Signals — compact operational context ─────────────────────────────
     signals: dict[str, Any] = {}
@@ -509,11 +669,12 @@ def agent_view(sm: SourceMap) -> dict[str, Any]:
     # ── 6. Confidence summary ─────────────────────────────────────────────────
     if sm.confidence_summary is not None:
         cs = sm.confidence_summary
-        conf: dict[str, Any] = {
-            "overall": cs.overall,
-            "stack": cs.stack_confidence,
-            "entry_points": cs.entry_point_confidence,
-        }
+        conf: dict[str, Any] = {
+            "overall": cs.overall,
+            "stack": cs.stack_confidence,
+            "entry_points": cs.entry_point_confidence,
+            "sections": _section_confidence(sm),
+        }
         if cs.hard_signals:
             conf["hard_signals"] = cs.hard_signals
         if cs.soft_signals:
@@ -596,10 +757,13 @@ def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any
     # Layer B — signals (only when the corresponding analyzer ran)
     if sm.dependency_summary is not None and sm.dependency_summary.requested:
-        dep_dict = asdict(sm.dependency_summary)
-        dep_dict.pop("dependencies", None)  # avoid duplication with key_dependencies
-        result["dependency_summary"] = dep_dict
-        result["key_dependencies"] = [asdict(d) for d in sm.key_dependencies]
+        dep_dict = asdict(sm.dependency_summary)
+        dep_dict.pop("dependencies", None)  # avoid duplication with key_dependencies
+        result["dependency_summary"] = dep_dict
+        result["key_dependencies"] = [
+            asdict(d) for d in sm.key_dependencies
+            if (d.role or "unknown") in _PRODUCTION_DEP_ROLES and d.scope not in {"dev"}
+        ]
     if sm.env_summary is not None and sm.env_summary.requested:
         result["env_summary"] = asdict(sm.env_summary)

sourcecode/summarizer.py CHANGED Viewed

@@ -9,6 +9,7 @@ from pathlib import Path
 from typing import Any
 from sourcecode.detectors.parsers import load_json_file, load_toml_file
+from sourcecode.entrypoint_classifier import is_production_entry_point
 from sourcecode.schema import MonorepoPackageInfo, SourceMap
 _TOOLING_PREFIXES = (".claude/", ".vscode/", "bin/")
@@ -31,6 +32,8 @@ _ARCH_LAYER_NAMES = {
     "schemas", "types",
     "migrations", "seeds",
     "scripts", "tools",
+    "docs", "doc", "documentation", "examples", "example", "benchmarks",
+    "benchmark", "playground", "playgrounds", "fixtures", "fixture",
 }
 _CODE_EXTENSIONS = {
@@ -108,7 +111,6 @@ class ProjectSummarizer:
         frameworks = [f.name for f in primary.frameworks]
         fw_part = f" ({', '.join(frameworks[:3])})" if frameworks else ""
-        arch_pattern = self._detect_architecture_pattern(sm.file_paths)
         domains = self._extract_business_domains(sm.file_paths)
         dep_part = self._build_dep_part(sm)
@@ -122,13 +124,16 @@ class ProjectSummarizer:
             domains_part = f" Dominios: {', '.join(domains)}." if domains else ""
             return f"Monorepo{ws_part} en {stacks_desc}.{domains_part}{dep_part}"
-        arch_suffix = f" con arquitectura {arch_pattern}" if arch_pattern else ""
+        arch_suffix = ""
         base = f"{type_label} en {stack_name}{fw_part}{arch_suffix}."
         if domains:
             extra = f" Dominios: {', '.join(domains)}."
         else:
-            ep_paths = [ep.path for ep in sm.entry_points if not self._is_tooling_path(ep.path)][:3]
+            ep_paths = [
+                ep.path for ep in sm.entry_points
+                if not self._is_tooling_path(ep.path) and is_production_entry_point(ep)
+            ][:3]
             extra = f" Entry points: {', '.join(ep_paths)}." if ep_paths else ""
         return f"{base}{extra}{dep_part}"
@@ -210,12 +215,10 @@ class ProjectSummarizer:
         if non_tooling_stacks:
             primary = self._select_summary_primary_stack(non_tooling_stacks)
             frameworks = [fw.name for fw in primary.frameworks[:2]]
-            arch_pattern = self._detect_architecture_pattern(sm.file_paths)
-            arch_str = f" con arquitectura {arch_pattern}" if arch_pattern else ""
             if frameworks:
-                parts.append(f"Stack: {primary.stack.capitalize()} ({', '.join(frameworks)}){arch_str}")
+                parts.append(f"Stack: {primary.stack.capitalize()} ({', '.join(frameworks)})")
             else:
-                parts.append(f"Stack: {primary.stack.capitalize()}{arch_str}")
+                parts.append(f"Stack: {primary.stack.capitalize()}")
         # Business domains only — skip entry_points (too technical for product summary)
         domains = self._extract_business_domains(sm.file_paths)

{sourcecode-0.30.0.dist-info → sourcecode-0.31.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sourcecode
-Version: 0.30.0
+Version: 0.31.0
 Summary: Deterministic codebase context for AI coding agents
 License:                                  Apache License
                                    Version 2.0, January 2004

{sourcecode-0.30.0.dist-info → sourcecode-0.31.0.dist-info}/RECORD RENAMED Viewed

@@ -1,28 +1,29 @@
-sourcecode/__init__.py,sha256=MU2HxHzhdlDeES-MGTUNA1df0X4nB3GWAvjTRWUEoys,100
-sourcecode/architecture_analyzer.py,sha256=SBRMWJN70M2qeNLkm9oCG_1rw2UOVuNgikyeAHJsXKw,22859
+sourcecode/__init__.py,sha256=lB4qjieACxD90qahkCtPTDiGAgKIQbJhcHbLxOgO4lc,100
+sourcecode/architecture_analyzer.py,sha256=H6noGgVArUJ25z1qC0fFA0KvJJeHZYyhKvKSkOyWHUk,23096
 sourcecode/architecture_summary.py,sha256=rSY5MRiaz4N1YdG0pqDTDuFjSN7PO_Zplx-dtNzv2Yo,19985
 sourcecode/classifier.py,sha256=GKTMN8qKZX7ponSwDJfN08RrasI4CVpq1_gFBgEopps,7093
-sourcecode/cli.py,sha256=LKtus6aETNZv70fkp5LrjTfvu5w9jsB4go-7MCoDnzg,50611
+sourcecode/cli.py,sha256=weX1vbYuzcSJ8Ny-6HWXevB9ZvNbu-8qrdh6Sxgl9JQ,50752
 sourcecode/code_notes_analyzer.py,sha256=rRd8bFYV0krjlxxQV0wenwE9K7pVpUQSR7KvSvUQKw4,9226
-sourcecode/confidence_analyzer.py,sha256=B48lCuz_t_qsyjPQdLbKUj2kJ0Wu4Sq5ZnO18F_v3eU,12069
+sourcecode/confidence_analyzer.py,sha256=HxJMPLI5ulqtkncnv98W4iVO6yMbpQo87VuxiuNbDmY,12167
 sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
 sourcecode/coverage_parser.py,sha256=q0LeZJaX1bnntLu-ImksdBsMlpsVmk_iUfSaB4eaJGo,19702
 sourcecode/dependency_analyzer.py,sha256=Exq0BfInvfS5iAg9xAr6WI2uPNuotkIudTKcYJcRhB8,52757
 sourcecode/doc_analyzer.py,sha256=Ec3orx6vBKsh5cNM3-F4y2Got2KuKx8w3dErwtdtM-A,19891
 sourcecode/entrypoint_classifier.py,sha256=a69dMGyxCTd_LOm3oqj-EXWpRmbmeujN7T1mr2eJ1as,3877
 sourcecode/env_analyzer.py,sha256=slvq-eT24RVMNczLNDlZbe0hU8JXIIPxybqubvrrnSQ,14409
+sourcecode/file_classifier.py,sha256=_KfFIIolharaIxbSTrCkaWauQIqNHCyor_n47RGyDh8,8577
 sourcecode/git_analyzer.py,sha256=saI5wtHBEOXBhdk7SrVR7ArSM6MFkyGgukvGRuD9WRc,9638
 sourcecode/graph_analyzer.py,sha256=hMOsLLz9B0UnQ4xwbHdgr3bFvqpw0bQ8kN-xmEn3Krk,64156
 sourcecode/metrics_analyzer.py,sha256=4uh11v-Q0gdrN87BOxuFWUym3N3AOkOuy21K5N8peB8,20126
-sourcecode/prepare_context.py,sha256=--lD2dhNkBYI8kwb14d1DlFmEN8XF1Ygtf0Qk7-Y1Bs,30911
+sourcecode/prepare_context.py,sha256=vxEzr8czS3MFbdTx4hBJQlJLrl9cuvbHdL3ZokxFkvo,31384
 sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
 sourcecode/relevance_scorer.py,sha256=2yvxDFnz9YGrHEJubgx9soiVIDZHKv_pntOtTARtKow,5928
 sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBGPjQ,14792
 sourcecode/scanner.py,sha256=aM3h9-DCQ3xKpeHpHYdo2vX6T5P95HA_YwZbkAVNwmo,8288
 sourcecode/schema.py,sha256=wylO5aKFBHBUAvMh4AH6hKKcN8p5yt6XRkyRvZRjV-4,20378
 sourcecode/semantic_analyzer.py,sha256=asQfJf-EhzYaOTA-iMuZsrVXtbW7SV2WEKCxgsxa88Y,79413
-sourcecode/serializer.py,sha256=VksZokFUG3GLWz_eUtVqNdkddkeV-tBY2lzfa8ociAc,27898
-sourcecode/summarizer.py,sha256=YfBixsN1zWHHXdOEqaf793BylbJrsj75ST7FN6jcqRU,15424
+sourcecode/serializer.py,sha256=c6q0rdrxeVpVfMF_yYK_1xRp6jtfR2UWePBKG9dx6-o,34315
+sourcecode/summarizer.py,sha256=NJiq8zzL9qsvMkIQxqvv0oGBSuFTc5OwplrK_blJV4o,15409
 sourcecode/tree_utils.py,sha256=Fj9OIuUksBvgibNd3feog0sMDjVypJzPexp5lvMoYWI,1424
 sourcecode/workspace.py,sha256=fQlVoNx8S-fSHpKoJ0JBvEHCFkxszH0KZVJed1i3TRk,6845
 sourcecode/detectors/__init__.py,sha256=A0AACJFF6HWf_RgatNtWu3PUzstcKtIGM9f1PoFcJug,1987
@@ -36,7 +37,7 @@ sourcecode/detectors/heuristic.py,sha256=Hab_Uiuxtq-WBs_wCnzETBS5hhaxeEtf-GOGMH6
 sourcecode/detectors/hybrid.py,sha256=IGFRUVsAZ1ooRlFdznCeJAV6vy1yVDx-VyghvLtddXc,9101
 sourcecode/detectors/java.py,sha256=cZvB13cqJ76zHDncEG-TOCuK8gJjJN2mZGS2DGEcZy8,7715
 sourcecode/detectors/jvm_ext.py,sha256=EgHJ5W8EE-ZTN9V607mVzohyKgZE8Mc2jCi-DF8RAZU,2616
-sourcecode/detectors/nodejs.py,sha256=LN-m3bERpijlBMl1TNVOH_cJDhfDYRhn8K8lsNzztVc,12923
+sourcecode/detectors/nodejs.py,sha256=7fsyAmrGkkguX6U80HUQpIe9MRaYyi_A7zbaRtmFmGc,13097
 sourcecode/detectors/parsers.py,sha256=ugPg8yNUf0Ai1gA7Fnn6wAkYGFjTxRodSP3IeViYJJ4,2290
 sourcecode/detectors/php.py,sha256=W_AQD0WMVDdWHa9h_ilX6W8XSpz0X4ctpMK2WXfXf1I,1887
 sourcecode/detectors/project.py,sha256=egFUnHC93xFfb-ikGCIOSkRdyP52qytDx9W7pGkX0MY,6525
@@ -52,8 +53,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
 sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
 sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
 sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
-sourcecode-0.30.0.dist-info/METADATA,sha256=wjMQ_CyxnBDjQ6G_7PLE5crhTdh2sl6wd6Bkdy3t48o,25020
-sourcecode-0.30.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
-sourcecode-0.30.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
-sourcecode-0.30.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
-sourcecode-0.30.0.dist-info/RECORD,,
+sourcecode-0.31.0.dist-info/METADATA,sha256=hWhgC_eeLe8eKsxxIIp9iAtJNmXT3yKIeVn2il4MBB8,25020
+sourcecode-0.31.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+sourcecode-0.31.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
+sourcecode-0.31.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
+sourcecode-0.31.0.dist-info/RECORD,,

{sourcecode-0.30.0.dist-info → sourcecode-0.31.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{sourcecode-0.30.0.dist-info → sourcecode-0.31.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sourcecode-0.30.0.dist-info → sourcecode-0.31.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sourcecode 0.30.0__py3-none-any.whl → 0.31.0__py3-none-any.whl

sourcecode 0.30.0py3-none-any.whl → 0.31.0py3-none-any.whl