PyPI - sourcecode - Versions diffs - 0.29.0__py3-none-any.whl → 0.31.0__py3-none-any.whl - Mend

sourcecode 0.29.0py3-none-any.whl → 0.31.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

sourcecode/__init__.py +1 -1
sourcecode/architecture_analyzer.py +9 -5
sourcecode/architecture_summary.py +4 -8
sourcecode/classifier.py +5 -1
sourcecode/cli.py +24 -34
sourcecode/confidence_analyzer.py +33 -20
sourcecode/detectors/nodejs.py +60 -18
sourcecode/entrypoint_classifier.py +106 -0
sourcecode/file_classifier.py +215 -0
sourcecode/prepare_context.py +12 -7
sourcecode/schema.py +6 -4
sourcecode/serializer.py +268 -87
sourcecode/summarizer.py +10 -7
{sourcecode-0.29.0.dist-info → sourcecode-0.31.0.dist-info}/METADATA +1 -1
{sourcecode-0.29.0.dist-info → sourcecode-0.31.0.dist-info}/RECORD +18 -16
{sourcecode-0.29.0.dist-info → sourcecode-0.31.0.dist-info}/WHEEL +0 -0
{sourcecode-0.29.0.dist-info → sourcecode-0.31.0.dist-info}/entry_points.txt +0 -0
{sourcecode-0.29.0.dist-info → sourcecode-0.31.0.dist-info}/licenses/LICENSE +0 -0

sourcecode/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """sourcecode — Genera mapas de contexto estructurado para agentes IA."""
-__version__ = "0.29.0"
+__version__ = "0.31.0"

sourcecode/architecture_analyzer.py CHANGED Viewed

@@ -215,18 +215,22 @@ class ArchitectureAnalyzer:
         if pattern not in (None, "unknown", "flat"):
             if all_layers_weak:
                 # Layers came from file-naming heuristic only, not directory structure
-                confidence = "medium"
+                confidence = "low"
                 limitations.append(
-                    "Patron inferido de nombres de archivo — sin estructura de directorios confirmatoria"
+                    "Low confidence inference: pattern inferred from filenames only, without import graph confirmation"
                 )
             else:
-                confidence = "high" if len(strong_domains) >= 3 else "medium"
+                confidence = "medium" if len(strong_domains) >= 3 else "low"
+                if graph is None:
+                    limitations.append(
+                        "Pattern not confirmed by module import graph; run with --graph-modules for structural validation"
+                    )
         elif len(strong_domains) >= 1:
             confidence = "medium"
         else:
             confidence = "low"
-        method = "graph+heuristic" if graph is not None else "heuristic"
+        method = "graph+structure" if graph is not None else "filesystem_inference"
         return ArchitectureAnalysis(
             requested=True,
@@ -339,7 +343,7 @@ class ArchitectureAnalyzer:
                 best_matched = matched
         if best_score >= 2:
-            layer_confidence: Literal["high", "medium", "low"] = "high" if best_score >= 3 else "medium"
+            layer_confidence: Literal["high", "medium", "low"] = "medium" if best_score >= 3 else "low"
             layers: list[ArchitectureLayer] = []
             for layer_key, matched_dirs in best_matched.items():
                 matched_files = [

sourcecode/architecture_summary.py CHANGED Viewed

@@ -5,6 +5,7 @@ import re
 from pathlib import Path
 from typing import Any
+from sourcecode.entrypoint_classifier import is_production_entry_point
 from sourcecode.schema import EntryPoint, SourceMap, StackDetection
 from sourcecode.tree_utils import flatten_file_tree
@@ -63,11 +64,8 @@ class ArchitectureSummarizer:
             entry for entry in sm.entry_points
             if not self._is_tooling_path(entry.path)
             and not self._is_auxiliary_path(entry.path)
-            and entry.entrypoint_type not in ("benchmark", "example")
+            and is_production_entry_point(entry)
         ]
-        if not entry_points:
-            fallback = self._infer_fallback_entry_points(file_paths, sm.stacks)
-            entry_points = fallback[:1]
         lang_lines: list[str] = []
         if entry_points:
@@ -280,8 +278,7 @@ class ArchitectureSummarizer:
         if modules:
             formatted = self._format_module_list([self._module_label(module) for module in modules])
             if formatted:
-                lines.append(f"Orquesta modulos internos: {formatted}.")
-        lines.append("Produce la salida principal del entry point JavaScript/TypeScript detectado.")
+                lines.append(f"Imports internos del entry point: {formatted}.")
         return lines
     def _summarize_java_entry(self, path: str, content: str, stacks: list[StackDetection]) -> list[str]:
@@ -344,8 +341,7 @@ class ArchitectureSummarizer:
         if internal:
             formatted = self._format_module_list([self._module_label(module) for module in internal])
             if formatted:
-                lines.append(f"Orquesta paquetes internos: {formatted}.")
-        lines.append("Produce la salida principal del binario Go detectado.")
+                lines.append(f"Imports internos del binario Go: {formatted}.")
         return lines
     def _describe_entry_point(self, entry_point: EntryPoint, project_type: str | None) -> str:

sourcecode/classifier.py CHANGED Viewed

@@ -45,8 +45,12 @@ class TypeClassifier:
         primary_stack = self._select_primary_stack(enriched, project_type)
         final_stacks: list[StackDetection] = []
+        primary_assigned = False
         for stack in enriched:
-            final_stacks.append(replace(stack, primary=(stack.stack == primary_stack)))
+            is_primary = stack.stack == primary_stack and not primary_assigned
+            if is_primary:
+                primary_assigned = True
+            final_stacks.append(replace(stack, primary=is_primary))
         return final_stacks, project_type
     def _enrich_stack(

sourcecode/cli.py CHANGED Viewed

@@ -6,9 +6,10 @@ import time
 from pathlib import Path
 from typing import Any, Optional, cast
-import typer
-from sourcecode import __version__
+import typer
+from sourcecode import __version__
+from sourcecode.entrypoint_classifier import is_production_entry_point, normalize_entry_point
 # ---------------------------------------------------------------------------
@@ -117,11 +118,11 @@ def _check_pipeline_coherence(sm: "SourceMap") -> list[str]:  # type: ignore[nam
                 )
         # overall:high requires at least one production entry point
-        if cs.overall == "high":
-            prod_eps = [
-                ep for ep in sm.entry_points
-                if ep.entrypoint_type in ("production", None)
-            ]
+        if cs.overall == "high":
+            prod_eps = [
+                ep for ep in sm.entry_points
+                if is_production_entry_point(ep)
+            ]
             if not prod_eps and sm.entry_points:
                 issues.append(
                     "[coherence] overall=high but no production entry points exist — "
@@ -134,21 +135,7 @@ def _check_pipeline_coherence(sm: "SourceMap") -> list[str]:  # type: ignore[nam
                 "[coherence] entry_point_confidence=high but entry_points is empty"
             )
-    # Contradictory EP classification: EPs with entrypoint_type=benchmark must not
-    # appear in agent_view output (checked post-facto via produced_by + type)
-    benchmark_eps = [
-        ep for ep in sm.entry_points
-        if ep.entrypoint_type in ("benchmark", "example")
-    ]
-    if benchmark_eps and sm.entry_points and all(
-        ep.entrypoint_type in ("benchmark", "example") for ep in sm.entry_points
-    ):
-        issues.append(
-            f"[coherence] all {len(sm.entry_points)} entry point(s) are benchmark/example — "
-            "no production entry detected; analysis_gaps should reflect impact=high"
-        )
-    return issues
+    return issues
 _HELP = """\
 Deterministic codebase context for AI coding agents.
@@ -909,11 +896,13 @@ def main(
     if dependency_analyzer is not None:
         from sourcecode.dependency_analyzer import _ROLE_PRIORITY
-        primary_ecosystem = sm.stacks[0].stack if sm.stacks else ""
-        direct_deps = [
-            d for d in sm.dependencies
-            if d.scope != "transitive" and d.source in {"manifest", "lockfile"}
-        ]
+        primary_ecosystem = sm.stacks[0].stack if sm.stacks else ""
+        direct_deps = [
+            d for d in sm.dependencies
+            if d.scope != "transitive" and d.source in {"manifest", "lockfile"}
+            and (d.role or "unknown") in {"runtime", "parsing", "serialization", "observability", "infra"}
+            and d.scope not in {"dev"}
+        ]
         def _dep_sort_key(d: Any) -> tuple[int, int, str]:
             role_order = _ROLE_PRIORITY.get(d.role or "runtime", 5)
@@ -993,12 +982,13 @@ def main(
             "example", "examples", "docs", "doc", "fixtures", "fixture",
         })
         for _ep in sm.entry_points:
-            _ep_type = _ep.entrypoint_type
-            _path_parts = _ep.path.replace("\\", "/").lower().split("/")
-            _filtered = (
-                _ep_type in ("benchmark", "example")
-                or any(p in _aux_parts for p in _path_parts)
-            )
+            _normalized_ep = normalize_entry_point(_ep)
+            _ep_type = _normalized_ep.entrypoint_type
+            _path_parts = _ep.path.replace("\\", "/").lower().split("/")
+            _filtered = (
+                _normalized_ep.classification != "production"
+                or any(p in _aux_parts for p in _path_parts)
+            )
             if _filtered:
                 _trace.emit("output", "agent_view", "filter_ep",
                             target=_ep.path,

sourcecode/confidence_analyzer.py CHANGED Viewed

@@ -12,6 +12,7 @@ from __future__ import annotations
 from pathlib import Path
 from typing import TYPE_CHECKING
+from sourcecode.entrypoint_classifier import is_production_entry_point, normalize_entry_point
 from sourcecode.schema import AnalysisGap, ConfidenceSummary, SourceMap
 if TYPE_CHECKING:
@@ -59,8 +60,15 @@ class ConfidenceAnalyzer:
                     hard_signals.append(sig)
         # ── Entry point signals ───────────────────────────────────────────────
-        for ep in sm.entry_points:
-            if ep.source in _HARD_SOURCES or ep.reason == "console_script":
+        normalized_entry_points = [normalize_entry_point(ep) for ep in sm.entry_points]
+        for ep in normalized_entry_points:
+            if ep.classification != "production":
+                sig = f"entry:{ep.path} ({ep.classification}, {ep.reason or ep.source})"
+                if sig not in ignored_signals:
+                    ignored_signals.append(sig)
+                continue
+            if ep.source in _HARD_SOURCES or ep.reason == "console_script" or ep.runtime_relevance == "high":
                 sig = f"entry:{ep.path} ({ep.reason or ep.source})"
                 if sig not in hard_signals:
                     hard_signals.append(sig)
@@ -95,13 +103,13 @@ class ConfidenceAnalyzer:
             anomalies.append("All stacks detected via heuristic only — no manifest found")
         # ── Anomaly: entry points all low-confidence ──────────────────────────
-        if sm.entry_points and all(ep.confidence == "low" for ep in sm.entry_points):
+        if normalized_entry_points and all(ep.confidence == "low" for ep in normalized_entry_points):
             anomalies.append("All entry points are low-confidence (heuristic/code_signal only)")
         # ── Anomaly: all production EPs are convention-only (no manifest evidence) ──
         production_eps_check = [
-            ep for ep in sm.entry_points
-            if ep.entrypoint_type in ("production", None)
+            ep for ep in normalized_entry_points
+            if is_production_entry_point(ep)
         ]
         if production_eps_check and all(
             ep.source in ("convention", "heuristic") or ep.reason in ("convention", "entry_file_pattern")
@@ -113,40 +121,40 @@ class ConfidenceAnalyzer:
             )
         # ── Anomaly: no production entry points ───────────────────────────────
-        if sm.entry_points:
+        if normalized_entry_points:
             production_eps = [
-                ep for ep in sm.entry_points
-                if ep.entrypoint_type in ("production", None)
+                ep for ep in normalized_entry_points
+                if is_production_entry_point(ep)
             ]
             if not production_eps:
                 anomalies.append(
-                    "No production entry points — all detected entries are dev/benchmark/example"
+                    "No production entry points — all detected entries are development/auxiliary"
                 )
         # ── Gaps ──────────────────────────────────────────────────────────────
-        if not sm.entry_points:
+        if not normalized_entry_points:
             gaps.append(AnalysisGap(
                 area="entry_points",
-                reason="No entry point detected — project may use non-standard structure or be a library",
+                reason="Critical: no runtime entrypoint detected; system cannot be executed without manual inference",
                 impact="high",
             ))
         elif all(
-            ep.entrypoint_type in ("benchmark", "example", "development")
-            for ep in sm.entry_points
+            ep.classification in ("development", "auxiliary")
+            for ep in normalized_entry_points
         ):
             gaps.append(AnalysisGap(
                 area="entry_points",
                 reason=(
-                    "All detected entry points are auxiliary (benchmark/example/dev) — "
-                    "no production entry point found. Verify project has a 'start'/'serve' "
-                    "script or production binary."
+                    "Critical: no production runtime entrypoint detected; detected entries are "
+                    "development or auxiliary only. Add/verify a start/serve script, CLI bin, "
+                    "or server bootstrap before using this context for automation."
                 ),
                 impact="high",
             ))
-        elif all(ep.confidence == "low" for ep in sm.entry_points):
+        elif all(ep.confidence == "low" for ep in normalized_entry_points):
             gaps.append(AnalysisGap(
                 area="entry_points",
-                reason="Entry points inferred from code patterns only, no manifest declaration found",
+                reason="Entry points inferred from code patterns only; no manifest script, CLI bin, or server bootstrap declaration found",
                 impact="medium",
             ))
@@ -196,12 +204,17 @@ class ConfidenceAnalyzer:
         # Entry points: only consider production EPs for confidence scoring.
         # Benchmark/example/dev-only entries are not evidence of production readiness.
         production_eps = [
-            ep for ep in sm.entry_points
-            if ep.entrypoint_type in ("production", None)
+            ep for ep in normalized_entry_points
+            if is_production_entry_point(ep)
         ]
         ep_conf = _max_confidence([ep.confidence for ep in production_eps] or ["low"])
         overall = _min_confidence([stack_conf, ep_conf])
+        if normalized_entry_points and not production_eps:
+            overall = "low"
+        elif production_eps and all(ep.runtime_relevance == "low" for ep in production_eps):
+            overall = _min_confidence([overall, "low"])
         # Factor in architecture confidence when available
         arch = sm.architecture
         if arch is not None and arch.requested:

sourcecode/detectors/nodejs.py CHANGED Viewed

@@ -58,7 +58,7 @@ class NodejsDetector(AbstractDetector):
         from sourcecode.detectors.hybrid import merge_framework_detections, scan_for_frameworks
-        dependency_names = self._collect_dependency_names(package_json)
+        dependency_names = self._collect_dependency_names(package_json, runtime_only=True)
         seen_fw: set[str] = set()
         manifest_frameworks = []
         for pkg_name, label in _FRAMEWORK_MAP.items():
@@ -98,9 +98,17 @@ class NodejsDetector(AbstractDetector):
             signals.append("monorepo:npm-workspaces")
         return signals
-    def _collect_dependency_names(self, package_json: dict[str, Any]) -> set[str]:
+    def _collect_dependency_names(
+        self,
+        package_json: dict[str, Any],
+        *,
+        runtime_only: bool = False,
+    ) -> set[str]:
         names: set[str] = set()
-        for field in ("dependencies", "devDependencies", "peerDependencies", "optionalDependencies"):
+        fields = ("dependencies", "peerDependencies", "optionalDependencies")
+        if not runtime_only:
+            fields = fields + ("devDependencies",)
+        for field in fields:
             raw = package_json.get(field, {})
             if isinstance(raw, dict):
                 names.update(str(name) for name in raw)
@@ -125,6 +133,9 @@ class NodejsDetector(AbstractDetector):
         "playground", "playgrounds",
         "fixture", "fixtures",
         "sandbox", "e2e", "docs",
+        "test", "tests", "__tests__", "spec", "specs",
+        "scripts", "script", "tools", "tooling", "ci",
+        ".storybook", "storybook",
     })
     def _collect_entry_points(
@@ -144,19 +155,20 @@ class NodejsDetector(AbstractDetector):
                     continue
                 # Extract file path from script command
                 path = self._extract_script_path(script_cmd, context)
+                if path is None:
+                    path = self._infer_tool_script_path(script_name, script_cmd, context)
                 if path and path not in seen and path_exists_in_tree(context.file_tree, path):
                     seen.add(path)
-                    if not self._is_auxiliary_path(path):
-                        entry_points.append(EntryPoint(
-                            path=path,
-                            stack="nodejs",
-                            kind=kind,
-                            source="package.json#scripts",
-                            confidence="high",
-                            reason=f"script:{script_name}",
-                            evidence=f"scripts.{script_name} = {script_cmd!r:.80}",
-                            entrypoint_type=ep_type,
-                        ))
+                    entry_points.append(EntryPoint(
+                        path=path,
+                        stack="nodejs",
+                        kind=kind,
+                        source="package.json#scripts",
+                        confidence="high",
+                        reason=f"script:{script_name}",
+                        evidence=f"scripts.{script_name} = {script_cmd!r:.80}",
+                        entrypoint_type=self._path_entrypoint_type(path, fallback=ep_type),
+                    ))
         # Priority 2: package.json bin — CLI production entry points
         bin_field = package_json.get("bin")
@@ -233,7 +245,7 @@ class NodejsDetector(AbstractDetector):
     def _classify_script(self, script_name: str) -> tuple[str | None, str]:
         """Map script name → (entrypoint_type, kind). Returns (None, '') to skip."""
         lower = script_name.lower()
-        if lower in ("start", "serve"):
+        if lower in ("start", "serve", "server"):
             return "production", "server"
         if lower in ("dev", "develop", "watch"):
             return "development", "server"
@@ -243,6 +255,12 @@ class NodejsDetector(AbstractDetector):
             return "benchmark", "script"
         if lower.startswith("example") or lower.startswith("demo"):
             return "example", "script"
+        if lower in {"docs", "doc", "storybook", "playground"} or any(
+            marker in lower for marker in ("rspress", "vite", "storybook", "playground")
+        ):
+            return "development", "server"
+        if lower in {"test", "e2e", "spec", "lint", "format", "typecheck", "build"}:
+            return "development", "script"
         return None, ""
     def _extract_script_path(self, cmd: str, context: DetectionContext) -> str | None:
@@ -264,12 +282,36 @@ class NodejsDetector(AbstractDetector):
                 return p
         return None
+    def _infer_tool_script_path(
+        self,
+        script_name: str,
+        script_cmd: str,
+        context: DetectionContext,
+    ) -> str | None:
+        text = f"{script_name} {script_cmd}".lower()
+        candidates: list[str] = []
+        if "rspress" in text or "docs" in text or "doc" in text:
+            candidates.extend(["docs/rspress.mjs", "docs/rspress.config.mjs"])
+        if "storybook" in text:
+            candidates.extend([".storybook/main.js", ".storybook/main.ts"])
+        if "vite" in text or "playground" in text:
+            candidates.extend(["playground/vite.config.ts", "vite.config.ts"])
+        for candidate in candidates:
+            if path_exists_in_tree(context.file_tree, candidate):
+                return candidate
+        return None
     def _is_auxiliary_path(self, path: str) -> bool:
         norm = path.replace("\\", "/")
         parts = norm.split("/")
         return any(p.lower() in self._AUXILIARY_DIRS for p in parts)
-    def _path_entrypoint_type(self, path: str) -> str:
-        if self._is_auxiliary_path(path):
+    def _path_entrypoint_type(self, path: str, *, fallback: str = "production") -> str:
+        parts = {p.lower() for p in path.replace("\\", "/").split("/")}
+        if parts & {"benchmark", "benchmarks", "bench", "benches"}:
+            return "benchmark"
+        if parts & {"example", "examples", "demo", "demos", "fixture", "fixtures"}:
             return "example"
-        return "production"
+        if self._is_auxiliary_path(path):
+            return "development"
+        return fallback

sourcecode/entrypoint_classifier.py ADDED Viewed

@@ -0,0 +1,106 @@
+from __future__ import annotations
+from dataclasses import replace
+from typing import Literal
+from sourcecode.schema import EntryPoint
+Classification = Literal["production", "development", "auxiliary"]
+RuntimeRelevance = Literal["high", "medium", "low"]
+_AUXILIARY_DIRS = frozenset({
+    "benchmark", "benchmarks", "bench", "benches",
+    "example", "examples", "demo", "demos",
+    "fixture", "fixtures", "__fixtures__", "testdata", "test_data",
+    "test", "tests", "__tests__", "spec", "specs", "e2e",
+    "script", "scripts", "tool", "tools", "tooling", "ci",
+    "mock", "mocks", "sandbox",
+})
+_DEVELOPMENT_DIRS = frozenset({
+    "docs", "doc", "documentation", "wiki",
+    "playground", "playgrounds", ".storybook", "storybook",
+})
+_DEV_MARKERS = ("rspress", "vite", "storybook", "playground", "dev-server")
+_PRODUCTION_SCRIPT_REASONS = {"script:start", "script:serve", "script:server"}
+def classify_entry_point(ep: EntryPoint) -> Classification:
+    """Return the operational class for an entry point.
+    The rules intentionally prefer exclusion over weak inclusion. Development
+    and auxiliary path evidence wins over detector-provided production labels.
+    """
+    path = ep.path.replace("\\", "/").lower()
+    parts = set(path.split("/"))
+    reason = (ep.reason or "").lower()
+    evidence = (ep.evidence or "").lower()
+    marker_text = f"{path} {reason} {evidence}"
+    if parts & _DEVELOPMENT_DIRS or any(marker in marker_text for marker in _DEV_MARKERS):
+        return "development"
+    if parts & _AUXILIARY_DIRS:
+        return "auxiliary"
+    if ep.entrypoint_type in {"benchmark", "example"}:
+        return "auxiliary"
+    if ep.entrypoint_type == "development":
+        return "development"
+    if (
+        ep.source == "convention"
+        and ep.kind in {"binary", "application"}
+        and ep.stack in {"go", "rust", "java", "dotnet", "kotlin", "scala"}
+    ):
+        return "production"
+    if ep.source in {"heuristic", "convention"}:
+        return "auxiliary"
+    if ep.entrypoint_type == "production":
+        return "production"
+    if ep.source == "package.json#bin" or reason == "bin":
+        return "production"
+    if reason in _PRODUCTION_SCRIPT_REASONS:
+        return "production"
+    return "production"
+def runtime_relevance(ep: EntryPoint, classification: Classification | None = None) -> RuntimeRelevance:
+    classification = classification or classify_entry_point(ep)
+    if classification != "production":
+        return "low"
+    reason = (ep.reason or "").lower()
+    if ep.source == "package.json#bin" or reason == "bin" or reason in _PRODUCTION_SCRIPT_REASONS:
+        return "high"
+    if ep.source == "package.json" and reason in {"main", "module"}:
+        return "medium"
+    if ep.source == "convention" and ep.kind in {"binary", "application"}:
+        return "medium"
+    if ep.source in {"heuristic", "convention"} or ep.confidence == "low":
+        return "low"
+    return "medium"
+def normalize_entry_point(ep: EntryPoint) -> EntryPoint:
+    classification = classify_entry_point(ep)
+    relevance = runtime_relevance(ep, classification)
+    legacy_type = ep.entrypoint_type
+    if classification == "auxiliary" and legacy_type == "production" and ep.source in {"heuristic", "convention"}:
+        legacy_type = None
+    if legacy_type is None:
+        if classification == "production":
+            legacy_type = "production"
+        elif classification == "development":
+            legacy_type = "development"
+    return replace(
+        ep,
+        classification=classification,
+        runtime_relevance=relevance,
+        entrypoint_type=legacy_type,
+    )
+def is_production_entry_point(ep: EntryPoint) -> bool:
+    normalized = normalize_entry_point(ep)
+    return (
+        normalized.classification == "production"
+        and normalized.runtime_relevance in {"high", "medium"}
+    )

sourcecode 0.29.0__py3-none-any.whl → 0.31.0__py3-none-any.whl

sourcecode 0.29.0py3-none-any.whl → 0.31.0py3-none-any.whl