PyPI - sourcecode - Versions diffs - 0.41.0__py3-none-any.whl → 0.42.0__py3-none-any.whl - Mend

sourcecode 0.41.0py3-none-any.whl → 0.42.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

sourcecode/__init__.py +1 -1
sourcecode/architecture_analyzer.py +94 -8
sourcecode/contract_pipeline.py +9 -6
sourcecode/doc_analyzer.py +22 -0
sourcecode/env_analyzer.py +110 -22
sourcecode/git_analyzer.py +13 -2
sourcecode/prepare_context.py +6 -2
sourcecode/schema.py +29 -0
sourcecode/semantic_analyzer.py +64 -0
sourcecode/serializer.py +15 -3
{sourcecode-0.41.0.dist-info → sourcecode-0.42.0.dist-info}/METADATA +1 -1
{sourcecode-0.41.0.dist-info → sourcecode-0.42.0.dist-info}/RECORD +15 -15
{sourcecode-0.41.0.dist-info → sourcecode-0.42.0.dist-info}/WHEEL +0 -0
{sourcecode-0.41.0.dist-info → sourcecode-0.42.0.dist-info}/entry_points.txt +0 -0
{sourcecode-0.41.0.dist-info → sourcecode-0.42.0.dist-info}/licenses/LICENSE +0 -0

sourcecode/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """sourcecode — Deterministic codebase context maps for AI coding agents."""
-__version__ = "0.41.0"
+__version__ = "0.42.0"

sourcecode/architecture_analyzer.py CHANGED Viewed

@@ -172,6 +172,7 @@ class ArchitectureAnalyzer:
         graph: Optional[ModuleGraph] = None,
     ) -> ArchitectureAnalysis:
         limitations: list[str] = []
+        evidence: list[dict] = []
         # Step 1: filter paths
         filtered = self._filter_paths(sm.file_paths)
@@ -180,6 +181,8 @@ class ArchitectureAnalyzer:
                 requested=True,
                 pattern="unknown",
                 limitations=["Arquitectura no inferida: proyecto sin archivos de codigo suficientes"],
+                evidence=[{"type": "none", "paths": [], "reason": "insufficient source files", "confidence": "high"}],
+                tentative=False,
             )
         # Step 2: domain clustering
@@ -193,17 +196,32 @@ class ArchitectureAnalyzer:
             elif pattern == "unknown":
                 limitations.append("Patron de capas no reconocido: estructura de directorios sin senales claras")
-        # Step 3b: monorepo override — workspace config is hard evidence
-        if self._has_workspace_config(sm.file_paths) and pattern not in (
+        # Step 3b: monorepo override — workspace config is hard evidence.
+        # Overrides all weak inferred patterns; only truly specialised patterns
+        # (cqrs, clean, onion, hexagonal) take precedence over workspace config.
+        has_workspace = self._has_workspace_config(sm.file_paths)
+        if has_workspace and pattern not in (
             "monorepo", "cqrs", "clean", "onion", "hexagonal"
         ):
             mono_layers = self._detect_monorepo_packages(filtered)
-            if mono_layers or pattern in (None, "unknown", "flat", "modular", "layered"):
+            # Override whenever: monorepo packages detected, OR pattern is any weak/generic type.
+            # "fullstack", "layered", "mvc", "microservices", "modular", "flat", "unknown", None
+            # all yield to workspace config evidence.
+            _WEAK_PATTERNS = {None, "unknown", "flat", "modular", "layered",
+                              "fullstack", "mvc", "microservices"}
+            if mono_layers or pattern in _WEAK_PATTERNS:
                 pattern = "monorepo"
                 layers = mono_layers
                 limitations.append(
                     "Workspace config detectado — arquitectura refleja topologia de paquetes"
                 )
+                ws_files = [p for p in sm.file_paths if p.split("/")[-1] in _WORKSPACE_CONFIG_FILES]
+                evidence.append({
+                    "type": "workspace_config",
+                    "paths": ws_files[:4],
+                    "reason": "Monorepo workspace config file(s) detected — hard evidence for monorepo topology",
+                    "confidence": "high",
+                })
         # Step 4: bounded context inference
         bounded_contexts = self._infer_bounded_contexts(domains, graph)
@@ -212,25 +230,91 @@ class ArchitectureAnalyzer:
         confidence: Literal["high", "medium", "low"]
         strong_domains = [d for d in domains if d.confidence in ("high", "medium")]
         all_layers_weak = layers and all(l.confidence == "low" for l in layers)
+        method = "graph+structure" if graph is not None else "filesystem_inference"
+        # High-confidence evidence (workspace config) makes pattern non-tentative.
+        tentative = not any(e.get("confidence") == "high" for e in evidence)
+        # _hard_evidence: high-confidence evidence was already set (e.g. workspace_config).
+        # When True, tentative must stay False and confidence must stay at least "medium".
+        _hard_evidence = not tentative  # tentative=False iff high-conf evidence present
         if pattern not in (None, "unknown", "flat"):
-            if all_layers_weak:
+            if graph is not None:
+                # Import graph provided — structural validation available
+                confidence = "medium" if len(strong_domains) >= 3 else "low"
+                evidence.append({
+                    "type": "import_graph",
+                    "paths": [n.id for n in graph.nodes[:6]],
+                    "reason": f"Module import graph with {len(graph.nodes)} nodes used for pattern validation",
+                    "confidence": "medium",
+                })
+            elif all_layers_weak:
                 # Layers came from file-naming heuristic only, not directory structure
                 confidence = "low"
+                if not _hard_evidence:
+                    tentative = True
                 limitations.append(
                     "Low confidence inference: pattern inferred from filenames only, without import graph confirmation"
                 )
+                evidence.append({
+                    "type": "filesystem_naming",
+                    "paths": [l.files[0] for l in layers if l.files][:6],
+                    "reason": (
+                        f"Pattern '{pattern}' inferred from file stem naming conventions only "
+                        "(e.g. *_controller.py, *_service.py). "
+                        "No directory structure or import graph confirmation."
+                    ),
+                    "confidence": "low",
+                })
             else:
-                confidence = "medium" if len(strong_domains) >= 3 else "low"
-                if graph is None:
+                # Directory structure match (or monorepo/workspace override with no layers)
+                confidence = "medium" if (_hard_evidence or len(strong_domains) >= 3) else "low"
+                if confidence == "low" and not _hard_evidence:
+                    tentative = True
+                if not _hard_evidence:
                     limitations.append(
                         "Pattern not confirmed by module import graph; run with --graph-modules for structural validation"
                     )
+                if not _hard_evidence:
+                    matched_dirs = sorted({
+                        p.replace("\\", "/").split("/")[0]
+                        for layer in layers for p in layer.files
+                    })
+                    evidence.append({
+                        "type": "filesystem_naming",
+                        "paths": matched_dirs[:8],
+                        "reason": (
+                            f"Pattern '{pattern}' inferred from directory names matching layer keywords. "
+                            "Import graph not available — structural direction of dependencies unverified."
+                        ),
+                        "confidence": "low" if confidence == "low" else "medium",
+                    })
         elif len(strong_domains) >= 1:
             confidence = "medium"
+            if not _hard_evidence:
+                tentative = True
+            evidence.append({
+                "type": "filesystem_naming",
+                "paths": [d.name for d in strong_domains[:6]],
+                "reason": "Domain clustering from directory names; no layer pattern confirmed",
+                "confidence": "low",
+            })
         else:
             confidence = "low"
-        method = "graph+structure" if graph is not None else "filesystem_inference"
+            if not _hard_evidence:
+                tentative = True
+            if not evidence:
+                limitations.append(
+                    "insufficient_evidence: no recognizable architectural signals found; "
+                    "filesystem structure does not match known patterns"
+                )
+                evidence.append({
+                    "type": "filesystem_naming",
+                    "paths": filtered[:6],
+                    "reason": "Only filesystem paths available; no pattern matched",
+                    "confidence": "low",
+                })
         return ArchitectureAnalysis(
             requested=True,
@@ -241,6 +325,8 @@ class ArchitectureAnalyzer:
             confidence=confidence,
             method=method,
             limitations=limitations,
+            evidence=evidence,
+            tentative=tentative,
         )
     # ------------------------------------------------------------------

sourcecode/contract_pipeline.py CHANGED Viewed

@@ -45,9 +45,10 @@ def _get_changed_files(root: Path) -> set[str]:
     ]:
         try:
             result = subprocess.run(
-                cmd, cwd=root, capture_output=True, text=True, timeout=10
+                cmd, cwd=root, capture_output=True, text=True,
+                encoding="utf-8", errors="replace", timeout=10,
             )
-            for line in result.stdout.splitlines():
+            for line in (result.stdout or "").splitlines():
                 line = line.strip()
                 if line:
                     changed.add(line.replace("\\", "/"))
@@ -56,9 +57,10 @@ def _get_changed_files(root: Path) -> set[str]:
     try:
         result = subprocess.run(
             ["git", "status", "--porcelain"],
-            cwd=root, capture_output=True, text=True, timeout=10
+            cwd=root, capture_output=True, text=True,
+            encoding="utf-8", errors="replace", timeout=10,
         )
-        for line in result.stdout.splitlines():
+        for line in (result.stdout or "").splitlines():
             if len(line) > 3:
                 changed.add(line[3:].strip().replace("\\", "/"))
     except Exception:
@@ -129,11 +131,12 @@ def _get_git_churn(root: Path, file_paths: list[str]) -> dict[str, int]:
     try:
         result = subprocess.run(
             ["git", "log", "--name-only", "--format=", "--since=90.days.ago"],
-            cwd=root, capture_output=True, text=True, timeout=15,
+            cwd=root, capture_output=True, text=True,
+            encoding="utf-8", errors="replace", timeout=15,
         )
         path_set = set(file_paths)
         counter: Counter[str] = Counter()
-        for line in result.stdout.splitlines():
+        for line in (result.stdout or "").splitlines():
             line = line.strip().replace("\\", "/")
             if line in path_set:
                 counter[line] += 1

sourcecode/doc_analyzer.py CHANGED Viewed

@@ -132,6 +132,8 @@ class DocAnalyzer:
         records: list[DocRecord] = []
         limitations: list[str] = list(limitations_pre)
         languages: set[str] = set()
+        # Track per-language support status for honest reporting
+        unsupported_langs: set[str] = set()
         for relative_path in file_paths:
             abs_path = root / relative_path
@@ -176,8 +178,18 @@ class DocAnalyzer:
                 # Unsupported language — D-04: no emitir DocRecord, solo registrar limitation
                 limitations.append(f"docs_unavailable:{norm_path}:language={lang}")
                 languages.add(lang)
+                unsupported_langs.add(lang)
                 # NO records.append() here
+        # Build language_coverage: explicit per-language support status
+        _SUPPORTED_LANGS = {"python", "javascript", "typescript"}
+        lang_coverage: dict[str, str] = {}
+        for lang in languages:
+            if lang in _SUPPORTED_LANGS:
+                lang_coverage[lang] = "supported"
+            else:
+                lang_coverage[lang] = "unsupported"
         # Build summary
         symbol_count = sum(1 for r in records if r.kind != "module")
         total_count = len(records)
@@ -192,6 +204,15 @@ class DocAnalyzer:
                 "no docstrings or JSDoc comments found"
             )
+        # Warn explicitly when unsupported languages are present — agents must not
+        # assume full coverage when Java/Go/Rust files are in scope but not analyzed.
+        if unsupported_langs:
+            sorted_unsupported = sorted(unsupported_langs)
+            limitations.append(
+                f"docs_not_extracted: language(s) {sorted_unsupported} present but not supported; "
+                "only Python and JS/TS docstrings are extracted"
+            )
         summary = DocSummary(
             requested=True,
             total_count=total_count,
@@ -200,6 +221,7 @@ class DocAnalyzer:
             depth=depth,
             truncated=truncated,
             limitations=limitations,
+            language_coverage=lang_coverage,
         )
         return records, summary

sourcecode/env_analyzer.py CHANGED Viewed

@@ -27,9 +27,13 @@ _ENV_EXAMPLE_NAMES = {
 # Spring Boot application.properties / application.yml and their profile variants
 _SPRING_CONF_BASE = {"application.properties", "application.yml", "application.yaml"}
-_SPRING_CONF_PROFILE_RE = re.compile(r'^application-[a-z0-9_-]+\.(properties|ya?ml)$', re.IGNORECASE)
-# Matches ${ENV_VAR} or ${ENV_VAR:default} where ENV_VAR is UPPER_SNAKE_CASE
-_SPRING_ENV_REF_RE = re.compile(r'\$\{([A-Z][A-Z0-9_]*)(?::[^}]*)?\}')
+_SPRING_CONF_PROFILE_RE = re.compile(r'^application-([a-z0-9_-]+)\.(properties|ya?ml)$', re.IGNORECASE)
+# Matches ${ENV_VAR} or ${ENV_VAR:default} where ENV_VAR is UPPER_SNAKE_CASE.
+# Group 1 = key, Group 2 = default (may be empty string, absent = no default).
+_SPRING_ENV_VAR_RE = re.compile(r'\$\{([A-Z][A-Z0-9_]*)(?::([^}]*))?\}')
+# Matches ${spring.dotted.key} or ${spring.dotted.key:default} — Spring property references.
+# These are internal property cross-references, not OS env vars, but still config signals.
+_SPRING_PROP_REF_RE = re.compile(r'\$\{([a-z][a-z0-9]*(?:\.[a-z][a-z0-9_-]*)*)(?::([^}]*))?\}')
 # Patterns where absence of the variable causes a hard runtime error (not just None/null).
 # py_environ_bracket → os.environ["KEY"] raises KeyError
@@ -140,9 +144,9 @@ def _infer_type_hint(key: str) -> str:
 def _scan_file(
     path: Path,
     rel_path: str,
-    findings: dict[str, list[tuple[str, Optional[str], bool]]],
+    findings: dict[str, list[tuple[str, Optional[str], bool, Optional[str]]]],
 ) -> None:
-    """Escanea un fichero y acumula hallazgos en findings[key] = [(file_ref, default, is_hard)]."""
+    """Escanea un fichero y acumula hallazgos en findings[key] = [(file_ref, default, is_hard, profile)]."""
     try:
         size = path.stat().st_size
         if size > _MAX_FILE_SIZE:
@@ -168,7 +172,7 @@ def _scan_file(
             line_num = content.count("\n", 0, m.start()) + 1
             file_ref = f"{rel_path}:{line_num}"
-            findings[key].append((file_ref, default, is_hard))
+            findings[key].append((file_ref, default, is_hard, None))
 def _parse_env_example(
@@ -204,22 +208,66 @@ def _parse_env_example(
     return results
+def _extract_spring_profile(filename: str) -> Optional[str]:
+    """Extract Spring profile from filename.
+    application.yml / application.properties → 'default'
+    application-m3dev.yml → 'm3dev'
+    """
+    name_lower = filename.lower()
+    if name_lower in _SPRING_CONF_BASE:
+        return "default"
+    m = _SPRING_CONF_PROFILE_RE.match(name_lower)
+    if m:
+        return m.group(1)
+    return None
 def _parse_spring_config(
     path: Path,
     rel_path: str,
     findings: dict,
-) -> None:
-    """Parse application.properties / application.yml looking for ${ENV_VAR} refs."""
+    profile: Optional[str] = None,
+) -> int:
+    """Parse application.properties / application.yml for ${ENV_VAR} refs.
+    Returns the total number of ${...} placeholders found (candidates).
+    Captures default values from ${VAR:default} syntax.
+    Marks vars without defaults as hard-required (Spring fails to start if missing).
+    """
     try:
         content = path.read_text(encoding="utf-8", errors="replace")
     except OSError:
-        return
+        return 0
-    for m in _SPRING_ENV_REF_RE.finditer(content):
+    candidates = 0
+    # 1. UPPER_SNAKE_CASE env var references: ${DB_HOST} or ${DB_HOST:localhost}
+    for m in _SPRING_ENV_VAR_RE.finditer(content):
+        key = m.group(1)
+        raw_default = m.group(2)  # None if no colon, "" if colon with empty default
+        # A colon means a default was specified (even if empty string)
+        has_default = raw_default is not None
+        default: Optional[str] = raw_default if (raw_default and raw_default.strip()) else None
+        line_num = content.count("\n", 0, m.start()) + 1
+        # Hard required only when no default is provided
+        is_hard = not has_default
+        findings[key].append((f"{rel_path}:{line_num}", default, is_hard, profile))
+        candidates += 1
+    # 2. lowercase.dotted Spring property refs: ${spring.datasource.url:default}
+    # These are internal property cross-references; store with a special prefix so
+    # callers can distinguish them from OS env vars. We do NOT mark them hard-required
+    # because they reference Spring's own property resolution chain.
+    for m in _SPRING_PROP_REF_RE.finditer(content):
         key = m.group(1)
+        raw_default = m.group(2)
+        default = raw_default if (raw_default and raw_default.strip()) else None
         line_num = content.count("\n", 0, m.start()) + 1
-        # Spring fails to start if a referenced env var has no default → hard required
-        findings[key].append((f"{rel_path}:{line_num}", None, True))
+        findings[key].append((f"{rel_path}:{line_num}", default, False, profile))
+        candidates += 1
+    return candidates
 class EnvAnalyzer:
@@ -232,13 +280,18 @@ class EnvAnalyzer:
     ) -> tuple[list, object]:
         from sourcecode.schema import EnvSummary, EnvVarRecord
-        # findings[key] = list of (file_ref, default_or_None, is_hard_required)
-        findings: dict[str, list[tuple[str, Optional[str], bool]]] = defaultdict(list)
+        # findings[key] = list of (file_ref, default_or_None, is_hard_required, profile_or_None)
+        findings: dict[str, list[tuple[str, Optional[str], bool, Optional[str]]]] = defaultdict(list)
         example_entries: list[tuple[str, Optional[str], Optional[str]]] = []
         example_files_found: list[str] = []
         limitations: list[str] = []
+        profiles_scanned: list[str] = []
+        spring_candidates: int = 0
-        self._walk(root, root, findings, example_entries, example_files_found, limitations)
+        spring_candidates = self._walk(
+            root, root, findings, example_entries, example_files_found,
+            limitations, profiles_scanned,
+        )
         # Merge findings into EnvVarRecord per key
         records: dict[str, EnvVarRecord] = {}
@@ -248,19 +301,23 @@ class EnvAnalyzer:
             if len(records) >= _MAX_KEYS:
                 limitations.append(f"key_limit_reached:{_MAX_KEYS}")
                 break
-            defaults = [d for _, d, _ in refs if d is not None]
+            defaults = [d for _, d, _, _ in refs if d is not None]
             # required only when access pattern causes a hard runtime error if missing:
             # os.environ["KEY"] (KeyError) or Spring @Value/${KEY} without default.
             # os.getenv("KEY") / os.environ.get("KEY") return None — not hard required.
-            has_hard_access = any(is_hard for _, _, is_hard in refs)
+            has_hard_access = any(is_hard for _, _, is_hard, _ in refs)
             required = has_hard_access and not defaults
             default_val = defaults[0] if defaults else None
             unique_files: list[str] = []
             seen: set[str] = set()
-            for file_ref, _, _ in refs:
+            # Collect first profile seen for this key (from Spring config files)
+            first_profile: Optional[str] = None
+            for file_ref, _, _, prof in refs:
                 if file_ref not in seen:
                     seen.add(file_ref)
                     unique_files.append(file_ref)
+                if first_profile is None and prof is not None:
+                    first_profile = prof
                 if len(unique_files) >= _MAX_FILES_PER_KEY:
                     break
             records[key] = EnvVarRecord(
@@ -270,6 +327,7 @@ class EnvAnalyzer:
                 type_hint=_infer_type_hint(key),
                 category=_infer_category(key),
                 files=unique_files,
+                profile=first_profile,
             )
         # 2. Supplement with .env.example entries (fill description + add missing keys)
@@ -300,6 +358,20 @@ class EnvAnalyzer:
         # Build summary
         categories = sorted({r.category for r in sorted_records if r.category})
         required_count = sum(1 for r in sorted_records if r.required)
+        # Coverage note: warn if Spring config was scanned but coverage seems partial
+        coverage_note: Optional[str] = None
+        if profiles_scanned and spring_candidates > 0:
+            spring_key_count = sum(
+                1 for r in sorted_records if r.profile is not None
+            )
+            if spring_key_count < spring_candidates:
+                coverage_note = (
+                    f"{spring_candidates} Spring ${{VAR}} placeholder(s) found across "
+                    f"{len(profiles_scanned)} profile(s); {spring_key_count} unique key(s) "
+                    "extracted. Duplicates across profiles collapsed."
+                )
         summary = EnvSummary(
             requested=True,
             total=len(sorted_records),
@@ -308,6 +380,9 @@ class EnvAnalyzer:
             categories=categories,
             example_files_found=example_files_found,
             limitations=limitations,
+            profiles_scanned=sorted(set(profiles_scanned)),
+            spring_candidates=spring_candidates,
+            coverage_note=coverage_note,
         )
         return sorted_records, summary
@@ -320,11 +395,15 @@ class EnvAnalyzer:
         example_entries: list,
         example_files_found: list,
         limitations: list,
-    ) -> None:
+        profiles_scanned: list,
+    ) -> int:
+        """Walk the directory tree accumulating env var findings. Returns spring_candidates count."""
         try:
             entries = sorted(current.iterdir())
         except PermissionError:
-            return
+            return 0
+        total_spring_candidates = 0
         for entry in entries:
             name = entry.name
@@ -333,7 +412,10 @@ class EnvAnalyzer:
             if entry.is_dir():
                 if name in _SKIP_DIRS:
                     continue
-                self._walk(root, entry, findings, example_entries, example_files_found, limitations)
+                total_spring_candidates += self._walk(
+                    root, entry, findings, example_entries, example_files_found,
+                    limitations, profiles_scanned,
+                )
             elif entry.is_file():
                 rel = entry.relative_to(root).as_posix()
                 name_lower = name.lower()
@@ -344,13 +426,19 @@ class EnvAnalyzer:
                     continue
                 # Spring Boot application.properties / application.yml (incl. profiles)
                 if name_lower in _SPRING_CONF_BASE or _SPRING_CONF_PROFILE_RE.match(name_lower):
-                    _parse_spring_config(entry, rel, findings)
+                    profile = _extract_spring_profile(name)
+                    if profile and profile not in profiles_scanned:
+                        profiles_scanned.append(profile)
+                    count = _parse_spring_config(entry, rel, findings, profile)
+                    total_spring_candidates += count
                     continue
                 # Source code files
                 suffix = entry.suffix.lower()
                 if suffix in _CODE_EXTENSIONS:
                     _scan_file(entry, rel, findings)
+        return total_spring_candidates
 def _replace_description(record, description: str):
     from dataclasses import replace

sourcecode/git_analyzer.py CHANGED Viewed

@@ -60,9 +60,13 @@ def _run_git(args: list[str], cwd: Path, timeout: int = 15) -> tuple[str, int]:
         ["git", "-C", str(cwd)] + args,
         capture_output=True,
         text=True,
+        encoding="utf-8",
+        errors="replace",
         timeout=timeout,
     )
-    return result.stdout, result.returncode
+    # `result.stdout` is typed Optional[str]; guard against None on edge-case
+    # platforms (Windows subprocess encoding failures, detached processes, etc.)
+    return result.stdout or "", result.returncode
 class GitAnalyzer:
@@ -80,6 +84,7 @@ class GitAnalyzer:
         branch: Optional[str] = None
         recent_commits: list[CommitRecord] = []
         change_hotspots: list[ChangeHotspot] = []
+        hotspots_status: str = "ok"
         uncommitted: Optional[UncommittedChanges] = None
         contributors: list[str] = []
@@ -137,8 +142,10 @@ class GitAnalyzer:
             change_hotspots = _parse_hotspots(stdout)
         except subprocess.TimeoutExpired:
             limitations.append("hotspots_timeout")
+            hotspots_status = "failed"
         except Exception as exc:
             limitations.append(f"hotspots_error:{exc}")
+            hotspots_status = "failed"
         try:
             stdout, _ = _run_git(["status", "--porcelain"], path, timeout=10)
@@ -166,6 +173,7 @@ class GitAnalyzer:
             branch=branch,
             recent_commits=recent_commits,
             change_hotspots=change_hotspots,
+            hotspots_status=hotspots_status,
             uncommitted_changes=uncommitted,
             contributors=contributors,
             git_summary=git_summary,
@@ -228,9 +236,12 @@ def _is_hotspot_admin(path: str) -> bool:
     return False
-def _parse_hotspots(output: str) -> list:
+def _parse_hotspots(output: str | None) -> list:
     from sourcecode.schema import ChangeHotspot
+    if not output:
+        return []
     file_counts: Counter = Counter()
     file_last_date: dict[str, str] = {}
     current_date = ""

sourcecode/prepare_context.py CHANGED Viewed

@@ -728,11 +728,13 @@ class TaskContextBuilder:
                 cwd=str(self.root),
                 capture_output=True,
                 text=True,
+                encoding="utf-8",
+                errors="replace",
                 timeout=10,
             )
             if result.returncode == 0:
                 return [
-                    line.strip() for line in result.stdout.splitlines()
+                    line.strip() for line in (result.stdout or "").splitlines()
                     if line.strip()
                 ]
         except (subprocess.TimeoutExpired, FileNotFoundError):
@@ -744,10 +746,12 @@ class TaskContextBuilder:
                 cwd=str(self.root),
                 capture_output=True,
                 text=True,
+                encoding="utf-8",
+                errors="replace",
                 timeout=10,
             )
             if result.returncode == 0:
-                return [line.strip() for line in result.stdout.splitlines() if line.strip()]
+                return [line.strip() for line in (result.stdout or "").splitlines() if line.strip()]
         except (subprocess.TimeoutExpired, FileNotFoundError):
             pass
         return []

sourcecode/schema.py CHANGED Viewed

@@ -252,6 +252,9 @@ class DocSummary:
     depth: Optional[DocsDepth] = None
     truncated: bool = False
     limitations: list[str] = field(default_factory=list)
+    # Per-language support status: "supported" | "unsupported" | "partial"
+    # Absent key = language not present in scanned files.
+    language_coverage: dict[str, str] = field(default_factory=dict)
 @dataclass
@@ -303,11 +306,21 @@ class SemanticSummary:
     """Summary of the --semantics analysis."""
     requested: bool = False
+    # Explicit analysis outcome — never omit, never silent.
+    # "ok": analysis ran and produced results
+    # "partial": analysis ran but with significant coverage gaps
+    # "failed": analysis could not produce useful results
+    status: str = "ok"
+    reason: Optional[str] = None        # human-readable failure/partial reason
     call_count: int = 0
     symbol_count: int = 0
     link_count: int = 0
     languages: list[str] = field(default_factory=list)
     language_coverage: dict[str, str] = field(default_factory=dict)
+    # Structured per-language support details. Each value:
+    # {"supported": bool, "status": str, "reason": str}
+    # status: "full" | "heuristic" | "unsupported"
+    language_coverage_details: dict[str, Any] = field(default_factory=dict)
     files_analyzed: int = 0
     files_skipped: int = 0
     truncated: bool = False
@@ -393,6 +406,13 @@ class ArchitectureAnalysis:
     confidence: Literal["high", "medium", "low"] = "low"
     method: str = "heuristic"
     limitations: list[str] = field(default_factory=list)
+    # Structured evidence for each architectural inference.
+    # Each entry: {"type": str, "paths": list[str], "reason": str, "confidence": str}
+    # type: "workspace_config" | "filesystem_naming" | "import_graph" | "entry_files"
+    evidence: list[dict] = field(default_factory=list)
+    # True when pattern is inferred from weak signals (e.g. directory names only).
+    # Agents must not treat tentative patterns as confirmed facts.
+    tentative: bool = False
 # --- Env Map ---
@@ -408,6 +428,7 @@ class EnvVarRecord:
     category: Optional[str] = None    # database | cache | storage | auth | service | observability | feature_flag | server | general
     description: Optional[str] = None
     files: list[str] = field(default_factory=list)  # "path:line"
+    profile: Optional[str] = None     # Spring profile if first occurrence is in application-{profile}.yml
 @dataclass
@@ -421,6 +442,10 @@ class EnvSummary:
     categories: list[str] = field(default_factory=list)
     example_files_found: list[str] = field(default_factory=list)
     limitations: list[str] = field(default_factory=list)
+    # Spring Boot coverage metadata
+    profiles_scanned: list[str] = field(default_factory=list)
+    spring_candidates: int = 0   # total ${VAR} refs found across Spring config files
+    coverage_note: Optional[str] = None  # explicit note about partial coverage
 # --- Code Notes ---
@@ -557,6 +582,10 @@ class GitContext:
     branch: Optional[str] = None
     recent_commits: list[CommitRecord] = field(default_factory=list)
     change_hotspots: list[ChangeHotspot] = field(default_factory=list)
+    # Explicit hotspot analysis outcome — distinguishes "no hotspots found" from "analysis failed".
+    # "ok": hotspot analysis ran (change_hotspots may still be empty if no changes in window)
+    # "failed": hotspot analysis threw an exception (see limitations for hotspots_error:...)
+    hotspots_status: str = "ok"
     uncommitted_changes: Optional[UncommittedChanges] = None
     contributors: list[str] = field(default_factory=list)
     git_summary: Optional[str] = None

sourcecode/semantic_analyzer.py CHANGED Viewed

@@ -343,8 +343,14 @@ class SemanticAnalyzer:
         # Plan 12-02: language_coverage["python"] = "full" when Python files are analyzed
         lang_coverage: dict[str, str] = {}
+        lang_coverage_details: dict[str, Any] = {}
         if source_files:
             lang_coverage["python"] = "full"
+            lang_coverage_details["python"] = {
+                "supported": True,
+                "status": "full",
+                "reason": "AST-based: symbols, cross-file calls, and imports fully resolved",
+            }
         # -----------------------------------------------------------------------
         # Plan 12-03: JS/TS analysis block
@@ -489,6 +495,12 @@ class SemanticAnalyzer:
                         js_languages.add("javascript")
             languages.extend(sorted(js_languages))
             lang_coverage["nodejs"] = "heuristic"
+            for js_lang in js_languages:
+                lang_coverage_details[js_lang] = {
+                    "supported": True,
+                    "status": "heuristic",
+                    "reason": "Regex-based: exports/imports extracted; cross-file call resolution is heuristic, not AST",
+                }
         # -----------------------------------------------------------------------
         # Plan 12-04: Go analysis block
@@ -530,6 +542,11 @@ class SemanticAnalyzer:
                 files_analyzed += 1
             languages.append("go")
             lang_coverage["go"] = "heuristic"
+            lang_coverage_details["go"] = {
+                "supported": True,
+                "status": "heuristic",
+                "reason": "Regex-based: func/struct names and same-file calls extracted; no cross-file resolution",
+            }
         # -----------------------------------------------------------------------
         # Plan 12-04: Rust analysis block
@@ -571,6 +588,11 @@ class SemanticAnalyzer:
                 files_analyzed += 1
             languages.append("rust")
             lang_coverage["rust"] = "heuristic"
+            lang_coverage_details["rust"] = {
+                "supported": True,
+                "status": "heuristic",
+                "reason": "Regex-based: fn/struct names and module-qualified calls extracted; no cross-file resolution",
+            }
         # -----------------------------------------------------------------------
         # Plan 12-04: JVM analysis block (Java, Kotlin, Scala)
@@ -612,14 +634,56 @@ class SemanticAnalyzer:
                 files_analyzed += 1
             languages.append("java")
             lang_coverage["java"] = "heuristic"
+            lang_coverage_details["java"] = {
+                "supported": True,
+                "status": "heuristic",
+                "reason": (
+                    "Regex-based only: class/interface/method names extracted, "
+                    "same-file call sites detected. "
+                    "No cross-file resolution, no type inference, no import graph. "
+                    "Spring annotations (@Service, @Component, etc.) not semantically interpreted."
+                ),
+            }
+        # Determine explicit analysis status — never emit silent empty results.
+        # An agent must be able to tell "analysis ran and found nothing" from
+        # "analysis failed to run" or "significant coverage gap".
+        _total_candidates = (
+            len(source_files)
+            + len(js_source_files)
+            + len(go_source_files)
+            + len(rust_source_files)
+            + len(jvm_source_files)
+        )
+        if _total_candidates == 0:
+            _sem_status = "failed"
+            _sem_reason = "no analyzable source files found in project"
+        elif files_analyzed == 0:
+            _sem_status = "failed"
+            _sem_reason = (
+                f"all {_total_candidates} candidate file(s) failed to analyze; "
+                "check limitations for parse/read errors"
+            )
+        elif files_analyzed < _total_candidates // 2 and _total_candidates > 4:
+            _sem_status = "partial"
+            _sem_reason = (
+                f"{files_analyzed} of {_total_candidates} file(s) analyzed; "
+                f"{files_skipped} skipped; see limitations"
+            )
+        else:
+            _sem_status = "ok"
+            _sem_reason = None
         summary = SemanticSummary(
             requested=True,
+            status=_sem_status,
+            reason=_sem_reason,
             call_count=len(calls),
             symbol_count=len(all_symbols),
             link_count=len(links),
             languages=languages,
             language_coverage=lang_coverage,
+            language_coverage_details=lang_coverage_details,
             files_analyzed=files_analyzed,
             files_skipped=files_skipped,
             truncated=truncated,

sourcecode/serializer.py CHANGED Viewed

@@ -957,9 +957,21 @@ def standard_view(sm: SourceMap, *, include_tree: bool = False) -> dict[str, Any
     if sm.semantic_summary is not None and sm.semantic_summary.requested:
         result["semantic_summary"] = asdict(sm.semantic_summary)
-        result["semantic_calls"] = [asdict(c) for c in sm.semantic_calls]
-        result["semantic_symbols"] = [asdict(s) for s in sm.semantic_symbols]
-        result["semantic_links"] = [asdict(lnk) for lnk in sm.semantic_links]
+        # Defensive filter: never emit objects with null required fields.
+        # A null entry in these arrays is worse than a shorter array — it causes
+        # agents to misinterpret the analysis as valid when it is not.
+        result["semantic_calls"] = [
+            asdict(c) for c in sm.semantic_calls
+            if c.caller_path and c.callee_path
+        ]
+        result["semantic_symbols"] = [
+            asdict(s) for s in sm.semantic_symbols
+            if s.symbol and s.kind and s.language and s.path
+        ]
+        result["semantic_links"] = [
+            asdict(lnk) for lnk in sm.semantic_links
+            if lnk.importer_path and lnk.symbol
+        ]
     if sm.metrics_summary is not None and sm.metrics_summary.requested:
         result["metrics_summary"] = asdict(sm.metrics_summary)

{sourcecode-0.41.0.dist-info → sourcecode-0.42.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sourcecode
-Version: 0.41.0
+Version: 0.42.0
 Summary: Deterministic codebase context for AI coding agents
 License:                                  Apache License
                                    Version 2.0, January 2004

{sourcecode-0.41.0.dist-info → sourcecode-0.42.0.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
-sourcecode/__init__.py,sha256=Z0LOxVp01ZH1jSUmGwFp1S832KRn_Hq6x3bcAaQ-10c,103
+sourcecode/__init__.py,sha256=K7shxEMemP2ulUio4YBuziIbKkDcIuDkcsLEFth5CwM,103
 sourcecode/adaptive_scanner.py,sha256=6dh34C2qZXyRbw-8xBhbEwDdXanM6CRFRWayVoYITnA,10190
-sourcecode/architecture_analyzer.py,sha256=H6noGgVArUJ25z1qC0fFA0KvJJeHZYyhKvKSkOyWHUk,23096
+sourcecode/architecture_analyzer.py,sha256=O4AXc7l_WTzIXrcAzstqZy-TGKNaFa6p3MzpgVjaO8g,27749
 sourcecode/architecture_summary.py,sha256=rSY5MRiaz4N1YdG0pqDTDuFjSN7PO_Zplx-dtNzv2Yo,19985
 sourcecode/ast_extractor.py,sha256=0OHQwTUBBc9lmqPLryVeB1z8dGIC6NhLlar800CD9oI,41129
 sourcecode/classifier.py,sha256=GKTMN8qKZX7ponSwDJfN08RrasI4CVpq1_gFBgEopps,7093
@@ -9,26 +9,26 @@ sourcecode/code_notes_analyzer.py,sha256=rRd8bFYV0krjlxxQV0wenwE9K7pVpUQSR7KvSvU
 sourcecode/confidence_analyzer.py,sha256=HxJMPLI5ulqtkncnv98W4iVO6yMbpQo87VuxiuNbDmY,12167
 sourcecode/context_summarizer.py,sha256=CiQrfBEzun949bWvmLabWoj2HhPn6Lw62ofqnsy0FlQ,6503
 sourcecode/contract_model.py,sha256=wpYNWGzHAVnyGxniGqNMk96TCmWbVVOqNSc3Kauajrg,3348
-sourcecode/contract_pipeline.py,sha256=m2xPFLYWkTRvEv9L7iV9gqE0JRDxYhnx_IcQNo5P9es,22793
+sourcecode/contract_pipeline.py,sha256=af30z1l4LiSOngawYkrpzQC-8huIJOgbQ8EJrq_PDSc,22967
 sourcecode/coverage_parser.py,sha256=q0LeZJaX1bnntLu-ImksdBsMlpsVmk_iUfSaB4eaJGo,19702
 sourcecode/dependency_analyzer.py,sha256=Exq0BfInvfS5iAg9xAr6WI2uPNuotkIudTKcYJcRhB8,52757
-sourcecode/doc_analyzer.py,sha256=KLQ8g5cFTLEnZfH2xh7Z1t936oS6N6fP5L6YplhbtzM,20182
+sourcecode/doc_analyzer.py,sha256=TttdS7mndKQhyJCfJnnAsyGCJrf-TIL7oXxDlTLUFKE,21248
 sourcecode/entrypoint_classifier.py,sha256=a69dMGyxCTd_LOm3oqj-EXWpRmbmeujN7T1mr2eJ1as,3877
-sourcecode/env_analyzer.py,sha256=slvq-eT24RVMNczLNDlZbe0hU8JXIIPxybqubvrrnSQ,14409
+sourcecode/env_analyzer.py,sha256=NFV4PSeBH5GEONOIo2SY5iJRXuuqhAOlRDtTZMqOZTI,18452
 sourcecode/file_classifier.py,sha256=_KfFIIolharaIxbSTrCkaWauQIqNHCyor_n47RGyDh8,8577
-sourcecode/git_analyzer.py,sha256=khF1AOT8dL5RP9d_tDqDpE8FXEvCa6Ns14L4BXjFcs0,11179
+sourcecode/git_analyzer.py,sha256=PD3eNWydznQ6KLNpxGzBqizIHoPIKevfwz9Xyf_pDt4,11600
 sourcecode/graph_analyzer.py,sha256=hMOsLLz9B0UnQ4xwbHdgr3bFvqpw0bQ8kN-xmEn3Krk,64156
 sourcecode/metrics_analyzer.py,sha256=e2cFwB9XubFq_dIVsP2PLjpr4wX0N6ulb3ol3sGDUeo,20777
-sourcecode/prepare_context.py,sha256=zYRcRFc9OXN_V-3eKcVmA6wwO9A8uhUjM2cqkkp1dV0,30892
+sourcecode/prepare_context.py,sha256=a0_ThVNJ8v98UTrgnrnjacovvCd-2HWJug1scenUtEU,31044
 sourcecode/ranking_engine.py,sha256=XdhzahKGleYNW3N0GqGW9salPOXx2BNp8KqXpaeHHmw,8247
 sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
 sourcecode/relevance_scorer.py,sha256=E74w7nlsNVobO3LqKHiMtBd84ONwGp8uDpwXJEjRtLA,8330
 sourcecode/repo_classifier.py,sha256=FG1vaWKdWXsWdl-S8hjVMiTqcwgaRXkDyvK4rPcOGtQ,22681
 sourcecode/runtime_classifier.py,sha256=zWX3r3HCKHc-qtIobErOa8aKMmaoPYREtJKvPcBGPjQ,14792
 sourcecode/scanner.py,sha256=aM3h9-DCQ3xKpeHpHYdo2vX6T5P95HA_YwZbkAVNwmo,8288
-sourcecode/schema.py,sha256=dVA-3EbHBakHLkgeZF-LfjKClEFRgPZkzblXpDTshFA,20796
-sourcecode/semantic_analyzer.py,sha256=CBRRt92AFucf8vhKbly24132sM3EEIaZZpzFsUDpsUI,79617
-sourcecode/serializer.py,sha256=1wWmBUTY1SoRBedVnE4_mPEzEL8xYsoZ8hamvpQiTvc,56477
+sourcecode/schema.py,sha256=ofEge9hTWHOTjeWt7ceCDQWzP-uhhenrYX2usjW2KVU,22759
+sourcecode/semantic_analyzer.py,sha256=16EFTgM7ooW0m5gNUKOlTSn7IEMLSzKmzQn-cWaSqjs,82604
+sourcecode/serializer.py,sha256=VUiBxA2w9CqlblXqhHQMXEUvysxTaNljgiATbw6MJ4A,56927
 sourcecode/summarizer.py,sha256=ZuzIdm3t8A-d5MuQL0TSNLrd-L0IQIuguIxeNXMNJf8,16070
 sourcecode/tree_utils.py,sha256=Fj9OIuUksBvgibNd3feog0sMDjVypJzPexp5lvMoYWI,1424
 sourcecode/workspace.py,sha256=fQlVoNx8S-fSHpKoJ0JBvEHCFkxszH0KZVJed1i3TRk,6845
@@ -59,8 +59,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
 sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
 sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
 sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
-sourcecode-0.41.0.dist-info/METADATA,sha256=NinjVy-jlbAy-be1L-ejAtO5j7HiAZwi5B3C4CbOCqk,25209
-sourcecode-0.41.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
-sourcecode-0.41.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
-sourcecode-0.41.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
-sourcecode-0.41.0.dist-info/RECORD,,
+sourcecode-0.42.0.dist-info/METADATA,sha256=-H--yzWSnQ5wpiUOXDmKirFowuaAGWb-LhUMSLYiTQ8,25209
+sourcecode-0.42.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+sourcecode-0.42.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
+sourcecode-0.42.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
+sourcecode-0.42.0.dist-info/RECORD,,

{sourcecode-0.41.0.dist-info → sourcecode-0.42.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{sourcecode-0.41.0.dist-info → sourcecode-0.42.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sourcecode-0.41.0.dist-info → sourcecode-0.42.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sourcecode 0.41.0__py3-none-any.whl → 0.42.0__py3-none-any.whl

sourcecode 0.41.0py3-none-any.whl → 0.42.0py3-none-any.whl