PyPI - sourcecode - Versions diffs - 1.20.0__py3-none-any.whl → 1.22.0__py3-none-any.whl - Mend

sourcecode 1.20.0py3-none-any.whl → 1.22.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

sourcecode/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """sourcecode — Deterministic codebase context maps for AI coding agents."""
-__version__ = "1.20.0"
+__version__ = "1.22.0"

sourcecode/cli.py CHANGED Viewed

@@ -1727,9 +1727,9 @@ def prepare_context_cmd(
             "changed_files": False, "affected_entry_points": False,
         },
         "delta": {
-            "project_summary": True, "architecture_summary": False,
+            "project_summary": False, "architecture_summary": False,
             "relevant_files": True, "key_dependencies": False,
-            "gaps": False, "confidence": True,
+            "gaps": True, "confidence": True,
             "suspected_areas": False, "improvement_opportunities": False,
             "test_gaps": False, "code_notes_summary": False,
             "changed_files": True, "affected_entry_points": True,
@@ -1771,6 +1771,18 @@ def prepare_context_cmd(
         out["changed_files"] = output.changed_files
     if _task_include("affected_entry_points") and output.affected_entry_points:
         out["affected_entry_points"] = output.affected_entry_points
+    # Delta-specific impact fields
+    if task == "delta":
+        if output.since:
+            out["since"] = output.since
+        if output.impact_summary:
+            out["impact_summary"] = output.impact_summary
+        if output.affected_modules:
+            out["affected_modules"] = output.affected_modules
+        if output.risk_areas:
+            out["risk_areas"] = output.risk_areas
+        if output.why_these_files:
+            out["reasoning"] = output.why_these_files
     if output.limitations:
         out["limitations"] = output.limitations
     if output.symptom:

sourcecode/prepare_context.py CHANGED Viewed

@@ -324,6 +324,11 @@ class TaskOutput:
     symptom: Optional[str] = None                                  # fix-bug only
     related_notes: list[dict] = field(default_factory=list)        # fix-bug + symptom only
     symptom_note: Optional[str] = None                             # fix-bug: cross-layer synonym note
+    # delta-specific impact fields
+    impact_summary: Optional[str] = None
+    affected_modules: list[str] = field(default_factory=list)
+    risk_areas: list[dict] = field(default_factory=list)
+    since: Optional[str] = None
 # ─────────────────────────────────────────────────────────────────────────────
@@ -640,14 +645,37 @@ class TaskContextBuilder:
         test_set = {p for p in all_paths if self._is_test(p)}
         source_set = {p for p in all_paths if not self._is_test(p) and self._is_source(p)}
-        relevant_files = self._rank_files(
-            task_name, spec, all_paths, entry_set, test_set,
-            monorepo_packages=sm.monorepo_packages if sm.monorepo_packages else None,
-            git_hotspots=git_hotspots,
-            uncommitted_files=uncommitted_files,
-            code_notes=cn_notes_for_ranking if cn_notes_for_ranking else None,
-            delta_files=_delta_files,
-        )
+        # Delta uses a dedicated impact-analysis path — never the generic ranker.
+        _delta_impact_summary: Optional[str] = None
+        _delta_affected_modules: list[str] = []
+        _delta_risk_areas: list[dict] = []
+        _delta_why: dict[str, str] = {}
+        _delta_analysis_gaps: list[str] = []
+        if task_name == "delta":
+            _delta_changed_list: list[str] = sorted(_delta_files) if _delta_files else []
+            (
+                relevant_files,
+                _delta_impact_summary,
+                _delta_affected_modules,
+                _delta_risk_areas,
+                _delta_why,
+                _delta_analysis_gaps,
+            ) = self._build_delta_impact(
+                changed_files=_delta_changed_list,
+                all_paths=all_paths,
+                entry_points=entry_points,
+                since=since,
+            )
+        else:
+            relevant_files = self._rank_files(
+                task_name, spec, all_paths, entry_set, test_set,
+                monorepo_packages=sm.monorepo_packages if sm.monorepo_packages else None,
+                git_hotspots=git_hotspots,
+                uncommitted_files=uncommitted_files,
+                code_notes=cn_notes_for_ranking if cn_notes_for_ranking else None,
+                delta_files=None,
+            )
         # ── 6b. Symptom keyword boost + related notes (fix-bug + --symptom) ──
         symptom_keywords: list[str] = []
@@ -805,22 +833,37 @@ class TaskContextBuilder:
         conf_summary, analysis_gaps = ConfidenceAnalyzer().analyze(sm_for_conf)
         confidence = conf_summary.overall
-        gaps = [g.reason for g in analysis_gaps]
-        if _mybatis_warning:
-            gaps.append(_mybatis_warning["reason"])
+        if task_name == "delta":
+            # Use delta-specific gaps; ConfidenceAnalyzer gaps are about full-repo
+            # detection quality and are not meaningful for an incremental diff.
+            gaps = _delta_analysis_gaps
+            if _mybatis_warning:
+                gaps.append(_mybatis_warning["reason"])
+        else:
+            gaps = [g.reason for g in analysis_gaps]
+            if _mybatis_warning:
+                gaps.append(_mybatis_warning["reason"])
         # ── 9. why_these_files ────────────────────────────────────────────────
-        why_these_files: dict[str, str] = {
-            rf.path: rf.reason for rf in relevant_files
-        }
+        if task_name == "delta":
+            why_these_files = _delta_why
+        else:
+            why_these_files = {rf.path: rf.reason for rf in relevant_files}
-        # ── 10. Delta: git changed files (reuse pre-computed set from step 5c) ──
+        # ── 10. Delta: git changed files + entry points ───────────────────────
         changed_files: list[str] = []
         affected_entry_points: list[str] = []
         if task_name == "delta":
             changed_files = sorted(_delta_files) if _delta_files else self._get_git_changed_files(since=since)
-            ep_set = {ep.path for ep in entry_points}
-            affected_entry_points = [f for f in changed_files if f in ep_set]
+            _ep_set = {ep.path for ep in entry_points}
+            # include framework-detected entry points AND files classified as
+            # entrypoint/controller by artifact taxonomy (CLI mains, Spring controllers)
+            _EP_ARTIFACT_TYPES = frozenset({"entrypoint", "controller"})
+            affected_entry_points = sorted({
+                f for f in changed_files
+                if f in _ep_set
+                or self._classify_changed_file(f)["artifact_type"] in _EP_ARTIFACT_TYPES
+            })
         return TaskOutput(
             task=task_name,
@@ -842,6 +885,10 @@ class TaskContextBuilder:
             symptom=symptom if task_name == "fix-bug" and symptom else None,
             related_notes=related_notes,
             symptom_note=symptom_note,
+            impact_summary=_delta_impact_summary,
+            affected_modules=_delta_affected_modules,
+            risk_areas=_delta_risk_areas,
+            since=since if task_name == "delta" else None,
         )
     def render_prompt(self, output: TaskOutput) -> str:
@@ -1133,6 +1180,509 @@ class TaskContextBuilder:
     def _is_source(self, path: str) -> bool:
         return Path(path).suffix.lower() in _SOURCE_EXTENSIONS
+    # ── Delta impact analysis ─────────────────────────────────────────────────
+    @staticmethod
+    def _classify_changed_file(path: str) -> dict[str, Any]:
+        """Classify a changed file by artifact type, risk areas, impact level, and confidence.
+        Returns dict: artifact_type, risk_areas, impact_level, is_noise, module, confidence.
+        Pure path/name heuristics — no file reads, fully deterministic.
+        Closed taxonomy (no unknown_* values ever emitted):
+          entrypoint | controller | service | repository | mapper | config |
+          spring_config | spring_profile | security | domain_model | dto |
+          test | build_manifest | documentation | ide_noise | db_migration |
+          generic_source
+        """
+        norm = path.replace("\\", "/")
+        name = Path(path).name
+        stem = Path(path).stem
+        suffix = Path(path).suffix.lower()
+        norm_lower = norm.lower()
+        stem_lower = stem.lower()
+        name_lower = name.lower()
+        _CODE_EXTS = frozenset({
+            ".py", ".js", ".ts", ".tsx", ".jsx", ".java", ".kt", ".go",
+            ".rs", ".rb", ".php", ".cs", ".dart", ".mjs", ".cjs", ".scala",
+        })
+        _CONFIG_EXTS = frozenset({
+            ".yml", ".yaml", ".json", ".xml", ".toml", ".properties",
+            ".env", ".cfg", ".ini", ".conf",
+        })
+        # IDE/hidden-tool directories → noise, skip impact analysis
+        _IDE_DIR_NAMES = frozenset({
+            ".idea", ".vscode", ".eclipse", ".fleet", ".git", ".github",
+            ".circleci", ".travis", ".teamcity", ".gradle", ".mvn",
+        })
+        path_dir_parts = norm_lower.split("/")[:-1]  # all components except filename
+        if any(part in _IDE_DIR_NAMES for part in path_dir_parts):
+            return {
+                "artifact_type": "ide_noise",
+                "risk_areas": [],
+                "impact_level": "noise",
+                "is_noise": True,
+                "module": "",
+                "confidence": "high",
+            }
+        module = _extract_ddd_domain(path)
+        # Tests (before other checks to avoid misclassifying TestFoo as service etc.)
+        _is_test = (
+            (stem_lower.startswith("test") and len(stem_lower) > 4)
+            or (stem_lower.endswith("test") and len(stem_lower) > 4)
+            or stem_lower.endswith("tests")
+            or stem_lower.endswith("spec")
+            or any(t in f"/{norm_lower}/" for t in (
+                "/test/", "/tests/", "/spec/", "/specs/", "/__tests__/", "/it/",
+            ))
+        )
+        if _is_test:
+            return {"artifact_type": "test", "risk_areas": ["tests"], "impact_level": "low", "is_noise": False, "module": module, "confidence": "high"}
+        # Entrypoints: Spring Boot Application, CLI mains, framework entry files
+        _ENTRYPOINT_NAMES = frozenset({
+            "main.py", "app.py", "run.py", "server.py", "wsgi.py", "asgi.py",
+            "__main__.py", "index.js", "index.ts", "server.js", "server.ts",
+            "app.js", "app.ts", "main.js", "main.ts",
+        })
+        if (
+            name_lower in _ENTRYPOINT_NAMES
+            or (suffix in _CODE_EXTS and stem_lower in ("cli", "manage", "entrypoint", "startup", "launcher"))
+            or (suffix in (".java", ".kt") and stem_lower.endswith("application"))
+        ):
+            return {"artifact_type": "entrypoint", "risk_areas": ["api", "config"], "impact_level": "critical", "is_noise": False, "module": module, "confidence": "high"}
+        # Security surface (extended: interceptor, filter, cors, acl)
+        _SECURITY_KW = ("security", "auth", "jwt", "token", "permission", "role",
+                         "credential", "encrypt", "decrypt", "oauth", "saml", "ldap",
+                         "password", "secret", "interceptor", "filter", "cors", "acl")
+        if suffix in _CODE_EXTS and any(kw in stem_lower for kw in _SECURITY_KW):
+            impact = "critical" if any(kw in stem_lower for kw in ("security", "auth", "jwt")) else "high"
+            return {"artifact_type": "security", "risk_areas": ["security"], "impact_level": impact, "is_noise": False, "module": module, "confidence": "high"}
+        # API / controller layer
+        _API_KW = ("controller", "restcontroller", "resource", "handler",
+                   "router", "route", "endpoint", "servlet")
+        if suffix in _CODE_EXTS and any(kw in stem_lower for kw in _API_KW):
+            return {"artifact_type": "controller", "risk_areas": ["api"], "impact_level": "high", "is_noise": False, "module": module, "confidence": "high"}
+        # Business logic / services (extended: facade, usecase, aspect, listener, component)
+        _SERVICE_KW = ("service", "serviceimpl", "servicefacade", "facade", "usecase",
+                       "interactor", "aspect", "listener", "subscriber", "eventhandler", "component")
+        if suffix in _CODE_EXTS and any(kw in stem_lower for kw in _SERVICE_KW):
+            return {"artifact_type": "service", "risk_areas": ["transactions", "business_logic"], "impact_level": "high", "is_noise": False, "module": module, "confidence": "high"}
+        # Data access / repositories
+        _DAO_KW = ("repository", "repositoryimpl", "dao", "daoimpl", "store", "jparepository")
+        if suffix in _CODE_EXTS and any(kw in stem_lower for kw in _DAO_KW):
+            return {"artifact_type": "repository", "risk_areas": ["persistence"], "impact_level": "high", "is_noise": False, "module": module, "confidence": "high"}
+        # MyBatis / ORM mappers
+        if "mapper" in stem_lower:
+            return {"artifact_type": "mapper", "risk_areas": ["persistence"], "impact_level": "high", "is_noise": False, "module": module, "confidence": "high"}
+        # Spring / app config files (by canonical name)
+        if name_lower in ("application.yml", "application.yaml", "application.properties",
+                           "bootstrap.yml", "bootstrap.yaml", "bootstrap.properties"):
+            return {"artifact_type": "spring_config", "risk_areas": ["config"], "impact_level": "high", "is_noise": False, "module": module, "confidence": "high"}
+        if name_lower.startswith("application-") and suffix in (".yml", ".yaml", ".properties"):
+            return {"artifact_type": "spring_profile", "risk_areas": ["config"], "impact_level": "medium", "is_noise": False, "module": module, "confidence": "high"}
+        _BUILD_MANIFEST_NAMES = frozenset({
+            "pom.xml", "build.gradle", "build.gradle.kts",
+            "settings.gradle", "settings.gradle.kts",
+            "pyproject.toml", "setup.py", "setup.cfg",
+            "package.json", "package-lock.json", "yarn.lock",
+            "cargo.toml", "go.mod", "go.sum",
+            "gemfile", "gemfile.lock", "build.sbt",
+            "requirements.txt", "requirements-dev.txt",
+        })
+        if name_lower in _BUILD_MANIFEST_NAMES:
+            return {"artifact_type": "build_manifest", "risk_areas": ["config", "dependencies"], "impact_level": "medium", "is_noise": False, "module": module, "confidence": "high"}
+        # Configuration classes / files
+        _CONFIG_STEM_KW = ("config", "configuration", "properties", "settings")
+        if suffix in _CODE_EXTS and any(kw in stem_lower for kw in _CONFIG_STEM_KW):
+            return {"artifact_type": "config", "risk_areas": ["config"], "impact_level": "medium", "is_noise": False, "module": module, "confidence": "high"}
+        # DB migrations / SQL
+        if suffix == ".sql" or any(kw in norm_lower for kw in ("migration", "flyway", "liquibase", "changelog")):
+            return {"artifact_type": "db_migration", "risk_areas": ["persistence"], "impact_level": "high", "is_noise": False, "module": module, "confidence": "high"}
+        # Domain models / entities
+        _ENTITY_KW = ("entity", "model", "domain", "aggregate", "valueobject")
+        if suffix in _CODE_EXTS and any(kw in stem_lower for kw in _ENTITY_KW):
+            return {"artifact_type": "domain_model", "risk_areas": ["persistence"], "impact_level": "medium", "is_noise": False, "module": module, "confidence": "high"}
+        # DTOs / request-response objects
+        _DTO_KW = ("dto", "request", "response", "payload", "command", "query", "event")
+        if suffix in _CODE_EXTS and any(kw in stem_lower for kw in _DTO_KW):
+            return {"artifact_type": "dto", "risk_areas": [], "impact_level": "low", "is_noise": False, "module": module, "confidence": "high"}
+        # Generic source code — closed taxonomy, confidence=low signals uncertain classification
+        if suffix in _CODE_EXTS:
+            return {"artifact_type": "generic_source", "risk_areas": [], "impact_level": "medium", "is_noise": False, "module": module, "confidence": "low"}
+        # Generic config / data files — fold into config type
+        if suffix in _CONFIG_EXTS:
+            return {"artifact_type": "config", "risk_areas": ["config"], "impact_level": "low", "is_noise": False, "module": module, "confidence": "low"}
+        # Docs
+        if suffix in (".md", ".rst", ".txt", ".adoc"):
+            return {"artifact_type": "documentation", "risk_areas": [], "impact_level": "low", "is_noise": False, "module": module, "confidence": "high"}
+        # Binaries, images, lock files — treat as noise (closed taxonomy: no unknown_*)
+        return {"artifact_type": "ide_noise", "risk_areas": [], "impact_level": "noise", "is_noise": True, "module": module, "confidence": "low"}
+    def _build_delta_impact(
+        self,
+        changed_files: list[str],
+        all_paths: list[str],
+        entry_points: list,
+        since: Optional[str],
+    ) -> tuple[list[RelevantFile], str, list[str], list[dict[str, Any]], dict[str, str], list[str]]:
+        """Build incremental impact analysis for changed files.
+        Returns:
+            (relevant_files, impact_summary, affected_modules, risk_areas,
+             why_these_files, analysis_gaps)
+        Changed files are always included in relevant_files (never dropped by score).
+        Related files are expanded type-aware: controller→service→repository→mapper chain.
+        Scoring is hierarchical by artifact_type, not by heuristic impact_level.
+        """
+        # Per-artifact deterministic scores — strictly ordered by semantic role
+        _ARTIFACT_SCORE: dict[str, float] = {
+            "entrypoint":     0.95,
+            "security":       0.90,
+            "controller":     0.85,
+            "service":        0.80,
+            "db_migration":   0.75,
+            "repository":     0.70,
+            "mapper":         0.65,
+            "spring_config":  0.60,
+            "config":         0.55,
+            "spring_profile": 0.50,
+            "domain_model":   0.50,
+            "build_manifest": 0.45,
+            "generic_source": 0.45,
+            "dto":            0.35,
+            "test":           0.30,
+            "documentation":  0.25,
+            "ide_noise":      0.10,
+        }
+        # impact_level per artifact_type — used for risk_areas severity ordering
+        _ARTIFACT_IMPACT: dict[str, str] = {
+            "entrypoint": "critical", "security": "critical",
+            "controller": "high", "service": "high", "repository": "high",
+            "mapper": "high", "db_migration": "high", "spring_config": "high",
+            "config": "medium", "spring_profile": "medium",
+            "build_manifest": "medium", "domain_model": "medium",
+            "generic_source": "medium",
+            "dto": "low", "test": "low", "documentation": "low", "ide_noise": "noise",
+        }
+        # propagation_risk per artifact_type
+        _PROPAGATION_RISK: dict[str, str] = {
+            "entrypoint": "high", "security": "high", "controller": "high",
+            "db_migration": "high", "spring_config": "high",
+            "service": "medium", "repository": "medium", "mapper": "medium",
+            "config": "medium", "domain_model": "medium",
+            "spring_profile": "low", "build_manifest": "low", "generic_source": "low",
+            "dto": "low", "test": "low", "documentation": "low", "ide_noise": "low",
+        }
+        # type-aware expansion: which artifact types a changed type should pull in
+        _EXPANSION_TARGETS: dict[str, frozenset[str]] = {
+            "controller":    frozenset({"service", "security", "dto"}),
+            "service":       frozenset({"repository", "mapper"}),
+            "repository":    frozenset({"mapper", "domain_model"}),
+            "mapper":        frozenset({"repository", "domain_model"}),
+            "security":      frozenset({"controller", "config", "spring_config"}),
+            "spring_config": frozenset({"service", "config", "repository"}),
+            "config":        frozenset({"service", "repository", "controller"}),
+            "entrypoint":    frozenset({"security", "config", "spring_config"}),
+            "dto":           frozenset({"controller", "service"}),
+            "domain_model":  frozenset({"repository", "service"}),
+            "db_migration":  frozenset({"repository", "mapper"}),
+            "spring_profile": frozenset({"service", "config"}),
+            "generic_source": frozenset({"service", "repository"}),
+            "test":          frozenset(),
+            "documentation": frozenset(),
+            "ide_noise":     frozenset(),
+            "build_manifest": frozenset(),
+        }
+        _SEV_ORDER = ["noise", "low", "medium", "high", "critical"]
+        # primary impact area used in structured reasoning
+        def _impact_area(risk_areas: list[str], atype: str) -> str:
+            if "security" in risk_areas:
+                return "security"
+            if "api" in risk_areas:
+                return "api"
+            if "persistence" in risk_areas or "transactions" in risk_areas:
+                return "persistence"
+            if "config" in risk_areas or "dependencies" in risk_areas:
+                return "config"
+            if "tests" in risk_areas:
+                return "tests"
+            return {
+                "controller": "api", "service": "business_logic",
+                "repository": "persistence", "mapper": "persistence",
+                "security": "security", "config": "config",
+                "spring_config": "config", "spring_profile": "config",
+                "build_manifest": "build", "domain_model": "persistence",
+                "dto": "api", "db_migration": "persistence",
+                "test": "tests", "entrypoint": "api",
+                "generic_source": "unknown",
+            }.get(atype, "unknown")
+        def _role_in_system(atype: str, in_ep_paths: bool) -> str:
+            if in_ep_paths or atype in ("entrypoint", "controller"):
+                return "entrypoint"
+            if atype in ("config", "spring_config", "spring_profile", "build_manifest"):
+                return "config"
+            if atype in ("dto", "domain_model", "test", "documentation"):
+                return "leaf"
+            return "dependency"
+        def _structured_why(atype: str, module: str, role: str, risk_areas: list[str]) -> str:
+            area = _impact_area(risk_areas, atype)
+            prop = _PROPAGATION_RISK.get(atype, "low")
+            parts = [
+                f"artifact_type: {atype}",
+                f"role_in_system: {role}",
+                f"impact_area: {area}",
+                f"propagation_risk: {prop}",
+            ]
+            if module:
+                parts.append(f"module: {module}")
+            return " | ".join(parts)
+        if not changed_files:
+            return (
+                [],
+                "No changes detected — verify the git ref passed to --since",
+                [],
+                [],
+                {},
+                ["No changed files found. Check that --since ref exists and the diff is non-empty."],
+            )
+        ep_paths = {ep.path for ep in entry_points}
+        # ── Step 1: classify every changed file ───────────────────────────────
+        classifications: dict[str, dict[str, Any]] = {
+            f: self._classify_changed_file(f) for f in changed_files
+        }
+        # ── Step 2: build relevant_files from the changed set ─────────────────
+        relevant: list[RelevantFile] = []
+        why: dict[str, str] = {}
+        affected_modules_set: set[str] = set()
+        changed_dirs: set[str] = set()
+        risk_acc: dict[str, dict[str, Any]] = {}  # area → {files, severity}
+        ref_label = since or "HEAD~1"
+        # union of expansion targets across all changed artifact types
+        wanted_expansion_types: frozenset[str] = frozenset()
+        for path, cls in classifications.items():
+            atype = cls["artifact_type"]
+            score = _ARTIFACT_SCORE.get(atype, 0.45)
+            module = cls["module"]
+            if module:
+                affected_modules_set.add(module)
+            if not cls["is_noise"]:
+                parent = str(Path(path).parent).replace("\\", "/")
+                if parent and parent != ".":
+                    changed_dirs.add(parent)
+            impact_level = _ARTIFACT_IMPACT.get(atype, "medium")
+            for area in cls["risk_areas"]:
+                if area not in risk_acc:
+                    risk_acc[area] = {"files": [], "severity": "noise"}
+                risk_acc[area]["files"].append(path)
+                cur_idx = _SEV_ORDER.index(risk_acc[area]["severity"])
+                new_idx = _SEV_ORDER.index(impact_level)
+                if new_idx > cur_idx:
+                    risk_acc[area]["severity"] = impact_level
+            wanted_expansion_types = wanted_expansion_types | _EXPANSION_TARGETS.get(atype, frozenset())
+            in_ep = path in ep_paths
+            role = _role_in_system(atype, in_ep)
+            why_str = _structured_why(atype, module, role, cls["risk_areas"])
+            reason = f"changed since {ref_label} | artifact: {atype} | score: {score:.2f}"
+            relevant.append(RelevantFile(path=path, role=role, score=round(score, 2), reason=reason, why=why_str))
+            why[path] = why_str
+        relevant.sort(key=lambda f: (-f.score, f.path))
+        # ── Step 3: type-aware expansion to related files ─────────────────────
+        existing_paths = {rf.path for rf in relevant}
+        related: list[tuple[float, str, RelevantFile]] = []
+        for path in all_paths:
+            if path in existing_paths:
+                continue
+            if Path(path).suffix.lower() not in _ALL_EXTENSIONS:
+                continue
+            rel_cls = self._classify_changed_file(path)
+            if rel_cls["is_noise"]:
+                continue
+            rel_atype = rel_cls["artifact_type"]
+            # only expand if this file's type is in the wanted expansion set
+            if rel_atype not in wanted_expansion_types:
+                continue
+            parent = str(Path(path).parent).replace("\\", "/")
+            path_module = _extract_ddd_domain(path)
+            in_same_module = bool(path_module and path_module in affected_modules_set)
+            in_same_dir = parent in changed_dirs
+            if not (in_same_module or in_same_dir):
+                continue
+            rel_base = _ARTIFACT_SCORE.get(rel_atype, 0.45)
+            rel_score = round(rel_base * 0.60, 2)
+            ctx_type = "module" if in_same_module else "directory"
+            ctx_val = path_module if in_same_module else parent
+            triggers = [
+                Path(f).name for f in changed_files
+                if (
+                    (_extract_ddd_domain(f) == path_module if in_same_module
+                     else str(Path(f).parent).replace("\\", "/") == parent)
+                )
+            ]
+            in_ep = path in ep_paths
+            role = _role_in_system(rel_atype, in_ep)
+            why_str = (
+                f"artifact_type: {rel_atype} | role_in_system: {role}"
+                f" | pulled_by: type-aware expansion from {ctx_type} '{ctx_val}'"
+                f" | triggered_by: {', '.join(triggers[:3])}"
+            )
+            reason = f"expansion: {ctx_type} '{ctx_val}' | artifact: {rel_atype} | score: {rel_score:.2f}"
+            related.append((rel_score, path, RelevantFile(
+                path=path, role=role, score=rel_score, reason=reason, why=why_str
+            )))
+            why[path] = why_str
+        related.sort(key=lambda x: (-x[0], x[1]))
+        relevant.extend(rf for _, _, rf in related[:10])
+        # ── Step 4: impact summary ─────────────────────────────────────────────
+        type_counts: dict[str, int] = {}
+        all_risk_areas: set[str] = set()
+        noise_count = 0
+        for cls in classifications.values():
+            t = cls["artifact_type"]
+            type_counts[t] = type_counts.get(t, 0) + 1
+            all_risk_areas.update(cls["risk_areas"])
+            if cls["is_noise"]:
+                noise_count += 1
+        meaningful = len(changed_files) - noise_count
+        _SUMMARY_LABELS: dict[str, str] = {
+            "entrypoint":     "entrypoint(s)",
+            "security":       "security file(s)",
+            "controller":     "controller(s)",
+            "service":        "service(s)",
+            "repository":     "repository/repositories",
+            "mapper":         "MyBatis mapper(s)",
+            "spring_config":  "Spring config file(s)",
+            "spring_profile": "Spring profile config(s)",
+            "config":         "configuration file(s)",
+            "build_manifest": "build manifest(s)",
+            "db_migration":   "database migration(s)",
+            "domain_model":   "domain model(s)",
+            "dto":            "DTO(s)",
+            "test":           "test file(s)",
+            "generic_source": "source file(s)",
+            "documentation":  "documentation file(s)",
+        }
+        if meaningful == 0:
+            impact_summary = (
+                f"{noise_count} IDE/tooling file(s) changed"
+                " — no semantic impact on application logic"
+            )
+        else:
+            _sev_rank = {"critical": 4, "high": 3, "medium": 2, "low": 1, "noise": 0}
+            parts = []
+            for atype, count in sorted(
+                type_counts.items(),
+                key=lambda kv: -_sev_rank.get(_ARTIFACT_IMPACT.get(kv[0], "medium"), 0),
+            ):
+                if atype == "ide_noise":
+                    continue
+                label = _SUMMARY_LABELS.get(atype, f"source file(s) ({atype})")
+                parts.append(f"{count} {label}")
+            impact_summary = "; ".join(parts) if parts else f"{meaningful} source file(s) changed"
+            if all_risk_areas:
+                impact_summary += f" — risk areas: {', '.join(sorted(all_risk_areas))}"
+            if noise_count > 0:
+                impact_summary += f" ({noise_count} IDE/tooling file(s) excluded)"
+        # ── Step 5: risk_areas output list ─────────────────────────────────────
+        risk_areas_out: list[dict[str, Any]] = sorted(
+            [
+                {
+                    "area": area,
+                    "severity": info["severity"],
+                    "affected_files": sorted(info["files"])[:5],
+                }
+                for area, info in risk_acc.items()
+            ],
+            key=lambda x: (-_SEV_ORDER.index(x["severity"]), x["area"]),
+        )
+        # ── Step 6: analysis gaps ──────────────────────────────────────────────
+        analysis_gaps: list[str] = [
+            "Related file expansion uses type-aware propagation chains + module/directory heuristics — import graph not traced",
+        ]
+        if noise_count > 0 and meaningful > 0:
+            analysis_gaps.append(
+                f"{noise_count} IDE/tooling file(s) in diff excluded from impact analysis"
+            )
+        elif noise_count > 0 and meaningful == 0:
+            analysis_gaps.append(
+                "All changed files are IDE/tooling — no actionable semantic impact detected"
+            )
+        low_confidence = [f for f, cls in classifications.items() if cls.get("confidence") == "low" and not cls["is_noise"]]
+        if low_confidence:
+            analysis_gaps.append(
+                f"{len(low_confidence)} file(s) classified with low confidence"
+                " (artifact type inferred from extension only)"
+                " — consider adding stem patterns to _classify_changed_file: "
+                + ", ".join(Path(f).name for f in low_confidence[:3])
+            )
+        if not affected_modules_set and any(not cls["is_noise"] for cls in classifications.values()):
+            analysis_gaps.append(
+                "DDD module/package structure not detected in changed paths"
+                " — related file expansion uses directory proximity only"
+            )
+        return (
+            relevant,
+            impact_summary,
+            sorted(affected_modules_set),
+            risk_areas_out,
+            why,
+            analysis_gaps,
+        )
     def _get_git_changed_files(self, since: Optional[str] = None) -> list[str]:
         """Get files changed since a git ref (default: HEAD~1) relative to self.root.

{sourcecode-1.20.0.dist-info → sourcecode-1.22.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sourcecode
-Version: 1.20.0
+Version: 1.22.0
 Summary: Deterministic codebase context for AI coding agents
 License:                                  Apache License
                                    Version 2.0, January 2004
@@ -221,7 +221,7 @@ Description-Content-Type: text/markdown
 **Compressed AI-ready context for Java/Spring enterprise codebases.**
-![Version](https://img.shields.io/badge/version-1.20.0-blue)
+![Version](https://img.shields.io/badge/version-1.22.0-blue)
 ![Python](https://img.shields.io/badge/python-3.10%2B-green)
 ---
@@ -255,7 +255,7 @@ pipx install sourcecode
 ```bash
 sourcecode version
-# sourcecode 1.20.0
+# sourcecode 1.22.0
 ```
 ---

{sourcecode-1.20.0.dist-info → sourcecode-1.22.0.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
-sourcecode/__init__.py,sha256=QZACuNjk_A3P8zRH7TdSQLgIDesiUHq3J12ZlBSdjvo,103
+sourcecode/__init__.py,sha256=AYpzylZKC4FdV_cDIgkpP-gpSaG-icZE-DD43XcAFXA,103
 sourcecode/adaptive_scanner.py,sha256=RTNExwWPXzjgLaRueT7UuxkPj5ZEToWjGbx1j0LSZ9E,10250
 sourcecode/architecture_analyzer.py,sha256=MyBa0Hf5HmkudZQDLKrjcWDKETXETXl0mQX1swtTwAA,39091
 sourcecode/architecture_summary.py,sha256=z34_6v7cSwy98cof2UVciGho7SCrZ93tiqMmq5WNzRQ,20405
 sourcecode/ast_extractor.py,sha256=XgrZg2DcWcUm9r87cRG3KGO7IK2TIL_N-CvhSbUmmh4,49901
 sourcecode/classifier.py,sha256=pYve2J1LqtYssU3lYLMDz18PT-CjN5c18QYE7R_IG1Q,7507
-sourcecode/cli.py,sha256=5Nhv7GdhG1i76eWRzlv8RCn6UkUJHSYiq6953_bDOBM,75912
+sourcecode/cli.py,sha256=gcOs2FiimQi8uS-ORhmkDvAZf3IiJgfUyYutqO1ECaQ,76407
 sourcecode/code_notes_analyzer.py,sha256=y1MJBnPZHYp4i6cQCXUb9ATIyifS_qMQWjw_8lPkpsU,9215
 sourcecode/confidence_analyzer.py,sha256=xw_Jv8pAd0wd8t2vvQlorw8Ih0rSF3YCoFS8K-_4aXg,15762
 sourcecode/context_scorer.py,sha256=QpChSpsmaAYz91rXA4Ue5xzQmNz_ZboZN09YOHScq1U,14679
@@ -20,7 +20,7 @@ sourcecode/file_classifier.py,sha256=48ly5Z6exkzBy8lNy1AkdP4-oJqIA1zT3LZfffuTyDo
 sourcecode/git_analyzer.py,sha256=_pCg2V4d2aa17k9hayTzpexAj8syvyk4y9NYNvvgOAI,12802
 sourcecode/graph_analyzer.py,sha256=iUK-7pSV-cvGqqD2hENdYmhnm0wcXFEyK-xnu5ul8OU,62515
 sourcecode/metrics_analyzer.py,sha256=m0ENgtqKeBL17kUIK3fmGkgo7UfXBNHxCMj0H_Y5K7c,22750
-sourcecode/prepare_context.py,sha256=314QXlwlRj-cTKvKbyDZfaHLECZQBGpSKJJopXfXUQw,53217
+sourcecode/prepare_context.py,sha256=v9BMh1Ro2CssAPUwo3Ch7ml0R7X8c5c13eJs3e4m6FE,80841
 sourcecode/progress.py,sha256=qn30sWaHOkjTgXsSBmiPkz7Rsbwc5oSlIe6JNEMYp_k,3149
 sourcecode/ranking_engine.py,sha256=virVglafZufioHpZpwktjMvUiL0TZELWQCQnQNV8dFo,9360
 sourcecode/redactor.py,sha256=xuGcadGEHaPw4qZXlMDvzMCsr4VOkdp3oBQptHyJk8c,2884
@@ -61,8 +61,8 @@ sourcecode/telemetry/consent.py,sha256=wLMvGNJeSSyZoNkQXpoUioY6mMv4Qdvuw7S9jAEWn
 sourcecode/telemetry/events.py,sha256=oEvvulfsv5GIDWG2174gSS6tNB95w38AIYiYeifGKlE,2294
 sourcecode/telemetry/filters.py,sha256=Asa71oRl7q3Wt_FMwuufIZJFzSYdgRNKS8LHCIyFeYE,4805
 sourcecode/telemetry/transport.py,sha256=KJeIPCPWMdmbCP3ySGs2iUlia34U6vWne2dZsUezesw,1560
-sourcecode-1.20.0.dist-info/METADATA,sha256=-xA4je3NwLeknIlLOwmGFovavH8n4uy-rxNaTwgSieo,20626
-sourcecode-1.20.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
-sourcecode-1.20.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
-sourcecode-1.20.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
-sourcecode-1.20.0.dist-info/RECORD,,
+sourcecode-1.22.0.dist-info/METADATA,sha256=sMO13GIMA6DnMIxi80QFCAwZte4pDIyZ3MiIVeWiEag,20626
+sourcecode-1.22.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+sourcecode-1.22.0.dist-info/entry_points.txt,sha256=ex3F9rmbXeyDIoFQHtkEqTsKSaJow8F0LrVu8XfIktQ,57
+sourcecode-1.22.0.dist-info/licenses/LICENSE,sha256=7DdHrU9Z_3e7dSvq4ISijZNjnuHo5NIHNiHDouMQ9JU,10491
+sourcecode-1.22.0.dist-info/RECORD,,

{sourcecode-1.20.0.dist-info → sourcecode-1.22.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{sourcecode-1.20.0.dist-info → sourcecode-1.22.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{sourcecode-1.20.0.dist-info → sourcecode-1.22.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sourcecode 1.20.0__py3-none-any.whl → 1.22.0__py3-none-any.whl

sourcecode 1.20.0py3-none-any.whl → 1.22.0py3-none-any.whl