PyPI - sourcecode - Versions diffs - 0.38.0__tar.gz → 0.41.0__tar.gz - Mend

sourcecode 0.38.0tar.gz → 0.41.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (145) hide show

{sourcecode-0.38.0 → sourcecode-0.41.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sourcecode
-Version: 0.38.0
+Version: 0.41.0
 Summary: Deterministic codebase context for AI coding agents
 License:                                  Apache License
                                    Version 2.0, January 2004

{sourcecode-0.38.0 → sourcecode-0.41.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "sourcecode"
-version = "0.38.0"
+version = "0.41.0"
 description = "Deterministic codebase context for AI coding agents"
 readme = "README.md"
 requires-python = ">=3.9"

{sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """sourcecode — Deterministic codebase context maps for AI coding agents."""
-__version__ = "0.38.0"
+__version__ = "0.41.0"

{sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/cli.py RENAMED Viewed

@@ -220,6 +220,29 @@ def _preprocess_argv() -> None:
     _sys.argv = _sys.argv[:1] + modified
+def _copy_to_clipboard(content: str) -> bool:
+    """Copy text to system clipboard. Returns True on success, False otherwise (never raises)."""
+    import subprocess
+    import sys as _sys
+    try:
+        if _sys.platform == "darwin":
+            subprocess.run(["pbcopy"], input=content.encode("utf-8"), check=True, timeout=10)
+            return True
+        elif _sys.platform == "win32":
+            subprocess.run(["clip"], input=content.encode("utf-16"), check=True, timeout=10)
+            return True
+        else:
+            for cmd in (["xclip", "-selection", "clipboard"], ["xsel", "--clipboard", "--input"]):
+                try:
+                    subprocess.run(cmd, input=content.encode("utf-8"), check=True, timeout=10)
+                    return True
+                except (FileNotFoundError, subprocess.CalledProcessError):
+                    continue
+            return False
+    except Exception:
+        return False
 app = typer.Typer(
     name="sourcecode",
     help=_HELP,
@@ -543,7 +566,7 @@ def main(
     entrypoints_only: bool = typer.Option(
         False,
         "--entrypoints-only",
-        help="Contract mode: include only files that are entrypoints or have exported symbols.",
+        help="Contract mode: include only files that are runtime entrypoints or have exported symbols (public API surface). Note: 'entrypoints' here includes all files with exports, not strictly detected runtime entry points.",
     ),
     changed_only: bool = typer.Option(
         False,
@@ -571,6 +594,12 @@ def main(
         "--symbol",
         help="Contract mode: extract localized context for a specific symbol name. Returns defining file + all importers.",
     ),
+    copy: bool = typer.Option(
+        False,
+        "--copy",
+        "-c",
+        help="Copy output to system clipboard after a successful run. No-op when --output is used or clipboard is unavailable.",
+    ),
 ) -> None:
     """Analyze a repository and produce structured context for AI coding agents.
@@ -1127,11 +1156,15 @@ def main(
         _all_call_files = set(_fan_in) | set(_fan_out)
         _hotspots: list[dict] = []
-        # Filter test paths from hotspots — they dominate fan-in by calling many modules
+        # Filter test, noise, and auxiliary paths — they dominate fan-in but carry no signal
         _TEST_MARKERS = {"/test", "/tests", "/spec", "/specs", "_test.", ".test.", ".spec."}
+        from sourcecode.ranking_engine import RankingEngine as _RankingEngine
+        _sem_engine = _RankingEngine(sm.monorepo_packages)
         for _p in _all_call_files:
             if any(_m in _p for _m in _TEST_MARKERS) or _p.startswith("test"):
                 continue
+            if _sem_engine.is_noise(_p) or _sem_engine.is_auxiliary(_p):
+                continue
             _in = _fan_in[_p]
             _out = _fan_out[_p]
             _score = _in * 2.0 + _out * 1.0
@@ -1386,6 +1419,13 @@ def main(
     # 6. Write output (CLI-04)
     write_output(content, output=output)
+    # 7. Clipboard copy (--copy / -c)
+    if copy and output is None:
+        _trimmed = content.strip()
+        if _trimmed and _trimmed not in ("{}", "[]", "null"):
+            if _copy_to_clipboard(content):
+                typer.echo("✓ copied to clipboard", err=True)
 @app.command("prepare-context")
 def prepare_context_cmd(
@@ -1417,6 +1457,12 @@ def prepare_context_cmd(
         "--dry-run",
         help="Show what would be analyzed without running it",
     ),
+    copy: bool = typer.Option(
+        False,
+        "--copy",
+        "-c",
+        help="Copy output to system clipboard after a successful run. No-op when clipboard is unavailable.",
+    ),
 ) -> None:
     """Task-specific context for AI coding agents.
@@ -1514,7 +1560,14 @@ def prepare_context_cmd(
     if llm_prompt:
         out["llm_prompt"] = builder.render_prompt(output)
-    typer.echo(json.dumps(out, indent=2, ensure_ascii=False))
+    _pc_content = json.dumps(out, indent=2, ensure_ascii=False)
+    typer.echo(_pc_content)
+    if copy:
+        _trimmed = _pc_content.strip()
+        if _trimmed and _trimmed not in ("{}", "[]", "null"):
+            if _copy_to_clipboard(_pc_content):
+                typer.echo("✓ copied to clipboard", err=True)
 # ── Telemetry commands ────────────────────────────────────────────────────────

{sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/contract_model.py RENAMED Viewed

@@ -91,6 +91,7 @@ class FileContract:
     fan_out: int = 0  # how many files this imports
     is_entrypoint: bool = False
     is_changed: bool = False
+    ranking_reasons: list[str] = field(default_factory=list)
     # Extraction quality
     extraction_method: str = "heuristic"  # ast | tree_sitter | heuristic

{sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/contract_pipeline.py RENAMED Viewed

@@ -17,6 +17,7 @@ from typing import Any, Literal, Optional
 from sourcecode.ast_extractor import AstExtractor, _LANGUAGE_MAP
 from sourcecode.contract_model import ContractSummary, FileContract
+from sourcecode.ranking_engine import RankingEngine
 from sourcecode.relevance_scorer import RelevanceScorer
 from sourcecode.schema import EntryPoint, MonorepoPackageInfo
@@ -27,22 +28,6 @@ from sourcecode.schema import EntryPoint, MonorepoPackageInfo
 _MAX_FILES = 500      # hard cap on files extracted per run
 _SRC_EXTENSIONS: frozenset[str] = frozenset(_LANGUAGE_MAP.keys())
-# Role-based score adjustments applied after contract extraction.
-# Runtime roles get a boost; config/util are neutral or penalized.
-_ROLE_SCORE: dict[str, float] = {
-    "entrypoint": 0.15,
-    "service":    0.10,
-    "route":      0.10,
-    "api":        0.08,
-    "middleware": 0.06,
-    "store":      0.05,
-    "model":      0.05,
-    "hook":       0.05,
-    "component":  0.03,
-    "util":       0.00,
-    "config":    -0.10,
-    "unknown":    0.00,
-}
 RankStrategy = Literal["relevance", "centrality", "git-churn"]
@@ -194,6 +179,7 @@ class ContractPipeline:
         """
         entry_paths = {ep.path.replace("\\", "/") for ep in (entry_points or [])}
         scorer = RelevanceScorer(monorepo_packages)
+        engine = RankingEngine(monorepo_packages)
         # 1. Changed files (for --changed-only and ranking)
         changed_files: set[str] = set()
@@ -267,9 +253,24 @@ class ContractPipeline:
         if rank_by == "git-churn":
             churn = _get_git_churn(root, [c.path for c in contracts])
-        # 6. Compute relevance scores
+        # 6. Compute relevance scores via unified ranking engine
+        max_fan_in = max((c.fan_in for c in contracts), default=1) if contracts else 1
+        max_churn_val = max(churn.values(), default=1) if churn else 1
         for c in contracts:
-            c.relevance_score = self._score(c, scorer, churn)
+            fs = engine.score(
+                c.path,
+                fan_in=c.fan_in,
+                fan_out=c.fan_out,
+                max_fan_in=max_fan_in,
+                git_churn=churn.get(c.path, 0),
+                max_churn=max_churn_val,
+                is_entrypoint=c.is_entrypoint,
+                is_changed=c.is_changed,
+                export_count=len(c.exports),
+                task="default",
+            )
+            c.relevance_score = fs.display_score
+            c.ranking_reasons = fs.reasons
         # 7. Rank
         contracts = self._rank(contracts, rank_by)
@@ -285,7 +286,7 @@ class ContractPipeline:
                     known_paths=set(src_paths),
                     entry_paths=entry_paths,
                     changed_files=changed_files,
-                    scorer=scorer,
+                    engine=engine,
                 )
         # 9. Entrypoints-only filter
@@ -312,45 +313,13 @@ class ContractPipeline:
         )
         return contracts, summary
-    def _score(
-        self,
-        c: FileContract,
-        scorer: RelevanceScorer,
-        churn: dict[str, int],
-    ) -> float:
-        base = scorer.score(c.path)
-        if c.is_entrypoint:
-            base += 0.3
-        if c.is_changed:
-            base += 0.2
-        # Fan-in is the strongest signal: many callers = critical contract
-        fi_score = min(c.fan_in / 10.0, 0.3)
-        fo_score = min(c.fan_out / 15.0, 0.15)
-        base += fi_score + fo_score
-        # Exported API value
-        export_count = len(c.exports)
-        base += min(export_count / 20.0, 0.1)
-        # Churn
-        churn_score = min(churn.get(c.path, 0) / 20.0, 0.1)
-        base += churn_score
-        # Role-based boost: runtime roles score higher than auxiliary
-        base += _ROLE_SCORE.get(c.role, 0.0)
-        return min(1.0, base)
     def _rank(self, contracts: list[FileContract], rank_by: RankStrategy) -> list[FileContract]:
         if rank_by == "centrality":
-            # Approximate centrality: fan_in + fan_out
-            return sorted(contracts, key=lambda c: -(c.fan_in + c.fan_out))
+            return sorted(contracts, key=lambda c: (-(c.fan_in + c.fan_out), c.path))
         if rank_by == "git-churn":
-            return sorted(contracts, key=lambda c: (-c.is_changed, -c.relevance_score))
-        # Default: relevance
-        return sorted(contracts, key=lambda c: (-c.is_entrypoint, -c.relevance_score))
+            return sorted(contracts, key=lambda c: (-c.is_changed, -c.relevance_score, c.path))
+        # Default: relevance — path breaks ties deterministically
+        return sorted(contracts, key=lambda c: (-c.is_entrypoint, -c.relevance_score, c.path))
     def _symbol_deep_scan(
         self,
@@ -359,7 +328,7 @@ class ContractPipeline:
         known_paths: set[str],
         entry_paths: set[str],
         changed_files: set[str],
-        scorer: RelevanceScorer,
+        engine: RankingEngine,
     ) -> list[FileContract]:
         """Grep-based fallback when the shallow scan missed the defining files.
@@ -367,7 +336,7 @@ class ContractPipeline:
         extracts contracts for candidates not already processed, then re-applies
         the symbol filter. Fan-in/fan-out are not computed for these contracts.
         """
-        candidates = _find_symbol_files(root, symbol, known_paths, scorer)
+        candidates = _find_symbol_files(root, symbol, known_paths, engine)
         if not candidates:
             return []
@@ -379,7 +348,9 @@ class ContractPipeline:
                 continue
             contract.is_entrypoint = rel_path in entry_paths
             contract.is_changed = rel_path in changed_files
-            contract.relevance_score = scorer.score(rel_path)
+            fs = engine.score(rel_path, is_entrypoint=contract.is_entrypoint, is_changed=contract.is_changed)
+            contract.relevance_score = fs.display_score
+            contract.ranking_reasons = fs.reasons
             extra.append(contract)
         return _filter_by_symbol(extra, symbol)
@@ -531,7 +502,7 @@ def _find_symbol_files(
     root: Path,
     symbol: str,
     known_paths: set[str],
-    scorer: RelevanceScorer,
+    engine: RankingEngine,
 ) -> list[str]:
     """Find source files outside *known_paths* that contain *symbol* as text.
@@ -560,7 +531,7 @@ def _find_symbol_files(
             if line.startswith("./"):
                 line = line[2:]
             line = line.replace("\\", "/")
-            if line and line not in known_paths and not scorer.is_noise(line):
+            if line and line not in known_paths and not engine.is_noise(line):
                 found.append(line)
         return found
     except Exception:
@@ -578,7 +549,7 @@ def _find_symbol_files(
                 rel_str = str(rel).replace("\\", "/")
             except ValueError:
                 continue
-            if rel_str in known_paths or scorer.is_noise(rel_str):
+            if rel_str in known_paths or engine.is_noise(rel_str):
                 continue
             try:
                 content = Path(full).read_text(encoding="utf-8", errors="replace")

{sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/doc_analyzer.py RENAMED Viewed

@@ -185,6 +185,13 @@ class DocAnalyzer:
         if any(r.doc_text and r.doc_text.endswith(self._TRUNCATION_SUFFIX) for r in records):
             truncated = True
+        # Explicit absence signal: scanned files but found nothing
+        if total_count == 0 and file_paths:
+            limitations.append(
+                f"no_docs_found: {len(file_paths)} file(s) scanned, "
+                "no docstrings or JSDoc comments found"
+            )
         summary = DocSummary(
             requested=True,
             total_count=total_count,

{sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/git_analyzer.py RENAMED Viewed

@@ -20,12 +20,13 @@ _RELEASE_COMMIT_RE = re.compile(
 )
 # Matches version-bump phrases anywhere in the commit subject (multilingual)
 _RELEASE_COMMIT_CONTAINS_RE = re.compile(
-    r"subiendo a v?[\d.]"          # Spanish: "subiendo a v.0.28.0"
+    r"subiendo a v?[\d.]"              # Spanish: "subiendo a 0.38.0", "subiendo a v.0.31.0"
+    r"|actualizando a v?[\d.]"         # Spanish: "actualizando a 0.15.1"
     r"|bumping to v?[\d.]"
     r"|preparing (?:v|release)[\d. ]"
     r"|releasing v?[\d.]"
     r"|cut v?[\d.]"
-    r"|\bv\d+\.\d+\.\d+\b",       # bare version tag in middle of message
+    r"|\bv\d+\.\d+\.\d+\b",           # bare version tag in middle of message
     re.IGNORECASE,
 )
@@ -34,12 +35,25 @@ _HOTSPOT_ADMIN_FILENAMES: frozenset[str] = frozenset({
     "CHANGELOG.md", "CHANGELOG", "CHANGES.md", "CHANGES", "HISTORY.md",
     "RELEASE.md", "RELEASES.md", "RELEASE_NOTES.md", "CHANGELOG.rst", "NEWS.md", "NEWS.rst",
     "VERSION", "VERSION.txt", "version.txt", ".version",
+    "_version.py", "__version__.py", "version.py",
+    "pyproject.toml", "setup.cfg",
     "package-lock.json", "yarn.lock", "pnpm-lock.yaml", "bun.lockb",
     "Cargo.lock", "poetry.lock", "Pipfile.lock", "composer.lock",
     "go.sum", "Gemfile.lock",
 })
 _HOTSPOT_ADMIN_SUFFIXES: tuple[str, ...] = (".lock", ".snap", ".min.js", ".min.css")
+# Auxiliary directory names whose files should be excluded from hotspots —
+# docs, examples, benchmarks etc. are high-commit but low operational signal.
+_HOTSPOT_AUX_DIRS: frozenset[str] = frozenset({
+    "docs", "doc", "benchmark", "benchmarks", "example", "examples",
+    "demo", "demos", "playground", "playgrounds", "fixture", "fixtures",
+    "generated", "generate", "storybook", ".storybook", "stories",
+    "sandbox", "sandboxes",
+    "ci", "translations", "locales", "locale", "i18n", "l10n",
+    ".planning",
+})
 def _run_git(args: list[str], cwd: Path, timeout: int = 15) -> tuple[str, int]:
     result = subprocess.run(
@@ -191,7 +205,7 @@ def _parse_commits(output: str) -> list:
 def _is_hotspot_admin(path: str) -> bool:
-    """True for files that are noisy from release/bot commits, not semantic changes."""
+    """True for files that are noisy from release/bot commits or auxiliary dirs."""
     filename = path.rsplit("/", 1)[-1]
     if filename in _HOTSPOT_ADMIN_FILENAMES:
         return True
@@ -202,9 +216,15 @@ def _is_hotspot_admin(path: str) -> bool:
     _lower = filename.lower()
     if _lower.startswith("changelog.") or _lower.startswith("changes."):
         return True
-    # lerna.json and root-level package.json are modified by version bumps, not dev work
+    # lerna.json is modified by version bumps, not dev work
     if filename in ("lerna.json",):
         return True
+    # Auxiliary directory parts — docs, benchmarks, examples, demos, etc.
+    # These may have high commit counts but are not operational signal for agents.
+    parts = path.split("/")
+    for part in parts[:-1]:  # check directory components, not the filename itself
+        if part.lower() in _HOTSPOT_AUX_DIRS:
+            return True
     return False
@@ -231,6 +251,9 @@ def _parse_hotspots(output: str) -> list:
             continue
         if skip_commit:
             continue
+        # Skip git artifact lines that are not file paths: flags (-o, --), separators, etc.
+        if line.startswith("-") or not ("/" in line or "." in line):
+            continue
         if _is_hotspot_admin(line):
             continue
         file_counts[line] += 1

{sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/prepare_context.py RENAMED Viewed

@@ -627,94 +627,81 @@ class TaskContextBuilder:
         git_hotspots: Optional[dict[str, int]] = None,
         uncommitted_files: Optional[set[str]] = None,
     ) -> list[RelevantFile]:
-        from sourcecode.relevance_scorer import RelevanceScorer
+        from sourcecode.ranking_engine import RankingEngine
         from sourcecode.file_classifier import FileClassifier
-        scorer = RelevanceScorer(monorepo_packages or [])
-        file_classifier = FileClassifier(self.root, [
-            # _rank_files only needs production path evidence; EntryPoint objects
-            # are not available here, so category evidence is best-effort below.
-        ], monorepo_packages or [])
-        # Auxiliary entry points (benchmark, docs, examples) must not get
-        # the production entry boost — they are not runtime signals.
-        runtime_entry_set = {ep for ep in entry_set if not scorer.is_auxiliary(ep)}
+        engine = RankingEngine(monorepo_packages or [])
+        file_classifier = FileClassifier(self.root, [], monorepo_packages or [])
+        # Auxiliary entry points (benchmark, docs, examples) are not runtime
+        runtime_entry_set = {ep for ep in entry_set if not engine.is_auxiliary(ep)}
         _hotspots = git_hotspots or {}
         _uncommitted = uncommitted_files or set()
         _max_churn = max(_hotspots.values(), default=1)
-        scored: list[tuple[float, RelevantFile]] = []
+        scored: list[tuple[float, str, RelevantFile]] = []
         for path in all_paths:
             if Path(path).suffix.lower() not in _ALL_EXTENSIONS:
                 continue
             if any(pen in path for pen in spec.ranking_penalties):
                 continue
-            # Hard filter: tooling/config noise
-            if scorer.is_noise(path):
+            if engine.is_noise(path):
                 continue
             is_test = path in test_set
             if is_test and task_name != "generate-tests":
                 continue
-            score = 0.0
-            reasons: list[str] = []
+            # Structural + git signals from unified engine (task-weighted)
+            fs = engine.score(
+                path,
+                is_entrypoint=(path in runtime_entry_set),
+                git_churn=_hotspots.get(path, 0),
+                max_churn=_max_churn,
+                is_changed=(path in _uncommitted),
+                task=task_name,
+            )
-            # Only runtime entry points get the production boost
-            if path in runtime_entry_set:
-                score += 3.0
-                reasons.append("entry point")
+            if fs.score < -50:  # hard noise
+                continue
+            # Content classification boost (reads file imports)
+            content_boost = 0.0
+            content_reasons: list[str] = []
             file_class = file_classifier.classify(path)
             if file_class is not None:
-                score += file_class.relevance * 2.0
-                reasons.append(f"{file_class.category}: {file_class.reason}")
+                content_boost = file_class.relevance * 2.0
+                content_reasons.append(f"{file_class.category}: {file_class.reason}")
             if is_test:
-                score += 2.0
-                reasons.append("existing test")
-            elif self._is_source(path):
-                score += 0.5
-                if not reasons:
-                    reasons.append("source file with supported extension")
-            # Operational relevance boost/penalty from package role
-            rel = scorer.score(path)
-            score += (rel - 0.3) * 2.0  # center around 0.3 baseline
-            # Suppress auxiliary dirs (benchmarks, docs, examples, demos)
-            if scorer.is_auxiliary(path):
-                score -= 2.0
-            # Git churn: frequently changed files are high-signal for active work
-            churn = _hotspots.get(path, 0)
-            if churn > 0:
-                score += (churn / _max_churn) * 1.5
-                reasons.append(f"git churn ({churn})")
-            # Uncommitted changes: files actively being edited rank highest
-            if path in _uncommitted:
-                score += 1.0
-                reasons.append("uncommitted changes")
-            if score <= 0:
+                content_boost += 2.0
+                content_reasons.append("existing test")
+            elif self._is_source(path) and not content_reasons:
+                content_boost += 0.5
+            total = fs.score + content_boost
+            if total <= 0:
                 continue
             role = (
                 "entrypoint" if path in runtime_entry_set
                 else ("test" if is_test else "source")
             )
-            scored.append((score, RelevantFile(
+            all_reasons = [r for r in fs.reasons if r != "source file"] + content_reasons
+            reason_str = ", ".join(all_reasons) if all_reasons else "source file"
+            scored.append((total, path, RelevantFile(
                 path=path,
                 role=role,
-                score=round(score, 1),
-                reason=", ".join(reasons) if reasons else "source file",
+                score=round(min(total / 3.0, 1.0), 2),
+                reason=reason_str,
             )))
-        scored.sort(key=lambda x: -x[0])
-        return [f for _, f in scored[:15]]
+        # Deterministic: score desc, then path asc as tiebreaker
+        scored.sort(key=lambda x: (-x[0], x[1]))
+        return [f for _, _, f in scored[:15]]
     def _is_test(self, path: str) -> bool:
         name = Path(path).name.lower()

sourcecode 0.38.0__tar.gz → 0.41.0__tar.gz

sourcecode 0.38.0tar.gz → 0.41.0tar.gz