sourcecode 0.38.0__tar.gz → 0.41.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sourcecode-0.38.0 → sourcecode-0.41.0}/PKG-INFO +1 -1
- {sourcecode-0.38.0 → sourcecode-0.41.0}/pyproject.toml +1 -1
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/__init__.py +1 -1
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/cli.py +56 -3
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/contract_model.py +1 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/contract_pipeline.py +32 -61
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/doc_analyzer.py +7 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/git_analyzer.py +27 -4
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/prepare_context.py +40 -53
- sourcecode-0.41.0/src/sourcecode/ranking_engine.py +231 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/relevance_scorer.py +4 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/semantic_analyzer.py +8 -2
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/serializer.py +136 -12
- {sourcecode-0.38.0 → sourcecode-0.41.0}/.agents/skills/source-command-gsd-join-discord/SKILL.md +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/.agents/skills/source-command-gsd-review-backlog/SKILL.md +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/.agents/skills/source-command-gsd-workstreams/SKILL.md +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/.gitignore +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/.ruff.toml +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/CONTRIBUTING.md +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/LICENSE +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/README.md +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/SECURITY.md +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/docs/privacy.md +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/docs/schema.md +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/raw +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/adaptive_scanner.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/architecture_analyzer.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/architecture_summary.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/ast_extractor.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/classifier.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/code_notes_analyzer.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/confidence_analyzer.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/context_summarizer.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/coverage_parser.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/dependency_analyzer.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/__init__.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/base.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/csproj_parser.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/dart.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/dotnet.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/elixir.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/go.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/heuristic.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/hybrid.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/java.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/jvm_ext.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/nodejs.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/parsers.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/php.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/project.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/python.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/ruby.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/rust.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/systems.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/terraform.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/tooling.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/entrypoint_classifier.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/env_analyzer.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/file_classifier.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/graph_analyzer.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/metrics_analyzer.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/redactor.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/repo_classifier.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/runtime_classifier.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/scanner.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/schema.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/summarizer.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/telemetry/__init__.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/telemetry/config.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/telemetry/consent.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/telemetry/events.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/telemetry/filters.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/telemetry/transport.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/tree_utils.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/workspace.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/__init__.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/conftest.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/coverage.xml +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/fastapi_app/pyproject.toml +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/fastapi_app/src/main.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/go_service/cmd/api/main.go +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/go_service/go.mod +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/jacoco.xml +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/lcov.info +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/nextjs_app/app/page.tsx +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/nextjs_app/package.json +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/nextjs_app/pnpm-lock.yaml +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/pnpm_monorepo/apps/web/app/page.tsx +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/pnpm_monorepo/apps/web/package.json +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/pnpm_monorepo/packages/api/main.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/pnpm_monorepo/packages/api/pyproject.toml +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/pnpm_monorepo/pnpm-workspace.yaml +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_architecture_analyzer.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_architecture_summary.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_ast_extractor.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_classifier.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_cli.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_code_notes_analyzer.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_contract_pipeline.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_coverage_parser.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_cross_consistency.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_dependency_analyzer_node_python.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_dependency_analyzer_polyglot.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_dependency_schema.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detector_dotnet.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detector_go_rust_java.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detector_nodejs.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detector_php_ruby_dart.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detector_python.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detector_universal_managed.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detector_universal_systems.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detectors_base.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_doc_analyzer_jsdom.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_doc_analyzer_python.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_graph_analyzer_polyglot.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_graph_analyzer_python_node.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_graph_schema.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_hybrid_inference.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_dependencies.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_detection.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_docs.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_graph_modules.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_lqn.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_metrics.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_multistack.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_semantics.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_universal.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_metrics_analyzer.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_packaging.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_phase1_improvements.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_pipeline_integrity.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_real_projects.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_redactor.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_scanner.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_schema.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_schema_normalization.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_semantic_analyzer_node.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_semantic_analyzer_python.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_semantic_import_resolution.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_semantic_schema.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_signal_hierarchy.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_summarizer.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_telemetry.py +0 -0
- {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_workspace_analyzer.py +0 -0
|
@@ -220,6 +220,29 @@ def _preprocess_argv() -> None:
|
|
|
220
220
|
_sys.argv = _sys.argv[:1] + modified
|
|
221
221
|
|
|
222
222
|
|
|
223
|
+
def _copy_to_clipboard(content: str) -> bool:
|
|
224
|
+
"""Copy text to system clipboard. Returns True on success, False otherwise (never raises)."""
|
|
225
|
+
import subprocess
|
|
226
|
+
import sys as _sys
|
|
227
|
+
try:
|
|
228
|
+
if _sys.platform == "darwin":
|
|
229
|
+
subprocess.run(["pbcopy"], input=content.encode("utf-8"), check=True, timeout=10)
|
|
230
|
+
return True
|
|
231
|
+
elif _sys.platform == "win32":
|
|
232
|
+
subprocess.run(["clip"], input=content.encode("utf-16"), check=True, timeout=10)
|
|
233
|
+
return True
|
|
234
|
+
else:
|
|
235
|
+
for cmd in (["xclip", "-selection", "clipboard"], ["xsel", "--clipboard", "--input"]):
|
|
236
|
+
try:
|
|
237
|
+
subprocess.run(cmd, input=content.encode("utf-8"), check=True, timeout=10)
|
|
238
|
+
return True
|
|
239
|
+
except (FileNotFoundError, subprocess.CalledProcessError):
|
|
240
|
+
continue
|
|
241
|
+
return False
|
|
242
|
+
except Exception:
|
|
243
|
+
return False
|
|
244
|
+
|
|
245
|
+
|
|
223
246
|
app = typer.Typer(
|
|
224
247
|
name="sourcecode",
|
|
225
248
|
help=_HELP,
|
|
@@ -543,7 +566,7 @@ def main(
|
|
|
543
566
|
entrypoints_only: bool = typer.Option(
|
|
544
567
|
False,
|
|
545
568
|
"--entrypoints-only",
|
|
546
|
-
help="Contract mode: include only files that are entrypoints or have exported symbols.",
|
|
569
|
+
help="Contract mode: include only files that are runtime entrypoints or have exported symbols (public API surface). Note: 'entrypoints' here includes all files with exports, not strictly detected runtime entry points.",
|
|
547
570
|
),
|
|
548
571
|
changed_only: bool = typer.Option(
|
|
549
572
|
False,
|
|
@@ -571,6 +594,12 @@ def main(
|
|
|
571
594
|
"--symbol",
|
|
572
595
|
help="Contract mode: extract localized context for a specific symbol name. Returns defining file + all importers.",
|
|
573
596
|
),
|
|
597
|
+
copy: bool = typer.Option(
|
|
598
|
+
False,
|
|
599
|
+
"--copy",
|
|
600
|
+
"-c",
|
|
601
|
+
help="Copy output to system clipboard after a successful run. No-op when --output is used or clipboard is unavailable.",
|
|
602
|
+
),
|
|
574
603
|
) -> None:
|
|
575
604
|
"""Analyze a repository and produce structured context for AI coding agents.
|
|
576
605
|
|
|
@@ -1127,11 +1156,15 @@ def main(
|
|
|
1127
1156
|
|
|
1128
1157
|
_all_call_files = set(_fan_in) | set(_fan_out)
|
|
1129
1158
|
_hotspots: list[dict] = []
|
|
1130
|
-
# Filter test
|
|
1159
|
+
# Filter test, noise, and auxiliary paths — they dominate fan-in but carry no signal
|
|
1131
1160
|
_TEST_MARKERS = {"/test", "/tests", "/spec", "/specs", "_test.", ".test.", ".spec."}
|
|
1161
|
+
from sourcecode.ranking_engine import RankingEngine as _RankingEngine
|
|
1162
|
+
_sem_engine = _RankingEngine(sm.monorepo_packages)
|
|
1132
1163
|
for _p in _all_call_files:
|
|
1133
1164
|
if any(_m in _p for _m in _TEST_MARKERS) or _p.startswith("test"):
|
|
1134
1165
|
continue
|
|
1166
|
+
if _sem_engine.is_noise(_p) or _sem_engine.is_auxiliary(_p):
|
|
1167
|
+
continue
|
|
1135
1168
|
_in = _fan_in[_p]
|
|
1136
1169
|
_out = _fan_out[_p]
|
|
1137
1170
|
_score = _in * 2.0 + _out * 1.0
|
|
@@ -1386,6 +1419,13 @@ def main(
|
|
|
1386
1419
|
# 6. Write output (CLI-04)
|
|
1387
1420
|
write_output(content, output=output)
|
|
1388
1421
|
|
|
1422
|
+
# 7. Clipboard copy (--copy / -c)
|
|
1423
|
+
if copy and output is None:
|
|
1424
|
+
_trimmed = content.strip()
|
|
1425
|
+
if _trimmed and _trimmed not in ("{}", "[]", "null"):
|
|
1426
|
+
if _copy_to_clipboard(content):
|
|
1427
|
+
typer.echo("✓ copied to clipboard", err=True)
|
|
1428
|
+
|
|
1389
1429
|
|
|
1390
1430
|
@app.command("prepare-context")
|
|
1391
1431
|
def prepare_context_cmd(
|
|
@@ -1417,6 +1457,12 @@ def prepare_context_cmd(
|
|
|
1417
1457
|
"--dry-run",
|
|
1418
1458
|
help="Show what would be analyzed without running it",
|
|
1419
1459
|
),
|
|
1460
|
+
copy: bool = typer.Option(
|
|
1461
|
+
False,
|
|
1462
|
+
"--copy",
|
|
1463
|
+
"-c",
|
|
1464
|
+
help="Copy output to system clipboard after a successful run. No-op when clipboard is unavailable.",
|
|
1465
|
+
),
|
|
1420
1466
|
) -> None:
|
|
1421
1467
|
"""Task-specific context for AI coding agents.
|
|
1422
1468
|
|
|
@@ -1514,7 +1560,14 @@ def prepare_context_cmd(
|
|
|
1514
1560
|
if llm_prompt:
|
|
1515
1561
|
out["llm_prompt"] = builder.render_prompt(output)
|
|
1516
1562
|
|
|
1517
|
-
|
|
1563
|
+
_pc_content = json.dumps(out, indent=2, ensure_ascii=False)
|
|
1564
|
+
typer.echo(_pc_content)
|
|
1565
|
+
|
|
1566
|
+
if copy:
|
|
1567
|
+
_trimmed = _pc_content.strip()
|
|
1568
|
+
if _trimmed and _trimmed not in ("{}", "[]", "null"):
|
|
1569
|
+
if _copy_to_clipboard(_pc_content):
|
|
1570
|
+
typer.echo("✓ copied to clipboard", err=True)
|
|
1518
1571
|
|
|
1519
1572
|
|
|
1520
1573
|
# ── Telemetry commands ────────────────────────────────────────────────────────
|
|
@@ -91,6 +91,7 @@ class FileContract:
|
|
|
91
91
|
fan_out: int = 0 # how many files this imports
|
|
92
92
|
is_entrypoint: bool = False
|
|
93
93
|
is_changed: bool = False
|
|
94
|
+
ranking_reasons: list[str] = field(default_factory=list)
|
|
94
95
|
|
|
95
96
|
# Extraction quality
|
|
96
97
|
extraction_method: str = "heuristic" # ast | tree_sitter | heuristic
|
|
@@ -17,6 +17,7 @@ from typing import Any, Literal, Optional
|
|
|
17
17
|
|
|
18
18
|
from sourcecode.ast_extractor import AstExtractor, _LANGUAGE_MAP
|
|
19
19
|
from sourcecode.contract_model import ContractSummary, FileContract
|
|
20
|
+
from sourcecode.ranking_engine import RankingEngine
|
|
20
21
|
from sourcecode.relevance_scorer import RelevanceScorer
|
|
21
22
|
from sourcecode.schema import EntryPoint, MonorepoPackageInfo
|
|
22
23
|
|
|
@@ -27,22 +28,6 @@ from sourcecode.schema import EntryPoint, MonorepoPackageInfo
|
|
|
27
28
|
_MAX_FILES = 500 # hard cap on files extracted per run
|
|
28
29
|
_SRC_EXTENSIONS: frozenset[str] = frozenset(_LANGUAGE_MAP.keys())
|
|
29
30
|
|
|
30
|
-
# Role-based score adjustments applied after contract extraction.
|
|
31
|
-
# Runtime roles get a boost; config/util are neutral or penalized.
|
|
32
|
-
_ROLE_SCORE: dict[str, float] = {
|
|
33
|
-
"entrypoint": 0.15,
|
|
34
|
-
"service": 0.10,
|
|
35
|
-
"route": 0.10,
|
|
36
|
-
"api": 0.08,
|
|
37
|
-
"middleware": 0.06,
|
|
38
|
-
"store": 0.05,
|
|
39
|
-
"model": 0.05,
|
|
40
|
-
"hook": 0.05,
|
|
41
|
-
"component": 0.03,
|
|
42
|
-
"util": 0.00,
|
|
43
|
-
"config": -0.10,
|
|
44
|
-
"unknown": 0.00,
|
|
45
|
-
}
|
|
46
31
|
|
|
47
32
|
RankStrategy = Literal["relevance", "centrality", "git-churn"]
|
|
48
33
|
|
|
@@ -194,6 +179,7 @@ class ContractPipeline:
|
|
|
194
179
|
"""
|
|
195
180
|
entry_paths = {ep.path.replace("\\", "/") for ep in (entry_points or [])}
|
|
196
181
|
scorer = RelevanceScorer(monorepo_packages)
|
|
182
|
+
engine = RankingEngine(monorepo_packages)
|
|
197
183
|
|
|
198
184
|
# 1. Changed files (for --changed-only and ranking)
|
|
199
185
|
changed_files: set[str] = set()
|
|
@@ -267,9 +253,24 @@ class ContractPipeline:
|
|
|
267
253
|
if rank_by == "git-churn":
|
|
268
254
|
churn = _get_git_churn(root, [c.path for c in contracts])
|
|
269
255
|
|
|
270
|
-
# 6. Compute relevance scores
|
|
256
|
+
# 6. Compute relevance scores via unified ranking engine
|
|
257
|
+
max_fan_in = max((c.fan_in for c in contracts), default=1) if contracts else 1
|
|
258
|
+
max_churn_val = max(churn.values(), default=1) if churn else 1
|
|
271
259
|
for c in contracts:
|
|
272
|
-
|
|
260
|
+
fs = engine.score(
|
|
261
|
+
c.path,
|
|
262
|
+
fan_in=c.fan_in,
|
|
263
|
+
fan_out=c.fan_out,
|
|
264
|
+
max_fan_in=max_fan_in,
|
|
265
|
+
git_churn=churn.get(c.path, 0),
|
|
266
|
+
max_churn=max_churn_val,
|
|
267
|
+
is_entrypoint=c.is_entrypoint,
|
|
268
|
+
is_changed=c.is_changed,
|
|
269
|
+
export_count=len(c.exports),
|
|
270
|
+
task="default",
|
|
271
|
+
)
|
|
272
|
+
c.relevance_score = fs.display_score
|
|
273
|
+
c.ranking_reasons = fs.reasons
|
|
273
274
|
|
|
274
275
|
# 7. Rank
|
|
275
276
|
contracts = self._rank(contracts, rank_by)
|
|
@@ -285,7 +286,7 @@ class ContractPipeline:
|
|
|
285
286
|
known_paths=set(src_paths),
|
|
286
287
|
entry_paths=entry_paths,
|
|
287
288
|
changed_files=changed_files,
|
|
288
|
-
|
|
289
|
+
engine=engine,
|
|
289
290
|
)
|
|
290
291
|
|
|
291
292
|
# 9. Entrypoints-only filter
|
|
@@ -312,45 +313,13 @@ class ContractPipeline:
|
|
|
312
313
|
)
|
|
313
314
|
return contracts, summary
|
|
314
315
|
|
|
315
|
-
def _score(
|
|
316
|
-
self,
|
|
317
|
-
c: FileContract,
|
|
318
|
-
scorer: RelevanceScorer,
|
|
319
|
-
churn: dict[str, int],
|
|
320
|
-
) -> float:
|
|
321
|
-
base = scorer.score(c.path)
|
|
322
|
-
|
|
323
|
-
if c.is_entrypoint:
|
|
324
|
-
base += 0.3
|
|
325
|
-
if c.is_changed:
|
|
326
|
-
base += 0.2
|
|
327
|
-
|
|
328
|
-
# Fan-in is the strongest signal: many callers = critical contract
|
|
329
|
-
fi_score = min(c.fan_in / 10.0, 0.3)
|
|
330
|
-
fo_score = min(c.fan_out / 15.0, 0.15)
|
|
331
|
-
base += fi_score + fo_score
|
|
332
|
-
|
|
333
|
-
# Exported API value
|
|
334
|
-
export_count = len(c.exports)
|
|
335
|
-
base += min(export_count / 20.0, 0.1)
|
|
336
|
-
|
|
337
|
-
# Churn
|
|
338
|
-
churn_score = min(churn.get(c.path, 0) / 20.0, 0.1)
|
|
339
|
-
base += churn_score
|
|
340
|
-
|
|
341
|
-
# Role-based boost: runtime roles score higher than auxiliary
|
|
342
|
-
base += _ROLE_SCORE.get(c.role, 0.0)
|
|
343
|
-
|
|
344
|
-
return min(1.0, base)
|
|
345
|
-
|
|
346
316
|
def _rank(self, contracts: list[FileContract], rank_by: RankStrategy) -> list[FileContract]:
|
|
347
317
|
if rank_by == "centrality":
|
|
348
|
-
|
|
349
|
-
return sorted(contracts, key=lambda c: -(c.fan_in + c.fan_out))
|
|
318
|
+
return sorted(contracts, key=lambda c: (-(c.fan_in + c.fan_out), c.path))
|
|
350
319
|
if rank_by == "git-churn":
|
|
351
|
-
return sorted(contracts, key=lambda c: (-c.is_changed, -c.relevance_score))
|
|
352
|
-
# Default: relevance
|
|
353
|
-
return sorted(contracts, key=lambda c: (-c.is_entrypoint, -c.relevance_score))
|
|
320
|
+
return sorted(contracts, key=lambda c: (-c.is_changed, -c.relevance_score, c.path))
|
|
321
|
+
# Default: relevance — path breaks ties deterministically
|
|
322
|
+
return sorted(contracts, key=lambda c: (-c.is_entrypoint, -c.relevance_score, c.path))
|
|
354
323
|
|
|
355
324
|
def _symbol_deep_scan(
|
|
356
325
|
self,
|
|
@@ -359,7 +328,7 @@ class ContractPipeline:
|
|
|
359
328
|
known_paths: set[str],
|
|
360
329
|
entry_paths: set[str],
|
|
361
330
|
changed_files: set[str],
|
|
362
|
-
|
|
331
|
+
engine: RankingEngine,
|
|
363
332
|
) -> list[FileContract]:
|
|
364
333
|
"""Grep-based fallback when the shallow scan missed the defining files.
|
|
365
334
|
|
|
@@ -367,7 +336,7 @@ class ContractPipeline:
|
|
|
367
336
|
extracts contracts for candidates not already processed, then re-applies
|
|
368
337
|
the symbol filter. Fan-in/fan-out are not computed for these contracts.
|
|
369
338
|
"""
|
|
370
|
-
candidates = _find_symbol_files(root, symbol, known_paths,
|
|
339
|
+
candidates = _find_symbol_files(root, symbol, known_paths, engine)
|
|
371
340
|
if not candidates:
|
|
372
341
|
return []
|
|
373
342
|
|
|
@@ -379,7 +348,9 @@ class ContractPipeline:
|
|
|
379
348
|
continue
|
|
380
349
|
contract.is_entrypoint = rel_path in entry_paths
|
|
381
350
|
contract.is_changed = rel_path in changed_files
|
|
382
|
-
|
|
351
|
+
fs = engine.score(rel_path, is_entrypoint=contract.is_entrypoint, is_changed=contract.is_changed)
|
|
352
|
+
contract.relevance_score = fs.display_score
|
|
353
|
+
contract.ranking_reasons = fs.reasons
|
|
383
354
|
extra.append(contract)
|
|
384
355
|
|
|
385
356
|
return _filter_by_symbol(extra, symbol)
|
|
@@ -531,7 +502,7 @@ def _find_symbol_files(
|
|
|
531
502
|
root: Path,
|
|
532
503
|
symbol: str,
|
|
533
504
|
known_paths: set[str],
|
|
534
|
-
|
|
505
|
+
engine: RankingEngine,
|
|
535
506
|
) -> list[str]:
|
|
536
507
|
"""Find source files outside *known_paths* that contain *symbol* as text.
|
|
537
508
|
|
|
@@ -560,7 +531,7 @@ def _find_symbol_files(
|
|
|
560
531
|
if line.startswith("./"):
|
|
561
532
|
line = line[2:]
|
|
562
533
|
line = line.replace("\\", "/")
|
|
563
|
-
if line and line not in known_paths and not
|
|
534
|
+
if line and line not in known_paths and not engine.is_noise(line):
|
|
564
535
|
found.append(line)
|
|
565
536
|
return found
|
|
566
537
|
except Exception:
|
|
@@ -578,7 +549,7 @@ def _find_symbol_files(
|
|
|
578
549
|
rel_str = str(rel).replace("\\", "/")
|
|
579
550
|
except ValueError:
|
|
580
551
|
continue
|
|
581
|
-
if rel_str in known_paths or
|
|
552
|
+
if rel_str in known_paths or engine.is_noise(rel_str):
|
|
582
553
|
continue
|
|
583
554
|
try:
|
|
584
555
|
content = Path(full).read_text(encoding="utf-8", errors="replace")
|
|
@@ -185,6 +185,13 @@ class DocAnalyzer:
|
|
|
185
185
|
if any(r.doc_text and r.doc_text.endswith(self._TRUNCATION_SUFFIX) for r in records):
|
|
186
186
|
truncated = True
|
|
187
187
|
|
|
188
|
+
# Explicit absence signal: scanned files but found nothing
|
|
189
|
+
if total_count == 0 and file_paths:
|
|
190
|
+
limitations.append(
|
|
191
|
+
f"no_docs_found: {len(file_paths)} file(s) scanned, "
|
|
192
|
+
"no docstrings or JSDoc comments found"
|
|
193
|
+
)
|
|
194
|
+
|
|
188
195
|
summary = DocSummary(
|
|
189
196
|
requested=True,
|
|
190
197
|
total_count=total_count,
|
|
@@ -20,12 +20,13 @@ _RELEASE_COMMIT_RE = re.compile(
|
|
|
20
20
|
)
|
|
21
21
|
# Matches version-bump phrases anywhere in the commit subject (multilingual)
|
|
22
22
|
_RELEASE_COMMIT_CONTAINS_RE = re.compile(
|
|
23
|
-
r"subiendo a v?[\d.]"
|
|
23
|
+
r"subiendo a v?[\d.]" # Spanish: "subiendo a 0.38.0", "subiendo a v.0.31.0"
|
|
24
|
+
r"|actualizando a v?[\d.]" # Spanish: "actualizando a 0.15.1"
|
|
24
25
|
r"|bumping to v?[\d.]"
|
|
25
26
|
r"|preparing (?:v|release)[\d. ]"
|
|
26
27
|
r"|releasing v?[\d.]"
|
|
27
28
|
r"|cut v?[\d.]"
|
|
28
|
-
r"|\bv\d+\.\d+\.\d+\b",
|
|
29
|
+
r"|\bv\d+\.\d+\.\d+\b", # bare version tag in middle of message
|
|
29
30
|
re.IGNORECASE,
|
|
30
31
|
)
|
|
31
32
|
|
|
@@ -34,12 +35,25 @@ _HOTSPOT_ADMIN_FILENAMES: frozenset[str] = frozenset({
|
|
|
34
35
|
"CHANGELOG.md", "CHANGELOG", "CHANGES.md", "CHANGES", "HISTORY.md",
|
|
35
36
|
"RELEASE.md", "RELEASES.md", "RELEASE_NOTES.md", "CHANGELOG.rst", "NEWS.md", "NEWS.rst",
|
|
36
37
|
"VERSION", "VERSION.txt", "version.txt", ".version",
|
|
38
|
+
"_version.py", "__version__.py", "version.py",
|
|
39
|
+
"pyproject.toml", "setup.cfg",
|
|
37
40
|
"package-lock.json", "yarn.lock", "pnpm-lock.yaml", "bun.lockb",
|
|
38
41
|
"Cargo.lock", "poetry.lock", "Pipfile.lock", "composer.lock",
|
|
39
42
|
"go.sum", "Gemfile.lock",
|
|
40
43
|
})
|
|
41
44
|
_HOTSPOT_ADMIN_SUFFIXES: tuple[str, ...] = (".lock", ".snap", ".min.js", ".min.css")
|
|
42
45
|
|
|
46
|
+
# Auxiliary directory names whose files should be excluded from hotspots —
|
|
47
|
+
# docs, examples, benchmarks etc. are high-commit but low operational signal.
|
|
48
|
+
_HOTSPOT_AUX_DIRS: frozenset[str] = frozenset({
|
|
49
|
+
"docs", "doc", "benchmark", "benchmarks", "example", "examples",
|
|
50
|
+
"demo", "demos", "playground", "playgrounds", "fixture", "fixtures",
|
|
51
|
+
"generated", "generate", "storybook", ".storybook", "stories",
|
|
52
|
+
"sandbox", "sandboxes",
|
|
53
|
+
"ci", "translations", "locales", "locale", "i18n", "l10n",
|
|
54
|
+
".planning",
|
|
55
|
+
})
|
|
56
|
+
|
|
43
57
|
|
|
44
58
|
def _run_git(args: list[str], cwd: Path, timeout: int = 15) -> tuple[str, int]:
|
|
45
59
|
result = subprocess.run(
|
|
@@ -191,7 +205,7 @@ def _parse_commits(output: str) -> list:
|
|
|
191
205
|
|
|
192
206
|
|
|
193
207
|
def _is_hotspot_admin(path: str) -> bool:
|
|
194
|
-
"""True for files that are noisy from release/bot commits
|
|
208
|
+
"""True for files that are noisy from release/bot commits or auxiliary dirs."""
|
|
195
209
|
filename = path.rsplit("/", 1)[-1]
|
|
196
210
|
if filename in _HOTSPOT_ADMIN_FILENAMES:
|
|
197
211
|
return True
|
|
@@ -202,9 +216,15 @@ def _is_hotspot_admin(path: str) -> bool:
|
|
|
202
216
|
_lower = filename.lower()
|
|
203
217
|
if _lower.startswith("changelog.") or _lower.startswith("changes."):
|
|
204
218
|
return True
|
|
205
|
-
# lerna.json
|
|
219
|
+
# lerna.json is modified by version bumps, not dev work
|
|
206
220
|
if filename in ("lerna.json",):
|
|
207
221
|
return True
|
|
222
|
+
# Auxiliary directory parts — docs, benchmarks, examples, demos, etc.
|
|
223
|
+
# These may have high commit counts but are not operational signal for agents.
|
|
224
|
+
parts = path.split("/")
|
|
225
|
+
for part in parts[:-1]: # check directory components, not the filename itself
|
|
226
|
+
if part.lower() in _HOTSPOT_AUX_DIRS:
|
|
227
|
+
return True
|
|
208
228
|
return False
|
|
209
229
|
|
|
210
230
|
|
|
@@ -231,6 +251,9 @@ def _parse_hotspots(output: str) -> list:
|
|
|
231
251
|
continue
|
|
232
252
|
if skip_commit:
|
|
233
253
|
continue
|
|
254
|
+
# Skip git artifact lines that are not file paths: flags (-o, --), separators, etc.
|
|
255
|
+
if line.startswith("-") or not ("/" in line or "." in line):
|
|
256
|
+
continue
|
|
234
257
|
if _is_hotspot_admin(line):
|
|
235
258
|
continue
|
|
236
259
|
file_counts[line] += 1
|
|
@@ -627,94 +627,81 @@ class TaskContextBuilder:
|
|
|
627
627
|
git_hotspots: Optional[dict[str, int]] = None,
|
|
628
628
|
uncommitted_files: Optional[set[str]] = None,
|
|
629
629
|
) -> list[RelevantFile]:
|
|
630
|
-
from sourcecode.
|
|
630
|
+
from sourcecode.ranking_engine import RankingEngine
|
|
631
631
|
from sourcecode.file_classifier import FileClassifier
|
|
632
|
-
scorer = RelevanceScorer(monorepo_packages or [])
|
|
633
|
-
file_classifier = FileClassifier(self.root, [
|
|
634
|
-
# _rank_files only needs production path evidence; EntryPoint objects
|
|
635
|
-
# are not available here, so category evidence is best-effort below.
|
|
636
|
-
], monorepo_packages or [])
|
|
637
632
|
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
633
|
+
engine = RankingEngine(monorepo_packages or [])
|
|
634
|
+
file_classifier = FileClassifier(self.root, [], monorepo_packages or [])
|
|
635
|
+
|
|
636
|
+
# Auxiliary entry points (benchmark, docs, examples) are not runtime
|
|
637
|
+
runtime_entry_set = {ep for ep in entry_set if not engine.is_auxiliary(ep)}
|
|
641
638
|
|
|
642
639
|
_hotspots = git_hotspots or {}
|
|
643
640
|
_uncommitted = uncommitted_files or set()
|
|
644
641
|
_max_churn = max(_hotspots.values(), default=1)
|
|
645
642
|
|
|
646
|
-
scored: list[tuple[float, RelevantFile]] = []
|
|
643
|
+
scored: list[tuple[float, str, RelevantFile]] = []
|
|
647
644
|
|
|
648
645
|
for path in all_paths:
|
|
649
646
|
if Path(path).suffix.lower() not in _ALL_EXTENSIONS:
|
|
650
647
|
continue
|
|
651
648
|
if any(pen in path for pen in spec.ranking_penalties):
|
|
652
649
|
continue
|
|
653
|
-
|
|
654
|
-
# Hard filter: tooling/config noise
|
|
655
|
-
if scorer.is_noise(path):
|
|
650
|
+
if engine.is_noise(path):
|
|
656
651
|
continue
|
|
657
652
|
|
|
658
653
|
is_test = path in test_set
|
|
659
654
|
if is_test and task_name != "generate-tests":
|
|
660
655
|
continue
|
|
661
656
|
|
|
662
|
-
|
|
663
|
-
|
|
657
|
+
# Structural + git signals from unified engine (task-weighted)
|
|
658
|
+
fs = engine.score(
|
|
659
|
+
path,
|
|
660
|
+
is_entrypoint=(path in runtime_entry_set),
|
|
661
|
+
git_churn=_hotspots.get(path, 0),
|
|
662
|
+
max_churn=_max_churn,
|
|
663
|
+
is_changed=(path in _uncommitted),
|
|
664
|
+
task=task_name,
|
|
665
|
+
)
|
|
664
666
|
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
score += 3.0
|
|
668
|
-
reasons.append("entry point")
|
|
667
|
+
if fs.score < -50: # hard noise
|
|
668
|
+
continue
|
|
669
669
|
|
|
670
|
+
# Content classification boost (reads file imports)
|
|
671
|
+
content_boost = 0.0
|
|
672
|
+
content_reasons: list[str] = []
|
|
670
673
|
file_class = file_classifier.classify(path)
|
|
671
674
|
if file_class is not None:
|
|
672
|
-
|
|
673
|
-
|
|
675
|
+
content_boost = file_class.relevance * 2.0
|
|
676
|
+
content_reasons.append(f"{file_class.category}: {file_class.reason}")
|
|
674
677
|
|
|
675
678
|
if is_test:
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
elif self._is_source(path):
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
# Operational relevance boost/penalty from package role
|
|
684
|
-
rel = scorer.score(path)
|
|
685
|
-
score += (rel - 0.3) * 2.0 # center around 0.3 baseline
|
|
686
|
-
|
|
687
|
-
# Suppress auxiliary dirs (benchmarks, docs, examples, demos)
|
|
688
|
-
if scorer.is_auxiliary(path):
|
|
689
|
-
score -= 2.0
|
|
690
|
-
|
|
691
|
-
# Git churn: frequently changed files are high-signal for active work
|
|
692
|
-
churn = _hotspots.get(path, 0)
|
|
693
|
-
if churn > 0:
|
|
694
|
-
score += (churn / _max_churn) * 1.5
|
|
695
|
-
reasons.append(f"git churn ({churn})")
|
|
696
|
-
|
|
697
|
-
# Uncommitted changes: files actively being edited rank highest
|
|
698
|
-
if path in _uncommitted:
|
|
699
|
-
score += 1.0
|
|
700
|
-
reasons.append("uncommitted changes")
|
|
701
|
-
|
|
702
|
-
if score <= 0:
|
|
679
|
+
content_boost += 2.0
|
|
680
|
+
content_reasons.append("existing test")
|
|
681
|
+
elif self._is_source(path) and not content_reasons:
|
|
682
|
+
content_boost += 0.5
|
|
683
|
+
|
|
684
|
+
total = fs.score + content_boost
|
|
685
|
+
if total <= 0:
|
|
703
686
|
continue
|
|
704
687
|
|
|
705
688
|
role = (
|
|
706
689
|
"entrypoint" if path in runtime_entry_set
|
|
707
690
|
else ("test" if is_test else "source")
|
|
708
691
|
)
|
|
709
|
-
|
|
692
|
+
all_reasons = [r for r in fs.reasons if r != "source file"] + content_reasons
|
|
693
|
+
reason_str = ", ".join(all_reasons) if all_reasons else "source file"
|
|
694
|
+
|
|
695
|
+
scored.append((total, path, RelevantFile(
|
|
710
696
|
path=path,
|
|
711
697
|
role=role,
|
|
712
|
-
score=round(
|
|
713
|
-
reason=
|
|
698
|
+
score=round(min(total / 3.0, 1.0), 2),
|
|
699
|
+
reason=reason_str,
|
|
714
700
|
)))
|
|
715
701
|
|
|
716
|
-
|
|
717
|
-
|
|
702
|
+
# Deterministic: score desc, then path asc as tiebreaker
|
|
703
|
+
scored.sort(key=lambda x: (-x[0], x[1]))
|
|
704
|
+
return [f for _, _, f in scored[:15]]
|
|
718
705
|
|
|
719
706
|
def _is_test(self, path: str) -> bool:
|
|
720
707
|
name = Path(path).name.lower()
|