sourcecode 0.38.0__tar.gz → 0.41.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. {sourcecode-0.38.0 → sourcecode-0.41.0}/PKG-INFO +1 -1
  2. {sourcecode-0.38.0 → sourcecode-0.41.0}/pyproject.toml +1 -1
  3. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/__init__.py +1 -1
  4. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/cli.py +56 -3
  5. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/contract_model.py +1 -0
  6. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/contract_pipeline.py +32 -61
  7. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/doc_analyzer.py +7 -0
  8. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/git_analyzer.py +27 -4
  9. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/prepare_context.py +40 -53
  10. sourcecode-0.41.0/src/sourcecode/ranking_engine.py +231 -0
  11. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/relevance_scorer.py +4 -0
  12. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/semantic_analyzer.py +8 -2
  13. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/serializer.py +136 -12
  14. {sourcecode-0.38.0 → sourcecode-0.41.0}/.agents/skills/source-command-gsd-join-discord/SKILL.md +0 -0
  15. {sourcecode-0.38.0 → sourcecode-0.41.0}/.agents/skills/source-command-gsd-review-backlog/SKILL.md +0 -0
  16. {sourcecode-0.38.0 → sourcecode-0.41.0}/.agents/skills/source-command-gsd-workstreams/SKILL.md +0 -0
  17. {sourcecode-0.38.0 → sourcecode-0.41.0}/.gitignore +0 -0
  18. {sourcecode-0.38.0 → sourcecode-0.41.0}/.ruff.toml +0 -0
  19. {sourcecode-0.38.0 → sourcecode-0.41.0}/CONTRIBUTING.md +0 -0
  20. {sourcecode-0.38.0 → sourcecode-0.41.0}/LICENSE +0 -0
  21. {sourcecode-0.38.0 → sourcecode-0.41.0}/README.md +0 -0
  22. {sourcecode-0.38.0 → sourcecode-0.41.0}/SECURITY.md +0 -0
  23. {sourcecode-0.38.0 → sourcecode-0.41.0}/docs/privacy.md +0 -0
  24. {sourcecode-0.38.0 → sourcecode-0.41.0}/docs/schema.md +0 -0
  25. {sourcecode-0.38.0 → sourcecode-0.41.0}/raw +0 -0
  26. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/adaptive_scanner.py +0 -0
  27. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/architecture_analyzer.py +0 -0
  28. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/architecture_summary.py +0 -0
  29. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/ast_extractor.py +0 -0
  30. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/classifier.py +0 -0
  31. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/code_notes_analyzer.py +0 -0
  32. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/confidence_analyzer.py +0 -0
  33. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/context_summarizer.py +0 -0
  34. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/coverage_parser.py +0 -0
  35. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/dependency_analyzer.py +0 -0
  36. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/__init__.py +0 -0
  37. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/base.py +0 -0
  38. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/csproj_parser.py +0 -0
  39. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/dart.py +0 -0
  40. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/dotnet.py +0 -0
  41. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/elixir.py +0 -0
  42. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/go.py +0 -0
  43. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/heuristic.py +0 -0
  44. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/hybrid.py +0 -0
  45. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/java.py +0 -0
  46. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/jvm_ext.py +0 -0
  47. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/nodejs.py +0 -0
  48. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/parsers.py +0 -0
  49. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/php.py +0 -0
  50. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/project.py +0 -0
  51. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/python.py +0 -0
  52. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/ruby.py +0 -0
  53. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/rust.py +0 -0
  54. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/systems.py +0 -0
  55. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/terraform.py +0 -0
  56. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/detectors/tooling.py +0 -0
  57. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/entrypoint_classifier.py +0 -0
  58. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/env_analyzer.py +0 -0
  59. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/file_classifier.py +0 -0
  60. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/graph_analyzer.py +0 -0
  61. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/metrics_analyzer.py +0 -0
  62. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/redactor.py +0 -0
  63. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/repo_classifier.py +0 -0
  64. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/runtime_classifier.py +0 -0
  65. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/scanner.py +0 -0
  66. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/schema.py +0 -0
  67. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/summarizer.py +0 -0
  68. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/telemetry/__init__.py +0 -0
  69. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/telemetry/config.py +0 -0
  70. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/telemetry/consent.py +0 -0
  71. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/telemetry/events.py +0 -0
  72. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/telemetry/filters.py +0 -0
  73. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/telemetry/transport.py +0 -0
  74. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/tree_utils.py +0 -0
  75. {sourcecode-0.38.0 → sourcecode-0.41.0}/src/sourcecode/workspace.py +0 -0
  76. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/__init__.py +0 -0
  77. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/conftest.py +0 -0
  78. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/coverage.xml +0 -0
  79. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/fastapi_app/pyproject.toml +0 -0
  80. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/fastapi_app/src/main.py +0 -0
  81. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/go_service/cmd/api/main.go +0 -0
  82. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/go_service/go.mod +0 -0
  83. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/jacoco.xml +0 -0
  84. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/lcov.info +0 -0
  85. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/nextjs_app/app/page.tsx +0 -0
  86. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/nextjs_app/package.json +0 -0
  87. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/nextjs_app/pnpm-lock.yaml +0 -0
  88. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/pnpm_monorepo/apps/web/app/page.tsx +0 -0
  89. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/pnpm_monorepo/apps/web/package.json +0 -0
  90. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/pnpm_monorepo/packages/api/main.py +0 -0
  91. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/pnpm_monorepo/packages/api/pyproject.toml +0 -0
  92. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/fixtures/pnpm_monorepo/pnpm-workspace.yaml +0 -0
  93. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_architecture_analyzer.py +0 -0
  94. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_architecture_summary.py +0 -0
  95. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_ast_extractor.py +0 -0
  96. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_classifier.py +0 -0
  97. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_cli.py +0 -0
  98. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_code_notes_analyzer.py +0 -0
  99. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_contract_pipeline.py +0 -0
  100. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_coverage_parser.py +0 -0
  101. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_cross_consistency.py +0 -0
  102. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_dependency_analyzer_node_python.py +0 -0
  103. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_dependency_analyzer_polyglot.py +0 -0
  104. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_dependency_schema.py +0 -0
  105. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detector_dotnet.py +0 -0
  106. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detector_go_rust_java.py +0 -0
  107. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detector_nodejs.py +0 -0
  108. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detector_php_ruby_dart.py +0 -0
  109. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detector_python.py +0 -0
  110. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detector_universal_managed.py +0 -0
  111. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detector_universal_systems.py +0 -0
  112. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_detectors_base.py +0 -0
  113. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_doc_analyzer_jsdom.py +0 -0
  114. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_doc_analyzer_python.py +0 -0
  115. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_graph_analyzer_polyglot.py +0 -0
  116. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_graph_analyzer_python_node.py +0 -0
  117. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_graph_schema.py +0 -0
  118. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_hybrid_inference.py +0 -0
  119. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration.py +0 -0
  120. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_dependencies.py +0 -0
  121. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_detection.py +0 -0
  122. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_docs.py +0 -0
  123. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_graph_modules.py +0 -0
  124. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_lqn.py +0 -0
  125. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_metrics.py +0 -0
  126. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_multistack.py +0 -0
  127. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_semantics.py +0 -0
  128. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_integration_universal.py +0 -0
  129. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_metrics_analyzer.py +0 -0
  130. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_packaging.py +0 -0
  131. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_phase1_improvements.py +0 -0
  132. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_pipeline_integrity.py +0 -0
  133. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_real_projects.py +0 -0
  134. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_redactor.py +0 -0
  135. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_scanner.py +0 -0
  136. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_schema.py +0 -0
  137. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_schema_normalization.py +0 -0
  138. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_semantic_analyzer_node.py +0 -0
  139. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_semantic_analyzer_python.py +0 -0
  140. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_semantic_import_resolution.py +0 -0
  141. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_semantic_schema.py +0 -0
  142. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_signal_hierarchy.py +0 -0
  143. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_summarizer.py +0 -0
  144. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_telemetry.py +0 -0
  145. {sourcecode-0.38.0 → sourcecode-0.41.0}/tests/test_workspace_analyzer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 0.38.0
3
+ Version: 0.41.0
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sourcecode"
7
- version = "0.38.0"
7
+ version = "0.41.0"
8
8
  description = "Deterministic codebase context for AI coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "0.38.0"
3
+ __version__ = "0.41.0"
@@ -220,6 +220,29 @@ def _preprocess_argv() -> None:
220
220
  _sys.argv = _sys.argv[:1] + modified
221
221
 
222
222
 
223
+ def _copy_to_clipboard(content: str) -> bool:
224
+ """Copy text to system clipboard. Returns True on success, False otherwise (never raises)."""
225
+ import subprocess
226
+ import sys as _sys
227
+ try:
228
+ if _sys.platform == "darwin":
229
+ subprocess.run(["pbcopy"], input=content.encode("utf-8"), check=True, timeout=10)
230
+ return True
231
+ elif _sys.platform == "win32":
232
+ subprocess.run(["clip"], input=content.encode("utf-16"), check=True, timeout=10)
233
+ return True
234
+ else:
235
+ for cmd in (["xclip", "-selection", "clipboard"], ["xsel", "--clipboard", "--input"]):
236
+ try:
237
+ subprocess.run(cmd, input=content.encode("utf-8"), check=True, timeout=10)
238
+ return True
239
+ except (FileNotFoundError, subprocess.CalledProcessError):
240
+ continue
241
+ return False
242
+ except Exception:
243
+ return False
244
+
245
+
223
246
  app = typer.Typer(
224
247
  name="sourcecode",
225
248
  help=_HELP,
@@ -543,7 +566,7 @@ def main(
543
566
  entrypoints_only: bool = typer.Option(
544
567
  False,
545
568
  "--entrypoints-only",
546
- help="Contract mode: include only files that are entrypoints or have exported symbols.",
569
+ help="Contract mode: include only files that are runtime entrypoints or have exported symbols (public API surface). Note: 'entrypoints' here includes all files with exports, not strictly detected runtime entry points.",
547
570
  ),
548
571
  changed_only: bool = typer.Option(
549
572
  False,
@@ -571,6 +594,12 @@ def main(
571
594
  "--symbol",
572
595
  help="Contract mode: extract localized context for a specific symbol name. Returns defining file + all importers.",
573
596
  ),
597
+ copy: bool = typer.Option(
598
+ False,
599
+ "--copy",
600
+ "-c",
601
+ help="Copy output to system clipboard after a successful run. No-op when --output is used or clipboard is unavailable.",
602
+ ),
574
603
  ) -> None:
575
604
  """Analyze a repository and produce structured context for AI coding agents.
576
605
 
@@ -1127,11 +1156,15 @@ def main(
1127
1156
 
1128
1157
  _all_call_files = set(_fan_in) | set(_fan_out)
1129
1158
  _hotspots: list[dict] = []
1130
- # Filter test paths from hotspots — they dominate fan-in by calling many modules
1159
+ # Filter test, noise, and auxiliary paths — they dominate fan-in but carry no signal
1131
1160
  _TEST_MARKERS = {"/test", "/tests", "/spec", "/specs", "_test.", ".test.", ".spec."}
1161
+ from sourcecode.ranking_engine import RankingEngine as _RankingEngine
1162
+ _sem_engine = _RankingEngine(sm.monorepo_packages)
1132
1163
  for _p in _all_call_files:
1133
1164
  if any(_m in _p for _m in _TEST_MARKERS) or _p.startswith("test"):
1134
1165
  continue
1166
+ if _sem_engine.is_noise(_p) or _sem_engine.is_auxiliary(_p):
1167
+ continue
1135
1168
  _in = _fan_in[_p]
1136
1169
  _out = _fan_out[_p]
1137
1170
  _score = _in * 2.0 + _out * 1.0
@@ -1386,6 +1419,13 @@ def main(
1386
1419
  # 6. Write output (CLI-04)
1387
1420
  write_output(content, output=output)
1388
1421
 
1422
+ # 7. Clipboard copy (--copy / -c)
1423
+ if copy and output is None:
1424
+ _trimmed = content.strip()
1425
+ if _trimmed and _trimmed not in ("{}", "[]", "null"):
1426
+ if _copy_to_clipboard(content):
1427
+ typer.echo("✓ copied to clipboard", err=True)
1428
+
1389
1429
 
1390
1430
  @app.command("prepare-context")
1391
1431
  def prepare_context_cmd(
@@ -1417,6 +1457,12 @@ def prepare_context_cmd(
1417
1457
  "--dry-run",
1418
1458
  help="Show what would be analyzed without running it",
1419
1459
  ),
1460
+ copy: bool = typer.Option(
1461
+ False,
1462
+ "--copy",
1463
+ "-c",
1464
+ help="Copy output to system clipboard after a successful run. No-op when clipboard is unavailable.",
1465
+ ),
1420
1466
  ) -> None:
1421
1467
  """Task-specific context for AI coding agents.
1422
1468
 
@@ -1514,7 +1560,14 @@ def prepare_context_cmd(
1514
1560
  if llm_prompt:
1515
1561
  out["llm_prompt"] = builder.render_prompt(output)
1516
1562
 
1517
- typer.echo(json.dumps(out, indent=2, ensure_ascii=False))
1563
+ _pc_content = json.dumps(out, indent=2, ensure_ascii=False)
1564
+ typer.echo(_pc_content)
1565
+
1566
+ if copy:
1567
+ _trimmed = _pc_content.strip()
1568
+ if _trimmed and _trimmed not in ("{}", "[]", "null"):
1569
+ if _copy_to_clipboard(_pc_content):
1570
+ typer.echo("✓ copied to clipboard", err=True)
1518
1571
 
1519
1572
 
1520
1573
  # ── Telemetry commands ────────────────────────────────────────────────────────
@@ -91,6 +91,7 @@ class FileContract:
91
91
  fan_out: int = 0 # how many files this imports
92
92
  is_entrypoint: bool = False
93
93
  is_changed: bool = False
94
+ ranking_reasons: list[str] = field(default_factory=list)
94
95
 
95
96
  # Extraction quality
96
97
  extraction_method: str = "heuristic" # ast | tree_sitter | heuristic
@@ -17,6 +17,7 @@ from typing import Any, Literal, Optional
17
17
 
18
18
  from sourcecode.ast_extractor import AstExtractor, _LANGUAGE_MAP
19
19
  from sourcecode.contract_model import ContractSummary, FileContract
20
+ from sourcecode.ranking_engine import RankingEngine
20
21
  from sourcecode.relevance_scorer import RelevanceScorer
21
22
  from sourcecode.schema import EntryPoint, MonorepoPackageInfo
22
23
 
@@ -27,22 +28,6 @@ from sourcecode.schema import EntryPoint, MonorepoPackageInfo
27
28
  _MAX_FILES = 500 # hard cap on files extracted per run
28
29
  _SRC_EXTENSIONS: frozenset[str] = frozenset(_LANGUAGE_MAP.keys())
29
30
 
30
- # Role-based score adjustments applied after contract extraction.
31
- # Runtime roles get a boost; config/util are neutral or penalized.
32
- _ROLE_SCORE: dict[str, float] = {
33
- "entrypoint": 0.15,
34
- "service": 0.10,
35
- "route": 0.10,
36
- "api": 0.08,
37
- "middleware": 0.06,
38
- "store": 0.05,
39
- "model": 0.05,
40
- "hook": 0.05,
41
- "component": 0.03,
42
- "util": 0.00,
43
- "config": -0.10,
44
- "unknown": 0.00,
45
- }
46
31
 
47
32
  RankStrategy = Literal["relevance", "centrality", "git-churn"]
48
33
 
@@ -194,6 +179,7 @@ class ContractPipeline:
194
179
  """
195
180
  entry_paths = {ep.path.replace("\\", "/") for ep in (entry_points or [])}
196
181
  scorer = RelevanceScorer(monorepo_packages)
182
+ engine = RankingEngine(monorepo_packages)
197
183
 
198
184
  # 1. Changed files (for --changed-only and ranking)
199
185
  changed_files: set[str] = set()
@@ -267,9 +253,24 @@ class ContractPipeline:
267
253
  if rank_by == "git-churn":
268
254
  churn = _get_git_churn(root, [c.path for c in contracts])
269
255
 
270
- # 6. Compute relevance scores
256
+ # 6. Compute relevance scores via unified ranking engine
257
+ max_fan_in = max((c.fan_in for c in contracts), default=1) if contracts else 1
258
+ max_churn_val = max(churn.values(), default=1) if churn else 1
271
259
  for c in contracts:
272
- c.relevance_score = self._score(c, scorer, churn)
260
+ fs = engine.score(
261
+ c.path,
262
+ fan_in=c.fan_in,
263
+ fan_out=c.fan_out,
264
+ max_fan_in=max_fan_in,
265
+ git_churn=churn.get(c.path, 0),
266
+ max_churn=max_churn_val,
267
+ is_entrypoint=c.is_entrypoint,
268
+ is_changed=c.is_changed,
269
+ export_count=len(c.exports),
270
+ task="default",
271
+ )
272
+ c.relevance_score = fs.display_score
273
+ c.ranking_reasons = fs.reasons
273
274
 
274
275
  # 7. Rank
275
276
  contracts = self._rank(contracts, rank_by)
@@ -285,7 +286,7 @@ class ContractPipeline:
285
286
  known_paths=set(src_paths),
286
287
  entry_paths=entry_paths,
287
288
  changed_files=changed_files,
288
- scorer=scorer,
289
+ engine=engine,
289
290
  )
290
291
 
291
292
  # 9. Entrypoints-only filter
@@ -312,45 +313,13 @@ class ContractPipeline:
312
313
  )
313
314
  return contracts, summary
314
315
 
315
- def _score(
316
- self,
317
- c: FileContract,
318
- scorer: RelevanceScorer,
319
- churn: dict[str, int],
320
- ) -> float:
321
- base = scorer.score(c.path)
322
-
323
- if c.is_entrypoint:
324
- base += 0.3
325
- if c.is_changed:
326
- base += 0.2
327
-
328
- # Fan-in is the strongest signal: many callers = critical contract
329
- fi_score = min(c.fan_in / 10.0, 0.3)
330
- fo_score = min(c.fan_out / 15.0, 0.15)
331
- base += fi_score + fo_score
332
-
333
- # Exported API value
334
- export_count = len(c.exports)
335
- base += min(export_count / 20.0, 0.1)
336
-
337
- # Churn
338
- churn_score = min(churn.get(c.path, 0) / 20.0, 0.1)
339
- base += churn_score
340
-
341
- # Role-based boost: runtime roles score higher than auxiliary
342
- base += _ROLE_SCORE.get(c.role, 0.0)
343
-
344
- return min(1.0, base)
345
-
346
316
  def _rank(self, contracts: list[FileContract], rank_by: RankStrategy) -> list[FileContract]:
347
317
  if rank_by == "centrality":
348
- # Approximate centrality: fan_in + fan_out
349
- return sorted(contracts, key=lambda c: -(c.fan_in + c.fan_out))
318
+ return sorted(contracts, key=lambda c: (-(c.fan_in + c.fan_out), c.path))
350
319
  if rank_by == "git-churn":
351
- return sorted(contracts, key=lambda c: (-c.is_changed, -c.relevance_score))
352
- # Default: relevance
353
- return sorted(contracts, key=lambda c: (-c.is_entrypoint, -c.relevance_score))
320
+ return sorted(contracts, key=lambda c: (-c.is_changed, -c.relevance_score, c.path))
321
+ # Default: relevance — path breaks ties deterministically
322
+ return sorted(contracts, key=lambda c: (-c.is_entrypoint, -c.relevance_score, c.path))
354
323
 
355
324
  def _symbol_deep_scan(
356
325
  self,
@@ -359,7 +328,7 @@ class ContractPipeline:
359
328
  known_paths: set[str],
360
329
  entry_paths: set[str],
361
330
  changed_files: set[str],
362
- scorer: RelevanceScorer,
331
+ engine: RankingEngine,
363
332
  ) -> list[FileContract]:
364
333
  """Grep-based fallback when the shallow scan missed the defining files.
365
334
 
@@ -367,7 +336,7 @@ class ContractPipeline:
367
336
  extracts contracts for candidates not already processed, then re-applies
368
337
  the symbol filter. Fan-in/fan-out are not computed for these contracts.
369
338
  """
370
- candidates = _find_symbol_files(root, symbol, known_paths, scorer)
339
+ candidates = _find_symbol_files(root, symbol, known_paths, engine)
371
340
  if not candidates:
372
341
  return []
373
342
 
@@ -379,7 +348,9 @@ class ContractPipeline:
379
348
  continue
380
349
  contract.is_entrypoint = rel_path in entry_paths
381
350
  contract.is_changed = rel_path in changed_files
382
- contract.relevance_score = scorer.score(rel_path)
351
+ fs = engine.score(rel_path, is_entrypoint=contract.is_entrypoint, is_changed=contract.is_changed)
352
+ contract.relevance_score = fs.display_score
353
+ contract.ranking_reasons = fs.reasons
383
354
  extra.append(contract)
384
355
 
385
356
  return _filter_by_symbol(extra, symbol)
@@ -531,7 +502,7 @@ def _find_symbol_files(
531
502
  root: Path,
532
503
  symbol: str,
533
504
  known_paths: set[str],
534
- scorer: RelevanceScorer,
505
+ engine: RankingEngine,
535
506
  ) -> list[str]:
536
507
  """Find source files outside *known_paths* that contain *symbol* as text.
537
508
 
@@ -560,7 +531,7 @@ def _find_symbol_files(
560
531
  if line.startswith("./"):
561
532
  line = line[2:]
562
533
  line = line.replace("\\", "/")
563
- if line and line not in known_paths and not scorer.is_noise(line):
534
+ if line and line not in known_paths and not engine.is_noise(line):
564
535
  found.append(line)
565
536
  return found
566
537
  except Exception:
@@ -578,7 +549,7 @@ def _find_symbol_files(
578
549
  rel_str = str(rel).replace("\\", "/")
579
550
  except ValueError:
580
551
  continue
581
- if rel_str in known_paths or scorer.is_noise(rel_str):
552
+ if rel_str in known_paths or engine.is_noise(rel_str):
582
553
  continue
583
554
  try:
584
555
  content = Path(full).read_text(encoding="utf-8", errors="replace")
@@ -185,6 +185,13 @@ class DocAnalyzer:
185
185
  if any(r.doc_text and r.doc_text.endswith(self._TRUNCATION_SUFFIX) for r in records):
186
186
  truncated = True
187
187
 
188
+ # Explicit absence signal: scanned files but found nothing
189
+ if total_count == 0 and file_paths:
190
+ limitations.append(
191
+ f"no_docs_found: {len(file_paths)} file(s) scanned, "
192
+ "no docstrings or JSDoc comments found"
193
+ )
194
+
188
195
  summary = DocSummary(
189
196
  requested=True,
190
197
  total_count=total_count,
@@ -20,12 +20,13 @@ _RELEASE_COMMIT_RE = re.compile(
20
20
  )
21
21
  # Matches version-bump phrases anywhere in the commit subject (multilingual)
22
22
  _RELEASE_COMMIT_CONTAINS_RE = re.compile(
23
- r"subiendo a v?[\d.]" # Spanish: "subiendo a v.0.28.0"
23
+ r"subiendo a v?[\d.]" # Spanish: "subiendo a 0.38.0", "subiendo a v.0.31.0"
24
+ r"|actualizando a v?[\d.]" # Spanish: "actualizando a 0.15.1"
24
25
  r"|bumping to v?[\d.]"
25
26
  r"|preparing (?:v|release)[\d. ]"
26
27
  r"|releasing v?[\d.]"
27
28
  r"|cut v?[\d.]"
28
- r"|\bv\d+\.\d+\.\d+\b", # bare version tag in middle of message
29
+ r"|\bv\d+\.\d+\.\d+\b", # bare version tag in middle of message
29
30
  re.IGNORECASE,
30
31
  )
31
32
 
@@ -34,12 +35,25 @@ _HOTSPOT_ADMIN_FILENAMES: frozenset[str] = frozenset({
34
35
  "CHANGELOG.md", "CHANGELOG", "CHANGES.md", "CHANGES", "HISTORY.md",
35
36
  "RELEASE.md", "RELEASES.md", "RELEASE_NOTES.md", "CHANGELOG.rst", "NEWS.md", "NEWS.rst",
36
37
  "VERSION", "VERSION.txt", "version.txt", ".version",
38
+ "_version.py", "__version__.py", "version.py",
39
+ "pyproject.toml", "setup.cfg",
37
40
  "package-lock.json", "yarn.lock", "pnpm-lock.yaml", "bun.lockb",
38
41
  "Cargo.lock", "poetry.lock", "Pipfile.lock", "composer.lock",
39
42
  "go.sum", "Gemfile.lock",
40
43
  })
41
44
  _HOTSPOT_ADMIN_SUFFIXES: tuple[str, ...] = (".lock", ".snap", ".min.js", ".min.css")
42
45
 
46
+ # Auxiliary directory names whose files should be excluded from hotspots —
47
+ # docs, examples, benchmarks etc. are high-commit but low operational signal.
48
+ _HOTSPOT_AUX_DIRS: frozenset[str] = frozenset({
49
+ "docs", "doc", "benchmark", "benchmarks", "example", "examples",
50
+ "demo", "demos", "playground", "playgrounds", "fixture", "fixtures",
51
+ "generated", "generate", "storybook", ".storybook", "stories",
52
+ "sandbox", "sandboxes",
53
+ "ci", "translations", "locales", "locale", "i18n", "l10n",
54
+ ".planning",
55
+ })
56
+
43
57
 
44
58
  def _run_git(args: list[str], cwd: Path, timeout: int = 15) -> tuple[str, int]:
45
59
  result = subprocess.run(
@@ -191,7 +205,7 @@ def _parse_commits(output: str) -> list:
191
205
 
192
206
 
193
207
  def _is_hotspot_admin(path: str) -> bool:
194
- """True for files that are noisy from release/bot commits, not semantic changes."""
208
+ """True for files that are noisy from release/bot commits or auxiliary dirs."""
195
209
  filename = path.rsplit("/", 1)[-1]
196
210
  if filename in _HOTSPOT_ADMIN_FILENAMES:
197
211
  return True
@@ -202,9 +216,15 @@ def _is_hotspot_admin(path: str) -> bool:
202
216
  _lower = filename.lower()
203
217
  if _lower.startswith("changelog.") or _lower.startswith("changes."):
204
218
  return True
205
- # lerna.json and root-level package.json are modified by version bumps, not dev work
219
+ # lerna.json is modified by version bumps, not dev work
206
220
  if filename in ("lerna.json",):
207
221
  return True
222
+ # Auxiliary directory parts — docs, benchmarks, examples, demos, etc.
223
+ # These may have high commit counts but are not operational signal for agents.
224
+ parts = path.split("/")
225
+ for part in parts[:-1]: # check directory components, not the filename itself
226
+ if part.lower() in _HOTSPOT_AUX_DIRS:
227
+ return True
208
228
  return False
209
229
 
210
230
 
@@ -231,6 +251,9 @@ def _parse_hotspots(output: str) -> list:
231
251
  continue
232
252
  if skip_commit:
233
253
  continue
254
+ # Skip git artifact lines that are not file paths: flags (-o, --), separators, etc.
255
+ if line.startswith("-") or not ("/" in line or "." in line):
256
+ continue
234
257
  if _is_hotspot_admin(line):
235
258
  continue
236
259
  file_counts[line] += 1
@@ -627,94 +627,81 @@ class TaskContextBuilder:
627
627
  git_hotspots: Optional[dict[str, int]] = None,
628
628
  uncommitted_files: Optional[set[str]] = None,
629
629
  ) -> list[RelevantFile]:
630
- from sourcecode.relevance_scorer import RelevanceScorer
630
+ from sourcecode.ranking_engine import RankingEngine
631
631
  from sourcecode.file_classifier import FileClassifier
632
- scorer = RelevanceScorer(monorepo_packages or [])
633
- file_classifier = FileClassifier(self.root, [
634
- # _rank_files only needs production path evidence; EntryPoint objects
635
- # are not available here, so category evidence is best-effort below.
636
- ], monorepo_packages or [])
637
632
 
638
- # Auxiliary entry points (benchmark, docs, examples) must not get
639
- # the production entry boost they are not runtime signals.
640
- runtime_entry_set = {ep for ep in entry_set if not scorer.is_auxiliary(ep)}
633
+ engine = RankingEngine(monorepo_packages or [])
634
+ file_classifier = FileClassifier(self.root, [], monorepo_packages or [])
635
+
636
+ # Auxiliary entry points (benchmark, docs, examples) are not runtime
637
+ runtime_entry_set = {ep for ep in entry_set if not engine.is_auxiliary(ep)}
641
638
 
642
639
  _hotspots = git_hotspots or {}
643
640
  _uncommitted = uncommitted_files or set()
644
641
  _max_churn = max(_hotspots.values(), default=1)
645
642
 
646
- scored: list[tuple[float, RelevantFile]] = []
643
+ scored: list[tuple[float, str, RelevantFile]] = []
647
644
 
648
645
  for path in all_paths:
649
646
  if Path(path).suffix.lower() not in _ALL_EXTENSIONS:
650
647
  continue
651
648
  if any(pen in path for pen in spec.ranking_penalties):
652
649
  continue
653
-
654
- # Hard filter: tooling/config noise
655
- if scorer.is_noise(path):
650
+ if engine.is_noise(path):
656
651
  continue
657
652
 
658
653
  is_test = path in test_set
659
654
  if is_test and task_name != "generate-tests":
660
655
  continue
661
656
 
662
- score = 0.0
663
- reasons: list[str] = []
657
+ # Structural + git signals from unified engine (task-weighted)
658
+ fs = engine.score(
659
+ path,
660
+ is_entrypoint=(path in runtime_entry_set),
661
+ git_churn=_hotspots.get(path, 0),
662
+ max_churn=_max_churn,
663
+ is_changed=(path in _uncommitted),
664
+ task=task_name,
665
+ )
664
666
 
665
- # Only runtime entry points get the production boost
666
- if path in runtime_entry_set:
667
- score += 3.0
668
- reasons.append("entry point")
667
+ if fs.score < -50: # hard noise
668
+ continue
669
669
 
670
+ # Content classification boost (reads file imports)
671
+ content_boost = 0.0
672
+ content_reasons: list[str] = []
670
673
  file_class = file_classifier.classify(path)
671
674
  if file_class is not None:
672
- score += file_class.relevance * 2.0
673
- reasons.append(f"{file_class.category}: {file_class.reason}")
675
+ content_boost = file_class.relevance * 2.0
676
+ content_reasons.append(f"{file_class.category}: {file_class.reason}")
674
677
 
675
678
  if is_test:
676
- score += 2.0
677
- reasons.append("existing test")
678
- elif self._is_source(path):
679
- score += 0.5
680
- if not reasons:
681
- reasons.append("source file with supported extension")
682
-
683
- # Operational relevance boost/penalty from package role
684
- rel = scorer.score(path)
685
- score += (rel - 0.3) * 2.0 # center around 0.3 baseline
686
-
687
- # Suppress auxiliary dirs (benchmarks, docs, examples, demos)
688
- if scorer.is_auxiliary(path):
689
- score -= 2.0
690
-
691
- # Git churn: frequently changed files are high-signal for active work
692
- churn = _hotspots.get(path, 0)
693
- if churn > 0:
694
- score += (churn / _max_churn) * 1.5
695
- reasons.append(f"git churn ({churn})")
696
-
697
- # Uncommitted changes: files actively being edited rank highest
698
- if path in _uncommitted:
699
- score += 1.0
700
- reasons.append("uncommitted changes")
701
-
702
- if score <= 0:
679
+ content_boost += 2.0
680
+ content_reasons.append("existing test")
681
+ elif self._is_source(path) and not content_reasons:
682
+ content_boost += 0.5
683
+
684
+ total = fs.score + content_boost
685
+ if total <= 0:
703
686
  continue
704
687
 
705
688
  role = (
706
689
  "entrypoint" if path in runtime_entry_set
707
690
  else ("test" if is_test else "source")
708
691
  )
709
- scored.append((score, RelevantFile(
692
+ all_reasons = [r for r in fs.reasons if r != "source file"] + content_reasons
693
+ reason_str = ", ".join(all_reasons) if all_reasons else "source file"
694
+
695
+ scored.append((total, path, RelevantFile(
710
696
  path=path,
711
697
  role=role,
712
- score=round(score, 1),
713
- reason=", ".join(reasons) if reasons else "source file",
698
+ score=round(min(total / 3.0, 1.0), 2),
699
+ reason=reason_str,
714
700
  )))
715
701
 
716
- scored.sort(key=lambda x: -x[0])
717
- return [f for _, f in scored[:15]]
702
+ # Deterministic: score desc, then path asc as tiebreaker
703
+ scored.sort(key=lambda x: (-x[0], x[1]))
704
+ return [f for _, _, f in scored[:15]]
718
705
 
719
706
  def _is_test(self, path: str) -> bool:
720
707
  name = Path(path).name.lower()