sourcecode 0.42.0__tar.gz → 0.44.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. {sourcecode-0.42.0 → sourcecode-0.44.0}/PKG-INFO +1 -1
  2. {sourcecode-0.42.0 → sourcecode-0.44.0}/pyproject.toml +1 -1
  3. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/__init__.py +1 -1
  4. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/cli.py +30 -0
  5. sourcecode-0.44.0/src/sourcecode/context_scorer.py +404 -0
  6. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/contract_model.py +1 -0
  7. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/contract_pipeline.py +59 -25
  8. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/prepare_context.py +27 -1
  9. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/ranking_engine.py +29 -7
  10. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/serializer.py +49 -5
  11. sourcecode-0.44.0/tests/test_block5_quality.py +302 -0
  12. sourcecode-0.44.0/tests/test_context_scorer.py +449 -0
  13. {sourcecode-0.42.0 → sourcecode-0.44.0}/.agents/skills/source-command-gsd-join-discord/SKILL.md +0 -0
  14. {sourcecode-0.42.0 → sourcecode-0.44.0}/.agents/skills/source-command-gsd-review-backlog/SKILL.md +0 -0
  15. {sourcecode-0.42.0 → sourcecode-0.44.0}/.agents/skills/source-command-gsd-workstreams/SKILL.md +0 -0
  16. {sourcecode-0.42.0 → sourcecode-0.44.0}/.gitignore +0 -0
  17. {sourcecode-0.42.0 → sourcecode-0.44.0}/.ruff.toml +0 -0
  18. {sourcecode-0.42.0 → sourcecode-0.44.0}/CONTRIBUTING.md +0 -0
  19. {sourcecode-0.42.0 → sourcecode-0.44.0}/LICENSE +0 -0
  20. {sourcecode-0.42.0 → sourcecode-0.44.0}/README.md +0 -0
  21. {sourcecode-0.42.0 → sourcecode-0.44.0}/SECURITY.md +0 -0
  22. {sourcecode-0.42.0 → sourcecode-0.44.0}/docs/privacy.md +0 -0
  23. {sourcecode-0.42.0 → sourcecode-0.44.0}/docs/schema.md +0 -0
  24. {sourcecode-0.42.0 → sourcecode-0.44.0}/raw +0 -0
  25. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/adaptive_scanner.py +0 -0
  26. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/architecture_analyzer.py +0 -0
  27. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/architecture_summary.py +0 -0
  28. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/ast_extractor.py +0 -0
  29. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/classifier.py +0 -0
  30. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/code_notes_analyzer.py +0 -0
  31. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/confidence_analyzer.py +0 -0
  32. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/context_summarizer.py +0 -0
  33. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/coverage_parser.py +0 -0
  34. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/dependency_analyzer.py +0 -0
  35. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/__init__.py +0 -0
  36. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/base.py +0 -0
  37. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/csproj_parser.py +0 -0
  38. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/dart.py +0 -0
  39. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/dotnet.py +0 -0
  40. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/elixir.py +0 -0
  41. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/go.py +0 -0
  42. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/heuristic.py +0 -0
  43. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/hybrid.py +0 -0
  44. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/java.py +0 -0
  45. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/jvm_ext.py +0 -0
  46. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/nodejs.py +0 -0
  47. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/parsers.py +0 -0
  48. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/php.py +0 -0
  49. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/project.py +0 -0
  50. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/python.py +0 -0
  51. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/ruby.py +0 -0
  52. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/rust.py +0 -0
  53. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/systems.py +0 -0
  54. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/terraform.py +0 -0
  55. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/detectors/tooling.py +0 -0
  56. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/doc_analyzer.py +0 -0
  57. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/entrypoint_classifier.py +0 -0
  58. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/env_analyzer.py +0 -0
  59. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/file_classifier.py +0 -0
  60. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/git_analyzer.py +0 -0
  61. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/graph_analyzer.py +0 -0
  62. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/metrics_analyzer.py +0 -0
  63. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/redactor.py +0 -0
  64. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/relevance_scorer.py +0 -0
  65. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/repo_classifier.py +0 -0
  66. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/runtime_classifier.py +0 -0
  67. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/scanner.py +0 -0
  68. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/schema.py +0 -0
  69. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/semantic_analyzer.py +0 -0
  70. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/summarizer.py +0 -0
  71. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/telemetry/__init__.py +0 -0
  72. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/telemetry/config.py +0 -0
  73. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/telemetry/consent.py +0 -0
  74. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/telemetry/events.py +0 -0
  75. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/telemetry/filters.py +0 -0
  76. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/telemetry/transport.py +0 -0
  77. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/tree_utils.py +0 -0
  78. {sourcecode-0.42.0 → sourcecode-0.44.0}/src/sourcecode/workspace.py +0 -0
  79. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/__init__.py +0 -0
  80. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/conftest.py +0 -0
  81. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/fixtures/coverage.xml +0 -0
  82. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/fixtures/fastapi_app/pyproject.toml +0 -0
  83. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/fixtures/fastapi_app/src/main.py +0 -0
  84. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/fixtures/go_service/cmd/api/main.go +0 -0
  85. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/fixtures/go_service/go.mod +0 -0
  86. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/fixtures/jacoco.xml +0 -0
  87. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/fixtures/lcov.info +0 -0
  88. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/fixtures/nextjs_app/app/page.tsx +0 -0
  89. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/fixtures/nextjs_app/package.json +0 -0
  90. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/fixtures/nextjs_app/pnpm-lock.yaml +0 -0
  91. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/fixtures/pnpm_monorepo/apps/web/app/page.tsx +0 -0
  92. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/fixtures/pnpm_monorepo/apps/web/package.json +0 -0
  93. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/fixtures/pnpm_monorepo/packages/api/main.py +0 -0
  94. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/fixtures/pnpm_monorepo/packages/api/pyproject.toml +0 -0
  95. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/fixtures/pnpm_monorepo/pnpm-workspace.yaml +0 -0
  96. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_architecture_analyzer.py +0 -0
  97. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_architecture_summary.py +0 -0
  98. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_ast_extractor.py +0 -0
  99. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_block1_reliability.py +0 -0
  100. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_block2_coverage.py +0 -0
  101. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_classifier.py +0 -0
  102. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_cli.py +0 -0
  103. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_code_notes_analyzer.py +0 -0
  104. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_contract_pipeline.py +0 -0
  105. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_coverage_parser.py +0 -0
  106. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_cross_consistency.py +0 -0
  107. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_dependency_analyzer_node_python.py +0 -0
  108. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_dependency_analyzer_polyglot.py +0 -0
  109. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_dependency_schema.py +0 -0
  110. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_detector_dotnet.py +0 -0
  111. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_detector_go_rust_java.py +0 -0
  112. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_detector_nodejs.py +0 -0
  113. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_detector_php_ruby_dart.py +0 -0
  114. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_detector_python.py +0 -0
  115. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_detector_universal_managed.py +0 -0
  116. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_detector_universal_systems.py +0 -0
  117. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_detectors_base.py +0 -0
  118. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_doc_analyzer_jsdom.py +0 -0
  119. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_doc_analyzer_python.py +0 -0
  120. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_graph_analyzer_polyglot.py +0 -0
  121. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_graph_analyzer_python_node.py +0 -0
  122. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_graph_schema.py +0 -0
  123. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_hybrid_inference.py +0 -0
  124. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_integration.py +0 -0
  125. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_integration_dependencies.py +0 -0
  126. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_integration_detection.py +0 -0
  127. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_integration_docs.py +0 -0
  128. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_integration_graph_modules.py +0 -0
  129. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_integration_lqn.py +0 -0
  130. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_integration_metrics.py +0 -0
  131. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_integration_multistack.py +0 -0
  132. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_integration_semantics.py +0 -0
  133. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_integration_universal.py +0 -0
  134. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_metrics_analyzer.py +0 -0
  135. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_packaging.py +0 -0
  136. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_phase1_improvements.py +0 -0
  137. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_pipeline_integrity.py +0 -0
  138. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_real_projects.py +0 -0
  139. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_redactor.py +0 -0
  140. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_scanner.py +0 -0
  141. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_schema.py +0 -0
  142. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_schema_normalization.py +0 -0
  143. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_semantic_analyzer_node.py +0 -0
  144. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_semantic_analyzer_python.py +0 -0
  145. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_semantic_import_resolution.py +0 -0
  146. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_semantic_schema.py +0 -0
  147. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_signal_hierarchy.py +0 -0
  148. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_summarizer.py +0 -0
  149. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_telemetry.py +0 -0
  150. {sourcecode-0.42.0 → sourcecode-0.44.0}/tests/test_workspace_analyzer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 0.42.0
3
+ Version: 0.44.0
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sourcecode"
7
- version = "0.42.0"
7
+ version = "0.44.0"
8
8
  description = "Deterministic codebase context for AI coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "0.42.0"
3
+ __version__ = "0.44.0"
@@ -181,6 +181,7 @@ _OPTIONS_WITH_VALUE: frozenset[str] = frozenset({
181
181
  "--dependency-depth",
182
182
  "--rank-by",
183
183
  "--symbol",
184
+ "--max-importers",
184
185
  })
185
186
 
186
187
 
@@ -594,6 +595,17 @@ def main(
594
595
  "--symbol",
595
596
  help="Contract mode: extract localized context for a specific symbol name. Returns defining file + all importers.",
596
597
  ),
598
+ max_importers: int = typer.Option(
599
+ 50,
600
+ "--max-importers",
601
+ help=(
602
+ "Maximum importer files returned by --symbol (default: 50). "
603
+ "Popular symbols can have hundreds of importers — this prevents output explosion. "
604
+ "Defining files are never truncated. Override: --symbol Foo --max-importers 200."
605
+ ),
606
+ min=1,
607
+ max=10000,
608
+ ),
597
609
  copy: bool = typer.Option(
598
610
  False,
599
611
  "--copy",
@@ -770,6 +782,21 @@ def main(
770
782
  code_notes = True
771
783
  no_tree = True # agents never need the raw file tree
772
784
  typer.echo("[agent] dependencies env-map code-notes (no-tree)", err=True)
785
+ # Warn about flags that are computed but excluded from agent_view output
786
+ _agent_suppressed: list[str] = []
787
+ if full_metrics:
788
+ _agent_suppressed.append("--full-metrics")
789
+ if graph_modules:
790
+ _agent_suppressed.append("--graph-modules")
791
+ if docs:
792
+ _agent_suppressed.append("--docs")
793
+ if _agent_suppressed:
794
+ typer.echo(
795
+ f"[agent] warning: {', '.join(_agent_suppressed)} computed but excluded "
796
+ "from --agent output — agent_view does not include these sections. "
797
+ "Remove these flags to skip unnecessary computation.",
798
+ err=True,
799
+ )
773
800
 
774
801
  scanner = AdaptiveScanner(target, topology=_topology, base_depth=effective_depth)
775
802
  raw_tree = scanner.scan_tree()
@@ -1343,6 +1370,9 @@ def main(
1343
1370
  changed_only=changed_only,
1344
1371
  symbol=symbol,
1345
1372
  compress_types=compress_types,
1373
+ max_importers=max_importers,
1374
+ semantic_calls=sm.semantic_calls or None,
1375
+ code_notes=sm.code_notes or None,
1346
1376
  )
1347
1377
  sm = _replace(sm, file_contracts=_contracts, contract_summary=_contract_summary)
1348
1378
  if symbol is not None and len(_contracts) == 0:
@@ -0,0 +1,404 @@
1
+ """context_scorer.py — Unified node scoring and minimum-sufficient subgraph selection.
2
+
3
+ Aggregates all available signals (structural, semantic, git, annotations, proximity)
4
+ into a NodeScore per file, then uses greedy selection to produce the minimum-sufficient
5
+ subgraph that maximises explanatory value within a context budget.
6
+
7
+ Design invariants:
8
+ - Deterministic: sort key is always (-score, path). Path breaks all ties.
9
+ - No LLMs, no randomness, no external I/O.
10
+ - All signals optional: degrades gracefully when data is absent.
11
+ - SCORER_VERSION: bump on any formula change so callers can detect drift.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ from collections import Counter, deque
16
+ from dataclasses import dataclass
17
+ from pathlib import Path
18
+ from typing import Any, Optional
19
+
20
+ SCORER_VERSION = "1"
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Edge weight tables
24
+ # ---------------------------------------------------------------------------
25
+
26
+ _EDGE_BASE_WEIGHTS: dict[str, float] = {
27
+ "imports": 1.00, # structural dependency — strongest signal
28
+ "extends": 0.90, # inheritance / implementation — tight coupling
29
+ "calls": 0.80, # behavioral dependency
30
+ "contains": 0.30, # membership — low marginal information
31
+ }
32
+
33
+ _CONFIDENCE_MULT: dict[str, float] = {
34
+ "high": 1.0,
35
+ "medium": 0.7,
36
+ "low": 0.3,
37
+ }
38
+
39
+ # Annotation kinds weighted at 2× (actionable defects vs informational notes)
40
+ _HIGH_SEVERITY_NOTES: frozenset[str] = frozenset({"BUG", "FIXME", "HACK", "XXX"})
41
+
42
+
43
+ # ---------------------------------------------------------------------------
44
+ # Data model
45
+ # ---------------------------------------------------------------------------
46
+
47
+ @dataclass
48
+ class NodeScore:
49
+ """Unified scoring breakdown for a single file node.
50
+
51
+ score / display_score drive all ranking and selection decisions.
52
+ The component fields (structural, semantic, annotation, proximity) allow
53
+ callers to inspect which signals dominated the final score.
54
+ """
55
+ path: str
56
+ score: float # final weighted score (higher = more relevant)
57
+ display_score: float # clamped [0.0, 1.0] for output fields
58
+ structural: float # contribution from RankingEngine
59
+ semantic: float # call graph centrality [0.0, 1.0]
60
+ annotation: float # code note density [0.0, 1.0]
61
+ proximity: float # BFS closeness to focus [0.0, 1.0]
62
+ reasons: list[str]
63
+
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # Core scorer
67
+ # ---------------------------------------------------------------------------
68
+
69
+ class ContextScorer:
70
+ """Unified file scoring and minimum-sufficient subgraph selection.
71
+
72
+ Stateless once constructed. Thread-safe (no mutable state after __init__).
73
+ """
74
+
75
+ def __init__(
76
+ self,
77
+ monorepo_packages: Optional[list] = None,
78
+ ) -> None:
79
+ from sourcecode.ranking_engine import RankingEngine
80
+ self._engine = RankingEngine(monorepo_packages or [])
81
+
82
+ def score_nodes(
83
+ self,
84
+ contracts: list[Any],
85
+ *,
86
+ semantic_calls: Optional[list] = None,
87
+ git_hotspots: Optional[dict[str, int]] = None,
88
+ code_notes: Optional[list] = None,
89
+ focus_path: Optional[str] = None,
90
+ task: str = "default",
91
+ ) -> dict[str, NodeScore]:
92
+ """Compute a NodeScore for every contract.
93
+
94
+ Parameters
95
+ ----------
96
+ contracts FileContract list. fan_in, fan_out, is_entrypoint,
97
+ is_changed, and exports must be set before calling.
98
+ semantic_calls list[CallRecord] from --semantics (optional).
99
+ git_hotspots {path: commit_count} from git analysis (optional).
100
+ code_notes list[CodeNote] from --code-notes (optional).
101
+ focus_path Anchor file for proximity BFS (optional).
102
+ task Task profile: fix-bug | refactor | explain | …
103
+
104
+ Returns
105
+ -------
106
+ dict mapping path → NodeScore for every contract path.
107
+ """
108
+ from sourcecode.ranking_engine import TASK_WEIGHTS
109
+
110
+ w = TASK_WEIGHTS.get(task, TASK_WEIGHTS["default"])
111
+ _hotspots = git_hotspots or {}
112
+ max_fan_in = max((c.fan_in for c in contracts), default=1)
113
+ max_churn = max(_hotspots.values(), default=1)
114
+
115
+ # Pre-compute optional signal maps
116
+ sem_centrality: dict[str, float] = {}
117
+ if semantic_calls:
118
+ sem_centrality = _semantic_centrality(semantic_calls, contracts)
119
+ max_semantic = max(sem_centrality.values(), default=1.0) or 1.0
120
+
121
+ ann_density: dict[str, float] = {}
122
+ if code_notes:
123
+ ann_density = _annotation_density(code_notes, contracts)
124
+
125
+ prox_scores: dict[str, float] = {}
126
+ if focus_path:
127
+ prox_scores = _proximity_bfs(focus_path, contracts, semantic_calls or [])
128
+
129
+ result: dict[str, NodeScore] = {}
130
+ for c in contracts:
131
+ sem = sem_centrality.get(c.path, 0.0)
132
+ ann = ann_density.get(c.path, 0.0)
133
+ prox = prox_scores.get(c.path, 0.0)
134
+
135
+ # Structural + git + annotation + semantic centrality via unified engine
136
+ fs = self._engine.score(
137
+ c.path,
138
+ fan_in=c.fan_in,
139
+ fan_out=c.fan_out,
140
+ max_fan_in=max_fan_in,
141
+ git_churn=_hotspots.get(c.path, 0),
142
+ max_churn=max_churn,
143
+ is_entrypoint=c.is_entrypoint,
144
+ is_changed=c.is_changed,
145
+ export_count=len(c.exports),
146
+ task=task,
147
+ semantic_centrality=sem,
148
+ max_semantic=max_semantic,
149
+ )
150
+
151
+ # Proximity is a graph operation, computed here and added on top
152
+ prox_contrib = prox * 0.50 * w.proximity
153
+
154
+ final = fs.score + prox_contrib
155
+
156
+ reasons = list(fs.reasons)
157
+ if prox >= 0.80 and prox_contrib > 0:
158
+ reasons.append("close to focus")
159
+ elif prox >= 0.50 and prox_contrib > 0:
160
+ reasons.append("near focus")
161
+
162
+ result[c.path] = NodeScore(
163
+ path=c.path,
164
+ score=final,
165
+ display_score=max(0.0, min(1.0, final)),
166
+ structural=fs.score,
167
+ semantic=sem,
168
+ annotation=ann,
169
+ proximity=prox,
170
+ reasons=reasons,
171
+ )
172
+
173
+ return result
174
+
175
+ def select_subgraph(
176
+ self,
177
+ node_scores: dict[str, NodeScore],
178
+ contracts: list[Any],
179
+ *,
180
+ budget: int = 30,
181
+ min_score: float = 0.05,
182
+ ) -> list[str]:
183
+ """Greedy minimum-sufficient subgraph selection with diversity re-ranking.
184
+
185
+ At each round, recomputes effective scores for all remaining candidates
186
+ (raw_score × (1 - redundancy_penalty)), then picks the highest. This
187
+ allows a file from a new directory to beat a clustered sibling even if
188
+ the sibling has a higher raw score — the selection actively prefers
189
+ coverage over concentration.
190
+
191
+ Stops when the budget is exhausted or no remaining candidate has an
192
+ effective score above min_score.
193
+
194
+ O(n × budget) — negligible for typical budgets (15-30) and file counts.
195
+ Deterministic: tie-break by path on every round.
196
+
197
+ Parameters
198
+ ----------
199
+ node_scores output of score_nodes()
200
+ contracts same FileContract list passed to score_nodes()
201
+ (used for directory-based redundancy; may be empty)
202
+ budget maximum number of nodes to select
203
+ min_score discard candidates whose effective score is below this
204
+ """
205
+ contract_map = {c.path: c for c in contracts}
206
+ remaining: dict[str, NodeScore] = dict(node_scores)
207
+ selected: list[str] = []
208
+ selected_set: set[str] = set()
209
+
210
+ while len(selected) < budget and remaining:
211
+ best_path: str | None = None
212
+ best_effective: float = -1.0
213
+
214
+ for path, ns in remaining.items():
215
+ if ns.score < min_score:
216
+ continue
217
+ penalty = _redundancy_penalty(path, selected_set, contract_map)
218
+ effective = ns.score * (1.0 - penalty)
219
+ # Strict tie-break by path ensures determinism
220
+ if effective > best_effective or (
221
+ effective == best_effective
222
+ and best_path is not None
223
+ and path < best_path
224
+ ):
225
+ best_effective = effective
226
+ best_path = path
227
+
228
+ if best_path is None or best_effective < min_score:
229
+ break
230
+
231
+ selected.append(best_path)
232
+ selected_set.add(best_path)
233
+ del remaining[best_path]
234
+
235
+ return selected
236
+
237
+ @staticmethod
238
+ def edge_weight(kind: str, confidence: str) -> float:
239
+ """Scalar weight for a graph edge based on relationship type and confidence.
240
+
241
+ Higher weight = stronger information dependency between the connected nodes.
242
+ """
243
+ base = _EDGE_BASE_WEIGHTS.get(kind, 0.50)
244
+ mult = _CONFIDENCE_MULT.get(confidence, 0.50)
245
+ return base * mult
246
+
247
+
248
+ # ---------------------------------------------------------------------------
249
+ # Signal computers (module-level, pure functions)
250
+ # ---------------------------------------------------------------------------
251
+
252
+ def _semantic_centrality(
253
+ semantic_calls: list,
254
+ contracts: list,
255
+ ) -> dict[str, float]:
256
+ """Per-file centrality from the call graph.
257
+
258
+ centrality(path) = (weighted_fan_in × 2 + weighted_fan_out) / max
259
+ where weight = confidence multiplier (high=1.0, medium=0.7, low=0.3).
260
+
261
+ Returns a dict normalised to [0.0, 1.0] across the contract set.
262
+ """
263
+ path_set = {c.path for c in contracts}
264
+ fan_in: Counter[str] = Counter()
265
+ fan_out: Counter[str] = Counter()
266
+
267
+ for call in semantic_calls:
268
+ w = _CONFIDENCE_MULT.get(getattr(call, "confidence", "medium"), 0.7)
269
+ callee = getattr(call, "callee_path", None)
270
+ caller = getattr(call, "caller_path", None)
271
+ if callee and callee in path_set:
272
+ fan_in[callee] += w
273
+ if caller and caller in path_set:
274
+ fan_out[caller] += w
275
+
276
+ raw = {p: fan_in[p] * 2.0 + fan_out[p] for p in path_set}
277
+ max_val = max(raw.values(), default=0.0)
278
+ if max_val <= 0.0:
279
+ return {p: 0.0 for p in path_set}
280
+ return {p: v / max_val for p, v in raw.items()}
281
+
282
+
283
+ def _proximity_bfs(
284
+ focus_path: str,
285
+ contracts: list,
286
+ semantic_calls: list,
287
+ ) -> dict[str, float]:
288
+ """BFS from focus_path through import + call edges.
289
+
290
+ Traversal is bidirectional (imports and calls traversed in both directions)
291
+ so the proximity score reflects reachability in any direction from the focus.
292
+
293
+ proximity(path) = 1.0 / (2 ** distance)
294
+ distance=0 → 1.00 (the focus itself)
295
+ distance=1 → 0.50
296
+ distance=2 → 0.25
297
+ distance=3 → 0.125
298
+ distance=4 → 0.0625 (max depth)
299
+
300
+ BFS neighbours are sorted before enqueuing to ensure determinism.
301
+ """
302
+ path_set = {c.path for c in contracts}
303
+
304
+ # Build bidirectional adjacency from import graph
305
+ adj: dict[str, set[str]] = {p: set() for p in path_set}
306
+ for c in contracts:
307
+ base_dir = str(Path(c.path).parent).replace("\\", "/")
308
+ for imp in c.imports:
309
+ src = getattr(imp, "source", "")
310
+ if not src.startswith("."):
311
+ continue
312
+ for t in _resolve_import(base_dir, src, path_set):
313
+ adj[c.path].add(t)
314
+ adj[t].add(c.path)
315
+
316
+ # Augment with call graph edges
317
+ for call in semantic_calls:
318
+ caller = getattr(call, "caller_path", None)
319
+ callee = getattr(call, "callee_path", None)
320
+ if caller in adj and callee in adj:
321
+ adj[caller].add(callee)
322
+ adj[callee].add(caller)
323
+
324
+ if focus_path not in adj:
325
+ return {}
326
+
327
+ distances: dict[str, int] = {focus_path: 0}
328
+ queue: deque[str] = deque([focus_path])
329
+ while queue:
330
+ node = queue.popleft()
331
+ d = distances[node]
332
+ if d >= 4:
333
+ continue
334
+ for neighbor in sorted(adj.get(node, set())):
335
+ if neighbor not in distances:
336
+ distances[neighbor] = d + 1
337
+ queue.append(neighbor)
338
+
339
+ return {p: 1.0 / (2 ** d) for p, d in distances.items()}
340
+
341
+
342
+ def _annotation_density(
343
+ code_notes: list,
344
+ contracts: list,
345
+ ) -> dict[str, float]:
346
+ """Severity-weighted annotation density per file, normalised [0.0, 1.0].
347
+
348
+ BUG / FIXME / HACK / XXX count 2×; all other kinds count 1×.
349
+ """
350
+ path_set = {c.path for c in contracts}
351
+ weighted: Counter[str] = Counter()
352
+ for note in code_notes:
353
+ path = getattr(note, "path", None)
354
+ if path not in path_set:
355
+ continue
356
+ kind = getattr(note, "kind", "").upper()
357
+ weighted[path] += 2.0 if kind in _HIGH_SEVERITY_NOTES else 1.0
358
+
359
+ max_val = max(weighted.values(), default=1.0)
360
+ return {p: min(weighted.get(p, 0.0) / max_val, 1.0) for p in path_set}
361
+
362
+
363
+ def _redundancy_penalty(
364
+ path: str,
365
+ selected_set: set[str],
366
+ contract_map: dict,
367
+ ) -> float:
368
+ """Penalty for adding a file from the same directory as already-selected files.
369
+
370
+ Rationale: files in the same directory address the same concern; the
371
+ marginal explanatory gain of the n-th file from a directory is lower than
372
+ that of the first file from a new directory.
373
+
374
+ Penalty grows by 0.10 per same-directory sibling, capped at 0.40.
375
+ The 0.40 cap ensures no node is ever fully excluded by proximity alone.
376
+ """
377
+ if not selected_set:
378
+ return 0.0
379
+ path_dir = str(Path(path).parent)
380
+ same_dir_count = sum(
381
+ 1 for s in selected_set
382
+ if str(Path(s).parent) == path_dir
383
+ )
384
+ return min(same_dir_count * 0.10, 0.40)
385
+
386
+
387
+ def _resolve_import(base_dir: str, src: str, path_set: set[str]) -> list[str]:
388
+ """Approximate resolution of a relative import specifier to known paths.
389
+
390
+ Mirrors the logic in contract_pipeline._resolve_relative without importing
391
+ from that module (avoids circular import).
392
+ """
393
+ src = src.lstrip("./")
394
+ if not src:
395
+ return []
396
+ exts = (".ts", ".tsx", ".js", ".jsx", ".py", "/index.ts", "/index.js", "/index.tsx")
397
+ for ext in exts:
398
+ candidate = f"{base_dir}/{src}{ext}".replace("//", "/")
399
+ if candidate in path_set:
400
+ return [candidate]
401
+ candidate = f"{base_dir}/{src}".replace("//", "/")
402
+ if candidate in path_set:
403
+ return [candidate]
404
+ return []
@@ -109,3 +109,4 @@ class ContractSummary:
109
109
  method_breakdown: dict[str, int] = field(default_factory=dict)
110
110
  ranked_by: str = "relevance"
111
111
  limitations: list[str] = field(default_factory=list)
112
+ symbol_truncation: Optional[dict] = None # set when --symbol truncates importers
@@ -175,6 +175,9 @@ class ContractPipeline:
175
175
  changed_only: bool = False,
176
176
  symbol: Optional[str] = None,
177
177
  compress_types: bool = False,
178
+ max_importers: int = 50,
179
+ semantic_calls: Optional[list] = None,
180
+ code_notes: Optional[list] = None,
178
181
  ) -> tuple[list[FileContract], ContractSummary]:
179
182
  """Run the full extraction pipeline.
180
183
 
@@ -256,40 +259,42 @@ class ContractPipeline:
256
259
  if rank_by == "git-churn":
257
260
  churn = _get_git_churn(root, [c.path for c in contracts])
258
261
 
259
- # 6. Compute relevance scores via unified ranking engine
260
- max_fan_in = max((c.fan_in for c in contracts), default=1) if contracts else 1
261
- max_churn_val = max(churn.values(), default=1) if churn else 1
262
+ # 6. Compute relevance scores via unified scoring engine.
263
+ # ContextScorer wraps RankingEngine and enriches scores with semantic
264
+ # centrality (when semantic_calls available) and annotation density
265
+ # (when code_notes available). Falls back to structural signals only
266
+ # when neither is present — identical to the old behaviour.
267
+ from sourcecode.context_scorer import ContextScorer
268
+ _ctx_scorer = ContextScorer(monorepo_packages)
269
+ _node_scores = _ctx_scorer.score_nodes(
270
+ contracts,
271
+ semantic_calls=semantic_calls,
272
+ code_notes=code_notes,
273
+ git_hotspots=churn,
274
+ task="default",
275
+ )
262
276
  for c in contracts:
263
- fs = engine.score(
264
- c.path,
265
- fan_in=c.fan_in,
266
- fan_out=c.fan_out,
267
- max_fan_in=max_fan_in,
268
- git_churn=churn.get(c.path, 0),
269
- max_churn=max_churn_val,
270
- is_entrypoint=c.is_entrypoint,
271
- is_changed=c.is_changed,
272
- export_count=len(c.exports),
273
- task="default",
274
- )
275
- c.relevance_score = fs.display_score
276
- c.ranking_reasons = fs.reasons
277
+ ns = _node_scores[c.path]
278
+ c.relevance_score = ns.display_score
279
+ c.ranking_reasons = ns.reasons
277
280
 
278
281
  # 7. Rank
279
282
  contracts = self._rank(contracts, rank_by)
280
283
 
281
284
  # 8. Symbol filter — keep files that define or import the symbol
285
+ _symbol_truncation: Optional[dict] = None
282
286
  if symbol:
283
- contracts = _filter_by_symbol(contracts, symbol)
287
+ contracts, _symbol_truncation = _filter_by_symbol(contracts, symbol, max_importers=max_importers)
284
288
  # When shallow scan missed the defining file (deep monorepo), fall back
285
289
  # to a grep-based filesystem search over the full directory tree.
286
290
  if not contracts:
287
- contracts = self._symbol_deep_scan(
291
+ contracts, _symbol_truncation = self._symbol_deep_scan(
288
292
  root, symbol,
289
293
  known_paths=set(src_paths),
290
294
  entry_paths=entry_paths,
291
295
  changed_files=changed_files,
292
296
  engine=engine,
297
+ max_importers=max_importers,
293
298
  )
294
299
 
295
300
  # 9. Entrypoints-only filter
@@ -313,6 +318,7 @@ class ContractPipeline:
313
318
  method_breakdown=dict(method_counts),
314
319
  ranked_by=rank_by,
315
320
  limitations=limitations,
321
+ symbol_truncation=_symbol_truncation,
316
322
  )
317
323
  return contracts, summary
318
324
 
@@ -332,7 +338,8 @@ class ContractPipeline:
332
338
  entry_paths: set[str],
333
339
  changed_files: set[str],
334
340
  engine: RankingEngine,
335
- ) -> list[FileContract]:
341
+ max_importers: int = 50,
342
+ ) -> tuple[list[FileContract], dict]:
336
343
  """Grep-based fallback when the shallow scan missed the defining files.
337
344
 
338
345
  Searches the full directory tree for source files containing *symbol*,
@@ -356,7 +363,7 @@ class ContractPipeline:
356
363
  contract.ranking_reasons = fs.reasons
357
364
  extra.append(contract)
358
365
 
359
- return _filter_by_symbol(extra, symbol)
366
+ return _filter_by_symbol(extra, symbol, max_importers=max_importers)
360
367
 
361
368
 
362
369
  # ---------------------------------------------------------------------------
@@ -412,7 +419,11 @@ def _limit_symbols(contracts: list[FileContract], max_symbols: int) -> list[File
412
419
  # Symbol-aware filter
413
420
  # ---------------------------------------------------------------------------
414
421
 
415
- def _filter_by_symbol(contracts: list[FileContract], symbol: str) -> list[FileContract]:
422
+ def _filter_by_symbol(
423
+ contracts: list[FileContract],
424
+ symbol: str,
425
+ max_importers: int = 50,
426
+ ) -> tuple[list[FileContract], dict]:
416
427
  """Return contracts that define, import, or structurally reference *symbol*.
417
428
 
418
429
  Four tiers applied in order:
@@ -423,6 +434,8 @@ def _filter_by_symbol(contracts: list[FileContract], symbol: str) -> list[FileCo
423
434
  function signatures (word-boundary). Only used when tiers 1-3 fail.
424
435
 
425
436
  Defining contracts are ranked first; importers and references follow.
437
+ max_importers caps tier 3 results to prevent output explosion on popular symbols.
438
+ Returns (contracts, truncation_metadata).
426
439
  """
427
440
  sym_l = symbol.lower()
428
441
  word_re = re.compile(
@@ -466,8 +479,14 @@ def _filter_by_symbol(contracts: list[FileContract], symbol: str) -> list[FileCo
466
479
 
467
480
  # Tier 3: import matching (case-insensitive when no definers found)
468
481
  ci_imports = len(defining) == 0
469
- importer_paths = {c.path for c in contracts if _imports_sym(c, case=ci_imports)}
470
- importers = [c for c in contracts if c.path in importer_paths and c.path not in defining_paths]
482
+ all_importer_paths = {c.path for c in contracts if _imports_sym(c, case=ci_imports)}
483
+ all_importers = [c for c in contracts if c.path in all_importer_paths and c.path not in defining_paths]
484
+
485
+ # Apply importer cap — definers are never truncated
486
+ total_importers = len(all_importers)
487
+ truncated = total_importers > max_importers
488
+ importers = all_importers[:max_importers] if truncated else all_importers
489
+ importer_paths = {c.path for c in importers}
471
490
 
472
491
  # Tier 4: type-reference matching (only when tiers 1-3 yield nothing)
473
492
  references: list[FileContract] = []
@@ -483,12 +502,27 @@ def _filter_by_symbol(contracts: list[FileContract], symbol: str) -> list[FileCo
483
502
  seen.add(c.path)
484
503
  merged.append(c)
485
504
 
486
- return sorted(merged, key=lambda c: (
505
+ result = sorted(merged, key=lambda c: (
487
506
  c.path not in defining_paths,
488
507
  c.path not in importer_paths,
489
508
  -c.relevance_score,
490
509
  ))
491
510
 
511
+ truncation: dict = {
512
+ "symbol": symbol,
513
+ "definers_found": len(defining),
514
+ "importers_found": total_importers,
515
+ "importers_returned": len(importers),
516
+ "references_found": len(references),
517
+ "total_returned": len(result),
518
+ "truncated": truncated,
519
+ }
520
+ if truncated:
521
+ truncation["truncation_reason"] = "max_importers_limit"
522
+ truncation["override_hint"] = f"--symbol {symbol} --max-importers {total_importers}"
523
+
524
+ return result, truncation
525
+
492
526
 
493
527
  # ---------------------------------------------------------------------------
494
528
  # Deep symbol scan — grep-based fallback for shallow-scanned repos