sourcecode 0.43.0__tar.gz → 0.45.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. {sourcecode-0.43.0 → sourcecode-0.45.0}/PKG-INFO +1 -1
  2. {sourcecode-0.43.0 → sourcecode-0.45.0}/pyproject.toml +1 -1
  3. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/__init__.py +1 -1
  4. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/cli.py +71 -22
  5. sourcecode-0.45.0/src/sourcecode/context_scorer.py +404 -0
  6. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/contract_pipeline.py +22 -18
  7. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/heuristic.py +9 -1
  8. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/env_analyzer.py +2 -2
  9. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/prepare_context.py +27 -1
  10. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/ranking_engine.py +29 -7
  11. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/serializer.py +29 -4
  12. sourcecode-0.45.0/tests/test_context_scorer.py +449 -0
  13. {sourcecode-0.43.0 → sourcecode-0.45.0}/.agents/skills/source-command-gsd-join-discord/SKILL.md +0 -0
  14. {sourcecode-0.43.0 → sourcecode-0.45.0}/.agents/skills/source-command-gsd-review-backlog/SKILL.md +0 -0
  15. {sourcecode-0.43.0 → sourcecode-0.45.0}/.agents/skills/source-command-gsd-workstreams/SKILL.md +0 -0
  16. {sourcecode-0.43.0 → sourcecode-0.45.0}/.gitignore +0 -0
  17. {sourcecode-0.43.0 → sourcecode-0.45.0}/.ruff.toml +0 -0
  18. {sourcecode-0.43.0 → sourcecode-0.45.0}/CONTRIBUTING.md +0 -0
  19. {sourcecode-0.43.0 → sourcecode-0.45.0}/LICENSE +0 -0
  20. {sourcecode-0.43.0 → sourcecode-0.45.0}/README.md +0 -0
  21. {sourcecode-0.43.0 → sourcecode-0.45.0}/SECURITY.md +0 -0
  22. {sourcecode-0.43.0 → sourcecode-0.45.0}/docs/privacy.md +0 -0
  23. {sourcecode-0.43.0 → sourcecode-0.45.0}/docs/schema.md +0 -0
  24. {sourcecode-0.43.0 → sourcecode-0.45.0}/raw +0 -0
  25. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/adaptive_scanner.py +0 -0
  26. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/architecture_analyzer.py +0 -0
  27. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/architecture_summary.py +0 -0
  28. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/ast_extractor.py +0 -0
  29. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/classifier.py +0 -0
  30. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/code_notes_analyzer.py +0 -0
  31. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/confidence_analyzer.py +0 -0
  32. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/context_summarizer.py +0 -0
  33. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/contract_model.py +0 -0
  34. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/coverage_parser.py +0 -0
  35. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/dependency_analyzer.py +0 -0
  36. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/__init__.py +0 -0
  37. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/base.py +0 -0
  38. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/csproj_parser.py +0 -0
  39. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/dart.py +0 -0
  40. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/dotnet.py +0 -0
  41. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/elixir.py +0 -0
  42. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/go.py +0 -0
  43. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/hybrid.py +0 -0
  44. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/java.py +0 -0
  45. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/jvm_ext.py +0 -0
  46. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/nodejs.py +0 -0
  47. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/parsers.py +0 -0
  48. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/php.py +0 -0
  49. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/project.py +0 -0
  50. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/python.py +0 -0
  51. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/ruby.py +0 -0
  52. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/rust.py +0 -0
  53. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/systems.py +0 -0
  54. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/terraform.py +0 -0
  55. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/detectors/tooling.py +0 -0
  56. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/doc_analyzer.py +0 -0
  57. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/entrypoint_classifier.py +0 -0
  58. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/file_classifier.py +0 -0
  59. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/git_analyzer.py +0 -0
  60. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/graph_analyzer.py +0 -0
  61. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/metrics_analyzer.py +0 -0
  62. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/redactor.py +0 -0
  63. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/relevance_scorer.py +0 -0
  64. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/repo_classifier.py +0 -0
  65. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/runtime_classifier.py +0 -0
  66. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/scanner.py +0 -0
  67. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/schema.py +0 -0
  68. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/semantic_analyzer.py +0 -0
  69. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/summarizer.py +0 -0
  70. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/telemetry/__init__.py +0 -0
  71. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/telemetry/config.py +0 -0
  72. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/telemetry/consent.py +0 -0
  73. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/telemetry/events.py +0 -0
  74. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/telemetry/filters.py +0 -0
  75. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/telemetry/transport.py +0 -0
  76. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/tree_utils.py +0 -0
  77. {sourcecode-0.43.0 → sourcecode-0.45.0}/src/sourcecode/workspace.py +0 -0
  78. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/__init__.py +0 -0
  79. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/conftest.py +0 -0
  80. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/fixtures/coverage.xml +0 -0
  81. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/fixtures/fastapi_app/pyproject.toml +0 -0
  82. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/fixtures/fastapi_app/src/main.py +0 -0
  83. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/fixtures/go_service/cmd/api/main.go +0 -0
  84. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/fixtures/go_service/go.mod +0 -0
  85. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/fixtures/jacoco.xml +0 -0
  86. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/fixtures/lcov.info +0 -0
  87. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/fixtures/nextjs_app/app/page.tsx +0 -0
  88. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/fixtures/nextjs_app/package.json +0 -0
  89. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/fixtures/nextjs_app/pnpm-lock.yaml +0 -0
  90. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/fixtures/pnpm_monorepo/apps/web/app/page.tsx +0 -0
  91. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/fixtures/pnpm_monorepo/apps/web/package.json +0 -0
  92. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/fixtures/pnpm_monorepo/packages/api/main.py +0 -0
  93. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/fixtures/pnpm_monorepo/packages/api/pyproject.toml +0 -0
  94. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/fixtures/pnpm_monorepo/pnpm-workspace.yaml +0 -0
  95. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_architecture_analyzer.py +0 -0
  96. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_architecture_summary.py +0 -0
  97. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_ast_extractor.py +0 -0
  98. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_block1_reliability.py +0 -0
  99. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_block2_coverage.py +0 -0
  100. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_block5_quality.py +0 -0
  101. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_classifier.py +0 -0
  102. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_cli.py +0 -0
  103. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_code_notes_analyzer.py +0 -0
  104. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_contract_pipeline.py +0 -0
  105. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_coverage_parser.py +0 -0
  106. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_cross_consistency.py +0 -0
  107. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_dependency_analyzer_node_python.py +0 -0
  108. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_dependency_analyzer_polyglot.py +0 -0
  109. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_dependency_schema.py +0 -0
  110. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_detector_dotnet.py +0 -0
  111. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_detector_go_rust_java.py +0 -0
  112. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_detector_nodejs.py +0 -0
  113. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_detector_php_ruby_dart.py +0 -0
  114. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_detector_python.py +0 -0
  115. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_detector_universal_managed.py +0 -0
  116. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_detector_universal_systems.py +0 -0
  117. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_detectors_base.py +0 -0
  118. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_doc_analyzer_jsdom.py +0 -0
  119. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_doc_analyzer_python.py +0 -0
  120. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_graph_analyzer_polyglot.py +0 -0
  121. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_graph_analyzer_python_node.py +0 -0
  122. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_graph_schema.py +0 -0
  123. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_hybrid_inference.py +0 -0
  124. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_integration.py +0 -0
  125. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_integration_dependencies.py +0 -0
  126. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_integration_detection.py +0 -0
  127. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_integration_docs.py +0 -0
  128. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_integration_graph_modules.py +0 -0
  129. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_integration_lqn.py +0 -0
  130. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_integration_metrics.py +0 -0
  131. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_integration_multistack.py +0 -0
  132. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_integration_semantics.py +0 -0
  133. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_integration_universal.py +0 -0
  134. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_metrics_analyzer.py +0 -0
  135. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_packaging.py +0 -0
  136. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_phase1_improvements.py +0 -0
  137. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_pipeline_integrity.py +0 -0
  138. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_real_projects.py +0 -0
  139. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_redactor.py +0 -0
  140. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_scanner.py +0 -0
  141. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_schema.py +0 -0
  142. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_schema_normalization.py +0 -0
  143. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_semantic_analyzer_node.py +0 -0
  144. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_semantic_analyzer_python.py +0 -0
  145. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_semantic_import_resolution.py +0 -0
  146. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_semantic_schema.py +0 -0
  147. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_signal_hierarchy.py +0 -0
  148. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_summarizer.py +0 -0
  149. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_telemetry.py +0 -0
  150. {sourcecode-0.43.0 → sourcecode-0.45.0}/tests/test_workspace_analyzer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 0.43.0
3
+ Version: 0.45.0
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sourcecode"
7
- version = "0.43.0"
7
+ version = "0.45.0"
8
8
  description = "Deterministic codebase context for AI coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "0.43.0"
3
+ __version__ = "0.45.0"
@@ -660,6 +660,21 @@ def main(
660
660
  )
661
661
  raise typer.Exit(code=1)
662
662
 
663
+ if symbol and mode not in ("contract", "standard"):
664
+ typer.echo(
665
+ f"Error: --symbol requires --mode contract or standard (got '{mode}'). "
666
+ "Symbol search uses the contract pipeline which does not run in raw mode.",
667
+ err=True,
668
+ )
669
+ raise typer.Exit(code=1)
670
+
671
+ if entrypoints_only and mode not in ("contract", "standard"):
672
+ typer.echo(
673
+ f"Error: --entrypoints-only requires --mode contract or standard (got '{mode}').",
674
+ err=True,
675
+ )
676
+ raise typer.Exit(code=1)
677
+
663
678
  if dependency_depth > 0:
664
679
  typer.echo(
665
680
  f"[warning] --dependency-depth {dependency_depth} has no effect: "
@@ -1257,7 +1272,13 @@ def main(
1257
1272
  eco_order = 0 if d.ecosystem == primary_ecosystem else 1
1258
1273
  return (role_order, eco_order, d.name.lower())
1259
1274
 
1260
- sm.key_dependencies = sorted(direct_deps, key=_dep_sort_key)[:15]
1275
+ _seen_dep_names: set[str] = set()
1276
+ _deduped_deps: list[Any] = []
1277
+ for d in sorted(direct_deps, key=_dep_sort_key):
1278
+ if d.name not in _seen_dep_names:
1279
+ _seen_dep_names.add(d.name)
1280
+ _deduped_deps.append(d)
1281
+ sm.key_dependencies = _deduped_deps[:15]
1261
1282
 
1262
1283
  # LQN-02: deterministic NL summary
1263
1284
  sm.project_summary = ProjectSummarizer(target).generate(sm)
@@ -1356,30 +1377,58 @@ def main(
1356
1377
  _is_contract_mode = mode in ("contract", "standard")
1357
1378
  if _is_contract_mode:
1358
1379
  from sourcecode.contract_pipeline import ContractPipeline
1380
+ from sourcecode.contract_model import ContractSummary as _ContractSummary
1359
1381
  _cp = ContractPipeline()
1360
- _contracts, _contract_summary = _cp.run(
1361
- target,
1362
- sm.file_paths,
1363
- entry_points=sm.entry_points,
1364
- monorepo_packages=sm.monorepo_packages,
1365
- mode=mode,
1366
- rank_by=rank_by, # type: ignore[arg-type]
1367
- max_symbols=max_symbols,
1368
- dependency_depth=dependency_depth,
1369
- entrypoints_only=entrypoints_only,
1370
- changed_only=changed_only,
1371
- symbol=symbol,
1372
- compress_types=compress_types,
1373
- max_importers=max_importers,
1374
- )
1382
+ try:
1383
+ _contracts, _contract_summary = _cp.run(
1384
+ target,
1385
+ sm.file_paths,
1386
+ entry_points=sm.entry_points,
1387
+ monorepo_packages=sm.monorepo_packages,
1388
+ mode=mode,
1389
+ rank_by=rank_by, # type: ignore[arg-type]
1390
+ max_symbols=max_symbols,
1391
+ dependency_depth=dependency_depth,
1392
+ entrypoints_only=entrypoints_only,
1393
+ changed_only=changed_only,
1394
+ symbol=symbol,
1395
+ compress_types=compress_types,
1396
+ max_importers=max_importers,
1397
+ semantic_calls=sm.semantic_calls or None,
1398
+ code_notes=sm.code_notes or None,
1399
+ )
1400
+ except Exception as _exc:
1401
+ typer.echo(f"[error] contract pipeline failed: {_exc}", err=True)
1402
+ _contracts = []
1403
+ _contract_summary = _ContractSummary(
1404
+ mode=mode,
1405
+ total_files=0,
1406
+ extracted_files=0,
1407
+ filtered_files=0,
1408
+ method_breakdown={},
1409
+ ranked_by=rank_by,
1410
+ limitations=[f"pipeline_error: {type(_exc).__name__}"],
1411
+ )
1375
1412
  sm = _replace(sm, file_contracts=_contracts, contract_summary=_contract_summary)
1376
1413
  if symbol is not None and len(_contracts) == 0:
1377
- typer.echo(
1378
- f"[warning] --symbol '{symbol}' matched 0 files. "
1379
- "The symbol may not exist at the current --depth, or the name may differ in case. "
1380
- "Try --depth 8 or verify the symbol name.",
1381
- err=True,
1382
- )
1414
+ _jvm_stacks = {"java", "kotlin", "scala", "groovy"}
1415
+ _is_jvm_repo = any(s.stack in _jvm_stacks for s in sm.stacks)
1416
+ if _is_jvm_repo:
1417
+ typer.echo(
1418
+ f"[warning] --symbol '{symbol}' matched 0 files. "
1419
+ "Per-file AST extraction is not available for Java/JVM repos — "
1420
+ "symbol search works only with Python, TypeScript, and JavaScript. "
1421
+ "Use --git-context or --code-notes for JVM navigation.",
1422
+ err=True,
1423
+ )
1424
+ else:
1425
+ typer.echo(
1426
+ f"[warning] --symbol '{symbol}' matched 0 files. "
1427
+ "The symbol may not exist, the name may differ in case, "
1428
+ "or the file may be outside the scanned depth. "
1429
+ "Try --depth 8 or verify the symbol name.",
1430
+ err=True,
1431
+ )
1383
1432
  if agent:
1384
1433
  typer.echo(f"[contract] {len(_contracts)} files extracted ({_contract_summary.method_breakdown})", err=True)
1385
1434
 
@@ -0,0 +1,404 @@
1
+ """context_scorer.py — Unified node scoring and minimum-sufficient subgraph selection.
2
+
3
+ Aggregates all available signals (structural, semantic, git, annotations, proximity)
4
+ into a NodeScore per file, then uses greedy selection to produce the minimum-sufficient
5
+ subgraph that maximises explanatory value within a context budget.
6
+
7
+ Design invariants:
8
+ - Deterministic: sort key is always (-score, path). Path breaks all ties.
9
+ - No LLMs, no randomness, no external I/O.
10
+ - All signals optional: degrades gracefully when data is absent.
11
+ - SCORER_VERSION: bump on any formula change so callers can detect drift.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ from collections import Counter, deque
16
+ from dataclasses import dataclass
17
+ from pathlib import Path
18
+ from typing import Any, Optional
19
+
20
+ SCORER_VERSION = "1"
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Edge weight tables
24
+ # ---------------------------------------------------------------------------
25
+
26
+ _EDGE_BASE_WEIGHTS: dict[str, float] = {
27
+ "imports": 1.00, # structural dependency — strongest signal
28
+ "extends": 0.90, # inheritance / implementation — tight coupling
29
+ "calls": 0.80, # behavioral dependency
30
+ "contains": 0.30, # membership — low marginal information
31
+ }
32
+
33
+ _CONFIDENCE_MULT: dict[str, float] = {
34
+ "high": 1.0,
35
+ "medium": 0.7,
36
+ "low": 0.3,
37
+ }
38
+
39
+ # Annotation kinds weighted at 2× (actionable defects vs informational notes)
40
+ _HIGH_SEVERITY_NOTES: frozenset[str] = frozenset({"BUG", "FIXME", "HACK", "XXX"})
41
+
42
+
43
+ # ---------------------------------------------------------------------------
44
+ # Data model
45
+ # ---------------------------------------------------------------------------
46
+
47
+ @dataclass
48
+ class NodeScore:
49
+ """Unified scoring breakdown for a single file node.
50
+
51
+ score / display_score drive all ranking and selection decisions.
52
+ The component fields (structural, semantic, annotation, proximity) allow
53
+ callers to inspect which signals dominated the final score.
54
+ """
55
+ path: str
56
+ score: float # final weighted score (higher = more relevant)
57
+ display_score: float # clamped [0.0, 1.0] for output fields
58
+ structural: float # contribution from RankingEngine
59
+ semantic: float # call graph centrality [0.0, 1.0]
60
+ annotation: float # code note density [0.0, 1.0]
61
+ proximity: float # BFS closeness to focus [0.0, 1.0]
62
+ reasons: list[str]
63
+
64
+
65
+ # ---------------------------------------------------------------------------
66
+ # Core scorer
67
+ # ---------------------------------------------------------------------------
68
+
69
+ class ContextScorer:
70
+ """Unified file scoring and minimum-sufficient subgraph selection.
71
+
72
+ Stateless once constructed. Thread-safe (no mutable state after __init__).
73
+ """
74
+
75
+ def __init__(
76
+ self,
77
+ monorepo_packages: Optional[list] = None,
78
+ ) -> None:
79
+ from sourcecode.ranking_engine import RankingEngine
80
+ self._engine = RankingEngine(monorepo_packages or [])
81
+
82
+ def score_nodes(
83
+ self,
84
+ contracts: list[Any],
85
+ *,
86
+ semantic_calls: Optional[list] = None,
87
+ git_hotspots: Optional[dict[str, int]] = None,
88
+ code_notes: Optional[list] = None,
89
+ focus_path: Optional[str] = None,
90
+ task: str = "default",
91
+ ) -> dict[str, NodeScore]:
92
+ """Compute a NodeScore for every contract.
93
+
94
+ Parameters
95
+ ----------
96
+ contracts FileContract list. fan_in, fan_out, is_entrypoint,
97
+ is_changed, and exports must be set before calling.
98
+ semantic_calls list[CallRecord] from --semantics (optional).
99
+ git_hotspots {path: commit_count} from git analysis (optional).
100
+ code_notes list[CodeNote] from --code-notes (optional).
101
+ focus_path Anchor file for proximity BFS (optional).
102
+ task Task profile: fix-bug | refactor | explain | …
103
+
104
+ Returns
105
+ -------
106
+ dict mapping path → NodeScore for every contract path.
107
+ """
108
+ from sourcecode.ranking_engine import TASK_WEIGHTS
109
+
110
+ w = TASK_WEIGHTS.get(task, TASK_WEIGHTS["default"])
111
+ _hotspots = git_hotspots or {}
112
+ max_fan_in = max((c.fan_in for c in contracts), default=1)
113
+ max_churn = max(_hotspots.values(), default=1)
114
+
115
+ # Pre-compute optional signal maps
116
+ sem_centrality: dict[str, float] = {}
117
+ if semantic_calls:
118
+ sem_centrality = _semantic_centrality(semantic_calls, contracts)
119
+ max_semantic = max(sem_centrality.values(), default=1.0) or 1.0
120
+
121
+ ann_density: dict[str, float] = {}
122
+ if code_notes:
123
+ ann_density = _annotation_density(code_notes, contracts)
124
+
125
+ prox_scores: dict[str, float] = {}
126
+ if focus_path:
127
+ prox_scores = _proximity_bfs(focus_path, contracts, semantic_calls or [])
128
+
129
+ result: dict[str, NodeScore] = {}
130
+ for c in contracts:
131
+ sem = sem_centrality.get(c.path, 0.0)
132
+ ann = ann_density.get(c.path, 0.0)
133
+ prox = prox_scores.get(c.path, 0.0)
134
+
135
+ # Structural + git + annotation + semantic centrality via unified engine
136
+ fs = self._engine.score(
137
+ c.path,
138
+ fan_in=c.fan_in,
139
+ fan_out=c.fan_out,
140
+ max_fan_in=max_fan_in,
141
+ git_churn=_hotspots.get(c.path, 0),
142
+ max_churn=max_churn,
143
+ is_entrypoint=c.is_entrypoint,
144
+ is_changed=c.is_changed,
145
+ export_count=len(c.exports),
146
+ task=task,
147
+ semantic_centrality=sem,
148
+ max_semantic=max_semantic,
149
+ )
150
+
151
+ # Proximity is a graph operation, computed here and added on top
152
+ prox_contrib = prox * 0.50 * w.proximity
153
+
154
+ final = fs.score + prox_contrib
155
+
156
+ reasons = list(fs.reasons)
157
+ if prox >= 0.80 and prox_contrib > 0:
158
+ reasons.append("close to focus")
159
+ elif prox >= 0.50 and prox_contrib > 0:
160
+ reasons.append("near focus")
161
+
162
+ result[c.path] = NodeScore(
163
+ path=c.path,
164
+ score=final,
165
+ display_score=max(0.0, min(1.0, final)),
166
+ structural=fs.score,
167
+ semantic=sem,
168
+ annotation=ann,
169
+ proximity=prox,
170
+ reasons=reasons,
171
+ )
172
+
173
+ return result
174
+
175
+ def select_subgraph(
176
+ self,
177
+ node_scores: dict[str, NodeScore],
178
+ contracts: list[Any],
179
+ *,
180
+ budget: int = 30,
181
+ min_score: float = 0.05,
182
+ ) -> list[str]:
183
+ """Greedy minimum-sufficient subgraph selection with diversity re-ranking.
184
+
185
+ At each round, recomputes effective scores for all remaining candidates
186
+ (raw_score × (1 - redundancy_penalty)), then picks the highest. This
187
+ allows a file from a new directory to beat a clustered sibling even if
188
+ the sibling has a higher raw score — the selection actively prefers
189
+ coverage over concentration.
190
+
191
+ Stops when the budget is exhausted or no remaining candidate has an
192
+ effective score above min_score.
193
+
194
+ O(n × budget) — negligible for typical budgets (15-30) and file counts.
195
+ Deterministic: tie-break by path on every round.
196
+
197
+ Parameters
198
+ ----------
199
+ node_scores output of score_nodes()
200
+ contracts same FileContract list passed to score_nodes()
201
+ (used for directory-based redundancy; may be empty)
202
+ budget maximum number of nodes to select
203
+ min_score discard candidates whose effective score is below this
204
+ """
205
+ contract_map = {c.path: c for c in contracts}
206
+ remaining: dict[str, NodeScore] = dict(node_scores)
207
+ selected: list[str] = []
208
+ selected_set: set[str] = set()
209
+
210
+ while len(selected) < budget and remaining:
211
+ best_path: str | None = None
212
+ best_effective: float = -1.0
213
+
214
+ for path, ns in remaining.items():
215
+ if ns.score < min_score:
216
+ continue
217
+ penalty = _redundancy_penalty(path, selected_set, contract_map)
218
+ effective = ns.score * (1.0 - penalty)
219
+ # Strict tie-break by path ensures determinism
220
+ if effective > best_effective or (
221
+ effective == best_effective
222
+ and best_path is not None
223
+ and path < best_path
224
+ ):
225
+ best_effective = effective
226
+ best_path = path
227
+
228
+ if best_path is None or best_effective < min_score:
229
+ break
230
+
231
+ selected.append(best_path)
232
+ selected_set.add(best_path)
233
+ del remaining[best_path]
234
+
235
+ return selected
236
+
237
+ @staticmethod
238
+ def edge_weight(kind: str, confidence: str) -> float:
239
+ """Scalar weight for a graph edge based on relationship type and confidence.
240
+
241
+ Higher weight = stronger information dependency between the connected nodes.
242
+ """
243
+ base = _EDGE_BASE_WEIGHTS.get(kind, 0.50)
244
+ mult = _CONFIDENCE_MULT.get(confidence, 0.50)
245
+ return base * mult
246
+
247
+
248
+ # ---------------------------------------------------------------------------
249
+ # Signal computers (module-level, pure functions)
250
+ # ---------------------------------------------------------------------------
251
+
252
+ def _semantic_centrality(
253
+ semantic_calls: list,
254
+ contracts: list,
255
+ ) -> dict[str, float]:
256
+ """Per-file centrality from the call graph.
257
+
258
+ centrality(path) = (weighted_fan_in × 2 + weighted_fan_out) / max
259
+ where weight = confidence multiplier (high=1.0, medium=0.7, low=0.3).
260
+
261
+ Returns a dict normalised to [0.0, 1.0] across the contract set.
262
+ """
263
+ path_set = {c.path for c in contracts}
264
+ fan_in: Counter[str] = Counter()
265
+ fan_out: Counter[str] = Counter()
266
+
267
+ for call in semantic_calls:
268
+ w = _CONFIDENCE_MULT.get(getattr(call, "confidence", "medium"), 0.7)
269
+ callee = getattr(call, "callee_path", None)
270
+ caller = getattr(call, "caller_path", None)
271
+ if callee and callee in path_set:
272
+ fan_in[callee] += w
273
+ if caller and caller in path_set:
274
+ fan_out[caller] += w
275
+
276
+ raw = {p: fan_in[p] * 2.0 + fan_out[p] for p in path_set}
277
+ max_val = max(raw.values(), default=0.0)
278
+ if max_val <= 0.0:
279
+ return {p: 0.0 for p in path_set}
280
+ return {p: v / max_val for p, v in raw.items()}
281
+
282
+
283
+ def _proximity_bfs(
284
+ focus_path: str,
285
+ contracts: list,
286
+ semantic_calls: list,
287
+ ) -> dict[str, float]:
288
+ """BFS from focus_path through import + call edges.
289
+
290
+ Traversal is bidirectional (imports and calls traversed in both directions)
291
+ so the proximity score reflects reachability in any direction from the focus.
292
+
293
+ proximity(path) = 1.0 / (2 ** distance)
294
+ distance=0 → 1.00 (the focus itself)
295
+ distance=1 → 0.50
296
+ distance=2 → 0.25
297
+ distance=3 → 0.125
298
+ distance=4 → 0.0625 (max depth)
299
+
300
+ BFS neighbours are sorted before enqueuing to ensure determinism.
301
+ """
302
+ path_set = {c.path for c in contracts}
303
+
304
+ # Build bidirectional adjacency from import graph
305
+ adj: dict[str, set[str]] = {p: set() for p in path_set}
306
+ for c in contracts:
307
+ base_dir = str(Path(c.path).parent).replace("\\", "/")
308
+ for imp in c.imports:
309
+ src = getattr(imp, "source", "")
310
+ if not src.startswith("."):
311
+ continue
312
+ for t in _resolve_import(base_dir, src, path_set):
313
+ adj[c.path].add(t)
314
+ adj[t].add(c.path)
315
+
316
+ # Augment with call graph edges
317
+ for call in semantic_calls:
318
+ caller = getattr(call, "caller_path", None)
319
+ callee = getattr(call, "callee_path", None)
320
+ if caller in adj and callee in adj:
321
+ adj[caller].add(callee)
322
+ adj[callee].add(caller)
323
+
324
+ if focus_path not in adj:
325
+ return {}
326
+
327
+ distances: dict[str, int] = {focus_path: 0}
328
+ queue: deque[str] = deque([focus_path])
329
+ while queue:
330
+ node = queue.popleft()
331
+ d = distances[node]
332
+ if d >= 4:
333
+ continue
334
+ for neighbor in sorted(adj.get(node, set())):
335
+ if neighbor not in distances:
336
+ distances[neighbor] = d + 1
337
+ queue.append(neighbor)
338
+
339
+ return {p: 1.0 / (2 ** d) for p, d in distances.items()}
340
+
341
+
342
+ def _annotation_density(
343
+ code_notes: list,
344
+ contracts: list,
345
+ ) -> dict[str, float]:
346
+ """Severity-weighted annotation density per file, normalised [0.0, 1.0].
347
+
348
+ BUG / FIXME / HACK / XXX count 2×; all other kinds count 1×.
349
+ """
350
+ path_set = {c.path for c in contracts}
351
+ weighted: Counter[str] = Counter()
352
+ for note in code_notes:
353
+ path = getattr(note, "path", None)
354
+ if path not in path_set:
355
+ continue
356
+ kind = getattr(note, "kind", "").upper()
357
+ weighted[path] += 2.0 if kind in _HIGH_SEVERITY_NOTES else 1.0
358
+
359
+ max_val = max(weighted.values(), default=1.0)
360
+ return {p: min(weighted.get(p, 0.0) / max_val, 1.0) for p in path_set}
361
+
362
+
363
+ def _redundancy_penalty(
364
+ path: str,
365
+ selected_set: set[str],
366
+ contract_map: dict,
367
+ ) -> float:
368
+ """Penalty for adding a file from the same directory as already-selected files.
369
+
370
+ Rationale: files in the same directory address the same concern; the
371
+ marginal explanatory gain of the n-th file from a directory is lower than
372
+ that of the first file from a new directory.
373
+
374
+ Penalty grows by 0.10 per same-directory sibling, capped at 0.40.
375
+ The 0.40 cap ensures no node is ever fully excluded by proximity alone.
376
+ """
377
+ if not selected_set:
378
+ return 0.0
379
+ path_dir = str(Path(path).parent)
380
+ same_dir_count = sum(
381
+ 1 for s in selected_set
382
+ if str(Path(s).parent) == path_dir
383
+ )
384
+ return min(same_dir_count * 0.10, 0.40)
385
+
386
+
387
+ def _resolve_import(base_dir: str, src: str, path_set: set[str]) -> list[str]:
388
+ """Approximate resolution of a relative import specifier to known paths.
389
+
390
+ Mirrors the logic in contract_pipeline._resolve_relative without importing
391
+ from that module (avoids circular import).
392
+ """
393
+ src = src.lstrip("./")
394
+ if not src:
395
+ return []
396
+ exts = (".ts", ".tsx", ".js", ".jsx", ".py", "/index.ts", "/index.js", "/index.tsx")
397
+ for ext in exts:
398
+ candidate = f"{base_dir}/{src}{ext}".replace("//", "/")
399
+ if candidate in path_set:
400
+ return [candidate]
401
+ candidate = f"{base_dir}/{src}".replace("//", "/")
402
+ if candidate in path_set:
403
+ return [candidate]
404
+ return []
@@ -176,6 +176,8 @@ class ContractPipeline:
176
176
  symbol: Optional[str] = None,
177
177
  compress_types: bool = False,
178
178
  max_importers: int = 50,
179
+ semantic_calls: Optional[list] = None,
180
+ code_notes: Optional[list] = None,
179
181
  ) -> tuple[list[FileContract], ContractSummary]:
180
182
  """Run the full extraction pipeline.
181
183
 
@@ -239,7 +241,9 @@ class ContractPipeline:
239
241
  contracts.append(contract)
240
242
  method_counts[contract.extraction_method] += 1
241
243
 
242
- if not self._extractor.has_tree_sitter():
244
+ _js_ts_languages = {"typescript", "javascript", "tsx", "jsx"}
245
+ _has_js_ts = any(c.language in _js_ts_languages for c in contracts)
246
+ if _has_js_ts and not self._extractor.has_tree_sitter():
243
247
  limitations.append(
244
248
  "tree_sitter_unavailable: JS/TS extraction uses heuristics. "
245
249
  "Install with: pip install 'sourcecode[ast]'"
@@ -257,24 +261,24 @@ class ContractPipeline:
257
261
  if rank_by == "git-churn":
258
262
  churn = _get_git_churn(root, [c.path for c in contracts])
259
263
 
260
- # 6. Compute relevance scores via unified ranking engine
261
- max_fan_in = max((c.fan_in for c in contracts), default=1) if contracts else 1
262
- max_churn_val = max(churn.values(), default=1) if churn else 1
264
+ # 6. Compute relevance scores via unified scoring engine.
265
+ # ContextScorer wraps RankingEngine and enriches scores with semantic
266
+ # centrality (when semantic_calls available) and annotation density
267
+ # (when code_notes available). Falls back to structural signals only
268
+ # when neither is present — identical to the old behaviour.
269
+ from sourcecode.context_scorer import ContextScorer
270
+ _ctx_scorer = ContextScorer(monorepo_packages)
271
+ _node_scores = _ctx_scorer.score_nodes(
272
+ contracts,
273
+ semantic_calls=semantic_calls,
274
+ code_notes=code_notes,
275
+ git_hotspots=churn,
276
+ task="default",
277
+ )
263
278
  for c in contracts:
264
- fs = engine.score(
265
- c.path,
266
- fan_in=c.fan_in,
267
- fan_out=c.fan_out,
268
- max_fan_in=max_fan_in,
269
- git_churn=churn.get(c.path, 0),
270
- max_churn=max_churn_val,
271
- is_entrypoint=c.is_entrypoint,
272
- is_changed=c.is_changed,
273
- export_count=len(c.exports),
274
- task="default",
275
- )
276
- c.relevance_score = fs.display_score
277
- c.ranking_reasons = fs.reasons
279
+ ns = _node_scores[c.path]
280
+ c.relevance_score = ns.display_score
281
+ c.ranking_reasons = ns.reasons
278
282
 
279
283
  # 7. Rank
280
284
  contracts = self._rank(contracts, rank_by)
@@ -66,6 +66,13 @@ class HeuristicDetector(AbstractDetector):
66
66
  counts[stack] += 1
67
67
  break
68
68
 
69
+ # Suppress minority stacks: if a language appears in fewer than 3 files
70
+ # AND represents less than 10% of detected source files, it is likely
71
+ # noise (stray config files, vendored snippets) rather than a real stack.
72
+ # Always emit the dominant language regardless of absolute count.
73
+ total_detected = sum(counts.values())
74
+ _ABS_MIN = 3
75
+ _REL_MIN = 0.10
69
76
  stacks = [
70
77
  StackDetection(
71
78
  stack=stack,
@@ -73,7 +80,8 @@ class HeuristicDetector(AbstractDetector):
73
80
  confidence="low",
74
81
  manifests=[],
75
82
  )
76
- for stack, _count in counts.most_common()
83
+ for stack, count in counts.most_common()
84
+ if count >= _ABS_MIN or (total_detected > 0 and count / total_detected >= _REL_MIN)
77
85
  ]
78
86
 
79
87
  entry_points: list[EntryPoint] = []
@@ -11,8 +11,8 @@ _MAX_FILE_SIZE = 512 * 1024 # 512 KB
11
11
 
12
12
  _SKIP_DIRS = {
13
13
  "node_modules", ".git", "__pycache__", ".venv", "venv",
14
- ".mypy_cache", "dist", "build", ".tox", ".eggs", "coverage",
15
- ".next", ".nuxt", ".output", "vendor",
14
+ ".mypy_cache", "dist", "build", "target", ".gradle",
15
+ ".tox", ".eggs", "coverage", ".next", ".nuxt", ".output", "vendor",
16
16
  }
17
17
 
18
18
  _CODE_EXTENSIONS = {
@@ -701,7 +701,33 @@ class TaskContextBuilder:
701
701
 
702
702
  # Deterministic: score desc, then path asc as tiebreaker
703
703
  scored.sort(key=lambda x: (-x[0], x[1]))
704
- return [f for _, _, f in scored[:15]]
704
+
705
+ # Apply directory-diversity selection via ContextScorer.
706
+ # Files from the same directory share the same concern; the scorer
707
+ # applies a small redundancy penalty so the final set spans more of
708
+ # the codebase rather than clustering inside a single directory.
709
+ # Falls back to top-15 slice when scorer is unavailable.
710
+ try:
711
+ from sourcecode.context_scorer import ContextScorer, NodeScore
712
+ _ctx = ContextScorer()
713
+ _ns: dict[str, NodeScore] = {
714
+ path: NodeScore(
715
+ path=path,
716
+ score=total,
717
+ display_score=min(total / 3.0, 1.0),
718
+ structural=total,
719
+ semantic=0.0,
720
+ annotation=0.0,
721
+ proximity=0.0,
722
+ reasons=[rf.reason] if rf.reason else ["source file"],
723
+ )
724
+ for total, path, rf in scored
725
+ }
726
+ _selected = _ctx.select_subgraph(_ns, contracts=[], budget=15, min_score=0.05)
727
+ _rf_map = {path: rf for _, path, rf in scored}
728
+ return [_rf_map[p] for p in _selected if p in _rf_map]
729
+ except Exception:
730
+ return [f for _, _, f in scored[:15]]
705
731
 
706
732
  def _is_test(self, path: str) -> bool:
707
733
  name = Path(path).name.lower()