sourcecode 0.39.0__tar.gz → 0.42.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. {sourcecode-0.39.0 → sourcecode-0.42.0}/PKG-INFO +1 -1
  2. {sourcecode-0.39.0 → sourcecode-0.42.0}/pyproject.toml +1 -1
  3. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/__init__.py +1 -1
  4. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/architecture_analyzer.py +94 -8
  5. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/cli.py +6 -2
  6. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/contract_model.py +1 -0
  7. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/contract_pipeline.py +41 -67
  8. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/doc_analyzer.py +29 -0
  9. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/env_analyzer.py +110 -22
  10. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/git_analyzer.py +40 -6
  11. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/prepare_context.py +46 -55
  12. sourcecode-0.42.0/src/sourcecode/ranking_engine.py +231 -0
  13. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/relevance_scorer.py +4 -0
  14. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/schema.py +29 -0
  15. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/semantic_analyzer.py +72 -2
  16. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/serializer.py +151 -15
  17. sourcecode-0.42.0/tests/test_block1_reliability.py +474 -0
  18. sourcecode-0.42.0/tests/test_block2_coverage.py +449 -0
  19. {sourcecode-0.39.0 → sourcecode-0.42.0}/.agents/skills/source-command-gsd-join-discord/SKILL.md +0 -0
  20. {sourcecode-0.39.0 → sourcecode-0.42.0}/.agents/skills/source-command-gsd-review-backlog/SKILL.md +0 -0
  21. {sourcecode-0.39.0 → sourcecode-0.42.0}/.agents/skills/source-command-gsd-workstreams/SKILL.md +0 -0
  22. {sourcecode-0.39.0 → sourcecode-0.42.0}/.gitignore +0 -0
  23. {sourcecode-0.39.0 → sourcecode-0.42.0}/.ruff.toml +0 -0
  24. {sourcecode-0.39.0 → sourcecode-0.42.0}/CONTRIBUTING.md +0 -0
  25. {sourcecode-0.39.0 → sourcecode-0.42.0}/LICENSE +0 -0
  26. {sourcecode-0.39.0 → sourcecode-0.42.0}/README.md +0 -0
  27. {sourcecode-0.39.0 → sourcecode-0.42.0}/SECURITY.md +0 -0
  28. {sourcecode-0.39.0 → sourcecode-0.42.0}/docs/privacy.md +0 -0
  29. {sourcecode-0.39.0 → sourcecode-0.42.0}/docs/schema.md +0 -0
  30. {sourcecode-0.39.0 → sourcecode-0.42.0}/raw +0 -0
  31. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/adaptive_scanner.py +0 -0
  32. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/architecture_summary.py +0 -0
  33. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/ast_extractor.py +0 -0
  34. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/classifier.py +0 -0
  35. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/code_notes_analyzer.py +0 -0
  36. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/confidence_analyzer.py +0 -0
  37. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/context_summarizer.py +0 -0
  38. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/coverage_parser.py +0 -0
  39. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/dependency_analyzer.py +0 -0
  40. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/__init__.py +0 -0
  41. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/base.py +0 -0
  42. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/csproj_parser.py +0 -0
  43. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/dart.py +0 -0
  44. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/dotnet.py +0 -0
  45. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/elixir.py +0 -0
  46. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/go.py +0 -0
  47. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/heuristic.py +0 -0
  48. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/hybrid.py +0 -0
  49. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/java.py +0 -0
  50. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/jvm_ext.py +0 -0
  51. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/nodejs.py +0 -0
  52. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/parsers.py +0 -0
  53. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/php.py +0 -0
  54. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/project.py +0 -0
  55. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/python.py +0 -0
  56. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/ruby.py +0 -0
  57. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/rust.py +0 -0
  58. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/systems.py +0 -0
  59. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/terraform.py +0 -0
  60. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/detectors/tooling.py +0 -0
  61. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/entrypoint_classifier.py +0 -0
  62. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/file_classifier.py +0 -0
  63. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/graph_analyzer.py +0 -0
  64. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/metrics_analyzer.py +0 -0
  65. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/redactor.py +0 -0
  66. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/repo_classifier.py +0 -0
  67. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/runtime_classifier.py +0 -0
  68. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/scanner.py +0 -0
  69. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/summarizer.py +0 -0
  70. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/telemetry/__init__.py +0 -0
  71. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/telemetry/config.py +0 -0
  72. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/telemetry/consent.py +0 -0
  73. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/telemetry/events.py +0 -0
  74. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/telemetry/filters.py +0 -0
  75. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/telemetry/transport.py +0 -0
  76. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/tree_utils.py +0 -0
  77. {sourcecode-0.39.0 → sourcecode-0.42.0}/src/sourcecode/workspace.py +0 -0
  78. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/__init__.py +0 -0
  79. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/conftest.py +0 -0
  80. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/coverage.xml +0 -0
  81. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/fastapi_app/pyproject.toml +0 -0
  82. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/fastapi_app/src/main.py +0 -0
  83. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/go_service/cmd/api/main.go +0 -0
  84. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/go_service/go.mod +0 -0
  85. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/jacoco.xml +0 -0
  86. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/lcov.info +0 -0
  87. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/nextjs_app/app/page.tsx +0 -0
  88. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/nextjs_app/package.json +0 -0
  89. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/nextjs_app/pnpm-lock.yaml +0 -0
  90. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/pnpm_monorepo/apps/web/app/page.tsx +0 -0
  91. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/pnpm_monorepo/apps/web/package.json +0 -0
  92. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/pnpm_monorepo/packages/api/main.py +0 -0
  93. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/pnpm_monorepo/packages/api/pyproject.toml +0 -0
  94. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/fixtures/pnpm_monorepo/pnpm-workspace.yaml +0 -0
  95. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_architecture_analyzer.py +0 -0
  96. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_architecture_summary.py +0 -0
  97. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_ast_extractor.py +0 -0
  98. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_classifier.py +0 -0
  99. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_cli.py +0 -0
  100. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_code_notes_analyzer.py +0 -0
  101. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_contract_pipeline.py +0 -0
  102. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_coverage_parser.py +0 -0
  103. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_cross_consistency.py +0 -0
  104. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_dependency_analyzer_node_python.py +0 -0
  105. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_dependency_analyzer_polyglot.py +0 -0
  106. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_dependency_schema.py +0 -0
  107. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detector_dotnet.py +0 -0
  108. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detector_go_rust_java.py +0 -0
  109. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detector_nodejs.py +0 -0
  110. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detector_php_ruby_dart.py +0 -0
  111. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detector_python.py +0 -0
  112. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detector_universal_managed.py +0 -0
  113. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detector_universal_systems.py +0 -0
  114. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_detectors_base.py +0 -0
  115. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_doc_analyzer_jsdom.py +0 -0
  116. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_doc_analyzer_python.py +0 -0
  117. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_graph_analyzer_polyglot.py +0 -0
  118. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_graph_analyzer_python_node.py +0 -0
  119. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_graph_schema.py +0 -0
  120. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_hybrid_inference.py +0 -0
  121. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration.py +0 -0
  122. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_dependencies.py +0 -0
  123. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_detection.py +0 -0
  124. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_docs.py +0 -0
  125. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_graph_modules.py +0 -0
  126. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_lqn.py +0 -0
  127. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_metrics.py +0 -0
  128. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_multistack.py +0 -0
  129. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_semantics.py +0 -0
  130. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_integration_universal.py +0 -0
  131. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_metrics_analyzer.py +0 -0
  132. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_packaging.py +0 -0
  133. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_phase1_improvements.py +0 -0
  134. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_pipeline_integrity.py +0 -0
  135. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_real_projects.py +0 -0
  136. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_redactor.py +0 -0
  137. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_scanner.py +0 -0
  138. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_schema.py +0 -0
  139. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_schema_normalization.py +0 -0
  140. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_semantic_analyzer_node.py +0 -0
  141. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_semantic_analyzer_python.py +0 -0
  142. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_semantic_import_resolution.py +0 -0
  143. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_semantic_schema.py +0 -0
  144. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_signal_hierarchy.py +0 -0
  145. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_summarizer.py +0 -0
  146. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_telemetry.py +0 -0
  147. {sourcecode-0.39.0 → sourcecode-0.42.0}/tests/test_workspace_analyzer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sourcecode
3
- Version: 0.39.0
3
+ Version: 0.42.0
4
4
  Summary: Deterministic codebase context for AI coding agents
5
5
  License: Apache License
6
6
  Version 2.0, January 2004
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sourcecode"
7
- version = "0.39.0"
7
+ version = "0.42.0"
8
8
  description = "Deterministic codebase context for AI coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1,3 +1,3 @@
1
1
  """sourcecode — Deterministic codebase context maps for AI coding agents."""
2
2
 
3
- __version__ = "0.39.0"
3
+ __version__ = "0.42.0"
@@ -172,6 +172,7 @@ class ArchitectureAnalyzer:
172
172
  graph: Optional[ModuleGraph] = None,
173
173
  ) -> ArchitectureAnalysis:
174
174
  limitations: list[str] = []
175
+ evidence: list[dict] = []
175
176
 
176
177
  # Step 1: filter paths
177
178
  filtered = self._filter_paths(sm.file_paths)
@@ -180,6 +181,8 @@ class ArchitectureAnalyzer:
180
181
  requested=True,
181
182
  pattern="unknown",
182
183
  limitations=["Arquitectura no inferida: proyecto sin archivos de codigo suficientes"],
184
+ evidence=[{"type": "none", "paths": [], "reason": "insufficient source files", "confidence": "high"}],
185
+ tentative=False,
183
186
  )
184
187
 
185
188
  # Step 2: domain clustering
@@ -193,17 +196,32 @@ class ArchitectureAnalyzer:
193
196
  elif pattern == "unknown":
194
197
  limitations.append("Patron de capas no reconocido: estructura de directorios sin senales claras")
195
198
 
196
- # Step 3b: monorepo override — workspace config is hard evidence
197
- if self._has_workspace_config(sm.file_paths) and pattern not in (
199
+ # Step 3b: monorepo override — workspace config is hard evidence.
200
+ # Overrides all weak inferred patterns; only truly specialised patterns
201
+ # (cqrs, clean, onion, hexagonal) take precedence over workspace config.
202
+ has_workspace = self._has_workspace_config(sm.file_paths)
203
+ if has_workspace and pattern not in (
198
204
  "monorepo", "cqrs", "clean", "onion", "hexagonal"
199
205
  ):
200
206
  mono_layers = self._detect_monorepo_packages(filtered)
201
- if mono_layers or pattern in (None, "unknown", "flat", "modular", "layered"):
207
+ # Override whenever: monorepo packages detected, OR pattern is any weak/generic type.
208
+ # "fullstack", "layered", "mvc", "microservices", "modular", "flat", "unknown", None
209
+ # all yield to workspace config evidence.
210
+ _WEAK_PATTERNS = {None, "unknown", "flat", "modular", "layered",
211
+ "fullstack", "mvc", "microservices"}
212
+ if mono_layers or pattern in _WEAK_PATTERNS:
202
213
  pattern = "monorepo"
203
214
  layers = mono_layers
204
215
  limitations.append(
205
216
  "Workspace config detectado — arquitectura refleja topologia de paquetes"
206
217
  )
218
+ ws_files = [p for p in sm.file_paths if p.split("/")[-1] in _WORKSPACE_CONFIG_FILES]
219
+ evidence.append({
220
+ "type": "workspace_config",
221
+ "paths": ws_files[:4],
222
+ "reason": "Monorepo workspace config file(s) detected — hard evidence for monorepo topology",
223
+ "confidence": "high",
224
+ })
207
225
 
208
226
  # Step 4: bounded context inference
209
227
  bounded_contexts = self._infer_bounded_contexts(domains, graph)
@@ -212,25 +230,91 @@ class ArchitectureAnalyzer:
212
230
  confidence: Literal["high", "medium", "low"]
213
231
  strong_domains = [d for d in domains if d.confidence in ("high", "medium")]
214
232
  all_layers_weak = layers and all(l.confidence == "low" for l in layers)
233
+
234
+ method = "graph+structure" if graph is not None else "filesystem_inference"
235
+ # High-confidence evidence (workspace config) makes pattern non-tentative.
236
+ tentative = not any(e.get("confidence") == "high" for e in evidence)
237
+
238
+ # _hard_evidence: high-confidence evidence was already set (e.g. workspace_config).
239
+ # When True, tentative must stay False and confidence must stay at least "medium".
240
+ _hard_evidence = not tentative # tentative=False iff high-conf evidence present
241
+
215
242
  if pattern not in (None, "unknown", "flat"):
216
- if all_layers_weak:
243
+ if graph is not None:
244
+ # Import graph provided — structural validation available
245
+ confidence = "medium" if len(strong_domains) >= 3 else "low"
246
+ evidence.append({
247
+ "type": "import_graph",
248
+ "paths": [n.id for n in graph.nodes[:6]],
249
+ "reason": f"Module import graph with {len(graph.nodes)} nodes used for pattern validation",
250
+ "confidence": "medium",
251
+ })
252
+ elif all_layers_weak:
217
253
  # Layers came from file-naming heuristic only, not directory structure
218
254
  confidence = "low"
255
+ if not _hard_evidence:
256
+ tentative = True
219
257
  limitations.append(
220
258
  "Low confidence inference: pattern inferred from filenames only, without import graph confirmation"
221
259
  )
260
+ evidence.append({
261
+ "type": "filesystem_naming",
262
+ "paths": [l.files[0] for l in layers if l.files][:6],
263
+ "reason": (
264
+ f"Pattern '{pattern}' inferred from file stem naming conventions only "
265
+ "(e.g. *_controller.py, *_service.py). "
266
+ "No directory structure or import graph confirmation."
267
+ ),
268
+ "confidence": "low",
269
+ })
222
270
  else:
223
- confidence = "medium" if len(strong_domains) >= 3 else "low"
224
- if graph is None:
271
+ # Directory structure match (or monorepo/workspace override with no layers)
272
+ confidence = "medium" if (_hard_evidence or len(strong_domains) >= 3) else "low"
273
+ if confidence == "low" and not _hard_evidence:
274
+ tentative = True
275
+ if not _hard_evidence:
225
276
  limitations.append(
226
277
  "Pattern not confirmed by module import graph; run with --graph-modules for structural validation"
227
278
  )
279
+ if not _hard_evidence:
280
+ matched_dirs = sorted({
281
+ p.replace("\\", "/").split("/")[0]
282
+ for layer in layers for p in layer.files
283
+ })
284
+ evidence.append({
285
+ "type": "filesystem_naming",
286
+ "paths": matched_dirs[:8],
287
+ "reason": (
288
+ f"Pattern '{pattern}' inferred from directory names matching layer keywords. "
289
+ "Import graph not available — structural direction of dependencies unverified."
290
+ ),
291
+ "confidence": "low" if confidence == "low" else "medium",
292
+ })
228
293
  elif len(strong_domains) >= 1:
229
294
  confidence = "medium"
295
+ if not _hard_evidence:
296
+ tentative = True
297
+ evidence.append({
298
+ "type": "filesystem_naming",
299
+ "paths": [d.name for d in strong_domains[:6]],
300
+ "reason": "Domain clustering from directory names; no layer pattern confirmed",
301
+ "confidence": "low",
302
+ })
230
303
  else:
231
304
  confidence = "low"
232
-
233
- method = "graph+structure" if graph is not None else "filesystem_inference"
305
+ if not _hard_evidence:
306
+ tentative = True
307
+ if not evidence:
308
+ limitations.append(
309
+ "insufficient_evidence: no recognizable architectural signals found; "
310
+ "filesystem structure does not match known patterns"
311
+ )
312
+ evidence.append({
313
+ "type": "filesystem_naming",
314
+ "paths": filtered[:6],
315
+ "reason": "Only filesystem paths available; no pattern matched",
316
+ "confidence": "low",
317
+ })
234
318
 
235
319
  return ArchitectureAnalysis(
236
320
  requested=True,
@@ -241,6 +325,8 @@ class ArchitectureAnalyzer:
241
325
  confidence=confidence,
242
326
  method=method,
243
327
  limitations=limitations,
328
+ evidence=evidence,
329
+ tentative=tentative,
244
330
  )
245
331
 
246
332
  # ------------------------------------------------------------------
@@ -566,7 +566,7 @@ def main(
566
566
  entrypoints_only: bool = typer.Option(
567
567
  False,
568
568
  "--entrypoints-only",
569
- help="Contract mode: include only files that are entrypoints or have exported symbols.",
569
+ help="Contract mode: include only files that are runtime entrypoints or have exported symbols (public API surface). Note: 'entrypoints' here includes all files with exports, not strictly detected runtime entry points.",
570
570
  ),
571
571
  changed_only: bool = typer.Option(
572
572
  False,
@@ -1156,11 +1156,15 @@ def main(
1156
1156
 
1157
1157
  _all_call_files = set(_fan_in) | set(_fan_out)
1158
1158
  _hotspots: list[dict] = []
1159
- # Filter test paths from hotspots — they dominate fan-in by calling many modules
1159
+ # Filter test, noise, and auxiliary paths — they dominate fan-in but carry no signal
1160
1160
  _TEST_MARKERS = {"/test", "/tests", "/spec", "/specs", "_test.", ".test.", ".spec."}
1161
+ from sourcecode.ranking_engine import RankingEngine as _RankingEngine
1162
+ _sem_engine = _RankingEngine(sm.monorepo_packages)
1161
1163
  for _p in _all_call_files:
1162
1164
  if any(_m in _p for _m in _TEST_MARKERS) or _p.startswith("test"):
1163
1165
  continue
1166
+ if _sem_engine.is_noise(_p) or _sem_engine.is_auxiliary(_p):
1167
+ continue
1164
1168
  _in = _fan_in[_p]
1165
1169
  _out = _fan_out[_p]
1166
1170
  _score = _in * 2.0 + _out * 1.0
@@ -91,6 +91,7 @@ class FileContract:
91
91
  fan_out: int = 0 # how many files this imports
92
92
  is_entrypoint: bool = False
93
93
  is_changed: bool = False
94
+ ranking_reasons: list[str] = field(default_factory=list)
94
95
 
95
96
  # Extraction quality
96
97
  extraction_method: str = "heuristic" # ast | tree_sitter | heuristic
@@ -17,6 +17,7 @@ from typing import Any, Literal, Optional
17
17
 
18
18
  from sourcecode.ast_extractor import AstExtractor, _LANGUAGE_MAP
19
19
  from sourcecode.contract_model import ContractSummary, FileContract
20
+ from sourcecode.ranking_engine import RankingEngine
20
21
  from sourcecode.relevance_scorer import RelevanceScorer
21
22
  from sourcecode.schema import EntryPoint, MonorepoPackageInfo
22
23
 
@@ -27,22 +28,6 @@ from sourcecode.schema import EntryPoint, MonorepoPackageInfo
27
28
  _MAX_FILES = 500 # hard cap on files extracted per run
28
29
  _SRC_EXTENSIONS: frozenset[str] = frozenset(_LANGUAGE_MAP.keys())
29
30
 
30
- # Role-based score adjustments applied after contract extraction.
31
- # Runtime roles get a boost; config/util are neutral or penalized.
32
- _ROLE_SCORE: dict[str, float] = {
33
- "entrypoint": 0.15,
34
- "service": 0.10,
35
- "route": 0.10,
36
- "api": 0.08,
37
- "middleware": 0.06,
38
- "store": 0.05,
39
- "model": 0.05,
40
- "hook": 0.05,
41
- "component": 0.03,
42
- "util": 0.00,
43
- "config": -0.10,
44
- "unknown": 0.00,
45
- }
46
31
 
47
32
  RankStrategy = Literal["relevance", "centrality", "git-churn"]
48
33
 
@@ -60,9 +45,10 @@ def _get_changed_files(root: Path) -> set[str]:
60
45
  ]:
61
46
  try:
62
47
  result = subprocess.run(
63
- cmd, cwd=root, capture_output=True, text=True, timeout=10
48
+ cmd, cwd=root, capture_output=True, text=True,
49
+ encoding="utf-8", errors="replace", timeout=10,
64
50
  )
65
- for line in result.stdout.splitlines():
51
+ for line in (result.stdout or "").splitlines():
66
52
  line = line.strip()
67
53
  if line:
68
54
  changed.add(line.replace("\\", "/"))
@@ -71,9 +57,10 @@ def _get_changed_files(root: Path) -> set[str]:
71
57
  try:
72
58
  result = subprocess.run(
73
59
  ["git", "status", "--porcelain"],
74
- cwd=root, capture_output=True, text=True, timeout=10
60
+ cwd=root, capture_output=True, text=True,
61
+ encoding="utf-8", errors="replace", timeout=10,
75
62
  )
76
- for line in result.stdout.splitlines():
63
+ for line in (result.stdout or "").splitlines():
77
64
  if len(line) > 3:
78
65
  changed.add(line[3:].strip().replace("\\", "/"))
79
66
  except Exception:
@@ -144,11 +131,12 @@ def _get_git_churn(root: Path, file_paths: list[str]) -> dict[str, int]:
144
131
  try:
145
132
  result = subprocess.run(
146
133
  ["git", "log", "--name-only", "--format=", "--since=90.days.ago"],
147
- cwd=root, capture_output=True, text=True, timeout=15,
134
+ cwd=root, capture_output=True, text=True,
135
+ encoding="utf-8", errors="replace", timeout=15,
148
136
  )
149
137
  path_set = set(file_paths)
150
138
  counter: Counter[str] = Counter()
151
- for line in result.stdout.splitlines():
139
+ for line in (result.stdout or "").splitlines():
152
140
  line = line.strip().replace("\\", "/")
153
141
  if line in path_set:
154
142
  counter[line] += 1
@@ -194,6 +182,7 @@ class ContractPipeline:
194
182
  """
195
183
  entry_paths = {ep.path.replace("\\", "/") for ep in (entry_points or [])}
196
184
  scorer = RelevanceScorer(monorepo_packages)
185
+ engine = RankingEngine(monorepo_packages)
197
186
 
198
187
  # 1. Changed files (for --changed-only and ranking)
199
188
  changed_files: set[str] = set()
@@ -267,9 +256,24 @@ class ContractPipeline:
267
256
  if rank_by == "git-churn":
268
257
  churn = _get_git_churn(root, [c.path for c in contracts])
269
258
 
270
- # 6. Compute relevance scores
259
+ # 6. Compute relevance scores via unified ranking engine
260
+ max_fan_in = max((c.fan_in for c in contracts), default=1) if contracts else 1
261
+ max_churn_val = max(churn.values(), default=1) if churn else 1
271
262
  for c in contracts:
272
- c.relevance_score = self._score(c, scorer, churn)
263
+ fs = engine.score(
264
+ c.path,
265
+ fan_in=c.fan_in,
266
+ fan_out=c.fan_out,
267
+ max_fan_in=max_fan_in,
268
+ git_churn=churn.get(c.path, 0),
269
+ max_churn=max_churn_val,
270
+ is_entrypoint=c.is_entrypoint,
271
+ is_changed=c.is_changed,
272
+ export_count=len(c.exports),
273
+ task="default",
274
+ )
275
+ c.relevance_score = fs.display_score
276
+ c.ranking_reasons = fs.reasons
273
277
 
274
278
  # 7. Rank
275
279
  contracts = self._rank(contracts, rank_by)
@@ -285,7 +289,7 @@ class ContractPipeline:
285
289
  known_paths=set(src_paths),
286
290
  entry_paths=entry_paths,
287
291
  changed_files=changed_files,
288
- scorer=scorer,
292
+ engine=engine,
289
293
  )
290
294
 
291
295
  # 9. Entrypoints-only filter
@@ -312,45 +316,13 @@ class ContractPipeline:
312
316
  )
313
317
  return contracts, summary
314
318
 
315
- def _score(
316
- self,
317
- c: FileContract,
318
- scorer: RelevanceScorer,
319
- churn: dict[str, int],
320
- ) -> float:
321
- base = scorer.score(c.path)
322
-
323
- if c.is_entrypoint:
324
- base += 0.3
325
- if c.is_changed:
326
- base += 0.2
327
-
328
- # Fan-in is the strongest signal: many callers = critical contract
329
- fi_score = min(c.fan_in / 10.0, 0.3)
330
- fo_score = min(c.fan_out / 15.0, 0.15)
331
- base += fi_score + fo_score
332
-
333
- # Exported API value
334
- export_count = len(c.exports)
335
- base += min(export_count / 20.0, 0.1)
336
-
337
- # Churn
338
- churn_score = min(churn.get(c.path, 0) / 20.0, 0.1)
339
- base += churn_score
340
-
341
- # Role-based boost: runtime roles score higher than auxiliary
342
- base += _ROLE_SCORE.get(c.role, 0.0)
343
-
344
- return min(1.0, base)
345
-
346
319
  def _rank(self, contracts: list[FileContract], rank_by: RankStrategy) -> list[FileContract]:
347
320
  if rank_by == "centrality":
348
- # Approximate centrality: fan_in + fan_out
349
- return sorted(contracts, key=lambda c: -(c.fan_in + c.fan_out))
321
+ return sorted(contracts, key=lambda c: (-(c.fan_in + c.fan_out), c.path))
350
322
  if rank_by == "git-churn":
351
- return sorted(contracts, key=lambda c: (-c.is_changed, -c.relevance_score))
352
- # Default: relevance
353
- return sorted(contracts, key=lambda c: (-c.is_entrypoint, -c.relevance_score))
323
+ return sorted(contracts, key=lambda c: (-c.is_changed, -c.relevance_score, c.path))
324
+ # Default: relevance — path breaks ties deterministically
325
+ return sorted(contracts, key=lambda c: (-c.is_entrypoint, -c.relevance_score, c.path))
354
326
 
355
327
  def _symbol_deep_scan(
356
328
  self,
@@ -359,7 +331,7 @@ class ContractPipeline:
359
331
  known_paths: set[str],
360
332
  entry_paths: set[str],
361
333
  changed_files: set[str],
362
- scorer: RelevanceScorer,
334
+ engine: RankingEngine,
363
335
  ) -> list[FileContract]:
364
336
  """Grep-based fallback when the shallow scan missed the defining files.
365
337
 
@@ -367,7 +339,7 @@ class ContractPipeline:
367
339
  extracts contracts for candidates not already processed, then re-applies
368
340
  the symbol filter. Fan-in/fan-out are not computed for these contracts.
369
341
  """
370
- candidates = _find_symbol_files(root, symbol, known_paths, scorer)
342
+ candidates = _find_symbol_files(root, symbol, known_paths, engine)
371
343
  if not candidates:
372
344
  return []
373
345
 
@@ -379,7 +351,9 @@ class ContractPipeline:
379
351
  continue
380
352
  contract.is_entrypoint = rel_path in entry_paths
381
353
  contract.is_changed = rel_path in changed_files
382
- contract.relevance_score = scorer.score(rel_path)
354
+ fs = engine.score(rel_path, is_entrypoint=contract.is_entrypoint, is_changed=contract.is_changed)
355
+ contract.relevance_score = fs.display_score
356
+ contract.ranking_reasons = fs.reasons
383
357
  extra.append(contract)
384
358
 
385
359
  return _filter_by_symbol(extra, symbol)
@@ -531,7 +505,7 @@ def _find_symbol_files(
531
505
  root: Path,
532
506
  symbol: str,
533
507
  known_paths: set[str],
534
- scorer: RelevanceScorer,
508
+ engine: RankingEngine,
535
509
  ) -> list[str]:
536
510
  """Find source files outside *known_paths* that contain *symbol* as text.
537
511
 
@@ -560,7 +534,7 @@ def _find_symbol_files(
560
534
  if line.startswith("./"):
561
535
  line = line[2:]
562
536
  line = line.replace("\\", "/")
563
- if line and line not in known_paths and not scorer.is_noise(line):
537
+ if line and line not in known_paths and not engine.is_noise(line):
564
538
  found.append(line)
565
539
  return found
566
540
  except Exception:
@@ -578,7 +552,7 @@ def _find_symbol_files(
578
552
  rel_str = str(rel).replace("\\", "/")
579
553
  except ValueError:
580
554
  continue
581
- if rel_str in known_paths or scorer.is_noise(rel_str):
555
+ if rel_str in known_paths or engine.is_noise(rel_str):
582
556
  continue
583
557
  try:
584
558
  content = Path(full).read_text(encoding="utf-8", errors="replace")
@@ -132,6 +132,8 @@ class DocAnalyzer:
132
132
  records: list[DocRecord] = []
133
133
  limitations: list[str] = list(limitations_pre)
134
134
  languages: set[str] = set()
135
+ # Track per-language support status for honest reporting
136
+ unsupported_langs: set[str] = set()
135
137
 
136
138
  for relative_path in file_paths:
137
139
  abs_path = root / relative_path
@@ -176,8 +178,18 @@ class DocAnalyzer:
176
178
  # Unsupported language — D-04: no emitir DocRecord, solo registrar limitation
177
179
  limitations.append(f"docs_unavailable:{norm_path}:language={lang}")
178
180
  languages.add(lang)
181
+ unsupported_langs.add(lang)
179
182
  # NO records.append() here
180
183
 
184
+ # Build language_coverage: explicit per-language support status
185
+ _SUPPORTED_LANGS = {"python", "javascript", "typescript"}
186
+ lang_coverage: dict[str, str] = {}
187
+ for lang in languages:
188
+ if lang in _SUPPORTED_LANGS:
189
+ lang_coverage[lang] = "supported"
190
+ else:
191
+ lang_coverage[lang] = "unsupported"
192
+
181
193
  # Build summary
182
194
  symbol_count = sum(1 for r in records if r.kind != "module")
183
195
  total_count = len(records)
@@ -185,6 +197,22 @@ class DocAnalyzer:
185
197
  if any(r.doc_text and r.doc_text.endswith(self._TRUNCATION_SUFFIX) for r in records):
186
198
  truncated = True
187
199
 
200
+ # Explicit absence signal: scanned files but found nothing
201
+ if total_count == 0 and file_paths:
202
+ limitations.append(
203
+ f"no_docs_found: {len(file_paths)} file(s) scanned, "
204
+ "no docstrings or JSDoc comments found"
205
+ )
206
+
207
+ # Warn explicitly when unsupported languages are present — agents must not
208
+ # assume full coverage when Java/Go/Rust files are in scope but not analyzed.
209
+ if unsupported_langs:
210
+ sorted_unsupported = sorted(unsupported_langs)
211
+ limitations.append(
212
+ f"docs_not_extracted: language(s) {sorted_unsupported} present but not supported; "
213
+ "only Python and JS/TS docstrings are extracted"
214
+ )
215
+
188
216
  summary = DocSummary(
189
217
  requested=True,
190
218
  total_count=total_count,
@@ -193,6 +221,7 @@ class DocAnalyzer:
193
221
  depth=depth,
194
222
  truncated=truncated,
195
223
  limitations=limitations,
224
+ language_coverage=lang_coverage,
196
225
  )
197
226
  return records, summary
198
227